From af87ce08fc45ba509ad0f1d5ca0c65aaf0fc3198 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Wed, 1 Jul 2026 18:12:09 +0200 Subject: [PATCH 1/2] feat(core): model type variations from simple extension YAML files Type variations declared in the `type_variations` section of simple extension YAML files were silently ignored because there was no model object to hold them. This adds a `TypeVariation` model (name, parent type class, description, function behavior, deprecation) along with a `TypeVariationAnchor`, parses the `type_variations` section in `ExtensionSignatures`, and exposes lookup by anchor from `ExtensionCollection` (`getTypeVariation`), mirroring how types are handled. `parent` is modeled as a raw String rather than a parsed type: the schema references `$defs/type`, but the spec's own example files use bare type-class names such as `struct`, which are not valid Substrait type expressions (see substrait-io/substrait#1115). Fixes #847 --- .../substrait/extension/SimpleExtension.java | 152 +++++++++++++++++- .../extension/TypeVariationExtensionTest.java | 79 +++++++++ .../extensions/type_variation_extensions.yaml | 26 +++ 3 files changed, 256 insertions(+), 1 deletion(-) create mode 100644 core/src/test/java/io/substrait/extension/TypeVariationExtensionTest.java create mode 100644 core/src/test/resources/extensions/type_variation_extensions.yaml diff --git a/core/src/main/java/io/substrait/extension/SimpleExtension.java b/core/src/main/java/io/substrait/extension/SimpleExtension.java index c2c2298b0..13a2ea521 100644 --- a/core/src/main/java/io/substrait/extension/SimpleExtension.java +++ b/core/src/main/java/io/substrait/extension/SimpleExtension.java @@ -105,6 +105,18 @@ public enum WindowType { STREAMING } + /** + * Enumerates how functions supporting the system-preferred variation relate to a type variation. + */ + public enum TypeVariationFunctionBehavior { + /** + * Functions that support the system-preferred variation implicitly also support this variation. + */ + INHERITS, + /** Functions must be resolved independently for this variation. */ + SEPARATE + } + private SimpleExtension() {} /** Describes an argument provided by a simple extension. */ @@ -372,6 +384,21 @@ static TypeAnchor of(String urn, String name) { } } + /** Describes a type variation anchor provided by a simple extension. */ + @Value.Immutable + public interface TypeVariationAnchor extends Anchor { + /** + * Creates the corresponding of instance. + * + * @param urn the urn + * @param name the name + * @return the of + */ + static TypeVariationAnchor of(String urn, String name) { + return ImmutableSimpleExtension.TypeVariationAnchor.builder().urn(urn).key(name).build(); + } + } + /** Describes a variadic behavior provided by a simple extension. */ @JsonDeserialize(as = ImmutableSimpleExtension.VariadicBehavior.class) @JsonSerialize(as = ImmutableSimpleExtension.VariadicBehavior.class) @@ -1061,6 +1088,82 @@ public TypeAnchor getAnchor() { } } + /** + * Describes a type variation declared by a simple extension. + * + *

A type variation represents an alternative representation of a base type class (e.g. a + * dictionary-encoded string), as defined by the {@code type_variations} section of the simple + * extension schema. + */ + @JsonDeserialize(as = ImmutableSimpleExtension.TypeVariation.class) + @JsonSerialize(as = ImmutableSimpleExtension.TypeVariation.class) + @Value.Immutable + public abstract static class TypeVariation { + private final Supplier anchorSupplier = + Util.memoize(() -> TypeVariationAnchor.of(urn(), name())); + + /** + * Returns the name. + * + * @return the name + */ + public abstract String name(); + + /** + * Returns the base type class of this variation as written in the extension file (e.g. {@code + * string} or {@code struct}). + * + * @return the parent type class + */ + @JsonProperty("parent") + public abstract String parent(); + + /** + * Returns the description. + * + * @return the description + */ + public abstract Optional description(); + + /** + * Returns the function behavior, i.e. whether functions supporting the system-preferred + * variation implicitly support this variation ({@link TypeVariationFunctionBehavior#INHERITS}) + * or must be resolved independently ({@link TypeVariationFunctionBehavior#SEPARATE}). Defaults + * to {@link TypeVariationFunctionBehavior#INHERITS}. + * + * @return the function behavior + */ + @Value.Default + @JsonProperty("functions") + public TypeVariationFunctionBehavior functions() { + return TypeVariationFunctionBehavior.INHERITS; + } + + /** + * Returns the deprecated. + * + * @return the deprecated + */ + public abstract Optional deprecated(); + + /** + * Returns the urn. + * + * @return the urn + */ + @JacksonInject(SimpleExtension.URN_LOCATOR_KEY) + public abstract String urn(); + + /** + * Returns the anchor. + * + * @return the anchor + */ + public TypeVariationAnchor getAnchor() { + return anchorSupplier.get(); + } + } + /** Describes an extension signatures provided by a simple extension. */ @JsonDeserialize(as = ImmutableSimpleExtension.ExtensionSignatures.class) @JsonSerialize(as = ImmutableSimpleExtension.ExtensionSignatures.class) @@ -1075,6 +1178,14 @@ public abstract static class ExtensionSignatures { @JsonProperty("types") public abstract List types(); + /** + * Returns the type variations. + * + * @return the type variations + */ + @JsonProperty("type_variations") + public abstract List typeVariations(); + /** * Returns the urn. * @@ -1170,6 +1281,14 @@ public abstract static class ExtensionCollection { types().stream() .collect( Collectors.toMap(Type::getAnchor, java.util.function.Function.identity()))); + + private final Supplier> typeVariationLookup = + Util.memoize( + () -> + typeVariations().stream() + .collect( + Collectors.toMap( + TypeVariation::getAnchor, java.util.function.Function.identity()))); private final Supplier> scalarFunctionsLookup = Util.memoize( () -> { @@ -1214,6 +1333,13 @@ public Map> extensionMetadata() { */ public abstract List types(); + /** + * Returns the type variations. + * + * @return the type variations + */ + public abstract List typeVariations(); + /** * Returns the scalar Functions. * @@ -1272,6 +1398,25 @@ public Type getType(TypeAnchor anchor) { anchor.key(), anchor.urn())); } + /** + * Returns the type variation for the given anchor. + * + * @param anchor the anchor + * @return the type variation + */ + public TypeVariation getTypeVariation(TypeVariationAnchor anchor) { + TypeVariation typeVariation = typeVariationLookup.get().get(anchor); + if (typeVariation != null) { + return typeVariation; + } + checkUrn(anchor.urn()); + throw new IllegalArgumentException( + String.format( + "Unexpected type variation with name %s. The URN %s is loaded but no type variation " + + "with this name found.", + anchor.key(), anchor.urn())); + } + /** * Returns the scalar Function for the given arguments. * @@ -1299,7 +1444,9 @@ public ScalarFunctionVariant getScalarFunction(FunctionAnchor anchor) { * @return the contains Urn */ public boolean containsUrn(String urn) { - return urnSupplier.get().contains(urn) || types().stream().anyMatch(t -> t.urn().equals(urn)); + return urnSupplier.get().contains(urn) + || types().stream().anyMatch(t -> t.urn().equals(urn)) + || typeVariations().stream().anyMatch(tv -> tv.urn().equals(urn)); } private void checkUrn(String name) { @@ -1374,6 +1521,8 @@ public ExtensionCollection merge(ExtensionCollection extensionCollection) { .addAllWindowFunctions(extensionCollection.windowFunctions()) .addAllTypes(types()) .addAllTypes(extensionCollection.types()) + .addAllTypeVariations(typeVariations()) + .addAllTypeVariations(extensionCollection.typeVariations()) .extensionMetadata(mergedExtensionMetadata) .build(); } @@ -1499,6 +1648,7 @@ public static ExtensionCollection buildExtensionCollection( .aggregateFunctions(aggregateFunctionVariants) .windowFunctions(allWindowFunctionVariants) .addAllTypes(extensionSignatures.types()) + .addAllTypeVariations(extensionSignatures.typeVariations()) .extensionMetadata(extMetadata) .build(); diff --git a/core/src/test/java/io/substrait/extension/TypeVariationExtensionTest.java b/core/src/test/java/io/substrait/extension/TypeVariationExtensionTest.java new file mode 100644 index 000000000..e46f5bfec --- /dev/null +++ b/core/src/test/java/io/substrait/extension/TypeVariationExtensionTest.java @@ -0,0 +1,79 @@ +package io.substrait.extension; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.substrait.TestBase; +import java.io.IOException; +import java.io.UncheckedIOException; +import org.junit.jupiter.api.Test; + +/** + * Verifies that type variations declared in the {@code type_variations} section of an extension + * YAML file can be read and looked up by anchor, including their parent type class, description, + * function behavior, and deprecation information. + */ +class TypeVariationExtensionTest extends TestBase { + + static final String URN = "extension:test:type_variation_extensions"; + static final SimpleExtension.ExtensionCollection TYPE_VARIATION_EXTENSION; + + static { + try { + String extensionStr = asString("extensions/type_variation_extensions.yaml"); + TYPE_VARIATION_EXTENSION = SimpleExtension.load(extensionStr); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + TypeVariationExtensionTest() { + super(TYPE_VARIATION_EXTENSION); + } + + @Test + void parsesInheritsVariation() { + SimpleExtension.TypeVariation variation = + extensions.getTypeVariation(SimpleExtension.TypeVariationAnchor.of(URN, "dict4")); + assertEquals("dict4", variation.name()); + assertEquals("string", variation.parent()); + assertEquals("a four-byte dictionary encoded string", variation.description().orElseThrow()); + assertEquals(SimpleExtension.TypeVariationFunctionBehavior.INHERITS, variation.functions()); + assertTrue(variation.deprecated().isEmpty()); + } + + @Test + void parsesSeparateVariation() { + SimpleExtension.TypeVariation variation = + extensions.getTypeVariation(SimpleExtension.TypeVariationAnchor.of(URN, "avro")); + assertEquals("struct", variation.parent()); + assertEquals(SimpleExtension.TypeVariationFunctionBehavior.SEPARATE, variation.functions()); + } + + @Test + void functionBehaviorDefaultsToInherits() { + SimpleExtension.TypeVariation variation = + extensions.getTypeVariation( + SimpleExtension.TypeVariationAnchor.of(URN, "inheritsByDefault")); + assertEquals(SimpleExtension.TypeVariationFunctionBehavior.INHERITS, variation.functions()); + } + + @Test + void parsesDeprecation() { + SimpleExtension.DeprecationStatus deprecation = + extensions + .getTypeVariation(SimpleExtension.TypeVariationAnchor.of(URN, "deprecatedVariation")) + .deprecated() + .orElseThrow(); + assertEquals("0.86.0", deprecation.since()); + assertEquals("Replaced by avro", deprecation.reason().orElseThrow()); + } + + @Test + void unknownVariationThrows() { + assertThrows( + IllegalArgumentException.class, + () -> extensions.getTypeVariation(SimpleExtension.TypeVariationAnchor.of(URN, "missing"))); + } +} diff --git a/core/src/test/resources/extensions/type_variation_extensions.yaml b/core/src/test/resources/extensions/type_variation_extensions.yaml new file mode 100644 index 000000000..250250515 --- /dev/null +++ b/core/src/test/resources/extensions/type_variation_extensions.yaml @@ -0,0 +1,26 @@ +%YAML 1.2 +--- +urn: extension:test:type_variation_extensions +type_variations: + # Explicit INHERITS behavior with a description. + - parent: string + name: dict4 + description: a four-byte dictionary encoded string + functions: INHERITS + # Explicit SEPARATE behavior. + - parent: struct + name: avro + description: an avro encoded struct + functions: SEPARATE + # functions omitted -> defaults to INHERITS. + - parent: string + name: inheritsByDefault + description: a variation without an explicit functions behavior + # Deprecated variation. + - parent: struct + name: deprecatedVariation + description: a deprecated struct variation + functions: SEPARATE + deprecated: + since: "0.86.0" + reason: "Replaced by avro" From ee526efbf383de7febafbc5259d19eb031120a22 Mon Sep 17 00:00:00 2001 From: Niels Pardon Date: Wed, 1 Jul 2026 18:23:05 +0200 Subject: [PATCH 2/2] docs(core): use {@code} instead of {@link} for TypeVariation function behavior The {@link TypeVariationFunctionBehavior#...} references failed javadoc generation: Immutables copies the doc onto the generated ImmutableSimpleExtension methods, where the nested enum name is not in scope, producing "reference not found" errors. --- .../main/java/io/substrait/extension/SimpleExtension.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/io/substrait/extension/SimpleExtension.java b/core/src/main/java/io/substrait/extension/SimpleExtension.java index 13a2ea521..e494aa1a8 100644 --- a/core/src/main/java/io/substrait/extension/SimpleExtension.java +++ b/core/src/main/java/io/substrait/extension/SimpleExtension.java @@ -1127,9 +1127,8 @@ public abstract static class TypeVariation { /** * Returns the function behavior, i.e. whether functions supporting the system-preferred - * variation implicitly support this variation ({@link TypeVariationFunctionBehavior#INHERITS}) - * or must be resolved independently ({@link TypeVariationFunctionBehavior#SEPARATE}). Defaults - * to {@link TypeVariationFunctionBehavior#INHERITS}. + * variation implicitly support this variation ({@code INHERITS}) or must be resolved + * independently ({@code SEPARATE}). Defaults to {@code INHERITS}. * * @return the function behavior */