From 7727757efe95f8db36b579e6f14290dfcb9dd6d4 Mon Sep 17 00:00:00 2001 From: Cedrick Lunven Date: Thu, 11 Jun 2026 15:10:39 +0200 Subject: [PATCH 1/2] feat: rerank override improvements --- .../cursor/CollectionFindAndRerankCursor.java | 15 ++++ .../CollectionFindAndRerankOptions.java | 21 +++++ .../AbstractCollectionFindAndRerankIT.java | 87 +++++++++++++++++++ .../tool/copy/CollectionCloneSettings.java | 8 ++ 4 files changed, 131 insertions(+) diff --git a/astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/cursor/CollectionFindAndRerankCursor.java b/astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/cursor/CollectionFindAndRerankCursor.java index 64e8996e..d0aa5d18 100644 --- a/astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/cursor/CollectionFindAndRerankCursor.java +++ b/astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/cursor/CollectionFindAndRerankCursor.java @@ -26,6 +26,7 @@ import com.datastax.astra.client.core.query.Filter; import com.datastax.astra.client.core.query.Projection; import com.datastax.astra.client.core.query.Sort; +import com.datastax.astra.client.core.rerank.RerankServiceOptions; import com.datastax.astra.client.core.rerank.RerankedResult; import com.datastax.astra.client.core.vector.DataAPIVector; import com.datastax.astra.internal.command.AbstractCursor; @@ -155,6 +156,20 @@ public CollectionFindAndRerankCursor project(Projection... newProjection) return newCursor; } + /** + * Creates a new {@link CollectionFindAndRerankCursor} with an updated reranking services + * + * @param rerankService + * the new projection to apply + * @return a new {@link CollectionFindAndRerankCursor} instance with the specified reranking service + */ + public CollectionFindAndRerankCursor rerankService(RerankServiceOptions rerankService) { + checkIdleState(); + CollectionFindAndRerankCursor newCursor = this.clone(); + newCursor.options.rerankService(rerankService); + return newCursor; + } + /** * Creates a new {@link CollectionFindAndRerankCursor} with a specified sort order. * diff --git a/astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/options/CollectionFindAndRerankOptions.java b/astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/options/CollectionFindAndRerankOptions.java index 6e188be6..1c7d96b9 100644 --- a/astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/options/CollectionFindAndRerankOptions.java +++ b/astra-db-java/src/main/java/com/datastax/astra/client/collections/commands/options/CollectionFindAndRerankOptions.java @@ -26,6 +26,8 @@ import com.datastax.astra.client.core.options.BaseOptions; import com.datastax.astra.client.core.query.Projection; import com.datastax.astra.client.core.query.Sort; +import com.datastax.astra.client.core.rerank.RerankProvider; +import com.datastax.astra.client.core.rerank.RerankServiceOptions; import lombok.Getter; import lombok.Setter; import lombok.experimental.Accessors; @@ -71,6 +73,11 @@ public class CollectionFindAndRerankOptions extends BaseOptions col = getDatabase().createCollection(collectionName, initialDef); + assertThat(col).isNotNull(); + assertThat(col.getDefinition().getRerank()).isNotNull(); + assertThat(col.getDefinition().getRerank().getService().getModelName()) + .isEqualTo("nvidia/llama-3.2-nv-rerankqa-1b-v2"); + + // Insert test documents + col.insertMany(List.of( + new Document().id("m1").put("text", "Artificial intelligence transforms technology").vectorize("Artificial intelligence transforms technology").lexical("Artificial intelligence transforms technology"), + new Document().id("m2").put("text", "Machine learning enables predictions").vectorize("Machine learning enables predictions").lexical("Machine learning enables predictions"), + new Document().id("m3").put("text", "Deep learning powers neural networks").vectorize("Deep learning powers neural networks").lexical("Deep learning powers neural networks")), + new CollectionInsertManyOptions().chunkSize(3)); + + // Perform findAndRerank with initial configuration + CollectionFindAndRerankOptions options = baseFindAndRerankOptions() + .sort(Sort.hybrid(new Hybrid("artificial intelligence and machine learning"))) + .hybridLimits(10) + .limit(3); + + List> initialResults = col.findAndRerank(options).toList(); + assertThat(initialResults).isNotEmpty(); + log.info("Initial rerank results count: {}", initialResults.size()); + + // Mutate the rerank service by creating a new collection definition + // Note: In practice, mutation would involve updating collection settings if supported + // For this test, we verify that different rerank configurations can be applied + RerankServiceOptions validUpdatedRerankService = new RerankServiceOptions() + .modelName("nvidia/llama-3.2-nv-rerankqa-1b-v2") + .provider("nvidia"); // Add custom parameters + + CollectionFindAndRerankOptions mutatedRerankOptions = new CollectionFindAndRerankOptions() + .rerankService(validUpdatedRerankService); + + List> updatedResults = col.findAndRerank(mutatedRerankOptions).toList(); + assertThat(updatedResults).isNotEmpty(); + log.info("Updated rerank results count: {}", initialResults.size()); + + CollectionFindAndRerankOptions mutatedInvalidRerankOptions = new CollectionFindAndRerankOptions() + .rerankService(new RerankServiceOptions().modelName("bla").provider("ble")); + + try { + col.findAndRerank(mutatedInvalidRerankOptions).toList(); + fail(); + } catch (DataAPIException dataAPIException) { + // Should fail + } + + // Cleanup + getDatabase().dropCollection(collectionName); + } + } diff --git a/tools/data-api-tools/src/main/java/com/datastax/astra/tool/copy/CollectionCloneSettings.java b/tools/data-api-tools/src/main/java/com/datastax/astra/tool/copy/CollectionCloneSettings.java index b114c3db..af0a8be4 100644 --- a/tools/data-api-tools/src/main/java/com/datastax/astra/tool/copy/CollectionCloneSettings.java +++ b/tools/data-api-tools/src/main/java/com/datastax/astra/tool/copy/CollectionCloneSettings.java @@ -29,6 +29,14 @@ public class CollectionCloneSettings { @Builder.Default private final int insertThreadPoolSize = 10; + /** + * Number of parallel threads for reading from source collection. + * More threads = faster reading through parallel skip/limit queries. + * Default: 5 + */ + @Builder.Default + private final int readThreadPoolSize = 5; + /** * Maximum time in seconds to wait for all insertions to complete. * Default: 300 seconds (5 minutes) From dbbcd67c369e0a0af550e94a87b7dcbce549e251 Mon Sep 17 00:00:00 2001 From: Cedrick Lunven Date: Thu, 11 Jun 2026 15:28:15 +0200 Subject: [PATCH 2/2] Add overriding method in CollectionDefinition --- .../definition/CollectionDefinition.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/astra-db-java/src/main/java/com/datastax/astra/client/collections/definition/CollectionDefinition.java b/astra-db-java/src/main/java/com/datastax/astra/client/collections/definition/CollectionDefinition.java index 219a1f48..610b4715 100644 --- a/astra-db-java/src/main/java/com/datastax/astra/client/collections/definition/CollectionDefinition.java +++ b/astra-db-java/src/main/java/com/datastax/astra/client/collections/definition/CollectionDefinition.java @@ -286,7 +286,7 @@ public CollectionDefinition vector(int dimension, @NonNull SimilarityMetric func * @return self reference */ public CollectionDefinition vectorize(String provider, String modeName) { - return vectorize(provider, modeName, null); + return vectorize(provider, modeName, (String) null); } /** @@ -313,6 +313,18 @@ public CollectionDefinition vectorize(String provider, String modeName, String s return this; } + /** + * Enable Vectorization within the collection. + * + * @param provider provider Name (LLM) + * @param modeName mode name + * @param parameters expected parameters for vectorize + * @return self reference + */ + public CollectionDefinition vectorize(String provider, String modeName, Map parameters) { + return vectorize(provider, modeName, null, parameters); + } + /** * Enable Vectorization within the collection. * @@ -328,6 +340,8 @@ public CollectionDefinition vectorize(String provider, String modeName, String s return this; } + + // --------------------- // Lexical options // ---------------------