diff --git a/.gitignore b/.gitignore
index 5f96167..aa49fe5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -103,7 +103,7 @@ ipython_config.py
 #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 #   This is especially recommended for binary packages to ensure reproducibility, and is more
 #   commonly ignored for libraries.
-#uv.lock
+uv.lock
 
 # poetry
 #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
@@ -187,4 +187,4 @@ examples/checkpoints/
 examples/outputs/
 
 .codex/
-openspec/
\ No newline at end of file
+.agents/
\ No newline at end of file
diff --git a/README.md b/README.md
index 6521a97..38b2deb 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Datafast is a python library for synthetic data generation using llms.
 The old dataset-class API has been removed. The canonical package is `datafast`, and the primary model is:
 
 - create records with `Source` or `Seed`
-- transform them with composable steps
+- transform them with composable steps such as `AddUUID`, `Map`, and `Filter`
 - call LLMs with `LLMStep`, `Classify`, `Score`, `Compare`, `Rewrite`, or `Extract`
 - persist results with `Sink`
 
@@ -53,7 +53,7 @@ pipeline.run(batch_size=4)
 
 - `Source`: load records from Python lists, files, or Hugging Face datasets
 - `Seed`: generate record combinations declaratively
-- `Map`, `FlatMap`, `Filter`, `Group`, `Pair`, `Concat`, `Join`: data operations
+- `AddUUID`, `Map`, `FlatMap`, `Filter`, `Group`, `Pair`, `Concat`, `Join`: data operations
 - `LLMStep`: free-form generation
 - `Classify`, `Score`, `Compare`, `Rewrite`, `Extract`: higher-level LLM transforms
 - `Branch` and `JoinBranches`: multi-path pipelines
diff --git a/datafast/__init__.py b/datafast/__init__.py
index 19bd452..6576370 100644
--- a/datafast/__init__.py
+++ b/datafast/__init__.py
@@ -31,7 +31,7 @@
     is_langfuse_tracing_enabled,
 )
 from datafast.transforms.branch import Branch, JoinBranches
-from datafast.transforms.data_ops import Map, FlatMap, Filter, Group, Pair, Concat, Join
+from datafast.transforms.data_ops import AddUUID, Map, FlatMap, Filter, Group, Pair, Concat, Join
 from datafast.transforms.llm_eval import Classify, Score, Compare
 from datafast.transforms.llm_extract import Extract
 from datafast.transforms.llm_step import LLMStep
@@ -64,6 +64,7 @@ def get_version() -> str:
     "Seed",
     "SeedDimension",
     "Sample",
+    "AddUUID",
     "Map",
     "FlatMap",
     "Filter",
diff --git a/datafast/core/runner.py b/datafast/core/runner.py
index 0a28ba2..3497605 100644
--- a/datafast/core/runner.py
+++ b/datafast/core/runner.py
@@ -233,7 +233,7 @@ def _execute_llm_step(
 
                 try:
                     result = model.generate(
-                        call.messages,
+                        messages=call.messages,
                         metadata=build_trace_metadata(
                             model=model,
                             component="pipeline.step",
diff --git a/datafast/llms.py b/datafast/llms.py
index 092346a..754e8b8 100644
--- a/datafast/llms.py
+++ b/datafast/llms.py
@@ -18,7 +18,6 @@
 # LiteLLM
 import litellm
 from litellm.exceptions import RateLimitError
-from litellm.utils import ModelResponse
 
 # Internal imports
 from .llm_utils import get_messages
@@ -292,17 +291,23 @@ def generate(
             if response_format is not None:
                 completion_params["response_format"] = response_format
 
-            # Call LiteLLM completion with batch messages - retry on rate limit
+            # Call LiteLLM completion with retry on rate limit.
+            # OpenRouter accepts single message requests via completion(), but
+            # rejects the same payload when wrapped in batch_completion().
             max_retries = 3
             retry_delay = 5  # Start with 5 seconds
             response = None
-            
+
             for attempt in range(max_retries):
                 try:
-                    response: list[ModelResponse] = litellm.batch_completion(
-                        **completion_params)
+                    if len(batch_to_send) == 1:
+                        response = [litellm.completion(
+                            **{**completion_params, "messages": batch_to_send[0]}
+                        )]
+                    else:
+                        response = litellm.batch_completion(**completion_params)
                     break  # Success, exit retry loop
-                except RateLimitError as e:
+                except RateLimitError:
                     if attempt < max_retries - 1:
                         wait_time = retry_delay * (2 ** attempt)  # Exponential backoff
                         logger.warning(
@@ -316,7 +321,7 @@ def generate(
                             f"Provider: {self.provider_name} | Model: {self.model_id}"
                         )
                         raise
-            
+
             if response is None:
                 raise RuntimeError("Failed to get response after retries")
 
diff --git a/datafast/transforms/__init__.py b/datafast/transforms/__init__.py
index 025ea3f..f7a88d2 100644
--- a/datafast/transforms/__init__.py
+++ b/datafast/transforms/__init__.py
@@ -1,7 +1,7 @@
 """Transform steps for datafast v2."""
 
 from datafast.transforms.sample import Sample
-from datafast.transforms.data_ops import Map, FlatMap, Filter, Group, Pair, Concat, Join
+from datafast.transforms.data_ops import AddUUID, Map, FlatMap, Filter, Group, Pair, Concat, Join
 from datafast.transforms.llm_step import LLMStep
 from datafast.transforms.llm_eval import Classify, Score, Compare
 from datafast.transforms.llm_transform import Rewrite
@@ -9,7 +9,7 @@
 from datafast.transforms.branch import Branch, JoinBranches
 
 __all__ = [
-    "Sample", "Map", "FlatMap", "Filter", "Group", "Pair", "Concat", "Join",
+    "Sample", "AddUUID", "Map", "FlatMap", "Filter", "Group", "Pair", "Concat", "Join",
     "LLMStep", "Classify", "Score", "Compare", "Rewrite", "Extract",
     "Branch", "JoinBranches",
 ]
diff --git a/datafast/transforms/data_ops.py b/datafast/transforms/data_ops.py
index 3887460..fafb5cf 100644
--- a/datafast/transforms/data_ops.py
+++ b/datafast/transforms/data_ops.py
@@ -3,6 +3,7 @@
 import itertools
 import random
 import re
+import uuid
 from collections import defaultdict
 from collections.abc import Callable, Iterable
 from typing import Any
@@ -62,6 +63,34 @@ def process(self, records: Iterable[Record]) -> Iterable[Record]:
             yield from self._fn(record)
 
 
+class AddUUID(Step):
+    """Add a UUID field to each record."""
+
+    def __init__(self, column: str = "id", overwrite: bool = False) -> None:
+        """
+        Initialize an AddUUID step.
+
+        Args:
+            column: Field name to write the UUID into.
+            overwrite: If True, replace existing values in the target column.
+
+        Examples:
+            >>> AddUUID()
+            >>> AddUUID(column="example_id", overwrite=True)
+        """
+        super().__init__()
+        self._column = column
+        self._overwrite = overwrite
+
+    def process(self, records: Iterable[Record]) -> Iterable[Record]:
+        """Add UUIDs while preserving all other fields."""
+        for record in records:
+            if self._column in record and not self._overwrite:
+                yield record
+            else:
+                yield {**record, self._column: str(uuid.uuid4())}
+
+
 class Filter(Step):
     """Keep or drop records based on conditions."""
 
diff --git a/docs/api.md b/docs/api.md
index edef161..45857e2 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -36,6 +36,7 @@ from datafast import Source, LLMStep, Sink, openrouter
 ## Data Operations
 
 - `Sample`
+- `AddUUID`
 - `Map`
 - `FlatMap`
 - `Filter`
diff --git a/docs/cookbook/assets/index.md b/docs/cookbook/assets/index.md
new file mode 100644
index 0000000..7f69923
--- /dev/null
+++ b/docs/cookbook/assets/index.md
@@ -0,0 +1,80 @@
+# Cookbook Assets
+
+Prompt files and dataset details used by cookbook examples.
+
+## Text Classification
+
+### Dataset
+
+- **Source:** seed dimensions created with `Seed.product`
+- **Dimensions:** label, trail type, style, language, and model
+- **Local output:** `examples/outputs/45_text_classification_cookbook.jsonl`
+- **Checkpoints:** `examples/checkpoints/45_text_classification_cookbook`
+- **Hub output:** optional, controlled by `DATAFAST_PUSH_TO_HUB=1`
+
+This cookbook models variation directly as seed dimensions so the label, trail
+type, style, language, and model are all explicit in the
+pipeline.
+
+### Prompt
+
+| File | Style |
+| --- | --- |
+| [text_classification_generation.txt](text_classification_generation.txt) | One short trail report per call, with label, trail type, style, and language injected |
+
+## Persona Generation
+
+### Dataset
+
+- **Source:** `xsum` (Hugging Face), `validation` split
+- **Fields used:** `id`, `document`, `summary`
+- **Filter:** 300–500 words, first 100 matches
+- **Local output:** `examples/outputs/43_persona_cookbook.jsonl`
+- **Checkpoints:** `examples/checkpoints/43_persona_cookbook`
+- **Hub output:** set `HF_REPO_ID` and the `repo_id` in `push_records_to_hub()` to repos under your own Hugging Face username or organization
+
+The example keeps first-match sampling for reproducibility. For local JSONL corpora with metadata such as `document_filename`, stratified sampling is usually a better fit.
+
+### Prompt Variants
+
+Each LLM step picks one prompt at random per record. The script also assigns random `life_stage` and `related_life_stage` values before the corresponding LLM steps. Multiple variants add diversity.
+
+#### Text-to-Persona
+
+| File | Style |
+| --- | --- |
+| [text_to_persona_v1.txt](text_to_persona_v1.txt) | Direct inference of a reader persona |
+| [text_to_persona_v2.txt](text_to_persona_v2.txt) | XML-tagged source text, writer/reader framing |
+| [text_to_persona_v3.txt](text_to_persona_v3.txt) | System-role preamble, search-interest angle |
+
+#### Persona-to-Persona
+
+| File | Style |
+| --- | --- |
+| [persona_to_persona_v1.txt](persona_to_persona_v1.txt) | Close relationship, standalone description |
+| [persona_to_persona_v2.txt](persona_to_persona_v2.txt) | Rule-list format, explicit separation of description and relationship |
+| [persona_to_persona_v3.txt](persona_to_persona_v3.txt) | XML-tagged input, concise vivid output |
+
+### Provenance
+
+- Text-to-Persona and Persona-to-Persona prompts are paper-aligned adaptations. The Persona Hub paper states its published prompts are simplified, not exact.
+- No Persona Hub code is reused. The workflow is built with datafast primitives.
+
+## Space Engineering Text Generation
+
+### Dataset
+
+- **Source:** seed dimensions created with `Seed.product`
+- **Dimensions:** document type, topic, expertise level, and language
+- **Local output:** `examples/outputs/44_space_text_generation_cookbook.jsonl`
+- **Checkpoints:** `examples/checkpoints/44_space_text_generation_cookbook`
+- **Hub output:** optional, controlled by `DATAFAST_PUSH_TO_HUB=1`
+
+### Prompt
+
+The text-generation cookbook uses one compact prompt and relies on seed
+dimensions for variation.
+
+| File | Style |
+| --- | --- |
+| [space_text_generation.txt](space_text_generation.txt) | Minimal variable-driven request |
diff --git a/docs/cookbook/assets/persona_to_persona_v1.txt b/docs/cookbook/assets/persona_to_persona_v1.txt
new file mode 100644
index 0000000..eabb6d6
--- /dev/null
+++ b/docs/cookbook/assets/persona_to_persona_v1.txt
@@ -0,0 +1,11 @@
+Given the following persona, infer one other specific persona who is in a close relationship with them.
+
+Persona:
+{persona_description}
+
+Requirements:
+1. Use one clear relationship such as family member, colleague, friend, or neighbor, coach, teacher, married partner.
+2. Choose a related persona that adds a meaningfully different life perspective but is still likely to be in close contact with the original persona.
+3. Keep the related persona realistic and specific.
+4. Don't talk about the orginal person in the description of the related persona, as it should be self-contained description.
+5. The related persona must be {related_life_stage}. Do not state a precise age, just reflect this life stage naturally.
diff --git a/docs/cookbook/assets/persona_to_persona_v2.txt b/docs/cookbook/assets/persona_to_persona_v2.txt
new file mode 100644
index 0000000..b4e4adf
--- /dev/null
+++ b/docs/cookbook/assets/persona_to_persona_v2.txt
@@ -0,0 +1,14 @@
+Think of a person who regularly interacts with the following persona in a meaningful way.
+
+Rules:
+- Do not mention the original persona in the description of the related persona.
+- Do not mention the relationship between the two personas in the description, only in the relationship_type
+- Pick a single, concrete relationship type such as mentor-mentee, colleague, neighbor, supervisor-report, or service provider-client
+- The related person should bring a distinctly different viewpoint or expertise, and some uniqueness.
+- Keep the description realistic and standalone without mentionning with the original persona.
+- The related persona must be {related_life_stage}. Do not state a precise age, just reflect this life stage naturally.
+
+Original Persona:
+{persona_description}
+
+Now generate a related persona.
\ No newline at end of file
diff --git a/docs/cookbook/assets/persona_to_persona_v3.txt b/docs/cookbook/assets/persona_to_persona_v3.txt
new file mode 100644
index 0000000..9652161
--- /dev/null
+++ b/docs/cookbook/assets/persona_to_persona_v3.txt
@@ -0,0 +1,16 @@
+Here is the description of someone:
+<description>
+{persona_description}
+</description>
+
+Come up with one other description of an individual who could be part of this persona's life.
+We want the description to be detailed but super concise (max 2 sentences) and vivid.
+But we want to have the a standalone description of that new persona without mentioning the original persona or a reason in the description.
+
+Requirements:
+1. Define a clear interpersonal link such as friend, advisor, competitor, family member, or collaborator.
+2. The new persona should offer a complementary or contrasting perspective.
+3. Make the related persona vivid and believable, avoid generic archetypes.
+4. Describe the relation in relationship_type field, not in the description.
+5. The related persona must be {related_life_stage}. Do not state a precise age, just reflect this life stage naturally.
+
diff --git a/docs/cookbook/assets/space_text_generation.txt b/docs/cookbook/assets/space_text_generation.txt
new file mode 100644
index 0000000..ca5af4b
--- /dev/null
+++ b/docs/cookbook/assets/space_text_generation.txt
@@ -0,0 +1 @@
+Write one {document_type} excerpt about {topic} for {expertise_level} in {language_name}.
diff --git a/docs/cookbook/assets/text_classification_generation.txt b/docs/cookbook/assets/text_classification_generation.txt
new file mode 100644
index 0000000..24d28c0
--- /dev/null
+++ b/docs/cookbook/assets/text_classification_generation.txt
@@ -0,0 +1,13 @@
+Write one realistic hiker report in {language_name}.
+
+Target category: {label}
+Category definition: {label_description}
+
+Constraints:
+- The report must clearly match the target category.
+- The setting must be a {trail_type}.
+- The writing style must be {style}.
+- Keep it to 1 or 2 sentences.
+- Do not mention the category name directly.
+- Do not use bullets, numbering, or explanations.
+- Make the report concrete and varied.
diff --git a/docs/cookbook/assets/text_to_persona_v1.txt b/docs/cookbook/assets/text_to_persona_v1.txt
new file mode 100644
index 0000000..cd09909
--- /dev/null
+++ b/docs/cookbook/assets/text_to_persona_v1.txt
@@ -0,0 +1,17 @@
+Infer one specific persona who is likely to read text.
+
+Source text:
+{document}
+
+Requirements:
+1. Return a single persona, not a group.
+2. Make the persona specific and fine-grained rather than generic.
+3. Ground the persona in signals from the text such as domain, expertise, context, or likely motivation.
+4. Do not quote the source text in the persona field.
+5. Only write 1 or 2 sentences maximum.
+6. The persona is not the subject of the text, but rather someone who would be reading it.
+7. Do not refer to the source text, article, or its content in the persona description. The persona must be self-contained.
+8. The persona must be {life_stage}. Do not mention a precise age, just reflect this life stage naturally.
+
+Now figure out a persona description who would be reading this text.
+
diff --git a/docs/cookbook/assets/text_to_persona_v2.txt b/docs/cookbook/assets/text_to_persona_v2.txt
new file mode 100644
index 0000000..294577d
--- /dev/null
+++ b/docs/cookbook/assets/text_to_persona_v2.txt
@@ -0,0 +1,16 @@
+<source_text>
+{document}
+</source_text>
+
+Identify one precise individual who would naturally encounter or write the <source_text>.
+
+Requirements:
+1. Describe exactly one person.
+2. Be as specific as possible: mention plausible occupation and/or life situation.
+3. Derive the persona strictly from cues in the text such as topic, jargon, tone, or implied audience as a potential writter / reader of this text.
+4. Do not copy or paraphrase the source text in the persona field.
+5. Only return 1 or 2 sentences maximum.
+6. The described person is not the subject of the text, but rather someone who would be encountering or writing such text as part of their life.
+7. Do not reference the source text, article, or its content in the persona description. The persona must stand on its own.
+8. The persona must be {life_stage}. Do not state a precise age, just reflect this life stage naturally.
+
diff --git a/docs/cookbook/assets/text_to_persona_v3.txt b/docs/cookbook/assets/text_to_persona_v3.txt
new file mode 100644
index 0000000..3ccb077
--- /dev/null
+++ b/docs/cookbook/assets/text_to_persona_v3.txt
@@ -0,0 +1,17 @@
+You are a persona inference assistant.
+
+Based on the text content below, imagine one real person who would be interested in searching about the topic from this content.
+
+Rules:
+- Output a single, concrete persona rather than a broad demographic.
+- Include details like professional background, interests, or situational context that make the persona feel authentic.
+- Don't mention the person search or information retrieval action in the persona description, just describe the persona which could explain their interest in the topic.
+- Keep it super short and concise.
+- Do not mention or refer to the source text, article, or its content in the persona description. The persona must be self-contained.
+- The persona must be {life_stage}. Do not state a precise age, just reflect this life stage naturally.
+
+Source text:
+{document}
+
+
+
diff --git a/docs/cookbook/index.md b/docs/cookbook/index.md
new file mode 100644
index 0000000..1b745ec
--- /dev/null
+++ b/docs/cookbook/index.md
@@ -0,0 +1,16 @@
+# Cookbook
+
+Cookbooks connect a runnable script to a documentation walkthrough.
+
+The Python script is the source of truth. Each cookbook page explains:
+
+- where the executable example lives
+- what inputs it uses
+- which prompt assets it depends on
+- where it writes its output artifacts
+
+## Available Cookbooks
+
+- [Text Classification](text_classification.md): generate a multilingual trail-conditions classification dataset from explicit seed dimensions.
+- [Persona Generation](persona_generation.md): infer personas from real articles and expand them through relationships using randomized prompt variants.
+- [Space Engineering Text Generation](space_text_generation.md): generate a raw multilingual technical text corpus from seed dimensions.
diff --git a/docs/cookbook/persona_generation.md b/docs/cookbook/persona_generation.md
new file mode 100644
index 0000000..f314a39
--- /dev/null
+++ b/docs/cookbook/persona_generation.md
@@ -0,0 +1,89 @@
+# Persona Generation
+
+Build personas from real articles and expand them through relationships. Inspired by the Persona Hub paper, implemented entirely with datafast.
+
+## Source
+
+- **Script:** `examples/scripts/43_cookbook_persona_generation.py`
+- **Prompt assets:** [asset index](assets/index.md)
+- **Local output:** `examples/outputs/43_persona_cookbook.jsonl`
+- **Checkpoints:** `examples/checkpoints/43_persona_cookbook`
+- **Hub output:** pushed to the Hugging Face Hub repo IDs configured in the script
+
+## Pipeline
+
+1. Load `xsum` articles (`validation` split), preserving the dataset `id`.
+2. Filter to documents between 300 and 500 words. Keep the first 100 matches.
+3. Assign a random life stage to the source persona.
+4. **Text-to-Persona** — infer one persona from each article and life stage.
+5. Assign a random life stage to the related persona.
+6. **Persona-to-Persona** — expand that persona into a related individual.
+7. Keep the final output fields, add a row UUID, write JSONL, checkpoint progress, and push results to Hugging Face Hub.
+
+Each LLM step randomly picks one prompt variant per record using `Sample(prompts, n=1)`. This adds diversity across generations.
+
+The cookbook keeps `Sample(n=100, strategy="first")` so runs are deterministic and easy to compare. For local corpora with source metadata, use stratified sampling, for example `Sample(n=250, strategy="stratified", by="document_filename")`, to avoid over-representing one source file.
+
+```text
+xsum article
+    │
+    ▼
+life_stage  (random from configured stages)
+    │
+    ▼
+Text-to-Persona  (random prompt from 3 variants)
+    │
+    ▼
+related_life_stage  (random from configured stages)
+    │
+    ▼
+Persona-to-Persona  (random prompt from 3 variants)
+    │
+    ▼
+Hugging Face Hub
+```
+
+## Run
+
+Prerequisites:
+
+- `OPENROUTER_API_KEY` set in a `.env` file
+- Hugging Face authentication via `HF_TOKEN` in `.env` or a cached `huggingface_hub` login
+- Base dependencies from `pyproject.toml` installed
+
+Before running, replace the example Hugging Face namespaces in the script with your own username or organization:
+
+- `HF_REPO_ID = "<your-username-or-org>/new-persona-cookbook-dataset"` controls the private pipeline sink.
+- `repo_id = "<your-username-or-org>/datafast-persona-cookbook"` inside `push_records_to_hub()` controls the public publish step.
+
+```bash
+python examples/scripts/43_cookbook_persona_generation.py
+```
+
+The run uses `checkpoint_dir` and `resume=True`, which is useful for paid or rate-limited LLM calls. If a run is interrupted, re-run the same command to continue from the saved checkpoints.
+
+The main example reads from Hugging Face. For a local JSONL corpus, replace `Source.huggingface(...)` with `Source.file(...)` and map your text column to `document` before `add_word_count`.
+
+## Prompt Variants
+
+Each step draws from multiple prompt files stored under `docs/cookbook/assets/`. See the [asset index](assets/index.md) for the full list.
+
+- **Text-to-Persona:** 3 variants (`text_to_persona_v1.txt`, `v2`, `v3`)
+- **Persona-to-Persona:** 3 variants (`persona_to_persona_v1.txt`, `v2`, `v3`)
+
+## Research Basis
+
+The Persona Hub paper introduces Text-to-Persona and Persona-to-Persona as scalable methods for building personas from web text. The paper states that its published prompts are simplified, not the exact experiment strings. This cookbook treats them as paper-aligned adaptations. It does not reuse any Persona Hub code.
+
+## Output Fields
+
+- `id` — generated row UUID
+- `source_id` — original XSum record identifier
+- `summary` — original article summary
+- `document` — source article text
+- `word_count` — whitespace token count
+- `life_stage` — randomly selected life stage for the inferred persona
+- `persona_description` — inferred persona
+- `relationship_type` — link between the two personas
+- `related_life_stage` — randomly selected life stage for the expanded persona
+- `related_persona_description` — the expanded related persona
diff --git a/docs/cookbook/space_text_generation.md b/docs/cookbook/space_text_generation.md
new file mode 100644
index 0000000..92c55dc
--- /dev/null
+++ b/docs/cookbook/space_text_generation.md
@@ -0,0 +1,103 @@
+# Space Engineering Text Generation
+
+Build a raw technical text corpus across document types, topics, expertise levels,
+languages, and model choices.
+
+## Source
+
+- **Script:** `examples/scripts/44_cookbook_space_text_generation.py`
+- **Prompt assets:** [asset index](assets/index.md)
+- **Local output:** `examples/outputs/44_space_text_generation_cookbook.jsonl`
+- **Checkpoints:** `examples/checkpoints/44_space_text_generation_cookbook`
+- **Hub output:** optional, controlled by `DATAFAST_PUSH_TO_HUB=1`
+
+## Pipeline
+
+1. Create a seed grid with `Seed.product`.
+2. Cross document types, topics, and expertise levels explicitly.
+3. Generate one section per seed and language with `LLMStep`.
+4. Let the prompt variables drive the corpus variation.
+5. Parse `title` and `text` from JSON mode.
+6. Keep publication fields, add a row UUID, write JSONL, checkpoint progress,
+   and optionally push to Hugging Face Hub.
+
+The default model is `nvidia/nemotron-3-super-120b-a12b:nitro` through
+OpenRouter.
+
+```text
+document_type x topic x expertise_level
+    |
+    v
+LLMStep language expansion: English and French
+    |
+    v
+JSON fields: title, text
+    |
+    v
+examples/outputs/44_space_text_generation_cookbook.jsonl
+```
+
+## Row Count
+
+The default script generates:
+
+```text
+3 document types x 8 topics x 3 expertise levels x 2 languages
+x 1 generated output x 1 model = 144 rows
+```
+
+To use several models, add provider IDs to `MODEL_IDS`. `LLMStep` will run each
+seed-language combination through every model and the row count will multiply by
+the number of models.
+
+## Run
+
+Prerequisites:
+
+- `OPENROUTER_API_KEY` set in a `.env` file
+- Base dependencies from `pyproject.toml` installed
+- Hugging Face authentication only if publishing
+
+```bash
+python examples/scripts/44_cookbook_space_text_generation.py
+```
+
+To publish, replace `HF_REPO_ID` in the script with a repository under your own
+Hugging Face username or organization, then run:
+
+```bash
+DATAFAST_PUSH_TO_HUB=1 python examples/scripts/44_cookbook_space_text_generation.py
+```
+
+The run uses `checkpoint_dir` and `resume=True`. If generation is interrupted,
+run the command again to continue from saved checkpoints.
+
+## Prompt
+
+The script uses one compact prompt file:
+
+```text
+Write one {document_type} excerpt about {topic} for {expertise_level} in {language_name}.
+```
+
+## Generation Controls
+
+- `MODEL_IDS` controls which models generate each record.
+- `LANGUAGES` controls language expansion and writes the emitted language code to
+  the `language` field.
+- `NUM_OUTPUTS` controls how many generated rows are created for each
+  seed, language, and model combination.
+- `PROMPT_PATH` controls the prompt file used for generation.
+- `SEED` controls deterministic dataset splitting when publishing.
+- `HF_REPO_ID` controls the optional Hugging Face Hub destination.
+
+## Output Fields
+
+- `id` - generated row UUID
+- `document_type` - requested document style
+- `topic` - space engineering topic
+- `expertise_level` - intended reader level
+- `language` - language code emitted by `LLMStep`
+- `model` - model ID emitted by `LLMStep`
+- `title` - generated section title
+- `text` - generated corpus text
diff --git a/docs/cookbook/text_classification.md b/docs/cookbook/text_classification.md
new file mode 100644
index 0000000..819523d
--- /dev/null
+++ b/docs/cookbook/text_classification.md
@@ -0,0 +1,118 @@
+# Text Classification
+
+Build a multilingual trail-conditions classification dataset with `datafast`.
+
+## Source
+
+- **Script:** `examples/scripts/45_cookbook_text_classification.py`
+- **Prompt assets:** [asset index](assets/index.md)
+- **Local output:** `examples/outputs/45_text_classification_cookbook.jsonl`
+- **Checkpoints:** `examples/checkpoints/45_text_classification_cookbook`
+- **Hub output:** optional, controlled by `DATAFAST_PUSH_TO_HUB=1`
+
+## Use Case
+
+This cookbook generates short hiker reports across four trail-condition labels
+so teams can monitor trail quality and surface issues quickly.
+
+The default setup is:
+
+- multi-class: 4 trail-condition labels
+- multi-lingual: English and French
+- multi-model: two generation models by default
+- publishable: optional push to Hugging Face Hub
+
+## Pipeline
+
+1. Create a seed grid from labels, trail types, and writing styles.
+2. Generate one short hiker report for each seed across all configured models
+   and languages.
+3. Keep the label and prompt-variation provenance in flat output columns.
+4. Add a UUID, write JSONL locally, and optionally push to Hugging Face Hub.
+
+Variation is modeled explicitly through `Seed.product(...)`, which keeps the
+generation axes inspectable and easy to count.
+
+```text
+label x trail_type x style
+    |
+    v
+LLMStep language expansion: English and French
+    |
+    v
+LLMStep model expansion
+    |
+    v
+examples/outputs/45_text_classification_cookbook.jsonl
+```
+
+## Row Count
+
+The default script generates:
+
+```text
+4 labels x 3 trail types x 2 styles x 2 languages
+x 2 models = 96 rows
+```
+
+Each extra model in `MODEL_IDS` multiplies the total row count.
+
+## Run
+
+Prerequisites:
+
+- `OPENROUTER_API_KEY` set in a `.env` file
+- Base dependencies from `pyproject.toml` installed
+- Hugging Face authentication only if publishing
+
+```bash
+python examples/scripts/45_cookbook_text_classification.py
+```
+
+To publish, replace `HF_REPO_ID` in the script with a repository under your own
+Hugging Face username or organization, then run:
+
+```bash
+DATAFAST_PUSH_TO_HUB=1 python examples/scripts/45_cookbook_text_classification.py
+```
+
+The run uses `checkpoint_dir` and `resume=True`. If generation is interrupted,
+run the command again to continue from saved checkpoints.
+
+If you want to use provider-specific clients directly, replace `MODEL_IDS` or
+the `model=MODELS` argument in `LLMStep` with providers such as `openai(...)`
+or `anthropic(...)`. The default setup uses multiple OpenRouter-backed models
+so it works with one API key.
+
+## Prompt
+
+The cookbook uses one prompt file and drives diversity through seed dimensions:
+
+```text
+Write one realistic hiker report in {language_name}.
+```
+
+See [text_classification_generation.txt](assets/text_classification_generation.txt)
+for the full prompt.
+
+## Generation Controls
+
+- `LABELS` defines the target classes and their prompt descriptions.
+- `TRAIL_TYPES` controls the trail settings used in generation.
+- `STYLES` controls the voice and format of each report.
+- `LANGUAGES` controls language expansion.
+- `MODEL_IDS` controls which models generate records.
+- `HF_REPO_ID` controls the optional Hugging Face Hub destination.
+
+If you want an extra quality-control pass, add a downstream `Classify` and
+`Filter` stage to verify that generated reports match their intended label.
+
+## Output Fields
+
+- `id` - generated row UUID
+- `label` - target trail-condition label
+- `trail_type` - prompt expansion axis for the trail setting
+- `style` - prompt expansion axis for the report style
+- `language` - language code emitted by `LLMStep`
+- `model` - model ID emitted by `LLMStep`
+- `text` - generated hiker report
diff --git a/docs/guides/building_pipelines.md b/docs/guides/building_pipelines.md
index 64aaaf2..b755410 100644
--- a/docs/guides/building_pipelines.md
+++ b/docs/guides/building_pipelines.md
@@ -3,11 +3,12 @@
 ## Minimal Pipeline
 
 ```python
-from datafast import Map, Sink, Source
+from datafast import AddUUID, Map, Sink, Source
 
 pipeline = (
     Source.list([{"text": "hello"}])
     >> Map(lambda r: {**r, "length": len(r["text"])})
+    >> AddUUID()
     >> Sink.list()
 )
 
@@ -38,6 +39,7 @@ seed = Seed.product(
 
 ## Core Data Operations
 
+- `AddUUID`: add a UUID field to each record
 - `Map`: one record in, one record out
 - `FlatMap`: one record in, many records out
 - `Filter`: keep or drop records
diff --git a/examples/scripts/43_cookbook_persona_generation.py b/examples/scripts/43_cookbook_persona_generation.py
new file mode 100644
index 0000000..ac4f718
--- /dev/null
+++ b/examples/scripts/43_cookbook_persona_generation.py
@@ -0,0 +1,137 @@
+"""Persona-generation cookbook: XSum article -> personas -> related personas.
+
+Demonstrates: Source.huggingface, Map, Filter, Sample, JSON-mode LLMSteps,
+and prompt assets stored under docs/cookbook/assets.
+
+Requires:
+- OPENROUTER_API_KEY
+- Hugging Face authentication via HF_TOKEN or a cached `huggingface_hub` login
+- network access to Hugging Face and OpenRouter
+"""
+
+import random
+
+from dotenv import load_dotenv
+
+from datafast import AddUUID, Filter, LLMStep, Map, Sample, Sink, Source, openrouter
+
+import litellm
+
+load_dotenv()
+
+litellm.suppress_debug_info = True
+
+
+MODEL_ID = "nvidia/nemotron-3-super-120b-a12b:nitro"
+OUTPUT_PATH = "examples/outputs/43_persona_cookbook.jsonl"
+CHECKPOINT_DIR = "examples/checkpoints/43_persona_cookbook"
+HF_REPO_ID = "patrickfleith/new-persona-cookbook-dataset"
+TEXT_TO_PERSONA_PROMPTS = [
+    "docs/cookbook/assets/text_to_persona_v1.txt",
+    "docs/cookbook/assets/text_to_persona_v2.txt",
+    "docs/cookbook/assets/text_to_persona_v3.txt",
+]
+PERSONA_TO_PERSONA_PROMPTS = [
+    "docs/cookbook/assets/persona_to_persona_v1.txt",
+    "docs/cookbook/assets/persona_to_persona_v2.txt",
+    "docs/cookbook/assets/persona_to_persona_v3.txt",
+]
+LIFE_STAGES = [
+    "a teenager",
+    "a young adult",
+    "an adult (30s/40s)",
+    "a middle-aged person (in their 50s/60s)",
+    "a senior person (in their 70s/80s)",
+]
+
+
+def add_word_count(record: dict) -> dict:
+    return {**record, "word_count": len(record["document"].split())}
+
+
+def assign_life_stage(record: dict) -> dict:
+    return {**record, "life_stage": random.choice(LIFE_STAGES)}
+
+
+def assign_related_life_stage(record: dict) -> dict:
+    return {**record, "related_life_stage": random.choice(LIFE_STAGES)}
+
+
+def keep_output_fields(record: dict) -> dict:
+    return {
+        "source_id": record["id"],
+        "summary": record["summary"],
+        "document": record["document"],
+        "word_count": record["word_count"],
+        "life_stage": record["life_stage"],
+        "persona_description": record["persona_description"],
+        "relationship_type": record["relationship_type"],
+        "related_life_stage": record["related_life_stage"],
+        "related_persona_description": record["related_persona_description"],
+    }
+
+
+def build_pipeline():
+    model = openrouter(MODEL_ID, temperature=0.7)
+
+    return (
+        Source.huggingface(
+            "xsum",
+            split="validation",
+            columns=["id", "document", "summary"],
+        )
+    # For a local JSONL corpus, replace the Hugging Face source with something
+    # like Source.file("data/articles.jsonl") and map your text field to
+    # "document" before add_word_count.
+    >> Map(add_word_count).as_step("add_word_count")
+    >> Filter(fn=lambda r: 300 <= r["word_count"] <= 500).as_step("filter_word_count")
+    >> Sample(n=10, strategy="first").as_step("take_first_100")
+    >> Map(assign_life_stage).as_step("assign_life_stage")
+    >> LLMStep(
+        prompt=Sample(TEXT_TO_PERSONA_PROMPTS, n=1),
+        input_columns=["document", "life_stage"],
+        output_columns=["persona_description"],
+        model=model,
+        parse_mode="json",
+        on_parse_error="raise",
+    ).as_step("text_to_persona")
+    >> Map(assign_related_life_stage).as_step("assign_related_life_stage")
+    >> LLMStep(
+        prompt=Sample(PERSONA_TO_PERSONA_PROMPTS, n=1),
+        input_columns=["persona_description", "related_life_stage"],
+        output_columns=["relationship_type", "related_persona_description"],
+        model=model,
+        parse_mode="json",
+        on_parse_error="raise",
+    ).as_step("persona_to_persona")
+    >> Map(keep_output_fields).as_step("keep_output_fields")
+    >> AddUUID(column="id", overwrite=True).as_step("add_uuid")
+    >> Sink.jsonl(OUTPUT_PATH)
+    >> Sink.hub(HF_REPO_ID, private=True)
+)
+
+
+def push_records_to_hub(records: list[dict]) -> None:
+    repo_id = "patrickfleith/datafast-persona-cookbook"
+    private = False
+
+    list(
+        Sink.hub(
+            repo_id=repo_id,
+            private=private,
+            commit_message=f"Publish cookbook 43 persona dataset with {MODEL_ID}",
+        ).process(records)
+    )
+
+
+def main() -> None:
+    records = build_pipeline().run(
+        batch_size=1,
+        checkpoint_dir=CHECKPOINT_DIR,
+        resume=False,
+    )
+    push_records_to_hub(records)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/scripts/44_cookbook_space_text_generation.py b/examples/scripts/44_cookbook_space_text_generation.py
new file mode 100644
index 0000000..6c5d2cb
--- /dev/null
+++ b/examples/scripts/44_cookbook_space_text_generation.py
@@ -0,0 +1,143 @@
+"""Space text-generation cookbook: seed grid -> technical text corpus.
+
+Demonstrates: Seed.product, LLMStep JSON mode, multi-language generation,
+num_outputs, checkpointing, JSONL output, and optional Hub push.
+
+Requires:
+- OPENROUTER_API_KEY
+- Hugging Face authentication only if DATAFAST_PUSH_TO_HUB=1
+- network access to OpenRouter, and to Hugging Face when publishing
+"""
+
+from __future__ import annotations
+
+import os
+
+import litellm
+from dotenv import load_dotenv
+
+from datafast import AddUUID, LLMStep, Map, Seed, Sink, openrouter
+
+load_dotenv()
+litellm.suppress_debug_info = True
+
+
+SEED = 20250304
+MODEL_IDS = ["nvidia/nemotron-3-super-120b-a12b:nitro"]
+OUTPUT_PATH = "examples/outputs/44_space_text_generation_cookbook.jsonl"
+CHECKPOINT_DIR = "examples/checkpoints/44_space_text_generation_cookbook"
+HF_REPO_ID = "patrickfleith/datafast-space-text-generation-cookbook"
+NUM_OUTPUTS = 1
+PROMPT_PATH = "docs/cookbook/assets/space_text_generation.txt"
+
+DOCUMENT_TYPES = [
+    "space engineering textbook",
+    "spacecraft design justification document",
+    "personal blog of a space engineer",
+]
+
+TOPICS = [
+    "Microgravity",
+    "Vacuum",
+    "Heavy Ions",
+    "Thermal Extremes",
+    "Atomic Oxygen",
+    "Debris Impact",
+    "Electrostatic Charging",
+    "Propellant Boil-off",
+]
+
+EXPERTISE_LEVELS = [
+    "executives",
+    "senior engineers",
+    "PhD candidates",
+]
+
+LANGUAGES = {
+    "en": "English",
+    "fr": "French",
+}
+
+
+def make_models():
+    return [openrouter(model_id, temperature=0.7) for model_id in MODEL_IDS]
+
+
+def expected_row_count(model_count: int | None = None) -> int:
+    """Return the number of rows this configuration should generate."""
+    model_total = len(MODEL_IDS) if model_count is None else model_count
+    return (
+        len(DOCUMENT_TYPES)
+        * len(TOPICS)
+        * len(EXPERTISE_LEVELS)
+        * len(LANGUAGES)
+        * NUM_OUTPUTS
+        * model_total
+    )
+
+
+def finalize_record(record: dict) -> dict:
+    """Keep the columns meant for publication."""
+    return {
+        "document_type": record["document_type"],
+        "topic": record["topic"],
+        "expertise_level": record["expertise_level"],
+        "language": record.get("_language", ""),
+        "model": record.get("_model", ""),
+        "title": record["title"],
+        "text": record["text"],
+    }
+
+
+def build_pipeline():
+    return (
+        Seed.product(
+            Seed.values("document_type", DOCUMENT_TYPES),
+            Seed.values("topic", TOPICS),
+            Seed.values("expertise_level", EXPERTISE_LEVELS),
+        ).as_step("seed_space_text_grid")
+        >> LLMStep(
+            prompt=PROMPT_PATH,
+            input_columns=["document_type", "topic", "expertise_level"],
+            output_columns=["title", "text"],
+            parse_mode="json",
+            model=make_models(),
+            language=LANGUAGES,
+            num_outputs=NUM_OUTPUTS,
+            on_parse_error="raise",
+        ).as_step("generate_space_text")
+        >> Map(finalize_record).as_step("finalize_record")
+        >> AddUUID(column="id", overwrite=True).as_step("add_uuid")
+        >> Sink.jsonl(OUTPUT_PATH)
+    )
+
+
+def push_records_to_hub(records: list[dict]) -> None:
+    list(
+        Sink.hub(
+            repo_id=HF_REPO_ID,
+            private=True,
+            train_size=0.8,
+            seed=SEED,
+            shuffle=True,
+            commit_message=f"Publish cookbook 44 text dataset with {', '.join(MODEL_IDS)}",
+        ).process(records)
+    )
+
+
+def main() -> None:
+    print(f"Expected rows: {expected_row_count()}")
+    records = build_pipeline().run(
+        batch_size=4,
+        checkpoint_dir=CHECKPOINT_DIR,
+        resume=True,
+    )
+
+    if os.getenv("DATAFAST_PUSH_TO_HUB") == "1":
+        push_records_to_hub(records)
+
+    print(f"Wrote {len(records)} records to {OUTPUT_PATH}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/scripts/45_cookbook_text_classification.py b/examples/scripts/45_cookbook_text_classification.py
new file mode 100644
index 0000000..01b7e5a
--- /dev/null
+++ b/examples/scripts/45_cookbook_text_classification.py
@@ -0,0 +1,158 @@
+"""Text-classification cookbook: seed grid -> multilingual trail reports.
+
+Demonstrates: Seed.product, prompt expansion via seed dimensions, multi-model
+generation, multi-language generation, checkpointing, JSONL output, and
+optional Hugging Face Hub publishing.
+
+Requires:
+- OPENROUTER_API_KEY
+- Hugging Face authentication only if DATAFAST_PUSH_TO_HUB=1
+- network access to OpenRouter, and to Hugging Face when publishing
+"""
+
+from __future__ import annotations
+
+import os
+
+import litellm
+from dotenv import load_dotenv
+
+from datafast import AddUUID, LLMStep, Map, Seed, SeedDimension, Sink, openrouter
+
+load_dotenv()
+litellm.suppress_debug_info = True
+
+
+SEED = 20250611
+MODEL_IDS = [
+    "nvidia/nemotron-3-super-120b-a12b:nitro",
+    "mistralai/ministral-14b-2512",
+]
+OUTPUT_PATH = "examples/outputs/45_text_classification_cookbook.jsonl"
+CHECKPOINT_DIR = "examples/checkpoints/45_text_classification_cookbook"
+HF_REPO_ID = "patrickfleith/datafast-text-classification-cookbook"
+PROMPT_PATH = "docs/cookbook/assets/text_classification_generation.txt"
+
+LABELS = [
+    {
+        "label": "trail_obstruction",
+        "label_description": (
+            "The trail is partially or fully blocked by obstacles such as "
+            "fallen trees, landslides, snow, flooding, erosion, or dense "
+            "vegetation."
+        ),
+    },
+    {
+        "label": "infrastructure_issues",
+        "label_description": (
+            "The report is about damaged or missing bridges, signs, stairs, "
+            "handrails, markers, boardwalks, or similar trail infrastructure."
+        ),
+    },
+    {
+        "label": "hazards",
+        "label_description": (
+            "The trail has immediate safety risks such as slippery surfaces, "
+            "dangerous crossings, unstable terrain, wildlife threats, or "
+            "other hazardous conditions."
+        ),
+    },
+    {
+        "label": "positive_conditions",
+        "label_description": (
+            "The report highlights clear, safe, enjoyable trail conditions "
+            "such as good maintenance, solid infrastructure, clear signage, "
+            "or scenic features."
+        ),
+    },
+]
+
+TRAIL_TYPES = [
+    "mountain trail",
+    "coastal path",
+    "forest walk",
+]
+
+STYLES = [
+    "a brief social media post",
+    "a hiking review",
+]
+
+LANGUAGES = {
+    "en": "English",
+    "fr": "French",
+}
+
+MODELS = [openrouter(model_id, temperature=0.8) for model_id in MODEL_IDS]
+EXPECTED_ROWS = (
+    len(LABELS)
+    * len(TRAIL_TYPES)
+    * len(STYLES)
+    * len(LANGUAGES)
+    * len(MODELS)
+)
+
+
+def keep_output_fields(record: dict) -> dict:
+    """Keep only the fields meant for publication."""
+    return {
+        "label": record["label"],
+        "trail_type": record["trail_type"],
+        "style": record["style"],
+        "language": record.get("_language", ""),
+        "model": record.get("_model", ""),
+        "text": record["text"],
+    }
+
+
+pipeline = (
+    Seed.product(
+        SeedDimension(
+            columns=["label", "label_description"],
+            values=LABELS,
+        ),
+        Seed.values("trail_type", TRAIL_TYPES),
+        Seed.values("style", STYLES),
+    ).as_step("seed_trail_report_grid")
+    >> LLMStep(
+        prompt=PROMPT_PATH,
+        input_columns=["label", "label_description", "trail_type", "style"],
+        output_column="text",
+        parse_mode="text",
+        model=MODELS,
+        language=LANGUAGES,
+    ).as_step("generate_trail_reports")
+    >> Map(keep_output_fields).as_step("keep_output_fields")
+    >> AddUUID(column="id", overwrite=True).as_step("add_uuid")
+    >> Sink.jsonl(OUTPUT_PATH)
+)
+
+
+def main() -> None:
+    print(f"Expected rows: {EXPECTED_ROWS}")
+    records = pipeline.run(
+        batch_size=4,
+        checkpoint_dir=CHECKPOINT_DIR,
+        resume=True,
+    )
+
+    if os.getenv("DATAFAST_PUSH_TO_HUB") == "1":
+        list(
+            Sink.hub(
+                repo_id=HF_REPO_ID,
+                private=False,
+                train_size=0.8,
+                seed=SEED,
+                shuffle=True,
+                commit_message=(
+                    "Publish cookbook 45 classification dataset with "
+                    f"{', '.join(MODEL_IDS)}"
+                ),
+            ).process(records)
+        )
+
+    print(f"Wrote {len(records)} records to {OUTPUT_PATH}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mkdocs.yml b/mkdocs.yml
index 87e795a..131400c 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -43,6 +43,11 @@ nav:
       - LLM Steps: guides/llm_steps.md
       - Checkpointing: guides/checkpointing.md
       - Langfuse Tracing: guides/langfuse_tracing.md
+  - Cookbook:
+      - cookbook/index.md
+      - Text Classification: cookbook/text_classification.md
+      - Persona Generation: cookbook/persona_generation.md
+      - Space Engineering Text Generation: cookbook/space_text_generation.md
   - Providers: llms.md
   - Models: models.md
   - API: api.md
diff --git a/tests/test_add_uuid.py b/tests/test_add_uuid.py
new file mode 100644
index 0000000..e89f837
--- /dev/null
+++ b/tests/test_add_uuid.py
@@ -0,0 +1,78 @@
+import uuid
+
+from datafast import AddUUID, LLMStep, Sink, Source
+
+
+def assert_valid_uuid(value: str) -> None:
+    parsed = uuid.UUID(value)
+    assert str(parsed) == value
+
+
+def test_add_uuid_adds_id_when_missing():
+    records = list(AddUUID().process([{"text": "hello"}]))
+
+    assert records[0]["text"] == "hello"
+    assert_valid_uuid(records[0]["id"])
+
+
+def test_add_uuid_preserves_existing_id_by_default():
+    records = list(AddUUID().process([{"id": "source-1", "text": "hello"}]))
+
+    assert records == [{"id": "source-1", "text": "hello"}]
+
+
+def test_add_uuid_overwrites_existing_id_when_requested():
+    records = list(
+        AddUUID(overwrite=True).process([{"id": "source-1", "text": "hello"}])
+    )
+
+    assert records[0]["text"] == "hello"
+    assert records[0]["id"] != "source-1"
+    assert_valid_uuid(records[0]["id"])
+
+
+def test_add_uuid_generates_distinct_ids_for_multiple_records():
+    records = list(AddUUID().process([{"text": "a"}, {"text": "b"}]))
+    ids = [record["id"] for record in records]
+
+    assert len(set(ids)) == 2
+    for value in ids:
+        assert_valid_uuid(value)
+
+
+def test_add_uuid_supports_custom_column_name():
+    records = list(AddUUID(column="example_id").process([{"text": "hello"}]))
+
+    assert "id" not in records[0]
+    assert_valid_uuid(records[0]["example_id"])
+
+
+def test_add_uuid_assigns_unique_ids_to_llm_num_outputs_pipeline():
+    class FakeModel:
+        model_id = "fake-model"
+        provider_name = "fake"
+
+        def generate(self, messages, metadata=None):
+            return '{"title": "Generated", "text": "Body"}'
+
+    pipeline = (
+        Source.list([{"topic": "vacuum"}])
+        >> LLMStep(
+            prompt="Write about {topic}.",
+            input_columns=["topic"],
+            output_columns=["title", "text"],
+            parse_mode="json",
+            model=FakeModel(),
+            num_outputs=2,
+        )
+        >> AddUUID()
+        >> Sink.list()
+    )
+
+    records = pipeline.run()
+    ids = [record["id"] for record in records]
+
+    assert len(records) == 2
+    assert len(set(ids)) == 2
+    for value in ids:
+        assert_valid_uuid(value)
diff --git a/tests/test_llms_unit.py b/tests/test_llms_unit.py
new file mode 100644
index 0000000..a6e5674
--- /dev/null
+++ b/tests/test_llms_unit.py
@@ -0,0 +1,80 @@
+import datafast.llms as llms_module
+from datafast.llms import OpenRouterProvider
+
+
+class _DummyMessage:
+    def __init__(self, content: str) -> None:
+        self.content = content
+
+
+class _DummyChoice:
+    def __init__(self, content: str) -> None:
+        self.message = _DummyMessage(content)
+
+
+class _DummyResponse:
+    def __init__(self, content: str) -> None:
+        self.choices = [_DummyChoice(content)]
+
+
+def test_openrouter_single_messages_use_completion(monkeypatch):
+    monkeypatch.setattr(llms_module, "load_env_once", lambda: None)
+    monkeypatch.setattr(
+        llms_module,
+        "maybe_configure_langfuse_tracing",
+        lambda load_env=False: False,
+    )
+
+    calls = {"completion": 0, "batch_completion": 0}
+
+    def fake_completion(**kwargs):
+        calls["completion"] += 1
+        assert kwargs["messages"] == [{"role": "user", "content": "ping"}]
+        return _DummyResponse("ok")
+
+    def fake_batch_completion(**kwargs):
+        calls["batch_completion"] += 1
+        raise AssertionError("single-message requests should not use batch_completion")
+
+    monkeypatch.setattr(llms_module.litellm, "completion", fake_completion)
+    monkeypatch.setattr(llms_module.litellm, "batch_completion", fake_batch_completion)
+
+    provider = OpenRouterProvider(model_id="demo-model", api_key="test-key")
+
+    response = provider.generate(messages=[{"role": "user", "content": "ping"}])
+
+    assert response == "ok"
+    assert calls == {"completion": 1, "batch_completion": 0}
+
+
+def test_openrouter_batch_messages_use_batch_completion(monkeypatch):
+    monkeypatch.setattr(llms_module, "load_env_once", lambda: None)
+    monkeypatch.setattr(
+        llms_module,
+        "maybe_configure_langfuse_tracing",
+        lambda load_env=False: False,
+    )
+
+    calls = {"completion": 0, "batch_completion": 0}
+
+    def fake_completion(**kwargs):
+        calls["completion"] += 1
+        raise AssertionError("batched requests should not use completion")
+
+    def fake_batch_completion(**kwargs):
+        calls["batch_completion"] += 1
+        assert len(kwargs["messages"]) == 2
+        return [_DummyResponse("first"), _DummyResponse("second")]
+
+    monkeypatch.setattr(llms_module.litellm, "completion", fake_completion)
+    monkeypatch.setattr(llms_module.litellm, "batch_completion", fake_batch_completion)
+
+    provider = OpenRouterProvider(model_id="demo-model", api_key="test-key")
+
+    response = provider.generate(messages=[
+        [{"role": "user", "content": "one"}],
+        [{"role": "user", "content": "two"}],
+    ])
+
+    assert response == ["first", "second"]
+    assert calls == {"completion": 0, "batch_completion": 1}
diff --git a/tests/test_public_api.py b/tests/test_public_api.py
index 7eaf787..ac56477 100644
--- a/tests/test_public_api.py
+++ b/tests/test_public_api.py
@@ -1,4 +1,5 @@
 from datafast import (
+    AddUUID,
     Branch,
     Classify,
     Compare,
@@ -70,6 +71,7 @@ def test_factory_exports_are_available(monkeypatch):
     assert Sink is not None
     assert Seed is not None
     assert Sample is not None
+    assert AddUUID is not None
     assert Map is not None
     assert FlatMap is not None
     assert Filter is not None
diff --git a/tests/test_runner_llm_messages.py b/tests/test_runner_llm_messages.py
new file mode 100644
index 0000000..d870093
--- /dev/null
+++ b/tests/test_runner_llm_messages.py
@@ -0,0 +1,47 @@
+from datafast import LLMStep, ListSink, Source
+
+
+def test_runner_passes_llm_messages_by_keyword():
+    class FakeModel:
+        provider_name = "fake"
+        model_id = "fake-model"
+
+        def __init__(self) -> None:
+            self.calls: list[dict] = []
+
+        def generate(
+            self,
+            prompt=None,
+            messages=None,
+            metadata=None,
+            response_format=None,
+        ):
+            self.calls.append({
+                "prompt": prompt,
+                "messages": messages,
+                "metadata": metadata,
+            })
+            return "done"
+
+    model = FakeModel()
+    sink = ListSink()
+
+    pipeline = (
+        Source.list([{"topic": "robotics"}])
+        >> LLMStep(
+            prompt="Write one short line about {topic}.",
+            input_columns=["topic"],
+            output_column="result",
+            model=model,
+        ).as_step("generate_copy")
+        >> sink
+    )
+
+    output = pipeline.run()
+
+    assert output == [{"topic": "robotics", "result": "done", "_model": "fake-model"}]
+    assert len(model.calls) == 1
+    assert model.calls[0]["prompt"] is None
+    assert model.calls[0]["messages"] == [
+        {"role": "user", "content": "Write one short line about robotics."}
+    ]