From 3d7d10edd130c182ad733db7d0fdb6af8fe2309a Mon Sep 17 00:00:00 2001 From: fern-api <115122769+fern-api[bot]@users.noreply.github.com> Date: Wed, 17 Jun 2026 06:34:56 +0000 Subject: [PATCH 1/2] [fern-generated] Update SDK Generated by Fern CLI Version: unknown Generators: - fernapi/fern-python-sdk: 4.37.0 --- PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md | 27 ------ scripts/check_release_workflow.py | 54 ------------ .../start_agents_request_properties_avatar.py | 1 + ...agents_request_properties_avatar_vendor.py | 2 +- ...gents_request_properties_turn_detection.py | 6 -- src/agora_agent/core/client_wrapper.py | 4 +- src/agora_agent/types/asr.py | 84 ++++++++++++++++++ src/agora_agent/types/bytedance_duplex_tts.py | 29 +++++++ .../types/bytedance_duplex_tts_params.py | 37 ++++++++ src/agora_agent/types/bytedance_tts.py | 29 +++++++ src/agora_agent/types/bytedance_tts_params.py | 62 ++++++++++++++ src/agora_agent/types/cosyvoice_tts.py | 29 +++++++ src/agora_agent/types/cosyvoice_tts_params.py | 42 +++++++++ src/agora_agent/types/fengming_asr.py | 29 +++++++ src/agora_agent/types/minimax_tts_params.py | 29 ++++++- .../types/minimax_tts_params_audio_setting.py | 27 ++++++ .../minimax_tts_params_pronunciation_dict.py | 27 ++++++ .../minimax_tts_params_timber_weights_item.py | 28 ++++++ .../types/minimax_tts_params_voice_setting.py | 30 +++++++ .../types/sensetime_avatar_params.py | 52 ++++++++++++ ...sensetime_avatar_params_scene_list_item.py | 21 +++++ ...tar_params_scene_list_item_digital_role.py | 32 +++++++ ...s_scene_list_item_digital_role_position.py | 28 ++++++ src/agora_agent/types/stepfun_tts.py | 29 +++++++ src/agora_agent/types/stepfun_tts_params.py | 37 ++++++++ src/agora_agent/types/tencent_asr.py | 27 ++++++ src/agora_agent/types/tencent_asr_params.py | 47 ++++++++++ src/agora_agent/types/tencent_tts.py | 29 +++++++ src/agora_agent/types/tencent_tts_params.py | 62 ++++++++++++++ src/agora_agent/types/tts.py | 85 +++++++++++++++++++ src/agora_agent/types/xfyun_asr.py | 27 ++++++ src/agora_agent/types/xfyun_asr_params.py | 42 +++++++++ src/agora_agent/types/xfyun_bigmodel_asr.py | 27 ++++++ .../types/xfyun_bigmodel_asr_params.py | 47 ++++++++++ src/agora_agent/types/xfyun_dialect_asr.py | 27 ++++++ .../types/xfyun_dialect_asr_params.py | 42 +++++++++ 36 files changed, 1144 insertions(+), 93 deletions(-) delete mode 100644 PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md delete mode 100644 scripts/check_release_workflow.py create mode 100644 src/agora_agent/types/bytedance_duplex_tts.py create mode 100644 src/agora_agent/types/bytedance_duplex_tts_params.py create mode 100644 src/agora_agent/types/bytedance_tts.py create mode 100644 src/agora_agent/types/bytedance_tts_params.py create mode 100644 src/agora_agent/types/cosyvoice_tts.py create mode 100644 src/agora_agent/types/cosyvoice_tts_params.py create mode 100644 src/agora_agent/types/fengming_asr.py create mode 100644 src/agora_agent/types/minimax_tts_params_audio_setting.py create mode 100644 src/agora_agent/types/minimax_tts_params_pronunciation_dict.py create mode 100644 src/agora_agent/types/minimax_tts_params_timber_weights_item.py create mode 100644 src/agora_agent/types/sensetime_avatar_params.py create mode 100644 src/agora_agent/types/sensetime_avatar_params_scene_list_item.py create mode 100644 src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role.py create mode 100644 src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role_position.py create mode 100644 src/agora_agent/types/stepfun_tts.py create mode 100644 src/agora_agent/types/stepfun_tts_params.py create mode 100644 src/agora_agent/types/tencent_asr.py create mode 100644 src/agora_agent/types/tencent_asr_params.py create mode 100644 src/agora_agent/types/tencent_tts.py create mode 100644 src/agora_agent/types/tencent_tts_params.py create mode 100644 src/agora_agent/types/xfyun_asr.py create mode 100644 src/agora_agent/types/xfyun_asr_params.py create mode 100644 src/agora_agent/types/xfyun_bigmodel_asr.py create mode 100644 src/agora_agent/types/xfyun_bigmodel_asr_params.py create mode 100644 src/agora_agent/types/xfyun_dialect_asr.py create mode 100644 src/agora_agent/types/xfyun_dialect_asr_params.py diff --git a/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md b/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md deleted file mode 100644 index f3cd64a..0000000 --- a/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md +++ /dev/null @@ -1,27 +0,0 @@ -# Python AgentKit Snake Case API Audit - -Scope: `agora-agents-python` public AgentKit wrappers, docs, and tests. - -Search terms: - -```bash -rg -n "apiKey|baseUrl|modelId|voiceId|groupId|keyTerm|turnDetection|inputAudioTranscription|greetingMessage|failureMessage|projectId|adcCredentialsString|sampleRate|targetLanguageCode|resourceName|deploymentName" agora-agents-python -``` - -## Result - -No shipped camelCase public Python constructor kwargs were found in source or docs examples. No deprecated alias helper is required for this pass. - -| File | Class / symbol | Public arg or example | Current spelling | Desired Python spelling | `to_config()` key | Wire key | Action | Compatibility needed | Test coverage | -|---|---|---|---|---|---|---|---|---|---| -| `src/agora_agent/agentkit/vendors/tts.py` | `GoogleTTS` | constructor arg | `voice_name` | `voice_name` | `params.VoiceSelectionParams` | `params.VoiceSelectionParams` | keep | no | `tests/custom/test_tts_vendors.py` | -| `src/agora_agent/agentkit/vendors/tts.py` | `RimeTTS` | constructor arg | `model_id` | `model_id` | `params.modelId` | `params.modelId` | keep | no | `tests/custom/test_tts_vendors.py` | -| `src/agora_agent/agentkit/vendors/tts.py` | `MurfTTS` | constructor arg | `voice_id` | `voice_id` | `params.voiceId` | `params.voiceId` | keep | no | `tests/custom/test_tts_vendors.py`, `tests/custom/test_request_body.py` | -| `src/agora_agent/types/rime_tts_params.py` | generated model | generated alias | `modelId` | n/a | `model_id` | `modelId` | keep | no | `tests/custom/test_tts_vendors.py` | -| `src/agora_agent/types/murf_tts_params.py` | generated model | generated alias | `voiceId` | n/a | `voice_id` | `voiceId` | keep | no | `tests/custom/test_tts_vendors.py` | -| `tests/custom/test_request_body.py` | wire assertion | payload key | `voiceId` | n/a | `params.voiceId` | `params.voiceId` | keep | no | request-body test | -| `tests/custom/test_tts_vendors.py` | wire assertion | payload key | `modelId`, `voiceId`, `VoiceSelectionParams` | n/a | generated model fields | wire aliases | keep | no | wire serialization test | - -## Guardrail Added - -`tests/custom/test_docs_snake_case.py` scans Python markdown code fences and fails on common camelCase kwargs such as `apiKey`, `baseUrl`, `modelId`, `voiceId`, `projectId`, and `greetingMessage`. JSON, TypeScript, Go, shell, and YAML examples are skipped so wire payload examples can retain required non-Python keys. diff --git a/scripts/check_release_workflow.py b/scripts/check_release_workflow.py deleted file mode 100644 index 1a6e065..0000000 --- a/scripts/check_release_workflow.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 - -import re -import sys -from pathlib import Path -from typing import NoReturn - - -def fail(message: str) -> NoReturn: - print(message, file=sys.stderr) - raise SystemExit(1) - - -def read_version(path: str) -> str: - text = Path(path).read_text() - match = re.search(r'^version\s*=\s*"v?([^"]+)"', text, re.M) - if not match: - fail(f"version not found in {path}") - return match.group(1) - - -def read_compat_dependency(path: str) -> str: - text = Path(path).read_text() - match = re.search(r'^agora-agents\s*=\s*"([^"]+)"', text, re.M) - if not match: - fail(f"agora-agents dependency not found in {path}") - return match.group(1) - - -root_version = read_version("pyproject.toml") -compat_pyproject = "compat/agora-agent-server-sdk/pyproject.toml" -compat_version = read_version(compat_pyproject) -compat_dependency = read_compat_dependency(compat_pyproject) - -if compat_version != root_version: - fail(f"Compat package version ({compat_version}) must match root package version ({root_version}).") - -expected_dependency = f">={root_version},<3.0.0" -if compat_dependency != expected_dependency: - fail(f"Compat package dependency on agora-agents ({compat_dependency}) must be {expected_dependency}.") - -release_workflow = Path(".github/workflows/release.yml").read_text() -required_workflow_markers = [ - ("contents: write", "release workflow must have contents: write so it can create GitHub releases"), - ("gh release create", "release workflow must create a GitHub release when one does not exist"), - ("gh release edit", "release workflow must update an existing GitHub release"), - ("release_notes.md", "release workflow must generate and use a release notes file"), -] - -for marker, message in required_workflow_markers: - if marker not in release_workflow: - fail(message) - -print("Release metadata and workflow checks passed.") diff --git a/src/agora_agent/agents/types/start_agents_request_properties_avatar.py b/src/agora_agent/agents/types/start_agents_request_properties_avatar.py index 8993b2c..b9b668e 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_avatar.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_avatar.py @@ -25,6 +25,7 @@ class StartAgentsRequestPropertiesAvatar(UncheckedBaseModel): - `liveavatar`: LiveAvatar (Beta) - `anam`: Anam (Beta) - `generic`: Generic (Beta) + - `sensetime`: SenseTime Avatar """ params: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) diff --git a/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py b/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py index e5bcec5..9a2d0c7 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py @@ -3,5 +3,5 @@ import typing StartAgentsRequestPropertiesAvatarVendor = typing.Union[ - typing.Literal["akool", "liveavatar", "anam", "generic", "heygen"], typing.Any + typing.Literal["akool", "liveavatar", "anam", "generic", "sensetime", "heygen"], typing.Any ] diff --git a/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py b/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py index fb58a36..40dbb02 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py @@ -5,7 +5,6 @@ import pydantic from ...core.pydantic_utilities import IS_PYDANTIC_V2 from ...core.unchecked_base_model import UncheckedBaseModel -from ...types.asr_language import AsrLanguage from .start_agents_request_properties_turn_detection_config import StartAgentsRequestPropertiesTurnDetectionConfig from .start_agents_request_properties_turn_detection_eagerness import StartAgentsRequestPropertiesTurnDetectionEagerness from .start_agents_request_properties_turn_detection_interrupt_mode import ( @@ -19,11 +18,6 @@ class StartAgentsRequestPropertiesTurnDetection(UncheckedBaseModel): Conversation turn detection settings. Controls the logic for voice activity detection and conversation turn determination. This object has no effect when `mllm.enable` is true; use `mllm.turn_detection` instead. """ - language: typing.Optional[AsrLanguage] = pydantic.Field(default=None) - """ - BCP-47 language tag identifying the primary language used for agent interaction. - """ - mode: typing.Optional[typing.Literal["default"]] = pydantic.Field(default=None) """ Conversation turn detection mode: diff --git a/src/agora_agent/core/client_wrapper.py b/src/agora_agent/core/client_wrapper.py index ba5e462..47029d6 100644 --- a/src/agora_agent/core/client_wrapper.py +++ b/src/agora_agent/core/client_wrapper.py @@ -26,10 +26,10 @@ def __init__( def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { - "User-Agent": "agora-agents/v2.2.0", + "User-Agent": "agora-agents/v2.2.1", "X-Fern-Language": "Python", "X-Fern-SDK-Name": "agora-agents", - "X-Fern-SDK-Version": "v2.2.0", + "X-Fern-SDK-Version": "v2.2.1", **(self.get_custom_headers() or {}), } headers["Authorization"] = httpx.BasicAuth(self._get_username(), self._get_password())._auth_header diff --git a/src/agora_agent/types/asr.py b/src/agora_agent/types/asr.py index 1f2225d..e334a8d 100644 --- a/src/agora_agent/types/asr.py +++ b/src/agora_agent/types/asr.py @@ -18,6 +18,10 @@ from .open_ai_asr_params import OpenAiAsrParams from .sarvam_asr_params import SarvamAsrParams from .speechmatics_asr_params import SpeechmaticsAsrParams +from .tencent_asr_params import TencentAsrParams +from .xfyun_asr_params import XfyunAsrParams +from .xfyun_bigmodel_asr_params import XfyunBigmodelAsrParams +from .xfyun_dialect_asr_params import XfyunDialectAsrParams class Asr_Ares(UncheckedBaseModel): @@ -35,6 +39,36 @@ class Config: extra = pydantic.Extra.allow +class Asr_Fengming(UncheckedBaseModel): + vendor: typing.Literal["fengming"] = "fengming" + language: typing.Optional[AsrLanguage] = None + params: typing.Optional[typing.Dict[str, typing.Any]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Asr_Tencent(UncheckedBaseModel): + vendor: typing.Literal["tencent"] = "tencent" + language: typing.Optional[AsrLanguage] = None + params: TencentAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + class Asr_Microsoft(UncheckedBaseModel): vendor: typing.Literal["microsoft"] = "microsoft" language: typing.Optional[AsrLanguage] = None @@ -155,9 +189,56 @@ class Config: extra = pydantic.Extra.allow +class Asr_Xfyun(UncheckedBaseModel): + vendor: typing.Literal["xfyun"] = "xfyun" + language: typing.Optional[AsrLanguage] = None + params: XfyunAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Asr_XfyunBigmodel(UncheckedBaseModel): + vendor: typing.Literal["xfyun_bigmodel"] = "xfyun_bigmodel" + language: typing.Optional[AsrLanguage] = None + params: XfyunBigmodelAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Asr_XfyunDialect(UncheckedBaseModel): + vendor: typing.Literal["xfyun_dialect"] = "xfyun_dialect" + language: typing.Optional[AsrLanguage] = None + params: XfyunDialectAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + Asr = typing_extensions.Annotated[ typing.Union[ Asr_Ares, + Asr_Fengming, + Asr_Tencent, Asr_Microsoft, Asr_Deepgram, Asr_Openai, @@ -166,6 +247,9 @@ class Config: Asr_Assemblyai, Asr_Speechmatics, Asr_Sarvam, + Asr_Xfyun, + Asr_XfyunBigmodel, + Asr_XfyunDialect, ], UnionMetadata(discriminant="vendor"), ] diff --git a/src/agora_agent/types/bytedance_duplex_tts.py b/src/agora_agent/types/bytedance_duplex_tts.py new file mode 100644 index 0000000..76c5313 --- /dev/null +++ b/src/agora_agent/types/bytedance_duplex_tts.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .bytedance_duplex_tts_params import BytedanceDuplexTtsParams + + +class BytedanceDuplexTts(UncheckedBaseModel): + """ + Bytedance duplex streaming Text-to-Speech configuration. + """ + + params: BytedanceDuplexTtsParams + skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None) + """ + Controls whether the TTS module skips bracketed content when reading LLM response text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/bytedance_duplex_tts_params.py b/src/agora_agent/types/bytedance_duplex_tts_params.py new file mode 100644 index 0000000..3aa48d4 --- /dev/null +++ b/src/agora_agent/types/bytedance_duplex_tts_params.py @@ -0,0 +1,37 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class BytedanceDuplexTtsParams(UncheckedBaseModel): + """ + Bytedance duplex streaming TTS configuration parameters. + """ + + app_id: str = pydantic.Field() + """ + Bytedance application ID. + """ + + token: str = pydantic.Field() + """ + Bytedance API token. + """ + + speaker: str = pydantic.Field() + """ + Duplex TTS speaker identifier. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/bytedance_tts.py b/src/agora_agent/types/bytedance_tts.py new file mode 100644 index 0000000..32342f7 --- /dev/null +++ b/src/agora_agent/types/bytedance_tts.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .bytedance_tts_params import BytedanceTtsParams + + +class BytedanceTts(UncheckedBaseModel): + """ + Bytedance Volcano Engine Text-to-Speech configuration. + """ + + params: BytedanceTtsParams + skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None) + """ + Controls whether the TTS module skips bracketed content when reading LLM response text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/bytedance_tts_params.py b/src/agora_agent/types/bytedance_tts_params.py new file mode 100644 index 0000000..f4d9c7a --- /dev/null +++ b/src/agora_agent/types/bytedance_tts_params.py @@ -0,0 +1,62 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class BytedanceTtsParams(UncheckedBaseModel): + """ + Bytedance Volcano Engine TTS configuration parameters. + """ + + token: str = pydantic.Field() + """ + Bytedance API token. + """ + + app_id: str = pydantic.Field() + """ + Bytedance application ID. + """ + + cluster: str = pydantic.Field() + """ + Bytedance cluster name. + """ + + voice_type: str = pydantic.Field() + """ + Bytedance voice type. + """ + + speed_ratio: typing.Optional[float] = pydantic.Field(default=None) + """ + Speech speed ratio. + """ + + volume_ratio: typing.Optional[float] = pydantic.Field(default=None) + """ + Volume ratio. + """ + + pitch_ratio: typing.Optional[float] = pydantic.Field(default=None) + """ + Pitch ratio. + """ + + emotion: typing.Optional[str] = pydantic.Field(default=None) + """ + Emotion preset. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/cosyvoice_tts.py b/src/agora_agent/types/cosyvoice_tts.py new file mode 100644 index 0000000..2519163 --- /dev/null +++ b/src/agora_agent/types/cosyvoice_tts.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .cosyvoice_tts_params import CosyvoiceTtsParams + + +class CosyvoiceTts(UncheckedBaseModel): + """ + Alibaba Cloud CosyVoice Text-to-Speech configuration. + """ + + params: CosyvoiceTtsParams + skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None) + """ + Controls whether the TTS module skips bracketed content when reading LLM response text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/cosyvoice_tts_params.py b/src/agora_agent/types/cosyvoice_tts_params.py new file mode 100644 index 0000000..3c48bb2 --- /dev/null +++ b/src/agora_agent/types/cosyvoice_tts_params.py @@ -0,0 +1,42 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class CosyvoiceTtsParams(UncheckedBaseModel): + """ + CosyVoice TTS configuration parameters. + """ + + api_key: str = pydantic.Field() + """ + CosyVoice API key. + """ + + model: str = pydantic.Field() + """ + CosyVoice model identifier. + """ + + sample_rate: int = pydantic.Field() + """ + Audio sample rate in Hz. + """ + + voice: str = pydantic.Field() + """ + CosyVoice speaker voice. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/fengming_asr.py b/src/agora_agent/types/fengming_asr.py new file mode 100644 index 0000000..84fe7e0 --- /dev/null +++ b/src/agora_agent/types/fengming_asr.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .asr_language import AsrLanguage + + +class FengmingAsr(UncheckedBaseModel): + """ + Agora Fengming ASR configuration. + """ + + language: typing.Optional[AsrLanguage] = None + params: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) + """ + Agora Fengming ASR configuration parameters. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/minimax_tts_params.py b/src/agora_agent/types/minimax_tts_params.py index 6442a71..076167d 100644 --- a/src/agora_agent/types/minimax_tts_params.py +++ b/src/agora_agent/types/minimax_tts_params.py @@ -5,6 +5,9 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2 from ..core.unchecked_base_model import UncheckedBaseModel +from .minimax_tts_params_audio_setting import MinimaxTtsParamsAudioSetting +from .minimax_tts_params_pronunciation_dict import MinimaxTtsParamsPronunciationDict +from .minimax_tts_params_timber_weights_item import MinimaxTtsParamsTimberWeightsItem from .minimax_tts_params_voice_setting import MinimaxTtsParamsVoiceSetting @@ -18,18 +21,38 @@ class MinimaxTtsParams(UncheckedBaseModel): MiniMax API key """ - group_id: str = pydantic.Field() + group_id: typing.Optional[str] = pydantic.Field(default=None) """ MiniMax group identifier """ model: str = pydantic.Field() """ - TTS model (e.g., speech-02-turbo) + BYOK TTS model. Managed MiniMax preset models are selected through the top-level preset field instead. """ voice_setting: MinimaxTtsParamsVoiceSetting - url: str = pydantic.Field() + audio_setting: typing.Optional[MinimaxTtsParamsAudioSetting] = pydantic.Field(default=None) + """ + Audio output settings. + """ + + pronunciation_dict: typing.Optional[MinimaxTtsParamsPronunciationDict] = pydantic.Field(default=None) + """ + Custom pronunciation dictionary settings. + """ + + timber_weights: typing.Optional[typing.List[MinimaxTtsParamsTimberWeightsItem]] = pydantic.Field(default=None) + """ + Weighted voice blending configuration. + """ + + language_boost: typing.Optional[str] = pydantic.Field(default=None) + """ + Language boost mode. + """ + + url: typing.Optional[str] = pydantic.Field(default=None) """ WebSocket endpoint (e.g., wss://api-uw.minimax.io/ws/v1/t2a_v2) """ diff --git a/src/agora_agent/types/minimax_tts_params_audio_setting.py b/src/agora_agent/types/minimax_tts_params_audio_setting.py new file mode 100644 index 0000000..e9dac4b --- /dev/null +++ b/src/agora_agent/types/minimax_tts_params_audio_setting.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class MinimaxTtsParamsAudioSetting(UncheckedBaseModel): + """ + Audio output settings. + """ + + sample_rate: typing.Optional[int] = pydantic.Field(default=None) + """ + Audio sample rate in Hz. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/minimax_tts_params_pronunciation_dict.py b/src/agora_agent/types/minimax_tts_params_pronunciation_dict.py new file mode 100644 index 0000000..e8321fb --- /dev/null +++ b/src/agora_agent/types/minimax_tts_params_pronunciation_dict.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class MinimaxTtsParamsPronunciationDict(UncheckedBaseModel): + """ + Custom pronunciation dictionary settings. + """ + + tone: typing.Optional[typing.List[str]] = pydantic.Field(default=None) + """ + Tone override list. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/minimax_tts_params_timber_weights_item.py b/src/agora_agent/types/minimax_tts_params_timber_weights_item.py new file mode 100644 index 0000000..2384427 --- /dev/null +++ b/src/agora_agent/types/minimax_tts_params_timber_weights_item.py @@ -0,0 +1,28 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class MinimaxTtsParamsTimberWeightsItem(UncheckedBaseModel): + voice_id: typing.Optional[str] = pydantic.Field(default=None) + """ + Voice identifier for blending. + """ + + weight: typing.Optional[float] = pydantic.Field(default=None) + """ + Relative blend weight. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/minimax_tts_params_voice_setting.py b/src/agora_agent/types/minimax_tts_params_voice_setting.py index 95b48e7..f409814 100644 --- a/src/agora_agent/types/minimax_tts_params_voice_setting.py +++ b/src/agora_agent/types/minimax_tts_params_voice_setting.py @@ -13,6 +13,36 @@ class MinimaxTtsParamsVoiceSetting(UncheckedBaseModel): Voice style identifier (e.g., English_captivating_female1) """ + speed: typing.Optional[float] = pydantic.Field(default=None) + """ + Speech speed multiplier. + """ + + vol: typing.Optional[float] = pydantic.Field(default=None) + """ + Voice volume multiplier. + """ + + pitch: typing.Optional[float] = pydantic.Field(default=None) + """ + Voice pitch adjustment. + """ + + emotion: typing.Optional[str] = pydantic.Field(default=None) + """ + Emotion preset. + """ + + latex_read: typing.Optional[bool] = pydantic.Field(default=None) + """ + Whether to read LaTeX expressions. + """ + + english_normalization: typing.Optional[bool] = pydantic.Field(default=None) + """ + Whether to normalize English text before synthesis. + """ + if IS_PYDANTIC_V2: model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 else: diff --git a/src/agora_agent/types/sensetime_avatar_params.py b/src/agora_agent/types/sensetime_avatar_params.py new file mode 100644 index 0000000..d777132 --- /dev/null +++ b/src/agora_agent/types/sensetime_avatar_params.py @@ -0,0 +1,52 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.serialization import FieldMetadata +from ..core.unchecked_base_model import UncheckedBaseModel +from .sensetime_avatar_params_scene_list_item import SensetimeAvatarParamsSceneListItem + + +class SensetimeAvatarParams(UncheckedBaseModel): + """ + SenseTime Avatar configuration parameters. + """ + + agora_token: str = pydantic.Field() + """ + Agora token used by the avatar service. + """ + + agora_uid: str = pydantic.Field() + """ + Numeric Agora UID string used by the avatar service. + """ + + app_id: typing_extensions.Annotated[str, FieldMetadata(alias="appId")] = pydantic.Field() + """ + SenseTime application ID. + """ + + app_key: str = pydantic.Field() + """ + SenseTime application key. + """ + + scene_list: typing_extensions.Annotated[ + typing.List[SensetimeAvatarParamsSceneListItem], FieldMetadata(alias="sceneList") + ] = pydantic.Field() + """ + SenseTime scene configuration list. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/sensetime_avatar_params_scene_list_item.py b/src/agora_agent/types/sensetime_avatar_params_scene_list_item.py new file mode 100644 index 0000000..82edf8a --- /dev/null +++ b/src/agora_agent/types/sensetime_avatar_params_scene_list_item.py @@ -0,0 +1,21 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .sensetime_avatar_params_scene_list_item_digital_role import SensetimeAvatarParamsSceneListItemDigitalRole + + +class SensetimeAvatarParamsSceneListItem(UncheckedBaseModel): + digital_role: typing.Optional[SensetimeAvatarParamsSceneListItemDigitalRole] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role.py b/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role.py new file mode 100644 index 0000000..798ed59 --- /dev/null +++ b/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role.py @@ -0,0 +1,32 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .sensetime_avatar_params_scene_list_item_digital_role_position import ( + SensetimeAvatarParamsSceneListItemDigitalRolePosition, +) + + +class SensetimeAvatarParamsSceneListItemDigitalRole(UncheckedBaseModel): + face_feature_id: typing.Optional[str] = pydantic.Field(default=None) + """ + SenseTime face feature identifier. + """ + + position: typing.Optional[SensetimeAvatarParamsSceneListItemDigitalRolePosition] = None + url: typing.Optional[str] = pydantic.Field(default=None) + """ + Avatar model package URL. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role_position.py b/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role_position.py new file mode 100644 index 0000000..c2ac79d --- /dev/null +++ b/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role_position.py @@ -0,0 +1,28 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class SensetimeAvatarParamsSceneListItemDigitalRolePosition(UncheckedBaseModel): + x: typing.Optional[float] = pydantic.Field(default=None) + """ + Avatar x position. + """ + + y: typing.Optional[float] = pydantic.Field(default=None) + """ + Avatar y position. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/stepfun_tts.py b/src/agora_agent/types/stepfun_tts.py new file mode 100644 index 0000000..708a5c2 --- /dev/null +++ b/src/agora_agent/types/stepfun_tts.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .stepfun_tts_params import StepfunTtsParams + + +class StepfunTts(UncheckedBaseModel): + """ + StepFun Text-to-Speech configuration. + """ + + params: StepfunTtsParams + skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None) + """ + Controls whether the TTS module skips bracketed content when reading LLM response text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/stepfun_tts_params.py b/src/agora_agent/types/stepfun_tts_params.py new file mode 100644 index 0000000..5636e2c --- /dev/null +++ b/src/agora_agent/types/stepfun_tts_params.py @@ -0,0 +1,37 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class StepfunTtsParams(UncheckedBaseModel): + """ + StepFun TTS configuration parameters. + """ + + api_key: str = pydantic.Field() + """ + StepFun API key. + """ + + model: str = pydantic.Field() + """ + StepFun model identifier. + """ + + voice_id: str = pydantic.Field() + """ + StepFun voice identifier. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/tencent_asr.py b/src/agora_agent/types/tencent_asr.py new file mode 100644 index 0000000..75ff988 --- /dev/null +++ b/src/agora_agent/types/tencent_asr.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .asr_language import AsrLanguage +from .tencent_asr_params import TencentAsrParams + + +class TencentAsr(UncheckedBaseModel): + """ + Tencent ASR configuration. + """ + + language: typing.Optional[AsrLanguage] = None + params: TencentAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/tencent_asr_params.py b/src/agora_agent/types/tencent_asr_params.py new file mode 100644 index 0000000..3fa75ef --- /dev/null +++ b/src/agora_agent/types/tencent_asr_params.py @@ -0,0 +1,47 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class TencentAsrParams(UncheckedBaseModel): + """ + Tencent ASR configuration parameters. + """ + + key: str = pydantic.Field() + """ + Tencent ASR secret key. + """ + + app_id: str = pydantic.Field() + """ + Tencent Cloud application ID. + """ + + secret: str = pydantic.Field() + """ + Tencent ASR secret. + """ + + engine_model_type: str = pydantic.Field() + """ + Tencent ASR engine model type. + """ + + voice_id: str = pydantic.Field() + """ + Tencent ASR voice session identifier. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/tencent_tts.py b/src/agora_agent/types/tencent_tts.py new file mode 100644 index 0000000..ad8a937 --- /dev/null +++ b/src/agora_agent/types/tencent_tts.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .tencent_tts_params import TencentTtsParams + + +class TencentTts(UncheckedBaseModel): + """ + Tencent Text-to-Speech configuration. + """ + + params: TencentTtsParams + skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None) + """ + Controls whether the TTS module skips bracketed content when reading LLM response text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/tencent_tts_params.py b/src/agora_agent/types/tencent_tts_params.py new file mode 100644 index 0000000..5fcf6f5 --- /dev/null +++ b/src/agora_agent/types/tencent_tts_params.py @@ -0,0 +1,62 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class TencentTtsParams(UncheckedBaseModel): + """ + Tencent TTS configuration parameters. + """ + + app_id: str = pydantic.Field() + """ + Tencent Cloud application ID. + """ + + secret_id: str = pydantic.Field() + """ + Tencent Cloud secret ID. + """ + + secret_key: str = pydantic.Field() + """ + Tencent Cloud secret key. + """ + + voice_type: int = pydantic.Field() + """ + Tencent voice type identifier. + """ + + volume: typing.Optional[float] = pydantic.Field(default=None) + """ + Volume setting. + """ + + speed: typing.Optional[float] = pydantic.Field(default=None) + """ + Speech speed setting. + """ + + emotion_category: typing.Optional[str] = pydantic.Field(default=None) + """ + Emotion category. + """ + + emotion_intensity: typing.Optional[int] = pydantic.Field(default=None) + """ + Emotion intensity. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/tts.py b/src/agora_agent/types/tts.py index 85761fd..f41cce3 100644 --- a/src/agora_agent/types/tts.py +++ b/src/agora_agent/types/tts.py @@ -9,7 +9,10 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2 from ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata from .amazon_tts_params import AmazonTtsParams +from .bytedance_duplex_tts_params import BytedanceDuplexTtsParams +from .bytedance_tts_params import BytedanceTtsParams from .cartesia_tts_params import CartesiaTtsParams +from .cosyvoice_tts_params import CosyvoiceTtsParams from .deepgram_tts_params import DeepgramTtsParams from .eleven_labs_tts_params import ElevenLabsTtsParams from .fish_audio_tts_params import FishAudioTtsParams @@ -21,6 +24,38 @@ from .open_ai_tts_params import OpenAiTtsParams from .rime_tts_params import RimeTtsParams from .sarvam_tts_params import SarvamTtsParams +from .stepfun_tts_params import StepfunTtsParams +from .tencent_tts_params import TencentTtsParams + + +class Tts_Tencent(UncheckedBaseModel): + vendor: typing.Literal["tencent"] = "tencent" + params: TencentTtsParams + skip_patterns: typing.Optional[typing.List[int]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Tts_Bytedance(UncheckedBaseModel): + vendor: typing.Literal["bytedance"] = "bytedance" + params: BytedanceTtsParams + skip_patterns: typing.Optional[typing.List[int]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow class Tts_Microsoft(UncheckedBaseModel): @@ -218,8 +253,55 @@ class Config: extra = pydantic.Extra.allow +class Tts_Cosyvoice(UncheckedBaseModel): + vendor: typing.Literal["cosyvoice"] = "cosyvoice" + params: CosyvoiceTtsParams + skip_patterns: typing.Optional[typing.List[int]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Tts_BytedanceDuplex(UncheckedBaseModel): + vendor: typing.Literal["bytedance_duplex"] = "bytedance_duplex" + params: BytedanceDuplexTtsParams + skip_patterns: typing.Optional[typing.List[int]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Tts_Stepfun(UncheckedBaseModel): + vendor: typing.Literal["stepfun"] = "stepfun" + params: StepfunTtsParams + skip_patterns: typing.Optional[typing.List[int]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + Tts = typing_extensions.Annotated[ typing.Union[ + Tts_Tencent, + Tts_Bytedance, Tts_Microsoft, Tts_Elevenlabs, Tts_Minimax, @@ -233,6 +315,9 @@ class Config: Tts_Amazon, Tts_Sarvam, Tts_Deepgram, + Tts_Cosyvoice, + Tts_BytedanceDuplex, + Tts_Stepfun, ], UnionMetadata(discriminant="vendor"), ] diff --git a/src/agora_agent/types/xfyun_asr.py b/src/agora_agent/types/xfyun_asr.py new file mode 100644 index 0000000..f97f312 --- /dev/null +++ b/src/agora_agent/types/xfyun_asr.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .asr_language import AsrLanguage +from .xfyun_asr_params import XfyunAsrParams + + +class XfyunAsr(UncheckedBaseModel): + """ + iFlytek ASR configuration. + """ + + language: typing.Optional[AsrLanguage] = None + params: XfyunAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/xfyun_asr_params.py b/src/agora_agent/types/xfyun_asr_params.py new file mode 100644 index 0000000..5bbc081 --- /dev/null +++ b/src/agora_agent/types/xfyun_asr_params.py @@ -0,0 +1,42 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class XfyunAsrParams(UncheckedBaseModel): + """ + iFlytek ASR configuration parameters. + """ + + api_key: str = pydantic.Field() + """ + iFlytek API key. + """ + + app_id: str = pydantic.Field() + """ + iFlytek application ID. + """ + + api_secret: str = pydantic.Field() + """ + iFlytek API secret. + """ + + language: str = pydantic.Field() + """ + iFlytek language code. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/xfyun_bigmodel_asr.py b/src/agora_agent/types/xfyun_bigmodel_asr.py new file mode 100644 index 0000000..bfd4b18 --- /dev/null +++ b/src/agora_agent/types/xfyun_bigmodel_asr.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .asr_language import AsrLanguage +from .xfyun_bigmodel_asr_params import XfyunBigmodelAsrParams + + +class XfyunBigmodelAsr(UncheckedBaseModel): + """ + iFlytek large-model ASR configuration. + """ + + language: typing.Optional[AsrLanguage] = None + params: XfyunBigmodelAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/xfyun_bigmodel_asr_params.py b/src/agora_agent/types/xfyun_bigmodel_asr_params.py new file mode 100644 index 0000000..9beff97 --- /dev/null +++ b/src/agora_agent/types/xfyun_bigmodel_asr_params.py @@ -0,0 +1,47 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class XfyunBigmodelAsrParams(UncheckedBaseModel): + """ + iFlytek large-model ASR configuration parameters. + """ + + api_key: str = pydantic.Field() + """ + iFlytek large-model API key. + """ + + app_id: str = pydantic.Field() + """ + iFlytek large-model application ID. + """ + + api_secret: str = pydantic.Field() + """ + iFlytek large-model API secret. + """ + + language_name: str = pydantic.Field() + """ + iFlytek language family name. + """ + + language: str = pydantic.Field() + """ + iFlytek language mode. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/xfyun_dialect_asr.py b/src/agora_agent/types/xfyun_dialect_asr.py new file mode 100644 index 0000000..1b1f33c --- /dev/null +++ b/src/agora_agent/types/xfyun_dialect_asr.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .asr_language import AsrLanguage +from .xfyun_dialect_asr_params import XfyunDialectAsrParams + + +class XfyunDialectAsr(UncheckedBaseModel): + """ + iFlytek dialect ASR configuration. + """ + + language: typing.Optional[AsrLanguage] = None + params: XfyunDialectAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/xfyun_dialect_asr_params.py b/src/agora_agent/types/xfyun_dialect_asr_params.py new file mode 100644 index 0000000..317a305 --- /dev/null +++ b/src/agora_agent/types/xfyun_dialect_asr_params.py @@ -0,0 +1,42 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class XfyunDialectAsrParams(UncheckedBaseModel): + """ + iFlytek dialect ASR configuration parameters. + """ + + app_id: str = pydantic.Field() + """ + iFlytek dialect application ID. + """ + + access_key_id: str = pydantic.Field() + """ + iFlytek access key ID. + """ + + access_key_secret: str = pydantic.Field() + """ + iFlytek access key secret. + """ + + language: str = pydantic.Field() + """ + Dialect recognition language code. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow From 1449b7337c09bf06d301b2c67c6f984dfe3a4b05 Mon Sep 17 00:00:00 2001 From: fern-api <115122769+fern-api[bot]@users.noreply.github.com> Date: Wed, 17 Jun 2026 06:35:08 +0000 Subject: [PATCH 2/2] [fern-replay] Applied customizations Patches applied (5): - patch-7465fada: fix(agentkit): resolve Python session typing issues - patch-fae1249a: Re-export agora-agents API from legacy PyPI compatibility package The compat distribution delegates to agora_agent via __getattr__ and documents both import paths in its README. - patch-299e4bd9: fix(agentkit): resolve provider config type checks - patch-bed29b6b: chore: bump Python packages to 2.1.0 - patch-fecdc77c: Fix AgentKit request validation and provider wire-key coverage Patches with unresolved conflicts (12): - patch-6e30398b: chore(agentkit): bump to v1.5.0 and expose v2.7 type aliases - patch-9df782b4: feat(agentkit): update MLLM and LLM vendor wrappers for v2.7 - patch-26706d73: feat(agentkit): add GenericAvatar and session-aware avatar validation - patch-9f491c63: feat(agentkit): update Agent builder and session lifecycle for v2.7 - patch-eaec58eb: refactor(agentkit): align deprecated vendor aliases with canonical names - patch-20245632: feat(agentkit): export type aliases and avatar token helpers - patch-972dd5bd: updated docs - patch-d29165c4: make python compat package publishable - patch-44c21c14: Re-export AgentKit symbols from agora_agent package root Extend __getattr__ and __all__ so vendor classes, presets, and helpers are importable via `from agora_agent import ...`. Add tests and update class docstring examples to use the root import path. - patch-617ee134: feat(agentkit): support agent-level pipeline_id - patch-8e22e6d0: udpated agent docs - patch-c287be1c: Prepare Python SDK v2.2.0 release Run `fern-replay resolve` to apply these customizations. Patches absorbed by generator (3): - patch-fc9d93c3: Document agora-agents PyPI install name and migration notes - patch-d475306b: Move package rename guidance to installation docs and protect manual paths in Fern ignore. Consolidate migration notes into the installation guide with next-step links, add a brief README pointer, and exclude README, compat, and workflow files from Fern generation. - patch-c9355576: Streamline Python docs and README for app-credentials-first onboarding. Remove duplicated low-level client examples from the README, de-emphasize legacy auth modes, refocus the low-level API guide on AgentKit with telephony escape hatches, and update Agora-managed model terminology. The generator now produces these customizations natively. --- .fern/replay.lock | 1017 ++++---------------- PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md | 27 + src/agora_agent/agentkit/agent.py | 19 + src/agora_agent/agentkit/agent_session.py | 1 + src/agora_agent/agentkit/vendors/avatar.py | 43 + src/agora_agent/agentkit/vendors/llm.py | 3 + 6 files changed, 298 insertions(+), 812 deletions(-) create mode 100644 PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md diff --git a/.fern/replay.lock b/.fern/replay.lock index a435ef4..23fb587 100644 --- a/.fern/replay.lock +++ b/.fern/replay.lock @@ -12,7 +12,13 @@ generations: cli_version: unknown generator_versions: fernapi/fern-python-sdk: 4.37.0 -current_generation: b66d871314ca0e5929cb9c9095949a7fd5e856a7 + - commit_sha: 3d7d10edd130c182ad733db7d0fdb6af8fe2309a + tree_hash: a2231bae160991dd969f615296d857f1ad7a6c14 + timestamp: 2026-06-17T06:34:56.828Z + cli_version: unknown + generator_versions: + fernapi/fern-python-sdk: 4.37.0 +current_generation: 3d7d10edd130c182ad733db7d0fdb6af8fe2309a patches: - id: patch-6e30398b content_hash: sha256:e99898e508e2d6cb9f134cc33e0b73c1c8acb845f5887924e0e38031a6e089c0 @@ -8237,26 +8243,26 @@ patches: | Think inject constant | `ThinkOnListeningActionInject` | `ThinkOnListeningActionInject` | `ThinkOnListeningActionInject` | status: unresolved - id: patch-7465fada - content_hash: sha256:9c6ed2e5f48702293eed8b213cc31cce63a7ed5a1ad16a0b23e791c13e77746f + content_hash: sha256:a2f90f66c927424018f2c3304742f097e8594dec9cb2f783264c7b11679a14ac original_commit: 7465fadafa0f1e62051d99b42d0eeda85f31eeee original_message: "fix(agentkit): resolve Python session typing issues" original_author: digitallysavvy - base_generation: b66d871314ca0e5929cb9c9095949a7fd5e856a7 + base_generation: 3d7d10edd130c182ad733db7d0fdb6af8fe2309a files: - src/agora_agent/agentkit/agent_session.py patch_content: | diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py - index dbff562..dca9ee8 100644 + index 2900c18..745c465 100644 --- a/src/agora_agent/agentkit/agent_session.py +++ b/src/agora_agent/agentkit/agent_session.py - @@ -24,6 +24,7 @@ from .avatar_types import ( - is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - + is_rtc_avatar, - validate_avatar_config, - validate_tts_sample_rate, + @@ -15,6 +15,7 @@ from ..agent_management.types.agent_think_agent_management_response import ( + AgentThinkAgentManagementResponse as AgentThinkResponse, ) + from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse + +from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse + from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping + from .avatar_types import ( + is_akool_avatar, theirs_snapshot: src/agora_agent/agentkit/agent_session.py: | import typing @@ -8276,8 +8282,8 @@ patches: AgentThinkAgentManagementResponse as AgentThinkResponse, ) from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions + from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse + from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping from .avatar_types import ( is_akool_avatar, is_anam_avatar, @@ -8595,15 +8601,15 @@ patches: properties["tts"] = self._dump_model(self._agent.tts) if self._agent.llm is not None: llm = dict(self._agent.llm) - if self._agent.instructions is not None: + if self._agent.instructions is not None and "system_messages" not in llm: llm["system_messages"] = [{"role": "system", "content": self._agent.instructions}] - if self._agent.greeting is not None: + if self._agent.greeting is not None and "greeting_message" not in llm: llm["greeting_message"] = self._agent.greeting - if self._agent.greeting_configs is not None: + if self._agent.greeting_configs is not None and "greeting_configs" not in llm: llm["greeting_configs"] = self._dump_model(self._agent.greeting_configs) - if self._agent.failure_message is not None: + if self._agent.failure_message is not None and "failure_message" not in llm: llm["failure_message"] = self._agent.failure_message - if self._agent.max_history is not None: + if self._agent.max_history is not None and "max_history" not in llm: llm["max_history"] = self._agent.max_history properties["llm"] = llm if self._agent.stt is not None: @@ -8611,6 +8617,47 @@ patches: return properties + @staticmethod + def _request_properties_for_start( + resolved_properties: typing.Dict[str, typing.Any], + *, + resolved_preset: typing.Optional[str], + pipeline_id: typing.Optional[str], + ) -> typing.Any: + try: + return _start_properties_from_mapping(resolved_properties) + except Exception as exc: + if pipeline_id: + return resolved_properties + if resolved_preset: + normalized_preset = normalize_preset_input(resolved_preset) + if not normalized_preset: + raise + preset_categories = { + category + for item in normalized_preset.split(",") + for category in [get_preset_category(item)] + if category is not None + } + error_categories = _AgentSessionBase._validation_error_categories(exc) + if error_categories and error_categories.issubset(preset_categories): + return resolved_properties + raise + + @staticmethod + def _validation_error_categories(exc: Exception) -> typing.Set[str]: + errors = getattr(exc, "errors", None) + if not callable(errors): + return set() + categories: typing.Set[str] = set() + for error in errors(): + loc = error.get("loc") if isinstance(error, dict) else None + if isinstance(loc, tuple) and loc: + field = loc[0] + if field in {"asr", "llm", "tts"}: + categories.add(typing.cast(str, field)) + return categories + def _vendor_validation_categories( self, pipeline_id: typing.Optional[str], @@ -8775,10 +8822,11 @@ patches: "properties": resolved_properties, }) - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties + request_properties = self._request_properties_for_start( + resolved_properties, + resolved_preset=resolved_preset, + pipeline_id=pipeline_id, + ) response = self._client.agents.start( self._app_id, @@ -9102,10 +9150,11 @@ patches: "properties": resolved_properties, }) - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties + request_properties = self._request_properties_for_start( + resolved_properties, + resolved_preset=resolved_preset, + pipeline_id=pipeline_id, + ) response = await self._client.agents.start( self._app_id, @@ -9478,7 +9527,7 @@ patches: original_commit: fae1249a20c53761a2eb5515a1bf92ca666760d1 original_message: Re-export agora-agents API from legacy PyPI compatibility package The compat distribution delegates to agora_agent via __getattr__ and documents both import paths in its README. original_author: digitallysavvy - base_generation: b66d871314ca0e5929cb9c9095949a7fd5e856a7 + base_generation: 3d7d10edd130c182ad733db7d0fdb6af8fe2309a files: - compat/agora-agent-server-sdk/README.md - compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py @@ -9566,88 +9615,6 @@ patches: def __dir__(): return dir(_agora_agent) user_owned: true - - id: patch-fc9d93c3 - content_hash: sha256:93877741bdad745fda5dd549d7c3dd6bc315f4574aabd2defb52c0c795bff011 - original_commit: fc9d93c3026a6109d8a5e8b386418592f8d121c5 - original_message: Document agora-agents PyPI install name and migration notes - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - docs/getting-started/installation.md - patch_content: | - diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md - index c14bdb2..f6f1750 100644 - --- a/docs/getting-started/installation.md - +++ b/docs/getting-started/installation.md - @@ -13,13 +13,13 @@ description: Install the Agora Conversational AI Python SDK. - ## Install with pip - - ```sh - -pip install agora-agent-sdk - +pip install agora-agents - ``` - - ## Install with Poetry - - ```sh - -poetry add agora-agent-sdk - +poetry add agora-agents - ``` - - ## Dependencies - theirs_snapshot: - docs/getting-started/installation.md: | - --- - sidebar_position: 1 - title: Installation - description: Install the Agora Conversational AI Python SDK. - --- - - # Installation - - ## Prerequisites - - - Python >= 3.8 - - ## Install with pip - - ```sh - pip install agora-agents - ``` - - ## Install with Poetry - - ```sh - poetry add agora-agents - ``` - - ## Dependencies - - The following packages are installed automatically: - - | Package | Purpose | - |---|---| - | `httpx` (>= 0.21.2) | HTTP client for sync and async requests | - | `pydantic` (>= 1.9.2) | Data validation for vendor configuration and API types | - | `typing_extensions` (>= 4.0.0) | Backported type hints for Python 3.8+ | - - ## Sync vs. Async - - The SDK supports both synchronous and asynchronous usage: - - - **Synchronous** — import `Agora` from `agora_agent` and use blocking method calls - - **Asynchronous** — import `AsyncAgora` and `AsyncAgentSession` from `agora_agent` and use `await` with all API calls - - ```python - # Sync - from agora_agent import Agora, Area - - # Async - from agora_agent import AsyncAgora, AsyncAgentSession, Area - ``` - - Both clients share the same constructor parameters and capabilities. See [Authentication](./authentication.md) for setup details. - status: unresolved - id: patch-44c21c14 content_hash: sha256:920a8a5905a3bbb134edb28b007c5c0b1b4b2c1f75753140fef305b14a64e3e0 original_commit: 44c21c14a14aa7ad469a18ce86024ff14ca2bf9b @@ -11681,535 +11648,20 @@ patches: assert "DeepgramSTT" in agora_agent.__all__ assert "OpenAI" in agora_agent.__all__ status: unresolved - - id: patch-d475306b - content_hash: sha256:407af5e7564d6e8d0b91f1e117cb433aec931f083225af53c6df2abfff281b22 - original_commit: d475306bd42279984bcf4934b900003e8e02c4eb - original_message: Move package rename guidance to installation docs and protect manual paths in Fern ignore. Consolidate migration notes into the installation guide with next-step links, add a brief README pointer, and exclude README, compat, and workflow files from Fern generation. - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - compat/agora-agent-server-sdk/README.md - - docs/getting-started/installation.md - patch_content: | - diff --git a/compat/agora-agent-server-sdk/README.md b/compat/agora-agent-server-sdk/README.md - index e43d1d8..1da36aa 100644 - --- a/compat/agora-agent-server-sdk/README.md - +++ b/compat/agora-agent-server-sdk/README.md - @@ -14,3 +14,5 @@ This compatibility package re-exports the public API from `agora-agents` to supp - from agora_agent import Agora, Area - from agora_agent_server_sdk_compat import Agora, Area - ``` - + - +Maintainers: dual-publish steps live in the repository release workflow, not in the root README. - diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md - index 04b48da..8fca9ab 100644 - --- a/docs/getting-started/installation.md - +++ b/docs/getting-started/installation.md - @@ -53,4 +53,15 @@ from agora_agent import AsyncAgora, AsyncAgentSession, Area - | `pydantic` (>= 1.9.2) | Data validation for vendor configuration and API types | - | `typing_extensions` (>= 4.0.0) | Backported type hints for Python 3.8+ | - - -See [Authentication](./authentication.md) for setup details. - +## Next steps - + - +- [Authentication](./authentication.md) — configure your credentials - +- [Quick Start](./quick-start.md) — build your first conversational agent - + - +## Migrating from a previous package name - + - +The PyPI distribution was renamed from `agora-agent-server-sdk` to `agora-agents` in v2.0.0. Install `agora-agents`; the import path remains `agora_agent`. - + - +The legacy PyPI name remains available as a compatibility shim that re-exports `agora-agents`. See [compat/agora-agent-server-sdk](../../compat/agora-agent-server-sdk/README.md). - + - +For release and version details, see [changelog — Migration notes](../../changelog.md#migration-notes). - theirs_snapshot: - compat/agora-agent-server-sdk/README.md: | - # agora-agent-server-sdk - - This package has been renamed to `agora-agents`. - - New projects should install: - - ```sh - pip install agora-agents - ``` - - This compatibility package re-exports the public API from `agora-agents` to support existing installs during the migration window. The primary import path remains `agora_agent`; you can also import from `agora_agent_server_sdk_compat`: - - ```python - from agora_agent import Agora, Area - from agora_agent_server_sdk_compat import Agora, Area - ``` - - Maintainers: dual-publish steps live in the repository release workflow, not in the root README. - docs/getting-started/installation.md: | - --- - sidebar_position: 1 - title: Installation - description: Install the Agora Conversational AI Python SDK. - --- - - # Installation - - ## Prerequisites - - - Python >= 3.8 - - ## Install with pip - - ```sh - pip install agora-agents - ``` - - ## Install with Poetry - - ```sh - poetry add agora-agents - ``` - - ## Imports - - ```python - from agora_agent import Agent, Agora, Area, DeepgramSTT, OpenAI - ``` - - The package installs as `agora-agents` and imports as `agora_agent`. - - ## Sync vs. Async - - The SDK supports both synchronous and asynchronous usage: - - - **Synchronous** — import `Agora` from `agora_agent` and use blocking method calls - - **Asynchronous** — import `AsyncAgora` and `AsyncAgentSession` from `agora_agent` and use `await` with all API calls - - ```python - # Sync - from agora_agent import Agora, Area - - # Async - from agora_agent import AsyncAgora, AsyncAgentSession, Area - ``` - - ## Dependencies - - | Package | Purpose | - | ------------------------------ | ------------------------------------------------------ | - | `httpx` (>= 0.21.2) | HTTP client for sync and async requests | - | `pydantic` (>= 1.9.2) | Data validation for vendor configuration and API types | - | `typing_extensions` (>= 4.0.0) | Backported type hints for Python 3.8+ | - - ## Next steps - - - [Authentication](./authentication.md) — configure your credentials - - [Quick Start](./quick-start.md) — build your first conversational agent - - ## Migrating from a previous package name - - The PyPI distribution was renamed from `agora-agent-server-sdk` to `agora-agents` in v2.0.0. Install `agora-agents`; the import path remains `agora_agent`. - - The legacy PyPI name remains available as a compatibility shim that re-exports `agora-agents`. See [compat/agora-agent-server-sdk](../../compat/agora-agent-server-sdk/README.md). - - For release and version details, see [changelog — Migration notes](../../changelog.md#migration-notes). - status: unresolved - - id: patch-c9355576 - content_hash: sha256:83b3b6148b21f2b4d53ee67321777522f5f4e871b61ea3b23f3a6b88ca052769 - original_commit: c93555763ffd63267a737b3e430217a890f203db - original_message: Streamline Python docs and README for app-credentials-first onboarding. Remove duplicated low-level client examples from the README, de-emphasize legacy auth modes, refocus the low-level API guide on AgentKit with telephony escape hatches, and update Agora-managed model terminology. - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - docs/getting-started/authentication.md - - docs/guides/low-level-api.md - patch_content: | - diff --git a/docs/getting-started/authentication.md b/docs/getting-started/authentication.md - index 31dcc56..74c62cd 100644 - --- a/docs/getting-started/authentication.md - +++ b/docs/getting-started/authentication.md - @@ -46,41 +46,6 @@ session = agent.create_session( - print(client.auth_mode) # "app-credentials" - ``` - - -## Other auth modes - +## Legacy auth modes - - -The SDK also supports pre-minted REST tokens and HTTP Basic Auth for legacy integrations. These are not recommended for new applications. - - - -### Token auth (`auth_token`) - - - -Pass a pre-minted Agora REST token on the client. You must also supply the RTC join token on `create_session(..., token=...)`. - - - -```python - -client = Agora( - - area=Area.US, - - app_id="your-app-id", - - app_certificate="your-app-certificate", - - auth_token="your-rest-auth-token", - -) - - - -session = agent.create_session( - - client, - - channel="room-123", - - agent_uid="1", - - remote_uids=["100"], - - token="your-rtc-join-token", - -) - -``` - - - -### Basic Auth (`customer_id` + `customer_secret`) - - - -Uses HTTP Basic Auth with Customer ID and Secret from Agora Console. Avoid for new integrations — the same credentials are sent on every request instead of minting fresh tokens. - - - -```python - -client = Agora( - - area=Area.US, - - app_id="your-app-id", - - app_certificate="your-app-certificate", - - customer_id="your-customer-id", - - customer_secret="your-customer-secret", - -) - -``` - +The generated client still supports pre-minted REST tokens and HTTP Basic Auth for legacy integrations. Do not use those modes for new session integrations. Use app credentials so AgentKit can mint short-lived ConvoAI REST auth and RTC join tokens for each session. - diff --git a/docs/guides/low-level-api.md b/docs/guides/low-level-api.md - index 6677b45..47397b7 100644 - --- a/docs/guides/low-level-api.md - +++ b/docs/guides/low-level-api.md - @@ -1,187 +1,55 @@ - --- - sidebar_position: 10 - title: Low-Level API - -description: Direct client.agents.start() usage without the builder pattern. - +description: Use generated clients for escape-hatch APIs while keeping agent sessions on AgentKit. - --- - - # Low-Level API - - -For full control over request payloads you can call the generated clients directly and pass raw types such as `StartAgentsRequestProperties`, `Tts_Elevenlabs`, and `StartAgentsRequestPropertiesAsr`. Use this when you need vendor or options not exposed by the agentkit, or when integrating with generated types from the API spec. - +Use the `Agent` builder and `AgentSession` for conversational agent starts. That path generates ConvoAI REST auth and RTC join tokens from `app_id` and `app_certificate`, so application code does not need prebuilt REST tokens, RTC tokens, Customer ID, or Customer Secret. - - -## Raw telephony and phone-number APIs - - - -AgentKit focuses on realtime agent session helpers. Telephony call status, call hangup, and phone-number management are exposed through the generated low-level clients: - - - -- `client.telephony` for call status and hangup operations - -- `client.phone_numbers` for phone-number list, create, retrieve, update, and delete operations - +Generated clients are still available for API surface that AgentKit does not wrap yet, such as telephony and phone-number management. - - -## Cascading flow (ASR → LLM → TTS) - +## Client setup - - ```python - from agora_agent import Agora, Area - -from agora_agent.agents import ( - - StartAgentsRequestProperties, - - StartAgentsRequestPropertiesAsr, - - StartAgentsRequestPropertiesLlm, - -) - -from agora_agent.types.eleven_labs_tts_params import ElevenLabsTtsParams - -from agora_agent.types.tts import Tts_Elevenlabs - - client = Agora( - area=Area.US, - - app_id="YOUR_APP_ID", - - app_certificate="YOUR_APP_CERTIFICATE", - - auth_token="your-rest-auth-token", - -) - -client.agents.start( - - client.app_id, - - name="unique_name", - - properties=StartAgentsRequestProperties( - - channel="channel_name", - - token="token", - - agent_rtc_uid="1001", - - remote_rtc_uids=["1002"], - - idle_timeout=120, - - asr=StartAgentsRequestPropertiesAsr( - - language="en-US", - - vendor="deepgram", - - params={"api_key": "YOUR_DEEPGRAM_API_KEY"}, - - ), - - tts=Tts_Elevenlabs( - - params=ElevenLabsTtsParams( - - key="YOUR_ELEVENLABS_API_KEY", - - model_id="eleven_flash_v2_5", - - voice_id="pNInz6obpgDQGcFmaJgB", - - sample_rate=24000, - - ), - - ), - - llm=StartAgentsRequestPropertiesLlm( - - url="https://api.openai.com/v1/chat/completions", - - api_key="", - - system_messages=[ - - {"role": "system", "content": "You are a helpful chatbot."} - - ], - - params={"model": "gpt-4o-mini"}, - - max_history=32, - - greeting_message="Hello, how can I assist you today?", - - failure_message="Please hold on a second.", - - ), - - ), - + app_id="your-app-id", - + app_certificate="your-app-certificate", - ) - ``` - - -## Async (low-level) - +## Raw telephony and phone-number APIs - - -```python - -import asyncio - -from agora_agent import Area, AsyncAgora - -from agora_agent.agents import ( - - StartAgentsRequestProperties, - - StartAgentsRequestPropertiesAsr, - - StartAgentsRequestPropertiesLlm, - -) - -from agora_agent.types.eleven_labs_tts_params import ElevenLabsTtsParams - -from agora_agent.types.tts import Tts_Elevenlabs - +AgentKit focuses on realtime agent session helpers. Use generated clients for operational APIs: - - -client = AsyncAgora( - - area=Area.US, - - app_id="YOUR_APP_ID", - - app_certificate="YOUR_APP_CERTIFICATE", - - auth_token="your-rest-auth-token", - +- `client.telephony` for call status and hangup operations - +- `client.phone_numbers` for phone-number list, create, retrieve, update, and delete operations - + - +```python - +calls = client.telephony.list( - + appid=client.app_id, - + type="sip", - ) - - -async def main() -> None: - - await client.agents.start( - - client.app_id, - - name="unique_name", - - properties=StartAgentsRequestProperties( - - channel="channel_name", - - token="token", - - agent_rtc_uid="1001", - - remote_rtc_uids=["1002"], - - idle_timeout=120, - - asr=StartAgentsRequestPropertiesAsr( - - language="en-US", - - vendor="deepgram", - - params={"api_key": "YOUR_DEEPGRAM_API_KEY"}, - - ), - - tts=Tts_Elevenlabs( - - params=ElevenLabsTtsParams( - - key="YOUR_ELEVENLABS_API_KEY", - - model_id="eleven_flash_v2_5", - - voice_id="pNInz6obpgDQGcFmaJgB", - - sample_rate=24000, - - ), - - ), - - llm=StartAgentsRequestPropertiesLlm( - - url="https://api.openai.com/v1/chat/completions", - - api_key="", - - system_messages=[ - - {"role": "system", "content": "You are a helpful chatbot."} - - ], - - params={"model": "gpt-4o-mini"}, - - max_history=32, - - greeting_message="Hello, how can I assist you today?", - - failure_message="Please hold on a second.", - - ), - - ), - - ) - - - -asyncio.run(main()) - +for call in calls: - + print(call.id, call.state) - ``` - - -## MLLM flow (multimodal) - +## Direct agent APIs - - -For real-time audio with OpenAI Realtime or Google Gemini Live, use the MLLM flow instead of the cascading ASR → LLM → TTS flow. See the [MLLM Overview](https://docs.agora.io/en/conversational-ai/models/mllm/overview). - +`client.agents` exposes the generated REST surface for advanced integrations. Prefer `agent.create_session(...).start()` for new session starts because it handles auth, token generation, vendor serialization, lifecycle state, and avatar enrichment. - - -```python - -from agora_agent import Agora, Area - -from agora_agent.agents import ( - - StartAgentsRequestProperties, - - StartAgentsRequestPropertiesMllm, - - StartAgentsRequestPropertiesMllmVendor, - - StartAgentsRequestPropertiesTts, - - StartAgentsRequestPropertiesTtsVendor, - - StartAgentsRequestPropertiesLlm, - -) - +If you need an endpoint that is not wrapped by `AgentSession`, use `session.raw` after creating the session: - - -client = Agora( - - area=Area.US, - - app_id="YOUR_APP_ID", - - app_certificate="YOUR_APP_CERTIFICATE", - - auth_token="your-rest-auth-token", - -) - - - -client.agents.start( - - client.app_id, - - name="mllm_agent", - - properties=StartAgentsRequestProperties( - - channel="channel_name", - - token="your_token", - - agent_rtc_uid="1001", - - remote_rtc_uids=["1002"], - - idle_timeout=120, - - mllm=StartAgentsRequestPropertiesMllm( - - enable=True, - - url="wss://api.openai.com/v1/realtime", - - api_key="", - - vendor=StartAgentsRequestPropertiesMllmVendor.OPENAI, - - params={ - - "model": "gpt-4o-realtime-preview", - - "voice": "alloy", - - }, - - input_modalities=["audio"], - - output_modalities=["text", "audio"], - - greeting_message="Hello! I'm ready to chat in real-time.", - - turn_detection={ - - "mode": "server_vad", - - "server_vad_config": { - - "idle_timeout_ms": 5000, - - }, - - }, - - ), - - ), - +```python - +info = session.raw.get( - + appid=session.app_id, - + agent_id=session.id, - ) - ``` - - -For more on the agentkit-based MLLM flow, see [MLLM Flow](./mllm-flow.md). - +You must pass `appid` and `agent_id` manually when using generated raw methods. - theirs_snapshot: - docs/getting-started/authentication.md: | - --- - sidebar_position: 2 - title: Authentication - description: Configure the Python SDK with app credentials and understand other supported auth modes. - --- - - # Authentication - - Create `Agora` or `AsyncAgora` with `app_id` and `app_certificate` only. The SDK mints a fresh ConvoAI REST token for each API call and generates the RTC join token when the session starts. - - ## App credentials - - ```python - from agora_agent import Agent, Agora, Area, DeepgramSTT, OpenAI, MiniMaxTTS - - client = Agora( - area=Area.US, - app_id="your-app-id", - app_certificate="your-app-certificate", - ) - - agent = ( - Agent(instructions="Be concise.") - .with_stt(DeepgramSTT(model="nova-3")) - .with_llm(OpenAI(model="gpt-4o-mini")) - .with_tts(MiniMaxTTS(model="speech_2_6_turbo", voice_id="English_captivating_female1")) - ) - - session = agent.create_session( - client, - channel="room-123", - agent_uid="1", - remote_uids=["100"], - ) - ``` - - ## Why app credentials - - - Fresh short-lived tokens per API call instead of reusing long-lived credentials - - No Customer ID / Customer Secret in request headers - - No manual REST or RTC token provisioning in application code - - ## Inspecting auth mode - - ```python - print(client.auth_mode) # "app-credentials" - ``` - - ## Legacy auth modes - - The generated client still supports pre-minted REST tokens and HTTP Basic Auth for legacy integrations. Do not use those modes for new session integrations. Use app credentials so AgentKit can mint short-lived ConvoAI REST auth and RTC join tokens for each session. - docs/guides/low-level-api.md: | - --- - sidebar_position: 10 - title: Low-Level API - description: Use generated clients for escape-hatch APIs while keeping agent sessions on AgentKit. - --- - - # Low-Level API - - Use the `Agent` builder and `AgentSession` for conversational agent starts. That path generates ConvoAI REST auth and RTC join tokens from `app_id` and `app_certificate`, so application code does not need prebuilt REST tokens, RTC tokens, Customer ID, or Customer Secret. - - Generated clients are still available for API surface that AgentKit does not wrap yet, such as telephony and phone-number management. - - ## Client setup - - ```python - from agora_agent import Agora, Area - - client = Agora( - area=Area.US, - app_id="your-app-id", - app_certificate="your-app-certificate", - ) - ``` - - ## Raw telephony and phone-number APIs - - AgentKit focuses on realtime agent session helpers. Use generated clients for operational APIs: - - - `client.telephony` for call status and hangup operations - - `client.phone_numbers` for phone-number list, create, retrieve, update, and delete operations - - ```python - calls = client.telephony.list( - appid=client.app_id, - type="sip", - ) - - for call in calls: - print(call.id, call.state) - ``` - - ## Direct agent APIs - - `client.agents` exposes the generated REST surface for advanced integrations. Prefer `agent.create_session(...).start()` for new session starts because it handles auth, token generation, vendor serialization, lifecycle state, and avatar enrichment. - - If you need an endpoint that is not wrapped by `AgentSession`, use `session.raw` after creating the session: - - ```python - info = session.raw.get( - appid=session.app_id, - agent_id=session.id, - ) - ``` - - You must pass `appid` and `agent_id` manually when using generated raw methods. - status: unresolved - id: patch-299e4bd9 - content_hash: sha256:e1470176436d28416d0ff67d8acc614060fae7b312f86c09b899a92d1c4adfe4 + content_hash: sha256:ee71350debd51653f1cb1472477a577436d74cbb847b3536a9cdbff0211abf2d original_commit: 299e4bd9cb59bd6144084332a7c3fa7bf260769f original_message: "fix(agentkit): resolve provider config type checks" original_author: digitallysavvy - base_generation: b66d871314ca0e5929cb9c9095949a7fd5e856a7 + base_generation: 3d7d10edd130c182ad733db7d0fdb6af8fe2309a files: - src/agora_agent/agentkit/agent.py - src/agora_agent/agentkit/vendors/llm.py - src/agora_agent/agentkit/vendors/mllm.py - src/agora_agent/agentkit/vendors/stt.py - patch_content: |+ + patch_content: | diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py - index 6275f04..ecf01c6 100644 + index 1daba82..95cfe34 100644 --- a/src/agora_agent/agentkit/agent.py +++ b/src/agora_agent/agentkit/agent.py @@ -57,6 +57,8 @@ from ..agents.types.start_agents_request_properties_filler_words_content import @@ -12221,7 +11673,7 @@ patches: from ..types.asr import Asr from ..types.llm import Llm from ..types.llm_style import LlmStyle as GeneratedLlmStyle - @@ -536,6 +538,23 @@ class Agent: + @@ -544,6 +546,23 @@ class Agent: ) return new_agent @@ -12246,12 +11698,10 @@ patches: """Deprecated. Configure the failure message on the LLM or MLLM vendor instead.""" new_agent = self._clone() diff --git a/src/agora_agent/agentkit/vendors/llm.py b/src/agora_agent/agentkit/vendors/llm.py - index 9156a01..5dd822d 100644 + index 5a9f39e..1f1b354 100644 --- a/src/agora_agent/agentkit/vendors/llm.py +++ b/src/agora_agent/agentkit/vendors/llm.py - @@ -1,7 +1,10 @@ - -from typing import Any, Dict, List, Optional - +from typing import Any, Dict, List, Optional, Union + @@ -2,6 +2,9 @@ from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -12261,43 +11711,6 @@ patches: from .base import BaseLLM LlmGreetingConfigs = Dict[str, Any] - diff --git a/src/agora_agent/agentkit/vendors/mllm.py b/src/agora_agent/agentkit/vendors/mllm.py - index 236a494..6a260d8 100644 - --- a/src/agora_agent/agentkit/vendors/mllm.py - +++ b/src/agora_agent/agentkit/vendors/mllm.py - @@ -1,3 +1,4 @@ - +import warnings - from typing import Any, Dict, List, Optional - - from pydantic import BaseModel, ConfigDict, Field - diff --git a/src/agora_agent/agentkit/vendors/stt.py b/src/agora_agent/agentkit/vendors/stt.py - index e5117b0..bb222a9 100644 - --- a/src/agora_agent/agentkit/vendors/stt.py - +++ b/src/agora_agent/agentkit/vendors/stt.py - @@ -89,6 +89,7 @@ class SpeechmaticsSTTOptions(BaseModel): - - api_key: str = Field(..., description="Speechmatics API key") - language: str = Field(..., description="Language code (e.g., en, es, fr)") - + interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") - model: Optional[str] = Field(default=None, description="Model name") - uri: Optional[str] = Field(default=None, description="Speechmatics streaming WebSocket URL") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - @@ -124,6 +125,7 @@ class DeepgramSTTOptions(BaseModel): - api_key: Optional[str] = Field(default=None, description="Deepgram API key") - model: Optional[str] = Field(default=None, description="Model (e.g., nova-2, enhanced, base)") - language: Optional[str] = Field(default=None, description="Language code (e.g., en-US)") - + interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") - smart_format: Optional[bool] = Field(default=None, description="Enable smart formatting") - punctuation: Optional[bool] = Field(default=None, description="Enable punctuation") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - @@ -353,6 +355,7 @@ class SarvamSTTOptions(BaseModel): - - api_key: str = Field(..., description="Sarvam API key") - language: str = Field(..., description="Language code (e.g., en, hi, ta)") - + interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") - model: Optional[str] = Field(default=None, description="Model name") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - theirs_snapshot: src/agora_agent/agentkit/agent.py: | from __future__ import annotations @@ -12380,6 +11793,7 @@ patches: from ..agent_management.types.agent_think_agent_management_response import ( AgentThinkAgentManagementResponse, ) + from ..core.pydantic_utilities import parse_obj_as from .vendors.base import BaseAvatar, BaseLLM, BaseMLLM, BaseSTT, BaseTTS # Top-level aliases @@ -12492,6 +11906,13 @@ patches: debug: bool warn: typing.Callable[[str], None] + + def _start_properties_from_mapping( + properties: typing.Mapping[str, typing.Any], + ) -> StartAgentsRequestProperties: + return parse_obj_as(StartAgentsRequestProperties, dict(properties)) + + # LLM sub-type aliases LlmGreetingConfigs = typing.Dict[str, typing.Any] LlmGreetingConfigsMode = typing.Any @@ -12602,7 +12023,7 @@ patches: def _validate_turn_detection_language(value: typing.Any) -> TurnDetectionLanguage: if not _is_turn_detection_language(value): - raise ValueError(f"Invalid interaction language: {value}") + raise ValueError(f"Invalid turn_detection.language: {value}") return value # type: ignore[return-value] @@ -13217,7 +12638,7 @@ patches: if self._failure_message is not None: mllm_config.setdefault("failure_message", self._failure_message) base_kwargs["mllm"] = mllm_config - return StartAgentsRequestProperties(**base_kwargs) + return _start_properties_from_mapping(base_kwargs) if skip_vendor_validation: warnings.warn( @@ -13240,12 +12661,13 @@ patches: allow_missing_llm = "llm" in allow_missing_categories allow_missing_tts = "tts" in allow_missing_categories + turn_detection_config = self._resolve_turn_detection_config() if not skip_asr_validation and (self._stt is not None or not allow_missing_asr): - base_kwargs["asr"] = self._resolve_asr_config() - base_kwargs["turn_detection"] = self._resolve_turn_detection_config() + base_kwargs["asr"] = self._resolve_asr_config(turn_detection_config) + base_kwargs["turn_detection"] = turn_detection_config if skip_vendor_validation: - return StartAgentsRequestProperties(**base_kwargs) + return _start_properties_from_mapping(base_kwargs) if self._tts is None and not (skip_tts_validation or allow_missing_tts): raise ValueError("TTS configuration is required. Use with_tts() to set it.") @@ -13258,39 +12680,34 @@ patches: if self._tts is not None and not skip_tts_validation: base_kwargs["tts"] = self._tts - return StartAgentsRequestProperties(**base_kwargs) + return _start_properties_from_mapping(base_kwargs) def _resolve_llm_config(self) -> typing.Dict[str, typing.Any]: llm_config = dict(self._llm or {}) - # Agent-level fields take priority over the vendor's defaults. - # This matches the TS SDK where agent-level values override vendor config. - if self._instructions is not None: + if self._instructions is not None and "system_messages" not in llm_config: llm_config["system_messages"] = [{"role": "system", "content": self._instructions}] - if self._greeting is not None: + if self._greeting is not None and "greeting_message" not in llm_config: llm_config["greeting_message"] = self._greeting - if self._greeting_configs is not None: + if self._greeting_configs is not None and "greeting_configs" not in llm_config: llm_config["greeting_configs"] = _dump_optional_model(self._greeting_configs) - if self._failure_message is not None: + if self._failure_message is not None and "failure_message" not in llm_config: llm_config["failure_message"] = self._failure_message - if self._max_history is not None: + if self._max_history is not None and "max_history" not in llm_config: llm_config["max_history"] = self._max_history return llm_config - def _resolve_asr_config(self) -> typing.Dict[str, typing.Any]: + def _resolve_asr_config(self, turn_detection_config: TurnDetectionConfig) -> typing.Dict[str, typing.Any]: asr_config = dict(self._stt or {}) - asr_config.pop("language", None) if not asr_config: asr_config["vendor"] = "ares" + asr_config["language"] = self._field_value(turn_detection_config, "language") return asr_config def _resolve_turn_detection_config(self) -> TurnDetectionConfig: - existing_stt_language = self._stt.get("language") if self._stt is not None else None existing_turn_detection_language = self._field_value(self._turn_detection, "language") language = ( existing_turn_detection_language if existing_turn_detection_language is not None - else existing_stt_language - if _is_turn_detection_language(existing_stt_language) else DEFAULT_TURN_DETECTION_LANGUAGE ) language = _validate_turn_detection_language(language) @@ -13708,12 +13125,13 @@ patches: options = _dump_optional_model(self.options) options.pop("project_id", None) options.pop("location", None) - config = Gemini(**options).to_config() - params = dict(config["params"]) - params["project_id"] = self.options.project_id - params["location"] = self.options.location - config["params"] = params - return config + if not options.get("url"): + options["url"] = ( + f"https://{self.options.location}-aiplatform.googleapis.com/v1/projects/" + f"{self.options.project_id}/locations/{self.options.location}/" + f"publishers/google/models/{self.options.model}:streamGenerateContent?alt=sse" + ) + return Gemini(**options).to_config() class AmazonBedrockOptions(BaseModel): @@ -14124,98 +13542,20 @@ patches: return config src/agora_agent/agentkit/vendors/stt.py: | - from typing import Any, Dict, Optional, Tuple + from typing import Any, Dict, Optional from pydantic import BaseModel, ConfigDict, Field, model_validator - from typing_extensions import Literal from .base import BaseSTT - TurnDetectionLanguage = Literal[ - "ar-EG", - "ar-JO", - "ar-SA", - "ar-AE", - "bn-IN", - "zh-CN", - "zh-HK", - "zh-TW", - "nl-NL", - "en-IN", - "en-US", - "fil-PH", - "fr-FR", - "de-DE", - "gu-IN", - "he-IL", - "hi-IN", - "id-ID", - "it-IT", - "ja-JP", - "kn-IN", - "ko-KR", - "ms-MY", - "fa-IR", - "pt-PT", - "ru-RU", - "es-ES", - "ta-IN", - "te-IN", - "th-TH", - "tr-TR", - "vi-VN", - ] - - TURN_DETECTION_LANGUAGE_VALUES: Tuple[TurnDetectionLanguage, ...] = ( - "ar-EG", - "ar-JO", - "ar-SA", - "ar-AE", - "bn-IN", - "zh-CN", - "zh-HK", - "zh-TW", - "nl-NL", - "en-IN", - "en-US", - "fil-PH", - "fr-FR", - "de-DE", - "gu-IN", - "he-IL", - "hi-IN", - "id-ID", - "it-IT", - "ja-JP", - "kn-IN", - "ko-KR", - "ms-MY", - "fa-IR", - "pt-PT", - "ru-RU", - "es-ES", - "ta-IN", - "te-IN", - "th-TH", - "tr-TR", - "vi-VN", - ) - _TURN_DETECTION_LANGUAGES = set(TURN_DETECTION_LANGUAGE_VALUES) _DEEPGRAM_MANAGED_MODELS = {"nova-2", "nova-3"} - def _turn_detection_language(language: Optional[str]) -> Optional[TurnDetectionLanguage]: - if language in _TURN_DETECTION_LANGUAGES: - return language # type: ignore[return-value] - return None - - class SpeechmaticsSTTOptions(BaseModel): model_config = ConfigDict(extra="forbid") api_key: str = Field(..., description="Speechmatics API key") language: str = Field(..., description="Language code (e.g., en, es, fr)") - interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") model: Optional[str] = Field(default=None, description="Model name") uri: Optional[str] = Field(default=None, description="Speechmatics streaming WebSocket URL") additional_params: Optional[Dict[str, Any]] = Field(default=None) @@ -14239,9 +13579,6 @@ patches: "vendor": "speechmatics", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14251,7 +13588,7 @@ patches: api_key: Optional[str] = Field(default=None, description="Deepgram API key") model: Optional[str] = Field(default=None, description="Model (e.g., nova-2, enhanced, base)") language: Optional[str] = Field(default=None, description="Language code (e.g., en-US)") - interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") + keyterm: Optional[str] = Field(default=None, description="Boost specialized terms and brands for Deepgram") smart_format: Optional[bool] = Field(default=None, description="Enable smart formatting") punctuation: Optional[bool] = Field(default=None, description="Enable punctuation") additional_params: Optional[Dict[str, Any]] = Field(default=None) @@ -14279,13 +13616,12 @@ patches: params["smart_format"] = self.options.smart_format if self.options.punctuation is not None: params["punctuation"] = self.options.punctuation + if self.options.keyterm is not None: + params["keyterm"] = self.options.keyterm config: Dict[str, Any] = { "vendor": "deepgram", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14314,9 +13650,6 @@ patches: "vendor": "microsoft", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14338,22 +13671,26 @@ patches: params: Dict[str, Any] = dict(self.options.additional_params or {}) params["api_key"] = self.options.api_key - transcription = {"model": "whisper-1", **(self.options.input_audio_transcription or {})} + transcription: Dict[str, Any] = {"model": "gpt-4o-mini-transcribe"} + transcription.update(self.options.input_audio_transcription or {}) if self.options.model is not None: transcription["model"] = self.options.model if self.options.prompt is not None: transcription["prompt"] = self.options.prompt if self.options.language is not None: transcription["language"] = self.options.language + if not transcription.get("model"): + raise ValueError("OpenAISTT: input_audio_transcription.model is required") + if not transcription.get("prompt"): + raise ValueError("OpenAISTT: input_audio_transcription.prompt is required") + if not transcription.get("language"): + raise ValueError("OpenAISTT: input_audio_transcription.language is required") params["input_audio_transcription"] = transcription config: Dict[str, Any] = { "vendor": "openai", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14388,9 +13725,6 @@ patches: "vendor": "google", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14421,9 +13755,6 @@ patches: "vendor": "amazon", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14451,16 +13782,12 @@ patches: "vendor": "assemblyai", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config class AresSTTOptions(BaseModel): model_config = ConfigDict(extra="forbid") - language: Optional[TurnDetectionLanguage] = Field(default=None, description="Language code") additional_params: Optional[Dict[str, Any]] = Field(default=None) class AresSTT(BaseSTT): @@ -14469,8 +13796,6 @@ patches: def to_config(self) -> Dict[str, Any]: config: Dict[str, Any] = {"vendor": "ares"} - if self.options.language is not None: - config["language"] = self.options.language if self.options.additional_params: config["params"] = self.options.additional_params return config @@ -14481,7 +13806,6 @@ patches: api_key: str = Field(..., description="Sarvam API key") language: str = Field(..., description="Language code (e.g., en, hi, ta)") - interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") model: Optional[str] = Field(default=None, description="Model name") additional_params: Optional[Dict[str, Any]] = Field(default=None) @@ -14502,9 +13826,6 @@ patches: "vendor": "sarvam", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config - id: patch-617ee134 content_hash: sha256:ea2d27ba8019bf09ce5766d322eb7218fcee0a90124e823ba16c4e45dc1af5a9 @@ -17920,24 +17241,24 @@ patches: Think value constants: `ThinkOnListeningActionInject`, `ThinkOnListeningActionInterrupt`, `ThinkOnListeningActionIgnore`, `ThinkOnThinkingActionInterrupt`, `ThinkOnThinkingActionIgnore`, `ThinkOnSpeakingActionInterrupt`, `ThinkOnSpeakingActionIgnore`. status: unresolved - id: patch-bed29b6b - content_hash: sha256:8008d9c33a194a48ef317868953c26d5b03ede60c23743b4249260894c0f6417 + content_hash: sha256:35a32ee64c95efd478f684c167efc54c9d95344af837e99b31da4c36f66febce original_commit: bed29b6b7d4d08480a8510b26b5e21d1ef234cc9 original_message: "chore: bump Python packages to 2.1.0" original_author: digitallysavvy - base_generation: b66d871314ca0e5929cb9c9095949a7fd5e856a7 + base_generation: 3d7d10edd130c182ad733db7d0fdb6af8fe2309a files: - compat/agora-agent-server-sdk/pyproject.toml patch_content: | diff --git a/compat/agora-agent-server-sdk/pyproject.toml b/compat/agora-agent-server-sdk/pyproject.toml - index ac93128..468294b 100644 + index eea45d7..078ac75 100644 --- a/compat/agora-agent-server-sdk/pyproject.toml +++ b/compat/agora-agent-server-sdk/pyproject.toml @@ -3,7 +3,7 @@ name = "agora-agent-server-sdk" [tool.poetry] name = "agora-agent-server-sdk" - -version = "v2.0.0" - +version = "v2.1.0" + -version = "v2.1.1" + +version = "v2.2.0" description = "Compatibility shim for the renamed agora-agents package." readme = "README.md" authors = [] @@ -17945,8 +17266,8 @@ patches: [tool.poetry.dependencies] python = "^3.8" - -agora-agents = ">=2.0.0,<3.0.0" - +agora-agents = ">=2.1.0,<3.0.0" + -agora-agents = ">=2.1.1,<3.0.0" + +agora-agents = ">=2.2.0,<3.0.0" [build-system] requires = ["poetry-core"] @@ -17957,7 +17278,7 @@ patches: [tool.poetry] name = "agora-agent-server-sdk" - version = "v2.1.0" + version = "v2.2.0" description = "Compatibility shim for the renamed agora-agents package." readme = "README.md" authors = [] @@ -17989,9 +17310,81 @@ patches: [tool.poetry.dependencies] python = "^3.8" - agora-agents = ">=2.1.0,<3.0.0" + agora-agents = ">=2.2.0,<3.0.0" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" user_owned: true + - id: patch-fecdc77c + content_hash: sha256:4c3321ec0facd689cee56c0fc609559d1038380d04a4cd8478b7ad7bb4a85388 + original_commit: fecdc77c866f433d8287fcb8a55328612e016b21 + original_message: Fix AgentKit request validation and provider wire-key coverage + original_author: digitallysavvy + base_generation: 3d7d10edd130c182ad733db7d0fdb6af8fe2309a + files: + - PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md + patch_content: | + diff --git a/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md b/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md + new file mode 100644 + index 0000000..f3cd64a + --- /dev/null + +++ b/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md + @@ -0,0 +1,27 @@ + +# Python AgentKit Snake Case API Audit + + + +Scope: `agora-agents-python` public AgentKit wrappers, docs, and tests. + + + +Search terms: + + + +```bash + +rg -n "apiKey|baseUrl|modelId|voiceId|groupId|keyTerm|turnDetection|inputAudioTranscription|greetingMessage|failureMessage|projectId|adcCredentialsString|sampleRate|targetLanguageCode|resourceName|deploymentName" agora-agents-python + +``` + + + +## Result + + + +No shipped camelCase public Python constructor kwargs were found in source or docs examples. No deprecated alias helper is required for this pass. + + + +| File | Class / symbol | Public arg or example | Current spelling | Desired Python spelling | `to_config()` key | Wire key | Action | Compatibility needed | Test coverage | + +|---|---|---|---|---|---|---|---|---|---| + +| `src/agora_agent/agentkit/vendors/tts.py` | `GoogleTTS` | constructor arg | `voice_name` | `voice_name` | `params.VoiceSelectionParams` | `params.VoiceSelectionParams` | keep | no | `tests/custom/test_tts_vendors.py` | + +| `src/agora_agent/agentkit/vendors/tts.py` | `RimeTTS` | constructor arg | `model_id` | `model_id` | `params.modelId` | `params.modelId` | keep | no | `tests/custom/test_tts_vendors.py` | + +| `src/agora_agent/agentkit/vendors/tts.py` | `MurfTTS` | constructor arg | `voice_id` | `voice_id` | `params.voiceId` | `params.voiceId` | keep | no | `tests/custom/test_tts_vendors.py`, `tests/custom/test_request_body.py` | + +| `src/agora_agent/types/rime_tts_params.py` | generated model | generated alias | `modelId` | n/a | `model_id` | `modelId` | keep | no | `tests/custom/test_tts_vendors.py` | + +| `src/agora_agent/types/murf_tts_params.py` | generated model | generated alias | `voiceId` | n/a | `voice_id` | `voiceId` | keep | no | `tests/custom/test_tts_vendors.py` | + +| `tests/custom/test_request_body.py` | wire assertion | payload key | `voiceId` | n/a | `params.voiceId` | `params.voiceId` | keep | no | request-body test | + +| `tests/custom/test_tts_vendors.py` | wire assertion | payload key | `modelId`, `voiceId`, `VoiceSelectionParams` | n/a | generated model fields | wire aliases | keep | no | wire serialization test | + + + +## Guardrail Added + + + +`tests/custom/test_docs_snake_case.py` scans Python markdown code fences and fails on common camelCase kwargs such as `apiKey`, `baseUrl`, `modelId`, `voiceId`, `projectId`, and `greetingMessage`. JSON, TypeScript, Go, shell, and YAML examples are skipped so wire payload examples can retain required non-Python keys. + theirs_snapshot: + PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md: | + # Python AgentKit Snake Case API Audit + + Scope: `agora-agents-python` public AgentKit wrappers, docs, and tests. + + Search terms: + + ```bash + rg -n "apiKey|baseUrl|modelId|voiceId|groupId|keyTerm|turnDetection|inputAudioTranscription|greetingMessage|failureMessage|projectId|adcCredentialsString|sampleRate|targetLanguageCode|resourceName|deploymentName" agora-agents-python + ``` + + ## Result + + No shipped camelCase public Python constructor kwargs were found in source or docs examples. No deprecated alias helper is required for this pass. + + | File | Class / symbol | Public arg or example | Current spelling | Desired Python spelling | `to_config()` key | Wire key | Action | Compatibility needed | Test coverage | + |---|---|---|---|---|---|---|---|---|---| + | `src/agora_agent/agentkit/vendors/tts.py` | `GoogleTTS` | constructor arg | `voice_name` | `voice_name` | `params.VoiceSelectionParams` | `params.VoiceSelectionParams` | keep | no | `tests/custom/test_tts_vendors.py` | + | `src/agora_agent/agentkit/vendors/tts.py` | `RimeTTS` | constructor arg | `model_id` | `model_id` | `params.modelId` | `params.modelId` | keep | no | `tests/custom/test_tts_vendors.py` | + | `src/agora_agent/agentkit/vendors/tts.py` | `MurfTTS` | constructor arg | `voice_id` | `voice_id` | `params.voiceId` | `params.voiceId` | keep | no | `tests/custom/test_tts_vendors.py`, `tests/custom/test_request_body.py` | + | `src/agora_agent/types/rime_tts_params.py` | generated model | generated alias | `modelId` | n/a | `model_id` | `modelId` | keep | no | `tests/custom/test_tts_vendors.py` | + | `src/agora_agent/types/murf_tts_params.py` | generated model | generated alias | `voiceId` | n/a | `voice_id` | `voiceId` | keep | no | `tests/custom/test_tts_vendors.py` | + | `tests/custom/test_request_body.py` | wire assertion | payload key | `voiceId` | n/a | `params.voiceId` | `params.voiceId` | keep | no | request-body test | + | `tests/custom/test_tts_vendors.py` | wire assertion | payload key | `modelId`, `voiceId`, `VoiceSelectionParams` | n/a | generated model fields | wire aliases | keep | no | wire serialization test | + + ## Guardrail Added + + `tests/custom/test_docs_snake_case.py` scans Python markdown code fences and fails on common camelCase kwargs such as `apiKey`, `baseUrl`, `modelId`, `voiceId`, `projectId`, and `greetingMessage`. JSON, TypeScript, Go, shell, and YAML examples are skipped so wire payload examples can retain required non-Python keys. + user_owned: true diff --git a/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md b/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md new file mode 100644 index 0000000..f3cd64a --- /dev/null +++ b/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md @@ -0,0 +1,27 @@ +# Python AgentKit Snake Case API Audit + +Scope: `agora-agents-python` public AgentKit wrappers, docs, and tests. + +Search terms: + +```bash +rg -n "apiKey|baseUrl|modelId|voiceId|groupId|keyTerm|turnDetection|inputAudioTranscription|greetingMessage|failureMessage|projectId|adcCredentialsString|sampleRate|targetLanguageCode|resourceName|deploymentName" agora-agents-python +``` + +## Result + +No shipped camelCase public Python constructor kwargs were found in source or docs examples. No deprecated alias helper is required for this pass. + +| File | Class / symbol | Public arg or example | Current spelling | Desired Python spelling | `to_config()` key | Wire key | Action | Compatibility needed | Test coverage | +|---|---|---|---|---|---|---|---|---|---| +| `src/agora_agent/agentkit/vendors/tts.py` | `GoogleTTS` | constructor arg | `voice_name` | `voice_name` | `params.VoiceSelectionParams` | `params.VoiceSelectionParams` | keep | no | `tests/custom/test_tts_vendors.py` | +| `src/agora_agent/agentkit/vendors/tts.py` | `RimeTTS` | constructor arg | `model_id` | `model_id` | `params.modelId` | `params.modelId` | keep | no | `tests/custom/test_tts_vendors.py` | +| `src/agora_agent/agentkit/vendors/tts.py` | `MurfTTS` | constructor arg | `voice_id` | `voice_id` | `params.voiceId` | `params.voiceId` | keep | no | `tests/custom/test_tts_vendors.py`, `tests/custom/test_request_body.py` | +| `src/agora_agent/types/rime_tts_params.py` | generated model | generated alias | `modelId` | n/a | `model_id` | `modelId` | keep | no | `tests/custom/test_tts_vendors.py` | +| `src/agora_agent/types/murf_tts_params.py` | generated model | generated alias | `voiceId` | n/a | `voice_id` | `voiceId` | keep | no | `tests/custom/test_tts_vendors.py` | +| `tests/custom/test_request_body.py` | wire assertion | payload key | `voiceId` | n/a | `params.voiceId` | `params.voiceId` | keep | no | request-body test | +| `tests/custom/test_tts_vendors.py` | wire assertion | payload key | `modelId`, `voiceId`, `VoiceSelectionParams` | n/a | generated model fields | wire aliases | keep | no | wire serialization test | + +## Guardrail Added + +`tests/custom/test_docs_snake_case.py` scans Python markdown code fences and fails on common camelCase kwargs such as `apiKey`, `baseUrl`, `modelId`, `voiceId`, `projectId`, and `greetingMessage`. JSON, TypeScript, Go, shell, and YAML examples are skipped so wire payload examples can retain required non-Python keys. diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py index 1daba82..95cfe34 100644 --- a/src/agora_agent/agentkit/agent.py +++ b/src/agora_agent/agentkit/agent.py @@ -57,6 +57,8 @@ from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule from ..types.tts import Tts +from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule +from ..types.tts import Tts from ..types.asr import Asr from ..types.llm import Llm from ..types.llm_style import LlmStyle as GeneratedLlmStyle @@ -544,6 +546,23 @@ def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent ) return new_agent + def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": + """Returns a new Agent with the specified RTC audio scenario.""" + new_agent = self._clone() + if new_agent._parameters is None: + new_agent._parameters = StartAgentsRequestPropertiesParameters(audio_scenario=audio_scenario) + elif isinstance(new_agent._parameters, dict): + new_agent._parameters = typing.cast( + SessionParamsInput, + {**new_agent._parameters, "audio_scenario": audio_scenario}, + ) + else: + new_agent._parameters = self._copy_model_update( + new_agent._parameters, + {"audio_scenario": audio_scenario}, + ) + return new_agent + def with_failure_message(self, message: str) -> "Agent": """Deprecated. Configure the failure message on the LLM or MLLM vendor instead.""" new_agent = self._clone() diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py index 2900c18..745c465 100644 --- a/src/agora_agent/agentkit/agent_session.py +++ b/src/agora_agent/agentkit/agent_session.py @@ -15,6 +15,7 @@ AgentThinkAgentManagementResponse as AgentThinkResponse, ) from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse +from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping from .avatar_types import ( is_akool_avatar, diff --git a/src/agora_agent/agentkit/vendors/avatar.py b/src/agora_agent/agentkit/vendors/avatar.py index 1bd9633..e816367 100644 --- a/src/agora_agent/agentkit/vendors/avatar.py +++ b/src/agora_agent/agentkit/vendors/avatar.py @@ -177,6 +177,49 @@ def to_config(self) -> Dict[str, Any]: return {"enable": enable, "vendor": "generic", "params": params} +class GenericAvatarOptions(BaseModel): + model_config = ConfigDict(extra="forbid") + + api_key: str = Field(..., description="Generic avatar provider API key") + api_base_url: str = Field(..., description="Avatar provider API base URL") + avatar_id: str = Field(..., description="Avatar ID") + agora_uid: str = Field(..., description="Agora UID for the avatar video stream") + agora_appid: Optional[str] = Field(default=None, description="Agora App ID; filled by AgentSession when omitted") + agora_token: Optional[str] = Field(default=None, description="RTC token; generated by AgentSession when omitted") + agora_channel: Optional[str] = Field(default=None, description="Agora channel; filled by AgentSession when omitted") + enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") + additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") + + +class GenericAvatar(BaseAvatar): + def __init__(self, **kwargs: Any): + self.options = GenericAvatarOptions(**kwargs) + + @property + def required_sample_rate(self) -> int: + return 0 + + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = { + "api_key": self.options.api_key, + "api_base_url": self.options.api_base_url, + "avatar_id": self.options.avatar_id, + "agora_uid": self.options.agora_uid, + } + + if self.options.agora_appid is not None: + params["agora_appid"] = self.options.agora_appid + if self.options.agora_token is not None: + params["agora_token"] = self.options.agora_token + if self.options.agora_channel is not None: + params["agora_channel"] = self.options.agora_channel + if self.options.additional_params is not None: + params = {**self.options.additional_params, **params} + + enable = self.options.enable if self.options.enable is not None else True + return {"enable": enable, "vendor": "generic", "params": params} + + class AnamAvatarOptions(BaseModel): model_config = ConfigDict(extra="forbid") diff --git a/src/agora_agent/agentkit/vendors/llm.py b/src/agora_agent/agentkit/vendors/llm.py index 5a9f39e..1f1b354 100644 --- a/src/agora_agent/agentkit/vendors/llm.py +++ b/src/agora_agent/agentkit/vendors/llm.py @@ -2,6 +2,9 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator +from ...agents.types.start_agents_request_properties_llm_greeting_configs import ( + StartAgentsRequestPropertiesLlmGreetingConfigs, +) from .base import BaseLLM LlmGreetingConfigs = Dict[str, Any]