From 1d61baad436285e3b6a37555edb5ca67c158681c Mon Sep 17 00:00:00 2001 From: fern-api <115122769+fern-api[bot]@users.noreply.github.com> Date: Wed, 17 Jun 2026 07:20:24 +0000 Subject: [PATCH 1/3] [fern-generated] Update SDK Generated by Fern CLI Version: unknown Generators: - fernapi/fern-python-sdk: 4.37.0 --- PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md | 27 ------ .../start_agents_request_properties_avatar.py | 1 + ...agents_request_properties_avatar_vendor.py | 2 +- ...gents_request_properties_turn_detection.py | 6 -- src/agora_agent/core/client_wrapper.py | 4 +- src/agora_agent/types/asr.py | 84 ++++++++++++++++++ src/agora_agent/types/bytedance_duplex_tts.py | 29 +++++++ .../types/bytedance_duplex_tts_params.py | 37 ++++++++ src/agora_agent/types/bytedance_tts.py | 29 +++++++ src/agora_agent/types/bytedance_tts_params.py | 62 ++++++++++++++ src/agora_agent/types/cosyvoice_tts.py | 29 +++++++ src/agora_agent/types/cosyvoice_tts_params.py | 42 +++++++++ src/agora_agent/types/fengming_asr.py | 29 +++++++ src/agora_agent/types/minimax_tts_params.py | 29 ++++++- .../types/minimax_tts_params_audio_setting.py | 27 ++++++ .../minimax_tts_params_pronunciation_dict.py | 27 ++++++ .../minimax_tts_params_timber_weights_item.py | 28 ++++++ .../types/minimax_tts_params_voice_setting.py | 30 +++++++ .../types/sensetime_avatar_params.py | 52 ++++++++++++ ...sensetime_avatar_params_scene_list_item.py | 21 +++++ ...tar_params_scene_list_item_digital_role.py | 32 +++++++ ...s_scene_list_item_digital_role_position.py | 28 ++++++ src/agora_agent/types/stepfun_tts.py | 29 +++++++ src/agora_agent/types/stepfun_tts_params.py | 37 ++++++++ src/agora_agent/types/tencent_asr.py | 27 ++++++ src/agora_agent/types/tencent_asr_params.py | 47 ++++++++++ src/agora_agent/types/tencent_tts.py | 29 +++++++ src/agora_agent/types/tencent_tts_params.py | 62 ++++++++++++++ src/agora_agent/types/tts.py | 85 +++++++++++++++++++ src/agora_agent/types/xfyun_asr.py | 27 ++++++ src/agora_agent/types/xfyun_asr_params.py | 42 +++++++++ src/agora_agent/types/xfyun_bigmodel_asr.py | 27 ++++++ .../types/xfyun_bigmodel_asr_params.py | 47 ++++++++++ src/agora_agent/types/xfyun_dialect_asr.py | 27 ++++++ .../types/xfyun_dialect_asr_params.py | 42 +++++++++ 35 files changed, 1144 insertions(+), 39 deletions(-) delete mode 100644 PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md create mode 100644 src/agora_agent/types/bytedance_duplex_tts.py create mode 100644 src/agora_agent/types/bytedance_duplex_tts_params.py create mode 100644 src/agora_agent/types/bytedance_tts.py create mode 100644 src/agora_agent/types/bytedance_tts_params.py create mode 100644 src/agora_agent/types/cosyvoice_tts.py create mode 100644 src/agora_agent/types/cosyvoice_tts_params.py create mode 100644 src/agora_agent/types/fengming_asr.py create mode 100644 src/agora_agent/types/minimax_tts_params_audio_setting.py create mode 100644 src/agora_agent/types/minimax_tts_params_pronunciation_dict.py create mode 100644 src/agora_agent/types/minimax_tts_params_timber_weights_item.py create mode 100644 src/agora_agent/types/sensetime_avatar_params.py create mode 100644 src/agora_agent/types/sensetime_avatar_params_scene_list_item.py create mode 100644 src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role.py create mode 100644 src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role_position.py create mode 100644 src/agora_agent/types/stepfun_tts.py create mode 100644 src/agora_agent/types/stepfun_tts_params.py create mode 100644 src/agora_agent/types/tencent_asr.py create mode 100644 src/agora_agent/types/tencent_asr_params.py create mode 100644 src/agora_agent/types/tencent_tts.py create mode 100644 src/agora_agent/types/tencent_tts_params.py create mode 100644 src/agora_agent/types/xfyun_asr.py create mode 100644 src/agora_agent/types/xfyun_asr_params.py create mode 100644 src/agora_agent/types/xfyun_bigmodel_asr.py create mode 100644 src/agora_agent/types/xfyun_bigmodel_asr_params.py create mode 100644 src/agora_agent/types/xfyun_dialect_asr.py create mode 100644 src/agora_agent/types/xfyun_dialect_asr_params.py diff --git a/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md b/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md deleted file mode 100644 index f3cd64a..0000000 --- a/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md +++ /dev/null @@ -1,27 +0,0 @@ -# Python AgentKit Snake Case API Audit - -Scope: `agora-agents-python` public AgentKit wrappers, docs, and tests. - -Search terms: - -```bash -rg -n "apiKey|baseUrl|modelId|voiceId|groupId|keyTerm|turnDetection|inputAudioTranscription|greetingMessage|failureMessage|projectId|adcCredentialsString|sampleRate|targetLanguageCode|resourceName|deploymentName" agora-agents-python -``` - -## Result - -No shipped camelCase public Python constructor kwargs were found in source or docs examples. No deprecated alias helper is required for this pass. - -| File | Class / symbol | Public arg or example | Current spelling | Desired Python spelling | `to_config()` key | Wire key | Action | Compatibility needed | Test coverage | -|---|---|---|---|---|---|---|---|---|---| -| `src/agora_agent/agentkit/vendors/tts.py` | `GoogleTTS` | constructor arg | `voice_name` | `voice_name` | `params.VoiceSelectionParams` | `params.VoiceSelectionParams` | keep | no | `tests/custom/test_tts_vendors.py` | -| `src/agora_agent/agentkit/vendors/tts.py` | `RimeTTS` | constructor arg | `model_id` | `model_id` | `params.modelId` | `params.modelId` | keep | no | `tests/custom/test_tts_vendors.py` | -| `src/agora_agent/agentkit/vendors/tts.py` | `MurfTTS` | constructor arg | `voice_id` | `voice_id` | `params.voiceId` | `params.voiceId` | keep | no | `tests/custom/test_tts_vendors.py`, `tests/custom/test_request_body.py` | -| `src/agora_agent/types/rime_tts_params.py` | generated model | generated alias | `modelId` | n/a | `model_id` | `modelId` | keep | no | `tests/custom/test_tts_vendors.py` | -| `src/agora_agent/types/murf_tts_params.py` | generated model | generated alias | `voiceId` | n/a | `voice_id` | `voiceId` | keep | no | `tests/custom/test_tts_vendors.py` | -| `tests/custom/test_request_body.py` | wire assertion | payload key | `voiceId` | n/a | `params.voiceId` | `params.voiceId` | keep | no | request-body test | -| `tests/custom/test_tts_vendors.py` | wire assertion | payload key | `modelId`, `voiceId`, `VoiceSelectionParams` | n/a | generated model fields | wire aliases | keep | no | wire serialization test | - -## Guardrail Added - -`tests/custom/test_docs_snake_case.py` scans Python markdown code fences and fails on common camelCase kwargs such as `apiKey`, `baseUrl`, `modelId`, `voiceId`, `projectId`, and `greetingMessage`. JSON, TypeScript, Go, shell, and YAML examples are skipped so wire payload examples can retain required non-Python keys. diff --git a/src/agora_agent/agents/types/start_agents_request_properties_avatar.py b/src/agora_agent/agents/types/start_agents_request_properties_avatar.py index 8993b2c..b9b668e 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_avatar.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_avatar.py @@ -25,6 +25,7 @@ class StartAgentsRequestPropertiesAvatar(UncheckedBaseModel): - `liveavatar`: LiveAvatar (Beta) - `anam`: Anam (Beta) - `generic`: Generic (Beta) + - `sensetime`: SenseTime Avatar """ params: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) diff --git a/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py b/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py index e5bcec5..9a2d0c7 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_avatar_vendor.py @@ -3,5 +3,5 @@ import typing StartAgentsRequestPropertiesAvatarVendor = typing.Union[ - typing.Literal["akool", "liveavatar", "anam", "generic", "heygen"], typing.Any + typing.Literal["akool", "liveavatar", "anam", "generic", "sensetime", "heygen"], typing.Any ] diff --git a/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py b/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py index fb58a36..40dbb02 100644 --- a/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py +++ b/src/agora_agent/agents/types/start_agents_request_properties_turn_detection.py @@ -5,7 +5,6 @@ import pydantic from ...core.pydantic_utilities import IS_PYDANTIC_V2 from ...core.unchecked_base_model import UncheckedBaseModel -from ...types.asr_language import AsrLanguage from .start_agents_request_properties_turn_detection_config import StartAgentsRequestPropertiesTurnDetectionConfig from .start_agents_request_properties_turn_detection_eagerness import StartAgentsRequestPropertiesTurnDetectionEagerness from .start_agents_request_properties_turn_detection_interrupt_mode import ( @@ -19,11 +18,6 @@ class StartAgentsRequestPropertiesTurnDetection(UncheckedBaseModel): Conversation turn detection settings. Controls the logic for voice activity detection and conversation turn determination. This object has no effect when `mllm.enable` is true; use `mllm.turn_detection` instead. """ - language: typing.Optional[AsrLanguage] = pydantic.Field(default=None) - """ - BCP-47 language tag identifying the primary language used for agent interaction. - """ - mode: typing.Optional[typing.Literal["default"]] = pydantic.Field(default=None) """ Conversation turn detection mode: diff --git a/src/agora_agent/core/client_wrapper.py b/src/agora_agent/core/client_wrapper.py index ba5e462..47029d6 100644 --- a/src/agora_agent/core/client_wrapper.py +++ b/src/agora_agent/core/client_wrapper.py @@ -26,10 +26,10 @@ def __init__( def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { - "User-Agent": "agora-agents/v2.2.0", + "User-Agent": "agora-agents/v2.2.1", "X-Fern-Language": "Python", "X-Fern-SDK-Name": "agora-agents", - "X-Fern-SDK-Version": "v2.2.0", + "X-Fern-SDK-Version": "v2.2.1", **(self.get_custom_headers() or {}), } headers["Authorization"] = httpx.BasicAuth(self._get_username(), self._get_password())._auth_header diff --git a/src/agora_agent/types/asr.py b/src/agora_agent/types/asr.py index 1f2225d..e334a8d 100644 --- a/src/agora_agent/types/asr.py +++ b/src/agora_agent/types/asr.py @@ -18,6 +18,10 @@ from .open_ai_asr_params import OpenAiAsrParams from .sarvam_asr_params import SarvamAsrParams from .speechmatics_asr_params import SpeechmaticsAsrParams +from .tencent_asr_params import TencentAsrParams +from .xfyun_asr_params import XfyunAsrParams +from .xfyun_bigmodel_asr_params import XfyunBigmodelAsrParams +from .xfyun_dialect_asr_params import XfyunDialectAsrParams class Asr_Ares(UncheckedBaseModel): @@ -35,6 +39,36 @@ class Config: extra = pydantic.Extra.allow +class Asr_Fengming(UncheckedBaseModel): + vendor: typing.Literal["fengming"] = "fengming" + language: typing.Optional[AsrLanguage] = None + params: typing.Optional[typing.Dict[str, typing.Any]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Asr_Tencent(UncheckedBaseModel): + vendor: typing.Literal["tencent"] = "tencent" + language: typing.Optional[AsrLanguage] = None + params: TencentAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + class Asr_Microsoft(UncheckedBaseModel): vendor: typing.Literal["microsoft"] = "microsoft" language: typing.Optional[AsrLanguage] = None @@ -155,9 +189,56 @@ class Config: extra = pydantic.Extra.allow +class Asr_Xfyun(UncheckedBaseModel): + vendor: typing.Literal["xfyun"] = "xfyun" + language: typing.Optional[AsrLanguage] = None + params: XfyunAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Asr_XfyunBigmodel(UncheckedBaseModel): + vendor: typing.Literal["xfyun_bigmodel"] = "xfyun_bigmodel" + language: typing.Optional[AsrLanguage] = None + params: XfyunBigmodelAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Asr_XfyunDialect(UncheckedBaseModel): + vendor: typing.Literal["xfyun_dialect"] = "xfyun_dialect" + language: typing.Optional[AsrLanguage] = None + params: XfyunDialectAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + Asr = typing_extensions.Annotated[ typing.Union[ Asr_Ares, + Asr_Fengming, + Asr_Tencent, Asr_Microsoft, Asr_Deepgram, Asr_Openai, @@ -166,6 +247,9 @@ class Config: Asr_Assemblyai, Asr_Speechmatics, Asr_Sarvam, + Asr_Xfyun, + Asr_XfyunBigmodel, + Asr_XfyunDialect, ], UnionMetadata(discriminant="vendor"), ] diff --git a/src/agora_agent/types/bytedance_duplex_tts.py b/src/agora_agent/types/bytedance_duplex_tts.py new file mode 100644 index 0000000..76c5313 --- /dev/null +++ b/src/agora_agent/types/bytedance_duplex_tts.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .bytedance_duplex_tts_params import BytedanceDuplexTtsParams + + +class BytedanceDuplexTts(UncheckedBaseModel): + """ + Bytedance duplex streaming Text-to-Speech configuration. + """ + + params: BytedanceDuplexTtsParams + skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None) + """ + Controls whether the TTS module skips bracketed content when reading LLM response text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/bytedance_duplex_tts_params.py b/src/agora_agent/types/bytedance_duplex_tts_params.py new file mode 100644 index 0000000..3aa48d4 --- /dev/null +++ b/src/agora_agent/types/bytedance_duplex_tts_params.py @@ -0,0 +1,37 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class BytedanceDuplexTtsParams(UncheckedBaseModel): + """ + Bytedance duplex streaming TTS configuration parameters. + """ + + app_id: str = pydantic.Field() + """ + Bytedance application ID. + """ + + token: str = pydantic.Field() + """ + Bytedance API token. + """ + + speaker: str = pydantic.Field() + """ + Duplex TTS speaker identifier. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/bytedance_tts.py b/src/agora_agent/types/bytedance_tts.py new file mode 100644 index 0000000..32342f7 --- /dev/null +++ b/src/agora_agent/types/bytedance_tts.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .bytedance_tts_params import BytedanceTtsParams + + +class BytedanceTts(UncheckedBaseModel): + """ + Bytedance Volcano Engine Text-to-Speech configuration. + """ + + params: BytedanceTtsParams + skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None) + """ + Controls whether the TTS module skips bracketed content when reading LLM response text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/bytedance_tts_params.py b/src/agora_agent/types/bytedance_tts_params.py new file mode 100644 index 0000000..f4d9c7a --- /dev/null +++ b/src/agora_agent/types/bytedance_tts_params.py @@ -0,0 +1,62 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class BytedanceTtsParams(UncheckedBaseModel): + """ + Bytedance Volcano Engine TTS configuration parameters. + """ + + token: str = pydantic.Field() + """ + Bytedance API token. + """ + + app_id: str = pydantic.Field() + """ + Bytedance application ID. + """ + + cluster: str = pydantic.Field() + """ + Bytedance cluster name. + """ + + voice_type: str = pydantic.Field() + """ + Bytedance voice type. + """ + + speed_ratio: typing.Optional[float] = pydantic.Field(default=None) + """ + Speech speed ratio. + """ + + volume_ratio: typing.Optional[float] = pydantic.Field(default=None) + """ + Volume ratio. + """ + + pitch_ratio: typing.Optional[float] = pydantic.Field(default=None) + """ + Pitch ratio. + """ + + emotion: typing.Optional[str] = pydantic.Field(default=None) + """ + Emotion preset. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/cosyvoice_tts.py b/src/agora_agent/types/cosyvoice_tts.py new file mode 100644 index 0000000..2519163 --- /dev/null +++ b/src/agora_agent/types/cosyvoice_tts.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .cosyvoice_tts_params import CosyvoiceTtsParams + + +class CosyvoiceTts(UncheckedBaseModel): + """ + Alibaba Cloud CosyVoice Text-to-Speech configuration. + """ + + params: CosyvoiceTtsParams + skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None) + """ + Controls whether the TTS module skips bracketed content when reading LLM response text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/cosyvoice_tts_params.py b/src/agora_agent/types/cosyvoice_tts_params.py new file mode 100644 index 0000000..3c48bb2 --- /dev/null +++ b/src/agora_agent/types/cosyvoice_tts_params.py @@ -0,0 +1,42 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class CosyvoiceTtsParams(UncheckedBaseModel): + """ + CosyVoice TTS configuration parameters. + """ + + api_key: str = pydantic.Field() + """ + CosyVoice API key. + """ + + model: str = pydantic.Field() + """ + CosyVoice model identifier. + """ + + sample_rate: int = pydantic.Field() + """ + Audio sample rate in Hz. + """ + + voice: str = pydantic.Field() + """ + CosyVoice speaker voice. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/fengming_asr.py b/src/agora_agent/types/fengming_asr.py new file mode 100644 index 0000000..84fe7e0 --- /dev/null +++ b/src/agora_agent/types/fengming_asr.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .asr_language import AsrLanguage + + +class FengmingAsr(UncheckedBaseModel): + """ + Agora Fengming ASR configuration. + """ + + language: typing.Optional[AsrLanguage] = None + params: typing.Optional[typing.Dict[str, typing.Any]] = pydantic.Field(default=None) + """ + Agora Fengming ASR configuration parameters. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/minimax_tts_params.py b/src/agora_agent/types/minimax_tts_params.py index 6442a71..076167d 100644 --- a/src/agora_agent/types/minimax_tts_params.py +++ b/src/agora_agent/types/minimax_tts_params.py @@ -5,6 +5,9 @@ import pydantic from ..core.pydantic_utilities import IS_PYDANTIC_V2 from ..core.unchecked_base_model import UncheckedBaseModel +from .minimax_tts_params_audio_setting import MinimaxTtsParamsAudioSetting +from .minimax_tts_params_pronunciation_dict import MinimaxTtsParamsPronunciationDict +from .minimax_tts_params_timber_weights_item import MinimaxTtsParamsTimberWeightsItem from .minimax_tts_params_voice_setting import MinimaxTtsParamsVoiceSetting @@ -18,18 +21,38 @@ class MinimaxTtsParams(UncheckedBaseModel): MiniMax API key """ - group_id: str = pydantic.Field() + group_id: typing.Optional[str] = pydantic.Field(default=None) """ MiniMax group identifier """ model: str = pydantic.Field() """ - TTS model (e.g., speech-02-turbo) + BYOK TTS model. Managed MiniMax preset models are selected through the top-level preset field instead. """ voice_setting: MinimaxTtsParamsVoiceSetting - url: str = pydantic.Field() + audio_setting: typing.Optional[MinimaxTtsParamsAudioSetting] = pydantic.Field(default=None) + """ + Audio output settings. + """ + + pronunciation_dict: typing.Optional[MinimaxTtsParamsPronunciationDict] = pydantic.Field(default=None) + """ + Custom pronunciation dictionary settings. + """ + + timber_weights: typing.Optional[typing.List[MinimaxTtsParamsTimberWeightsItem]] = pydantic.Field(default=None) + """ + Weighted voice blending configuration. + """ + + language_boost: typing.Optional[str] = pydantic.Field(default=None) + """ + Language boost mode. + """ + + url: typing.Optional[str] = pydantic.Field(default=None) """ WebSocket endpoint (e.g., wss://api-uw.minimax.io/ws/v1/t2a_v2) """ diff --git a/src/agora_agent/types/minimax_tts_params_audio_setting.py b/src/agora_agent/types/minimax_tts_params_audio_setting.py new file mode 100644 index 0000000..e9dac4b --- /dev/null +++ b/src/agora_agent/types/minimax_tts_params_audio_setting.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class MinimaxTtsParamsAudioSetting(UncheckedBaseModel): + """ + Audio output settings. + """ + + sample_rate: typing.Optional[int] = pydantic.Field(default=None) + """ + Audio sample rate in Hz. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/minimax_tts_params_pronunciation_dict.py b/src/agora_agent/types/minimax_tts_params_pronunciation_dict.py new file mode 100644 index 0000000..e8321fb --- /dev/null +++ b/src/agora_agent/types/minimax_tts_params_pronunciation_dict.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class MinimaxTtsParamsPronunciationDict(UncheckedBaseModel): + """ + Custom pronunciation dictionary settings. + """ + + tone: typing.Optional[typing.List[str]] = pydantic.Field(default=None) + """ + Tone override list. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/minimax_tts_params_timber_weights_item.py b/src/agora_agent/types/minimax_tts_params_timber_weights_item.py new file mode 100644 index 0000000..2384427 --- /dev/null +++ b/src/agora_agent/types/minimax_tts_params_timber_weights_item.py @@ -0,0 +1,28 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class MinimaxTtsParamsTimberWeightsItem(UncheckedBaseModel): + voice_id: typing.Optional[str] = pydantic.Field(default=None) + """ + Voice identifier for blending. + """ + + weight: typing.Optional[float] = pydantic.Field(default=None) + """ + Relative blend weight. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/minimax_tts_params_voice_setting.py b/src/agora_agent/types/minimax_tts_params_voice_setting.py index 95b48e7..f409814 100644 --- a/src/agora_agent/types/minimax_tts_params_voice_setting.py +++ b/src/agora_agent/types/minimax_tts_params_voice_setting.py @@ -13,6 +13,36 @@ class MinimaxTtsParamsVoiceSetting(UncheckedBaseModel): Voice style identifier (e.g., English_captivating_female1) """ + speed: typing.Optional[float] = pydantic.Field(default=None) + """ + Speech speed multiplier. + """ + + vol: typing.Optional[float] = pydantic.Field(default=None) + """ + Voice volume multiplier. + """ + + pitch: typing.Optional[float] = pydantic.Field(default=None) + """ + Voice pitch adjustment. + """ + + emotion: typing.Optional[str] = pydantic.Field(default=None) + """ + Emotion preset. + """ + + latex_read: typing.Optional[bool] = pydantic.Field(default=None) + """ + Whether to read LaTeX expressions. + """ + + english_normalization: typing.Optional[bool] = pydantic.Field(default=None) + """ + Whether to normalize English text before synthesis. + """ + if IS_PYDANTIC_V2: model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 else: diff --git a/src/agora_agent/types/sensetime_avatar_params.py b/src/agora_agent/types/sensetime_avatar_params.py new file mode 100644 index 0000000..d777132 --- /dev/null +++ b/src/agora_agent/types/sensetime_avatar_params.py @@ -0,0 +1,52 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +import typing_extensions +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.serialization import FieldMetadata +from ..core.unchecked_base_model import UncheckedBaseModel +from .sensetime_avatar_params_scene_list_item import SensetimeAvatarParamsSceneListItem + + +class SensetimeAvatarParams(UncheckedBaseModel): + """ + SenseTime Avatar configuration parameters. + """ + + agora_token: str = pydantic.Field() + """ + Agora token used by the avatar service. + """ + + agora_uid: str = pydantic.Field() + """ + Numeric Agora UID string used by the avatar service. + """ + + app_id: typing_extensions.Annotated[str, FieldMetadata(alias="appId")] = pydantic.Field() + """ + SenseTime application ID. + """ + + app_key: str = pydantic.Field() + """ + SenseTime application key. + """ + + scene_list: typing_extensions.Annotated[ + typing.List[SensetimeAvatarParamsSceneListItem], FieldMetadata(alias="sceneList") + ] = pydantic.Field() + """ + SenseTime scene configuration list. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/sensetime_avatar_params_scene_list_item.py b/src/agora_agent/types/sensetime_avatar_params_scene_list_item.py new file mode 100644 index 0000000..82edf8a --- /dev/null +++ b/src/agora_agent/types/sensetime_avatar_params_scene_list_item.py @@ -0,0 +1,21 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .sensetime_avatar_params_scene_list_item_digital_role import SensetimeAvatarParamsSceneListItemDigitalRole + + +class SensetimeAvatarParamsSceneListItem(UncheckedBaseModel): + digital_role: typing.Optional[SensetimeAvatarParamsSceneListItemDigitalRole] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role.py b/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role.py new file mode 100644 index 0000000..798ed59 --- /dev/null +++ b/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role.py @@ -0,0 +1,32 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .sensetime_avatar_params_scene_list_item_digital_role_position import ( + SensetimeAvatarParamsSceneListItemDigitalRolePosition, +) + + +class SensetimeAvatarParamsSceneListItemDigitalRole(UncheckedBaseModel): + face_feature_id: typing.Optional[str] = pydantic.Field(default=None) + """ + SenseTime face feature identifier. + """ + + position: typing.Optional[SensetimeAvatarParamsSceneListItemDigitalRolePosition] = None + url: typing.Optional[str] = pydantic.Field(default=None) + """ + Avatar model package URL. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role_position.py b/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role_position.py new file mode 100644 index 0000000..c2ac79d --- /dev/null +++ b/src/agora_agent/types/sensetime_avatar_params_scene_list_item_digital_role_position.py @@ -0,0 +1,28 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class SensetimeAvatarParamsSceneListItemDigitalRolePosition(UncheckedBaseModel): + x: typing.Optional[float] = pydantic.Field(default=None) + """ + Avatar x position. + """ + + y: typing.Optional[float] = pydantic.Field(default=None) + """ + Avatar y position. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/stepfun_tts.py b/src/agora_agent/types/stepfun_tts.py new file mode 100644 index 0000000..708a5c2 --- /dev/null +++ b/src/agora_agent/types/stepfun_tts.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .stepfun_tts_params import StepfunTtsParams + + +class StepfunTts(UncheckedBaseModel): + """ + StepFun Text-to-Speech configuration. + """ + + params: StepfunTtsParams + skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None) + """ + Controls whether the TTS module skips bracketed content when reading LLM response text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/stepfun_tts_params.py b/src/agora_agent/types/stepfun_tts_params.py new file mode 100644 index 0000000..5636e2c --- /dev/null +++ b/src/agora_agent/types/stepfun_tts_params.py @@ -0,0 +1,37 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class StepfunTtsParams(UncheckedBaseModel): + """ + StepFun TTS configuration parameters. + """ + + api_key: str = pydantic.Field() + """ + StepFun API key. + """ + + model: str = pydantic.Field() + """ + StepFun model identifier. + """ + + voice_id: str = pydantic.Field() + """ + StepFun voice identifier. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/tencent_asr.py b/src/agora_agent/types/tencent_asr.py new file mode 100644 index 0000000..75ff988 --- /dev/null +++ b/src/agora_agent/types/tencent_asr.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .asr_language import AsrLanguage +from .tencent_asr_params import TencentAsrParams + + +class TencentAsr(UncheckedBaseModel): + """ + Tencent ASR configuration. + """ + + language: typing.Optional[AsrLanguage] = None + params: TencentAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/tencent_asr_params.py b/src/agora_agent/types/tencent_asr_params.py new file mode 100644 index 0000000..3fa75ef --- /dev/null +++ b/src/agora_agent/types/tencent_asr_params.py @@ -0,0 +1,47 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class TencentAsrParams(UncheckedBaseModel): + """ + Tencent ASR configuration parameters. + """ + + key: str = pydantic.Field() + """ + Tencent ASR secret key. + """ + + app_id: str = pydantic.Field() + """ + Tencent Cloud application ID. + """ + + secret: str = pydantic.Field() + """ + Tencent ASR secret. + """ + + engine_model_type: str = pydantic.Field() + """ + Tencent ASR engine model type. + """ + + voice_id: str = pydantic.Field() + """ + Tencent ASR voice session identifier. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/tencent_tts.py b/src/agora_agent/types/tencent_tts.py new file mode 100644 index 0000000..ad8a937 --- /dev/null +++ b/src/agora_agent/types/tencent_tts.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .tencent_tts_params import TencentTtsParams + + +class TencentTts(UncheckedBaseModel): + """ + Tencent Text-to-Speech configuration. + """ + + params: TencentTtsParams + skip_patterns: typing.Optional[typing.List[int]] = pydantic.Field(default=None) + """ + Controls whether the TTS module skips bracketed content when reading LLM response text. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/tencent_tts_params.py b/src/agora_agent/types/tencent_tts_params.py new file mode 100644 index 0000000..5fcf6f5 --- /dev/null +++ b/src/agora_agent/types/tencent_tts_params.py @@ -0,0 +1,62 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class TencentTtsParams(UncheckedBaseModel): + """ + Tencent TTS configuration parameters. + """ + + app_id: str = pydantic.Field() + """ + Tencent Cloud application ID. + """ + + secret_id: str = pydantic.Field() + """ + Tencent Cloud secret ID. + """ + + secret_key: str = pydantic.Field() + """ + Tencent Cloud secret key. + """ + + voice_type: int = pydantic.Field() + """ + Tencent voice type identifier. + """ + + volume: typing.Optional[float] = pydantic.Field(default=None) + """ + Volume setting. + """ + + speed: typing.Optional[float] = pydantic.Field(default=None) + """ + Speech speed setting. + """ + + emotion_category: typing.Optional[str] = pydantic.Field(default=None) + """ + Emotion category. + """ + + emotion_intensity: typing.Optional[int] = pydantic.Field(default=None) + """ + Emotion intensity. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/tts.py b/src/agora_agent/types/tts.py index 85761fd..f41cce3 100644 --- a/src/agora_agent/types/tts.py +++ b/src/agora_agent/types/tts.py @@ -9,7 +9,10 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2 from ..core.unchecked_base_model import UncheckedBaseModel, UnionMetadata from .amazon_tts_params import AmazonTtsParams +from .bytedance_duplex_tts_params import BytedanceDuplexTtsParams +from .bytedance_tts_params import BytedanceTtsParams from .cartesia_tts_params import CartesiaTtsParams +from .cosyvoice_tts_params import CosyvoiceTtsParams from .deepgram_tts_params import DeepgramTtsParams from .eleven_labs_tts_params import ElevenLabsTtsParams from .fish_audio_tts_params import FishAudioTtsParams @@ -21,6 +24,38 @@ from .open_ai_tts_params import OpenAiTtsParams from .rime_tts_params import RimeTtsParams from .sarvam_tts_params import SarvamTtsParams +from .stepfun_tts_params import StepfunTtsParams +from .tencent_tts_params import TencentTtsParams + + +class Tts_Tencent(UncheckedBaseModel): + vendor: typing.Literal["tencent"] = "tencent" + params: TencentTtsParams + skip_patterns: typing.Optional[typing.List[int]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Tts_Bytedance(UncheckedBaseModel): + vendor: typing.Literal["bytedance"] = "bytedance" + params: BytedanceTtsParams + skip_patterns: typing.Optional[typing.List[int]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow class Tts_Microsoft(UncheckedBaseModel): @@ -218,8 +253,55 @@ class Config: extra = pydantic.Extra.allow +class Tts_Cosyvoice(UncheckedBaseModel): + vendor: typing.Literal["cosyvoice"] = "cosyvoice" + params: CosyvoiceTtsParams + skip_patterns: typing.Optional[typing.List[int]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Tts_BytedanceDuplex(UncheckedBaseModel): + vendor: typing.Literal["bytedance_duplex"] = "bytedance_duplex" + params: BytedanceDuplexTtsParams + skip_patterns: typing.Optional[typing.List[int]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + +class Tts_Stepfun(UncheckedBaseModel): + vendor: typing.Literal["stepfun"] = "stepfun" + params: StepfunTtsParams + skip_patterns: typing.Optional[typing.List[int]] = None + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow + + Tts = typing_extensions.Annotated[ typing.Union[ + Tts_Tencent, + Tts_Bytedance, Tts_Microsoft, Tts_Elevenlabs, Tts_Minimax, @@ -233,6 +315,9 @@ class Config: Tts_Amazon, Tts_Sarvam, Tts_Deepgram, + Tts_Cosyvoice, + Tts_BytedanceDuplex, + Tts_Stepfun, ], UnionMetadata(discriminant="vendor"), ] diff --git a/src/agora_agent/types/xfyun_asr.py b/src/agora_agent/types/xfyun_asr.py new file mode 100644 index 0000000..f97f312 --- /dev/null +++ b/src/agora_agent/types/xfyun_asr.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .asr_language import AsrLanguage +from .xfyun_asr_params import XfyunAsrParams + + +class XfyunAsr(UncheckedBaseModel): + """ + iFlytek ASR configuration. + """ + + language: typing.Optional[AsrLanguage] = None + params: XfyunAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/xfyun_asr_params.py b/src/agora_agent/types/xfyun_asr_params.py new file mode 100644 index 0000000..5bbc081 --- /dev/null +++ b/src/agora_agent/types/xfyun_asr_params.py @@ -0,0 +1,42 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class XfyunAsrParams(UncheckedBaseModel): + """ + iFlytek ASR configuration parameters. + """ + + api_key: str = pydantic.Field() + """ + iFlytek API key. + """ + + app_id: str = pydantic.Field() + """ + iFlytek application ID. + """ + + api_secret: str = pydantic.Field() + """ + iFlytek API secret. + """ + + language: str = pydantic.Field() + """ + iFlytek language code. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/xfyun_bigmodel_asr.py b/src/agora_agent/types/xfyun_bigmodel_asr.py new file mode 100644 index 0000000..bfd4b18 --- /dev/null +++ b/src/agora_agent/types/xfyun_bigmodel_asr.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .asr_language import AsrLanguage +from .xfyun_bigmodel_asr_params import XfyunBigmodelAsrParams + + +class XfyunBigmodelAsr(UncheckedBaseModel): + """ + iFlytek large-model ASR configuration. + """ + + language: typing.Optional[AsrLanguage] = None + params: XfyunBigmodelAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/xfyun_bigmodel_asr_params.py b/src/agora_agent/types/xfyun_bigmodel_asr_params.py new file mode 100644 index 0000000..9beff97 --- /dev/null +++ b/src/agora_agent/types/xfyun_bigmodel_asr_params.py @@ -0,0 +1,47 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class XfyunBigmodelAsrParams(UncheckedBaseModel): + """ + iFlytek large-model ASR configuration parameters. + """ + + api_key: str = pydantic.Field() + """ + iFlytek large-model API key. + """ + + app_id: str = pydantic.Field() + """ + iFlytek large-model application ID. + """ + + api_secret: str = pydantic.Field() + """ + iFlytek large-model API secret. + """ + + language_name: str = pydantic.Field() + """ + iFlytek language family name. + """ + + language: str = pydantic.Field() + """ + iFlytek language mode. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/xfyun_dialect_asr.py b/src/agora_agent/types/xfyun_dialect_asr.py new file mode 100644 index 0000000..1b1f33c --- /dev/null +++ b/src/agora_agent/types/xfyun_dialect_asr.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel +from .asr_language import AsrLanguage +from .xfyun_dialect_asr_params import XfyunDialectAsrParams + + +class XfyunDialectAsr(UncheckedBaseModel): + """ + iFlytek dialect ASR configuration. + """ + + language: typing.Optional[AsrLanguage] = None + params: XfyunDialectAsrParams + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/agora_agent/types/xfyun_dialect_asr_params.py b/src/agora_agent/types/xfyun_dialect_asr_params.py new file mode 100644 index 0000000..317a305 --- /dev/null +++ b/src/agora_agent/types/xfyun_dialect_asr_params.py @@ -0,0 +1,42 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing + +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +from ..core.unchecked_base_model import UncheckedBaseModel + + +class XfyunDialectAsrParams(UncheckedBaseModel): + """ + iFlytek dialect ASR configuration parameters. + """ + + app_id: str = pydantic.Field() + """ + iFlytek dialect application ID. + """ + + access_key_id: str = pydantic.Field() + """ + iFlytek access key ID. + """ + + access_key_secret: str = pydantic.Field() + """ + iFlytek access key secret. + """ + + language: str = pydantic.Field() + """ + Dialect recognition language code. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow From 93ffa437ff29c6695018603ba45967ab6a5eb86a Mon Sep 17 00:00:00 2001 From: fern-api <115122769+fern-api[bot]@users.noreply.github.com> Date: Wed, 17 Jun 2026 07:20:32 +0000 Subject: [PATCH 2/3] [fern-replay] Applied customizations Patches applied (5): - patch-7465fada: fix(agentkit): resolve Python session typing issues - patch-fae1249a: Re-export agora-agents API from legacy PyPI compatibility package The compat distribution delegates to agora_agent via __getattr__ and documents both import paths in its README. - patch-299e4bd9: fix(agentkit): resolve provider config type checks - patch-bed29b6b: chore: bump Python packages to 2.1.0 - patch-fecdc77c: Fix AgentKit request validation and provider wire-key coverage Patches with unresolved conflicts (12): - patch-6e30398b: chore(agentkit): bump to v1.5.0 and expose v2.7 type aliases - patch-9df782b4: feat(agentkit): update MLLM and LLM vendor wrappers for v2.7 - patch-26706d73: feat(agentkit): add GenericAvatar and session-aware avatar validation - patch-9f491c63: feat(agentkit): update Agent builder and session lifecycle for v2.7 - patch-eaec58eb: refactor(agentkit): align deprecated vendor aliases with canonical names - patch-20245632: feat(agentkit): export type aliases and avatar token helpers - patch-972dd5bd: updated docs - patch-d29165c4: make python compat package publishable - patch-44c21c14: Re-export AgentKit symbols from agora_agent package root Extend __getattr__ and __all__ so vendor classes, presets, and helpers are importable via `from agora_agent import ...`. Add tests and update class docstring examples to use the root import path. - patch-617ee134: feat(agentkit): support agent-level pipeline_id - patch-8e22e6d0: udpated agent docs - patch-c287be1c: Prepare Python SDK v2.2.0 release Run `fern-replay resolve` to apply these customizations. Patches absorbed by generator (3): - patch-fc9d93c3: Document agora-agents PyPI install name and migration notes - patch-d475306b: Move package rename guidance to installation docs and protect manual paths in Fern ignore. Consolidate migration notes into the installation guide with next-step links, add a brief README pointer, and exclude README, compat, and workflow files from Fern generation. - patch-c9355576: Streamline Python docs and README for app-credentials-first onboarding. Remove duplicated low-level client examples from the README, de-emphasize legacy auth modes, refocus the low-level API guide on AgentKit with telephony escape hatches, and update Agora-managed model terminology. The generator now produces these customizations natively. --- .fern/replay.lock | 1017 ++++---------------- PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md | 27 + src/agora_agent/agentkit/agent.py | 19 + src/agora_agent/agentkit/agent_session.py | 1 + src/agora_agent/agentkit/vendors/avatar.py | 43 + src/agora_agent/agentkit/vendors/llm.py | 3 + 6 files changed, 298 insertions(+), 812 deletions(-) create mode 100644 PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md diff --git a/.fern/replay.lock b/.fern/replay.lock index a435ef4..b9f48c4 100644 --- a/.fern/replay.lock +++ b/.fern/replay.lock @@ -12,7 +12,13 @@ generations: cli_version: unknown generator_versions: fernapi/fern-python-sdk: 4.37.0 -current_generation: b66d871314ca0e5929cb9c9095949a7fd5e856a7 + - commit_sha: 1d61baad436285e3b6a37555edb5ca67c158681c + tree_hash: 277360a3264a8c6b4bef09971b179275aab994ec + timestamp: 2026-06-17T07:20:24.878Z + cli_version: unknown + generator_versions: + fernapi/fern-python-sdk: 4.37.0 +current_generation: 1d61baad436285e3b6a37555edb5ca67c158681c patches: - id: patch-6e30398b content_hash: sha256:e99898e508e2d6cb9f134cc33e0b73c1c8acb845f5887924e0e38031a6e089c0 @@ -8237,26 +8243,26 @@ patches: | Think inject constant | `ThinkOnListeningActionInject` | `ThinkOnListeningActionInject` | `ThinkOnListeningActionInject` | status: unresolved - id: patch-7465fada - content_hash: sha256:9c6ed2e5f48702293eed8b213cc31cce63a7ed5a1ad16a0b23e791c13e77746f + content_hash: sha256:a2f90f66c927424018f2c3304742f097e8594dec9cb2f783264c7b11679a14ac original_commit: 7465fadafa0f1e62051d99b42d0eeda85f31eeee original_message: "fix(agentkit): resolve Python session typing issues" original_author: digitallysavvy - base_generation: b66d871314ca0e5929cb9c9095949a7fd5e856a7 + base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c files: - src/agora_agent/agentkit/agent_session.py patch_content: | diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py - index dbff562..dca9ee8 100644 + index 2900c18..745c465 100644 --- a/src/agora_agent/agentkit/agent_session.py +++ b/src/agora_agent/agentkit/agent_session.py - @@ -24,6 +24,7 @@ from .avatar_types import ( - is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - + is_rtc_avatar, - validate_avatar_config, - validate_tts_sample_rate, + @@ -15,6 +15,7 @@ from ..agent_management.types.agent_think_agent_management_response import ( + AgentThinkAgentManagementResponse as AgentThinkResponse, ) + from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse + +from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse + from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping + from .avatar_types import ( + is_akool_avatar, theirs_snapshot: src/agora_agent/agentkit/agent_session.py: | import typing @@ -8276,8 +8282,8 @@ patches: AgentThinkAgentManagementResponse as AgentThinkResponse, ) from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions + from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse + from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping from .avatar_types import ( is_akool_avatar, is_anam_avatar, @@ -8595,15 +8601,15 @@ patches: properties["tts"] = self._dump_model(self._agent.tts) if self._agent.llm is not None: llm = dict(self._agent.llm) - if self._agent.instructions is not None: + if self._agent.instructions is not None and "system_messages" not in llm: llm["system_messages"] = [{"role": "system", "content": self._agent.instructions}] - if self._agent.greeting is not None: + if self._agent.greeting is not None and "greeting_message" not in llm: llm["greeting_message"] = self._agent.greeting - if self._agent.greeting_configs is not None: + if self._agent.greeting_configs is not None and "greeting_configs" not in llm: llm["greeting_configs"] = self._dump_model(self._agent.greeting_configs) - if self._agent.failure_message is not None: + if self._agent.failure_message is not None and "failure_message" not in llm: llm["failure_message"] = self._agent.failure_message - if self._agent.max_history is not None: + if self._agent.max_history is not None and "max_history" not in llm: llm["max_history"] = self._agent.max_history properties["llm"] = llm if self._agent.stt is not None: @@ -8611,6 +8617,47 @@ patches: return properties + @staticmethod + def _request_properties_for_start( + resolved_properties: typing.Dict[str, typing.Any], + *, + resolved_preset: typing.Optional[str], + pipeline_id: typing.Optional[str], + ) -> typing.Any: + try: + return _start_properties_from_mapping(resolved_properties) + except Exception as exc: + if pipeline_id: + return resolved_properties + if resolved_preset: + normalized_preset = normalize_preset_input(resolved_preset) + if not normalized_preset: + raise + preset_categories = { + category + for item in normalized_preset.split(",") + for category in [get_preset_category(item)] + if category is not None + } + error_categories = _AgentSessionBase._validation_error_categories(exc) + if error_categories and error_categories.issubset(preset_categories): + return resolved_properties + raise + + @staticmethod + def _validation_error_categories(exc: Exception) -> typing.Set[str]: + errors = getattr(exc, "errors", None) + if not callable(errors): + return set() + categories: typing.Set[str] = set() + for error in errors(): + loc = error.get("loc") if isinstance(error, dict) else None + if isinstance(loc, tuple) and loc: + field = loc[0] + if field in {"asr", "llm", "tts"}: + categories.add(typing.cast(str, field)) + return categories + def _vendor_validation_categories( self, pipeline_id: typing.Optional[str], @@ -8775,10 +8822,11 @@ patches: "properties": resolved_properties, }) - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties + request_properties = self._request_properties_for_start( + resolved_properties, + resolved_preset=resolved_preset, + pipeline_id=pipeline_id, + ) response = self._client.agents.start( self._app_id, @@ -9102,10 +9150,11 @@ patches: "properties": resolved_properties, }) - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties + request_properties = self._request_properties_for_start( + resolved_properties, + resolved_preset=resolved_preset, + pipeline_id=pipeline_id, + ) response = await self._client.agents.start( self._app_id, @@ -9478,7 +9527,7 @@ patches: original_commit: fae1249a20c53761a2eb5515a1bf92ca666760d1 original_message: Re-export agora-agents API from legacy PyPI compatibility package The compat distribution delegates to agora_agent via __getattr__ and documents both import paths in its README. original_author: digitallysavvy - base_generation: b66d871314ca0e5929cb9c9095949a7fd5e856a7 + base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c files: - compat/agora-agent-server-sdk/README.md - compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py @@ -9566,88 +9615,6 @@ patches: def __dir__(): return dir(_agora_agent) user_owned: true - - id: patch-fc9d93c3 - content_hash: sha256:93877741bdad745fda5dd549d7c3dd6bc315f4574aabd2defb52c0c795bff011 - original_commit: fc9d93c3026a6109d8a5e8b386418592f8d121c5 - original_message: Document agora-agents PyPI install name and migration notes - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - docs/getting-started/installation.md - patch_content: | - diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md - index c14bdb2..f6f1750 100644 - --- a/docs/getting-started/installation.md - +++ b/docs/getting-started/installation.md - @@ -13,13 +13,13 @@ description: Install the Agora Conversational AI Python SDK. - ## Install with pip - - ```sh - -pip install agora-agent-sdk - +pip install agora-agents - ``` - - ## Install with Poetry - - ```sh - -poetry add agora-agent-sdk - +poetry add agora-agents - ``` - - ## Dependencies - theirs_snapshot: - docs/getting-started/installation.md: | - --- - sidebar_position: 1 - title: Installation - description: Install the Agora Conversational AI Python SDK. - --- - - # Installation - - ## Prerequisites - - - Python >= 3.8 - - ## Install with pip - - ```sh - pip install agora-agents - ``` - - ## Install with Poetry - - ```sh - poetry add agora-agents - ``` - - ## Dependencies - - The following packages are installed automatically: - - | Package | Purpose | - |---|---| - | `httpx` (>= 0.21.2) | HTTP client for sync and async requests | - | `pydantic` (>= 1.9.2) | Data validation for vendor configuration and API types | - | `typing_extensions` (>= 4.0.0) | Backported type hints for Python 3.8+ | - - ## Sync vs. Async - - The SDK supports both synchronous and asynchronous usage: - - - **Synchronous** — import `Agora` from `agora_agent` and use blocking method calls - - **Asynchronous** — import `AsyncAgora` and `AsyncAgentSession` from `agora_agent` and use `await` with all API calls - - ```python - # Sync - from agora_agent import Agora, Area - - # Async - from agora_agent import AsyncAgora, AsyncAgentSession, Area - ``` - - Both clients share the same constructor parameters and capabilities. See [Authentication](./authentication.md) for setup details. - status: unresolved - id: patch-44c21c14 content_hash: sha256:920a8a5905a3bbb134edb28b007c5c0b1b4b2c1f75753140fef305b14a64e3e0 original_commit: 44c21c14a14aa7ad469a18ce86024ff14ca2bf9b @@ -11681,535 +11648,20 @@ patches: assert "DeepgramSTT" in agora_agent.__all__ assert "OpenAI" in agora_agent.__all__ status: unresolved - - id: patch-d475306b - content_hash: sha256:407af5e7564d6e8d0b91f1e117cb433aec931f083225af53c6df2abfff281b22 - original_commit: d475306bd42279984bcf4934b900003e8e02c4eb - original_message: Move package rename guidance to installation docs and protect manual paths in Fern ignore. Consolidate migration notes into the installation guide with next-step links, add a brief README pointer, and exclude README, compat, and workflow files from Fern generation. - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - compat/agora-agent-server-sdk/README.md - - docs/getting-started/installation.md - patch_content: | - diff --git a/compat/agora-agent-server-sdk/README.md b/compat/agora-agent-server-sdk/README.md - index e43d1d8..1da36aa 100644 - --- a/compat/agora-agent-server-sdk/README.md - +++ b/compat/agora-agent-server-sdk/README.md - @@ -14,3 +14,5 @@ This compatibility package re-exports the public API from `agora-agents` to supp - from agora_agent import Agora, Area - from agora_agent_server_sdk_compat import Agora, Area - ``` - + - +Maintainers: dual-publish steps live in the repository release workflow, not in the root README. - diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md - index 04b48da..8fca9ab 100644 - --- a/docs/getting-started/installation.md - +++ b/docs/getting-started/installation.md - @@ -53,4 +53,15 @@ from agora_agent import AsyncAgora, AsyncAgentSession, Area - | `pydantic` (>= 1.9.2) | Data validation for vendor configuration and API types | - | `typing_extensions` (>= 4.0.0) | Backported type hints for Python 3.8+ | - - -See [Authentication](./authentication.md) for setup details. - +## Next steps - + - +- [Authentication](./authentication.md) — configure your credentials - +- [Quick Start](./quick-start.md) — build your first conversational agent - + - +## Migrating from a previous package name - + - +The PyPI distribution was renamed from `agora-agent-server-sdk` to `agora-agents` in v2.0.0. Install `agora-agents`; the import path remains `agora_agent`. - + - +The legacy PyPI name remains available as a compatibility shim that re-exports `agora-agents`. See [compat/agora-agent-server-sdk](../../compat/agora-agent-server-sdk/README.md). - + - +For release and version details, see [changelog — Migration notes](../../changelog.md#migration-notes). - theirs_snapshot: - compat/agora-agent-server-sdk/README.md: | - # agora-agent-server-sdk - - This package has been renamed to `agora-agents`. - - New projects should install: - - ```sh - pip install agora-agents - ``` - - This compatibility package re-exports the public API from `agora-agents` to support existing installs during the migration window. The primary import path remains `agora_agent`; you can also import from `agora_agent_server_sdk_compat`: - - ```python - from agora_agent import Agora, Area - from agora_agent_server_sdk_compat import Agora, Area - ``` - - Maintainers: dual-publish steps live in the repository release workflow, not in the root README. - docs/getting-started/installation.md: | - --- - sidebar_position: 1 - title: Installation - description: Install the Agora Conversational AI Python SDK. - --- - - # Installation - - ## Prerequisites - - - Python >= 3.8 - - ## Install with pip - - ```sh - pip install agora-agents - ``` - - ## Install with Poetry - - ```sh - poetry add agora-agents - ``` - - ## Imports - - ```python - from agora_agent import Agent, Agora, Area, DeepgramSTT, OpenAI - ``` - - The package installs as `agora-agents` and imports as `agora_agent`. - - ## Sync vs. Async - - The SDK supports both synchronous and asynchronous usage: - - - **Synchronous** — import `Agora` from `agora_agent` and use blocking method calls - - **Asynchronous** — import `AsyncAgora` and `AsyncAgentSession` from `agora_agent` and use `await` with all API calls - - ```python - # Sync - from agora_agent import Agora, Area - - # Async - from agora_agent import AsyncAgora, AsyncAgentSession, Area - ``` - - ## Dependencies - - | Package | Purpose | - | ------------------------------ | ------------------------------------------------------ | - | `httpx` (>= 0.21.2) | HTTP client for sync and async requests | - | `pydantic` (>= 1.9.2) | Data validation for vendor configuration and API types | - | `typing_extensions` (>= 4.0.0) | Backported type hints for Python 3.8+ | - - ## Next steps - - - [Authentication](./authentication.md) — configure your credentials - - [Quick Start](./quick-start.md) — build your first conversational agent - - ## Migrating from a previous package name - - The PyPI distribution was renamed from `agora-agent-server-sdk` to `agora-agents` in v2.0.0. Install `agora-agents`; the import path remains `agora_agent`. - - The legacy PyPI name remains available as a compatibility shim that re-exports `agora-agents`. See [compat/agora-agent-server-sdk](../../compat/agora-agent-server-sdk/README.md). - - For release and version details, see [changelog — Migration notes](../../changelog.md#migration-notes). - status: unresolved - - id: patch-c9355576 - content_hash: sha256:83b3b6148b21f2b4d53ee67321777522f5f4e871b61ea3b23f3a6b88ca052769 - original_commit: c93555763ffd63267a737b3e430217a890f203db - original_message: Streamline Python docs and README for app-credentials-first onboarding. Remove duplicated low-level client examples from the README, de-emphasize legacy auth modes, refocus the low-level API guide on AgentKit with telephony escape hatches, and update Agora-managed model terminology. - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - docs/getting-started/authentication.md - - docs/guides/low-level-api.md - patch_content: | - diff --git a/docs/getting-started/authentication.md b/docs/getting-started/authentication.md - index 31dcc56..74c62cd 100644 - --- a/docs/getting-started/authentication.md - +++ b/docs/getting-started/authentication.md - @@ -46,41 +46,6 @@ session = agent.create_session( - print(client.auth_mode) # "app-credentials" - ``` - - -## Other auth modes - +## Legacy auth modes - - -The SDK also supports pre-minted REST tokens and HTTP Basic Auth for legacy integrations. These are not recommended for new applications. - - - -### Token auth (`auth_token`) - - - -Pass a pre-minted Agora REST token on the client. You must also supply the RTC join token on `create_session(..., token=...)`. - - - -```python - -client = Agora( - - area=Area.US, - - app_id="your-app-id", - - app_certificate="your-app-certificate", - - auth_token="your-rest-auth-token", - -) - - - -session = agent.create_session( - - client, - - channel="room-123", - - agent_uid="1", - - remote_uids=["100"], - - token="your-rtc-join-token", - -) - -``` - - - -### Basic Auth (`customer_id` + `customer_secret`) - - - -Uses HTTP Basic Auth with Customer ID and Secret from Agora Console. Avoid for new integrations — the same credentials are sent on every request instead of minting fresh tokens. - - - -```python - -client = Agora( - - area=Area.US, - - app_id="your-app-id", - - app_certificate="your-app-certificate", - - customer_id="your-customer-id", - - customer_secret="your-customer-secret", - -) - -``` - +The generated client still supports pre-minted REST tokens and HTTP Basic Auth for legacy integrations. Do not use those modes for new session integrations. Use app credentials so AgentKit can mint short-lived ConvoAI REST auth and RTC join tokens for each session. - diff --git a/docs/guides/low-level-api.md b/docs/guides/low-level-api.md - index 6677b45..47397b7 100644 - --- a/docs/guides/low-level-api.md - +++ b/docs/guides/low-level-api.md - @@ -1,187 +1,55 @@ - --- - sidebar_position: 10 - title: Low-Level API - -description: Direct client.agents.start() usage without the builder pattern. - +description: Use generated clients for escape-hatch APIs while keeping agent sessions on AgentKit. - --- - - # Low-Level API - - -For full control over request payloads you can call the generated clients directly and pass raw types such as `StartAgentsRequestProperties`, `Tts_Elevenlabs`, and `StartAgentsRequestPropertiesAsr`. Use this when you need vendor or options not exposed by the agentkit, or when integrating with generated types from the API spec. - +Use the `Agent` builder and `AgentSession` for conversational agent starts. That path generates ConvoAI REST auth and RTC join tokens from `app_id` and `app_certificate`, so application code does not need prebuilt REST tokens, RTC tokens, Customer ID, or Customer Secret. - - -## Raw telephony and phone-number APIs - - - -AgentKit focuses on realtime agent session helpers. Telephony call status, call hangup, and phone-number management are exposed through the generated low-level clients: - - - -- `client.telephony` for call status and hangup operations - -- `client.phone_numbers` for phone-number list, create, retrieve, update, and delete operations - +Generated clients are still available for API surface that AgentKit does not wrap yet, such as telephony and phone-number management. - - -## Cascading flow (ASR → LLM → TTS) - +## Client setup - - ```python - from agora_agent import Agora, Area - -from agora_agent.agents import ( - - StartAgentsRequestProperties, - - StartAgentsRequestPropertiesAsr, - - StartAgentsRequestPropertiesLlm, - -) - -from agora_agent.types.eleven_labs_tts_params import ElevenLabsTtsParams - -from agora_agent.types.tts import Tts_Elevenlabs - - client = Agora( - area=Area.US, - - app_id="YOUR_APP_ID", - - app_certificate="YOUR_APP_CERTIFICATE", - - auth_token="your-rest-auth-token", - -) - -client.agents.start( - - client.app_id, - - name="unique_name", - - properties=StartAgentsRequestProperties( - - channel="channel_name", - - token="token", - - agent_rtc_uid="1001", - - remote_rtc_uids=["1002"], - - idle_timeout=120, - - asr=StartAgentsRequestPropertiesAsr( - - language="en-US", - - vendor="deepgram", - - params={"api_key": "YOUR_DEEPGRAM_API_KEY"}, - - ), - - tts=Tts_Elevenlabs( - - params=ElevenLabsTtsParams( - - key="YOUR_ELEVENLABS_API_KEY", - - model_id="eleven_flash_v2_5", - - voice_id="pNInz6obpgDQGcFmaJgB", - - sample_rate=24000, - - ), - - ), - - llm=StartAgentsRequestPropertiesLlm( - - url="https://api.openai.com/v1/chat/completions", - - api_key="", - - system_messages=[ - - {"role": "system", "content": "You are a helpful chatbot."} - - ], - - params={"model": "gpt-4o-mini"}, - - max_history=32, - - greeting_message="Hello, how can I assist you today?", - - failure_message="Please hold on a second.", - - ), - - ), - + app_id="your-app-id", - + app_certificate="your-app-certificate", - ) - ``` - - -## Async (low-level) - +## Raw telephony and phone-number APIs - - -```python - -import asyncio - -from agora_agent import Area, AsyncAgora - -from agora_agent.agents import ( - - StartAgentsRequestProperties, - - StartAgentsRequestPropertiesAsr, - - StartAgentsRequestPropertiesLlm, - -) - -from agora_agent.types.eleven_labs_tts_params import ElevenLabsTtsParams - -from agora_agent.types.tts import Tts_Elevenlabs - +AgentKit focuses on realtime agent session helpers. Use generated clients for operational APIs: - - -client = AsyncAgora( - - area=Area.US, - - app_id="YOUR_APP_ID", - - app_certificate="YOUR_APP_CERTIFICATE", - - auth_token="your-rest-auth-token", - +- `client.telephony` for call status and hangup operations - +- `client.phone_numbers` for phone-number list, create, retrieve, update, and delete operations - + - +```python - +calls = client.telephony.list( - + appid=client.app_id, - + type="sip", - ) - - -async def main() -> None: - - await client.agents.start( - - client.app_id, - - name="unique_name", - - properties=StartAgentsRequestProperties( - - channel="channel_name", - - token="token", - - agent_rtc_uid="1001", - - remote_rtc_uids=["1002"], - - idle_timeout=120, - - asr=StartAgentsRequestPropertiesAsr( - - language="en-US", - - vendor="deepgram", - - params={"api_key": "YOUR_DEEPGRAM_API_KEY"}, - - ), - - tts=Tts_Elevenlabs( - - params=ElevenLabsTtsParams( - - key="YOUR_ELEVENLABS_API_KEY", - - model_id="eleven_flash_v2_5", - - voice_id="pNInz6obpgDQGcFmaJgB", - - sample_rate=24000, - - ), - - ), - - llm=StartAgentsRequestPropertiesLlm( - - url="https://api.openai.com/v1/chat/completions", - - api_key="", - - system_messages=[ - - {"role": "system", "content": "You are a helpful chatbot."} - - ], - - params={"model": "gpt-4o-mini"}, - - max_history=32, - - greeting_message="Hello, how can I assist you today?", - - failure_message="Please hold on a second.", - - ), - - ), - - ) - - - -asyncio.run(main()) - +for call in calls: - + print(call.id, call.state) - ``` - - -## MLLM flow (multimodal) - +## Direct agent APIs - - -For real-time audio with OpenAI Realtime or Google Gemini Live, use the MLLM flow instead of the cascading ASR → LLM → TTS flow. See the [MLLM Overview](https://docs.agora.io/en/conversational-ai/models/mllm/overview). - +`client.agents` exposes the generated REST surface for advanced integrations. Prefer `agent.create_session(...).start()` for new session starts because it handles auth, token generation, vendor serialization, lifecycle state, and avatar enrichment. - - -```python - -from agora_agent import Agora, Area - -from agora_agent.agents import ( - - StartAgentsRequestProperties, - - StartAgentsRequestPropertiesMllm, - - StartAgentsRequestPropertiesMllmVendor, - - StartAgentsRequestPropertiesTts, - - StartAgentsRequestPropertiesTtsVendor, - - StartAgentsRequestPropertiesLlm, - -) - +If you need an endpoint that is not wrapped by `AgentSession`, use `session.raw` after creating the session: - - -client = Agora( - - area=Area.US, - - app_id="YOUR_APP_ID", - - app_certificate="YOUR_APP_CERTIFICATE", - - auth_token="your-rest-auth-token", - -) - - - -client.agents.start( - - client.app_id, - - name="mllm_agent", - - properties=StartAgentsRequestProperties( - - channel="channel_name", - - token="your_token", - - agent_rtc_uid="1001", - - remote_rtc_uids=["1002"], - - idle_timeout=120, - - mllm=StartAgentsRequestPropertiesMllm( - - enable=True, - - url="wss://api.openai.com/v1/realtime", - - api_key="", - - vendor=StartAgentsRequestPropertiesMllmVendor.OPENAI, - - params={ - - "model": "gpt-4o-realtime-preview", - - "voice": "alloy", - - }, - - input_modalities=["audio"], - - output_modalities=["text", "audio"], - - greeting_message="Hello! I'm ready to chat in real-time.", - - turn_detection={ - - "mode": "server_vad", - - "server_vad_config": { - - "idle_timeout_ms": 5000, - - }, - - }, - - ), - - ), - +```python - +info = session.raw.get( - + appid=session.app_id, - + agent_id=session.id, - ) - ``` - - -For more on the agentkit-based MLLM flow, see [MLLM Flow](./mllm-flow.md). - +You must pass `appid` and `agent_id` manually when using generated raw methods. - theirs_snapshot: - docs/getting-started/authentication.md: | - --- - sidebar_position: 2 - title: Authentication - description: Configure the Python SDK with app credentials and understand other supported auth modes. - --- - - # Authentication - - Create `Agora` or `AsyncAgora` with `app_id` and `app_certificate` only. The SDK mints a fresh ConvoAI REST token for each API call and generates the RTC join token when the session starts. - - ## App credentials - - ```python - from agora_agent import Agent, Agora, Area, DeepgramSTT, OpenAI, MiniMaxTTS - - client = Agora( - area=Area.US, - app_id="your-app-id", - app_certificate="your-app-certificate", - ) - - agent = ( - Agent(instructions="Be concise.") - .with_stt(DeepgramSTT(model="nova-3")) - .with_llm(OpenAI(model="gpt-4o-mini")) - .with_tts(MiniMaxTTS(model="speech_2_6_turbo", voice_id="English_captivating_female1")) - ) - - session = agent.create_session( - client, - channel="room-123", - agent_uid="1", - remote_uids=["100"], - ) - ``` - - ## Why app credentials - - - Fresh short-lived tokens per API call instead of reusing long-lived credentials - - No Customer ID / Customer Secret in request headers - - No manual REST or RTC token provisioning in application code - - ## Inspecting auth mode - - ```python - print(client.auth_mode) # "app-credentials" - ``` - - ## Legacy auth modes - - The generated client still supports pre-minted REST tokens and HTTP Basic Auth for legacy integrations. Do not use those modes for new session integrations. Use app credentials so AgentKit can mint short-lived ConvoAI REST auth and RTC join tokens for each session. - docs/guides/low-level-api.md: | - --- - sidebar_position: 10 - title: Low-Level API - description: Use generated clients for escape-hatch APIs while keeping agent sessions on AgentKit. - --- - - # Low-Level API - - Use the `Agent` builder and `AgentSession` for conversational agent starts. That path generates ConvoAI REST auth and RTC join tokens from `app_id` and `app_certificate`, so application code does not need prebuilt REST tokens, RTC tokens, Customer ID, or Customer Secret. - - Generated clients are still available for API surface that AgentKit does not wrap yet, such as telephony and phone-number management. - - ## Client setup - - ```python - from agora_agent import Agora, Area - - client = Agora( - area=Area.US, - app_id="your-app-id", - app_certificate="your-app-certificate", - ) - ``` - - ## Raw telephony and phone-number APIs - - AgentKit focuses on realtime agent session helpers. Use generated clients for operational APIs: - - - `client.telephony` for call status and hangup operations - - `client.phone_numbers` for phone-number list, create, retrieve, update, and delete operations - - ```python - calls = client.telephony.list( - appid=client.app_id, - type="sip", - ) - - for call in calls: - print(call.id, call.state) - ``` - - ## Direct agent APIs - - `client.agents` exposes the generated REST surface for advanced integrations. Prefer `agent.create_session(...).start()` for new session starts because it handles auth, token generation, vendor serialization, lifecycle state, and avatar enrichment. - - If you need an endpoint that is not wrapped by `AgentSession`, use `session.raw` after creating the session: - - ```python - info = session.raw.get( - appid=session.app_id, - agent_id=session.id, - ) - ``` - - You must pass `appid` and `agent_id` manually when using generated raw methods. - status: unresolved - id: patch-299e4bd9 - content_hash: sha256:e1470176436d28416d0ff67d8acc614060fae7b312f86c09b899a92d1c4adfe4 + content_hash: sha256:ee71350debd51653f1cb1472477a577436d74cbb847b3536a9cdbff0211abf2d original_commit: 299e4bd9cb59bd6144084332a7c3fa7bf260769f original_message: "fix(agentkit): resolve provider config type checks" original_author: digitallysavvy - base_generation: b66d871314ca0e5929cb9c9095949a7fd5e856a7 + base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c files: - src/agora_agent/agentkit/agent.py - src/agora_agent/agentkit/vendors/llm.py - src/agora_agent/agentkit/vendors/mllm.py - src/agora_agent/agentkit/vendors/stt.py - patch_content: |+ + patch_content: | diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py - index 6275f04..ecf01c6 100644 + index 1daba82..95cfe34 100644 --- a/src/agora_agent/agentkit/agent.py +++ b/src/agora_agent/agentkit/agent.py @@ -57,6 +57,8 @@ from ..agents.types.start_agents_request_properties_filler_words_content import @@ -12221,7 +11673,7 @@ patches: from ..types.asr import Asr from ..types.llm import Llm from ..types.llm_style import LlmStyle as GeneratedLlmStyle - @@ -536,6 +538,23 @@ class Agent: + @@ -544,6 +546,23 @@ class Agent: ) return new_agent @@ -12246,12 +11698,10 @@ patches: """Deprecated. Configure the failure message on the LLM or MLLM vendor instead.""" new_agent = self._clone() diff --git a/src/agora_agent/agentkit/vendors/llm.py b/src/agora_agent/agentkit/vendors/llm.py - index 9156a01..5dd822d 100644 + index 5a9f39e..1f1b354 100644 --- a/src/agora_agent/agentkit/vendors/llm.py +++ b/src/agora_agent/agentkit/vendors/llm.py - @@ -1,7 +1,10 @@ - -from typing import Any, Dict, List, Optional - +from typing import Any, Dict, List, Optional, Union + @@ -2,6 +2,9 @@ from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel, ConfigDict, Field, model_validator @@ -12261,43 +11711,6 @@ patches: from .base import BaseLLM LlmGreetingConfigs = Dict[str, Any] - diff --git a/src/agora_agent/agentkit/vendors/mllm.py b/src/agora_agent/agentkit/vendors/mllm.py - index 236a494..6a260d8 100644 - --- a/src/agora_agent/agentkit/vendors/mllm.py - +++ b/src/agora_agent/agentkit/vendors/mllm.py - @@ -1,3 +1,4 @@ - +import warnings - from typing import Any, Dict, List, Optional - - from pydantic import BaseModel, ConfigDict, Field - diff --git a/src/agora_agent/agentkit/vendors/stt.py b/src/agora_agent/agentkit/vendors/stt.py - index e5117b0..bb222a9 100644 - --- a/src/agora_agent/agentkit/vendors/stt.py - +++ b/src/agora_agent/agentkit/vendors/stt.py - @@ -89,6 +89,7 @@ class SpeechmaticsSTTOptions(BaseModel): - - api_key: str = Field(..., description="Speechmatics API key") - language: str = Field(..., description="Language code (e.g., en, es, fr)") - + interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") - model: Optional[str] = Field(default=None, description="Model name") - uri: Optional[str] = Field(default=None, description="Speechmatics streaming WebSocket URL") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - @@ -124,6 +125,7 @@ class DeepgramSTTOptions(BaseModel): - api_key: Optional[str] = Field(default=None, description="Deepgram API key") - model: Optional[str] = Field(default=None, description="Model (e.g., nova-2, enhanced, base)") - language: Optional[str] = Field(default=None, description="Language code (e.g., en-US)") - + interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") - smart_format: Optional[bool] = Field(default=None, description="Enable smart formatting") - punctuation: Optional[bool] = Field(default=None, description="Enable punctuation") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - @@ -353,6 +355,7 @@ class SarvamSTTOptions(BaseModel): - - api_key: str = Field(..., description="Sarvam API key") - language: str = Field(..., description="Language code (e.g., en, hi, ta)") - + interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") - model: Optional[str] = Field(default=None, description="Model name") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - theirs_snapshot: src/agora_agent/agentkit/agent.py: | from __future__ import annotations @@ -12380,6 +11793,7 @@ patches: from ..agent_management.types.agent_think_agent_management_response import ( AgentThinkAgentManagementResponse, ) + from ..core.pydantic_utilities import parse_obj_as from .vendors.base import BaseAvatar, BaseLLM, BaseMLLM, BaseSTT, BaseTTS # Top-level aliases @@ -12492,6 +11906,13 @@ patches: debug: bool warn: typing.Callable[[str], None] + + def _start_properties_from_mapping( + properties: typing.Mapping[str, typing.Any], + ) -> StartAgentsRequestProperties: + return parse_obj_as(StartAgentsRequestProperties, dict(properties)) + + # LLM sub-type aliases LlmGreetingConfigs = typing.Dict[str, typing.Any] LlmGreetingConfigsMode = typing.Any @@ -12602,7 +12023,7 @@ patches: def _validate_turn_detection_language(value: typing.Any) -> TurnDetectionLanguage: if not _is_turn_detection_language(value): - raise ValueError(f"Invalid interaction language: {value}") + raise ValueError(f"Invalid turn_detection.language: {value}") return value # type: ignore[return-value] @@ -13217,7 +12638,7 @@ patches: if self._failure_message is not None: mllm_config.setdefault("failure_message", self._failure_message) base_kwargs["mllm"] = mllm_config - return StartAgentsRequestProperties(**base_kwargs) + return _start_properties_from_mapping(base_kwargs) if skip_vendor_validation: warnings.warn( @@ -13240,12 +12661,13 @@ patches: allow_missing_llm = "llm" in allow_missing_categories allow_missing_tts = "tts" in allow_missing_categories + turn_detection_config = self._resolve_turn_detection_config() if not skip_asr_validation and (self._stt is not None or not allow_missing_asr): - base_kwargs["asr"] = self._resolve_asr_config() - base_kwargs["turn_detection"] = self._resolve_turn_detection_config() + base_kwargs["asr"] = self._resolve_asr_config(turn_detection_config) + base_kwargs["turn_detection"] = turn_detection_config if skip_vendor_validation: - return StartAgentsRequestProperties(**base_kwargs) + return _start_properties_from_mapping(base_kwargs) if self._tts is None and not (skip_tts_validation or allow_missing_tts): raise ValueError("TTS configuration is required. Use with_tts() to set it.") @@ -13258,39 +12680,34 @@ patches: if self._tts is not None and not skip_tts_validation: base_kwargs["tts"] = self._tts - return StartAgentsRequestProperties(**base_kwargs) + return _start_properties_from_mapping(base_kwargs) def _resolve_llm_config(self) -> typing.Dict[str, typing.Any]: llm_config = dict(self._llm or {}) - # Agent-level fields take priority over the vendor's defaults. - # This matches the TS SDK where agent-level values override vendor config. - if self._instructions is not None: + if self._instructions is not None and "system_messages" not in llm_config: llm_config["system_messages"] = [{"role": "system", "content": self._instructions}] - if self._greeting is not None: + if self._greeting is not None and "greeting_message" not in llm_config: llm_config["greeting_message"] = self._greeting - if self._greeting_configs is not None: + if self._greeting_configs is not None and "greeting_configs" not in llm_config: llm_config["greeting_configs"] = _dump_optional_model(self._greeting_configs) - if self._failure_message is not None: + if self._failure_message is not None and "failure_message" not in llm_config: llm_config["failure_message"] = self._failure_message - if self._max_history is not None: + if self._max_history is not None and "max_history" not in llm_config: llm_config["max_history"] = self._max_history return llm_config - def _resolve_asr_config(self) -> typing.Dict[str, typing.Any]: + def _resolve_asr_config(self, turn_detection_config: TurnDetectionConfig) -> typing.Dict[str, typing.Any]: asr_config = dict(self._stt or {}) - asr_config.pop("language", None) if not asr_config: asr_config["vendor"] = "ares" + asr_config["language"] = self._field_value(turn_detection_config, "language") return asr_config def _resolve_turn_detection_config(self) -> TurnDetectionConfig: - existing_stt_language = self._stt.get("language") if self._stt is not None else None existing_turn_detection_language = self._field_value(self._turn_detection, "language") language = ( existing_turn_detection_language if existing_turn_detection_language is not None - else existing_stt_language - if _is_turn_detection_language(existing_stt_language) else DEFAULT_TURN_DETECTION_LANGUAGE ) language = _validate_turn_detection_language(language) @@ -13708,12 +13125,13 @@ patches: options = _dump_optional_model(self.options) options.pop("project_id", None) options.pop("location", None) - config = Gemini(**options).to_config() - params = dict(config["params"]) - params["project_id"] = self.options.project_id - params["location"] = self.options.location - config["params"] = params - return config + if not options.get("url"): + options["url"] = ( + f"https://{self.options.location}-aiplatform.googleapis.com/v1/projects/" + f"{self.options.project_id}/locations/{self.options.location}/" + f"publishers/google/models/{self.options.model}:streamGenerateContent?alt=sse" + ) + return Gemini(**options).to_config() class AmazonBedrockOptions(BaseModel): @@ -14124,98 +13542,20 @@ patches: return config src/agora_agent/agentkit/vendors/stt.py: | - from typing import Any, Dict, Optional, Tuple + from typing import Any, Dict, Optional from pydantic import BaseModel, ConfigDict, Field, model_validator - from typing_extensions import Literal from .base import BaseSTT - TurnDetectionLanguage = Literal[ - "ar-EG", - "ar-JO", - "ar-SA", - "ar-AE", - "bn-IN", - "zh-CN", - "zh-HK", - "zh-TW", - "nl-NL", - "en-IN", - "en-US", - "fil-PH", - "fr-FR", - "de-DE", - "gu-IN", - "he-IL", - "hi-IN", - "id-ID", - "it-IT", - "ja-JP", - "kn-IN", - "ko-KR", - "ms-MY", - "fa-IR", - "pt-PT", - "ru-RU", - "es-ES", - "ta-IN", - "te-IN", - "th-TH", - "tr-TR", - "vi-VN", - ] - - TURN_DETECTION_LANGUAGE_VALUES: Tuple[TurnDetectionLanguage, ...] = ( - "ar-EG", - "ar-JO", - "ar-SA", - "ar-AE", - "bn-IN", - "zh-CN", - "zh-HK", - "zh-TW", - "nl-NL", - "en-IN", - "en-US", - "fil-PH", - "fr-FR", - "de-DE", - "gu-IN", - "he-IL", - "hi-IN", - "id-ID", - "it-IT", - "ja-JP", - "kn-IN", - "ko-KR", - "ms-MY", - "fa-IR", - "pt-PT", - "ru-RU", - "es-ES", - "ta-IN", - "te-IN", - "th-TH", - "tr-TR", - "vi-VN", - ) - _TURN_DETECTION_LANGUAGES = set(TURN_DETECTION_LANGUAGE_VALUES) _DEEPGRAM_MANAGED_MODELS = {"nova-2", "nova-3"} - def _turn_detection_language(language: Optional[str]) -> Optional[TurnDetectionLanguage]: - if language in _TURN_DETECTION_LANGUAGES: - return language # type: ignore[return-value] - return None - - class SpeechmaticsSTTOptions(BaseModel): model_config = ConfigDict(extra="forbid") api_key: str = Field(..., description="Speechmatics API key") language: str = Field(..., description="Language code (e.g., en, es, fr)") - interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") model: Optional[str] = Field(default=None, description="Model name") uri: Optional[str] = Field(default=None, description="Speechmatics streaming WebSocket URL") additional_params: Optional[Dict[str, Any]] = Field(default=None) @@ -14239,9 +13579,6 @@ patches: "vendor": "speechmatics", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14251,7 +13588,7 @@ patches: api_key: Optional[str] = Field(default=None, description="Deepgram API key") model: Optional[str] = Field(default=None, description="Model (e.g., nova-2, enhanced, base)") language: Optional[str] = Field(default=None, description="Language code (e.g., en-US)") - interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") + keyterm: Optional[str] = Field(default=None, description="Boost specialized terms and brands for Deepgram") smart_format: Optional[bool] = Field(default=None, description="Enable smart formatting") punctuation: Optional[bool] = Field(default=None, description="Enable punctuation") additional_params: Optional[Dict[str, Any]] = Field(default=None) @@ -14279,13 +13616,12 @@ patches: params["smart_format"] = self.options.smart_format if self.options.punctuation is not None: params["punctuation"] = self.options.punctuation + if self.options.keyterm is not None: + params["keyterm"] = self.options.keyterm config: Dict[str, Any] = { "vendor": "deepgram", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14314,9 +13650,6 @@ patches: "vendor": "microsoft", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14338,22 +13671,26 @@ patches: params: Dict[str, Any] = dict(self.options.additional_params or {}) params["api_key"] = self.options.api_key - transcription = {"model": "whisper-1", **(self.options.input_audio_transcription or {})} + transcription: Dict[str, Any] = {"model": "gpt-4o-mini-transcribe"} + transcription.update(self.options.input_audio_transcription or {}) if self.options.model is not None: transcription["model"] = self.options.model if self.options.prompt is not None: transcription["prompt"] = self.options.prompt if self.options.language is not None: transcription["language"] = self.options.language + if not transcription.get("model"): + raise ValueError("OpenAISTT: input_audio_transcription.model is required") + if not transcription.get("prompt"): + raise ValueError("OpenAISTT: input_audio_transcription.prompt is required") + if not transcription.get("language"): + raise ValueError("OpenAISTT: input_audio_transcription.language is required") params["input_audio_transcription"] = transcription config: Dict[str, Any] = { "vendor": "openai", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14388,9 +13725,6 @@ patches: "vendor": "google", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14421,9 +13755,6 @@ patches: "vendor": "amazon", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config @@ -14451,16 +13782,12 @@ patches: "vendor": "assemblyai", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config class AresSTTOptions(BaseModel): model_config = ConfigDict(extra="forbid") - language: Optional[TurnDetectionLanguage] = Field(default=None, description="Language code") additional_params: Optional[Dict[str, Any]] = Field(default=None) class AresSTT(BaseSTT): @@ -14469,8 +13796,6 @@ patches: def to_config(self) -> Dict[str, Any]: config: Dict[str, Any] = {"vendor": "ares"} - if self.options.language is not None: - config["language"] = self.options.language if self.options.additional_params: config["params"] = self.options.additional_params return config @@ -14481,7 +13806,6 @@ patches: api_key: str = Field(..., description="Sarvam API key") language: str = Field(..., description="Language code (e.g., en, hi, ta)") - interaction_language: Optional[InteractionLanguage] = Field(default=None, description="Agora interaction language for asr.language") model: Optional[str] = Field(default=None, description="Model name") additional_params: Optional[Dict[str, Any]] = Field(default=None) @@ -14502,9 +13826,6 @@ patches: "vendor": "sarvam", "params": params, } - turn_detection_language = _turn_detection_language(self.options.language) - if turn_detection_language is not None: - config["language"] = turn_detection_language return config - id: patch-617ee134 content_hash: sha256:ea2d27ba8019bf09ce5766d322eb7218fcee0a90124e823ba16c4e45dc1af5a9 @@ -17920,24 +17241,24 @@ patches: Think value constants: `ThinkOnListeningActionInject`, `ThinkOnListeningActionInterrupt`, `ThinkOnListeningActionIgnore`, `ThinkOnThinkingActionInterrupt`, `ThinkOnThinkingActionIgnore`, `ThinkOnSpeakingActionInterrupt`, `ThinkOnSpeakingActionIgnore`. status: unresolved - id: patch-bed29b6b - content_hash: sha256:8008d9c33a194a48ef317868953c26d5b03ede60c23743b4249260894c0f6417 + content_hash: sha256:35a32ee64c95efd478f684c167efc54c9d95344af837e99b31da4c36f66febce original_commit: bed29b6b7d4d08480a8510b26b5e21d1ef234cc9 original_message: "chore: bump Python packages to 2.1.0" original_author: digitallysavvy - base_generation: b66d871314ca0e5929cb9c9095949a7fd5e856a7 + base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c files: - compat/agora-agent-server-sdk/pyproject.toml patch_content: | diff --git a/compat/agora-agent-server-sdk/pyproject.toml b/compat/agora-agent-server-sdk/pyproject.toml - index ac93128..468294b 100644 + index eea45d7..078ac75 100644 --- a/compat/agora-agent-server-sdk/pyproject.toml +++ b/compat/agora-agent-server-sdk/pyproject.toml @@ -3,7 +3,7 @@ name = "agora-agent-server-sdk" [tool.poetry] name = "agora-agent-server-sdk" - -version = "v2.0.0" - +version = "v2.1.0" + -version = "v2.1.1" + +version = "v2.2.0" description = "Compatibility shim for the renamed agora-agents package." readme = "README.md" authors = [] @@ -17945,8 +17266,8 @@ patches: [tool.poetry.dependencies] python = "^3.8" - -agora-agents = ">=2.0.0,<3.0.0" - +agora-agents = ">=2.1.0,<3.0.0" + -agora-agents = ">=2.1.1,<3.0.0" + +agora-agents = ">=2.2.0,<3.0.0" [build-system] requires = ["poetry-core"] @@ -17957,7 +17278,7 @@ patches: [tool.poetry] name = "agora-agent-server-sdk" - version = "v2.1.0" + version = "v2.2.0" description = "Compatibility shim for the renamed agora-agents package." readme = "README.md" authors = [] @@ -17989,9 +17310,81 @@ patches: [tool.poetry.dependencies] python = "^3.8" - agora-agents = ">=2.1.0,<3.0.0" + agora-agents = ">=2.2.0,<3.0.0" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" user_owned: true + - id: patch-fecdc77c + content_hash: sha256:4c3321ec0facd689cee56c0fc609559d1038380d04a4cd8478b7ad7bb4a85388 + original_commit: fecdc77c866f433d8287fcb8a55328612e016b21 + original_message: Fix AgentKit request validation and provider wire-key coverage + original_author: digitallysavvy + base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c + files: + - PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md + patch_content: | + diff --git a/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md b/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md + new file mode 100644 + index 0000000..f3cd64a + --- /dev/null + +++ b/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md + @@ -0,0 +1,27 @@ + +# Python AgentKit Snake Case API Audit + + + +Scope: `agora-agents-python` public AgentKit wrappers, docs, and tests. + + + +Search terms: + + + +```bash + +rg -n "apiKey|baseUrl|modelId|voiceId|groupId|keyTerm|turnDetection|inputAudioTranscription|greetingMessage|failureMessage|projectId|adcCredentialsString|sampleRate|targetLanguageCode|resourceName|deploymentName" agora-agents-python + +``` + + + +## Result + + + +No shipped camelCase public Python constructor kwargs were found in source or docs examples. No deprecated alias helper is required for this pass. + + + +| File | Class / symbol | Public arg or example | Current spelling | Desired Python spelling | `to_config()` key | Wire key | Action | Compatibility needed | Test coverage | + +|---|---|---|---|---|---|---|---|---|---| + +| `src/agora_agent/agentkit/vendors/tts.py` | `GoogleTTS` | constructor arg | `voice_name` | `voice_name` | `params.VoiceSelectionParams` | `params.VoiceSelectionParams` | keep | no | `tests/custom/test_tts_vendors.py` | + +| `src/agora_agent/agentkit/vendors/tts.py` | `RimeTTS` | constructor arg | `model_id` | `model_id` | `params.modelId` | `params.modelId` | keep | no | `tests/custom/test_tts_vendors.py` | + +| `src/agora_agent/agentkit/vendors/tts.py` | `MurfTTS` | constructor arg | `voice_id` | `voice_id` | `params.voiceId` | `params.voiceId` | keep | no | `tests/custom/test_tts_vendors.py`, `tests/custom/test_request_body.py` | + +| `src/agora_agent/types/rime_tts_params.py` | generated model | generated alias | `modelId` | n/a | `model_id` | `modelId` | keep | no | `tests/custom/test_tts_vendors.py` | + +| `src/agora_agent/types/murf_tts_params.py` | generated model | generated alias | `voiceId` | n/a | `voice_id` | `voiceId` | keep | no | `tests/custom/test_tts_vendors.py` | + +| `tests/custom/test_request_body.py` | wire assertion | payload key | `voiceId` | n/a | `params.voiceId` | `params.voiceId` | keep | no | request-body test | + +| `tests/custom/test_tts_vendors.py` | wire assertion | payload key | `modelId`, `voiceId`, `VoiceSelectionParams` | n/a | generated model fields | wire aliases | keep | no | wire serialization test | + + + +## Guardrail Added + + + +`tests/custom/test_docs_snake_case.py` scans Python markdown code fences and fails on common camelCase kwargs such as `apiKey`, `baseUrl`, `modelId`, `voiceId`, `projectId`, and `greetingMessage`. JSON, TypeScript, Go, shell, and YAML examples are skipped so wire payload examples can retain required non-Python keys. + theirs_snapshot: + PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md: | + # Python AgentKit Snake Case API Audit + + Scope: `agora-agents-python` public AgentKit wrappers, docs, and tests. + + Search terms: + + ```bash + rg -n "apiKey|baseUrl|modelId|voiceId|groupId|keyTerm|turnDetection|inputAudioTranscription|greetingMessage|failureMessage|projectId|adcCredentialsString|sampleRate|targetLanguageCode|resourceName|deploymentName" agora-agents-python + ``` + + ## Result + + No shipped camelCase public Python constructor kwargs were found in source or docs examples. No deprecated alias helper is required for this pass. + + | File | Class / symbol | Public arg or example | Current spelling | Desired Python spelling | `to_config()` key | Wire key | Action | Compatibility needed | Test coverage | + |---|---|---|---|---|---|---|---|---|---| + | `src/agora_agent/agentkit/vendors/tts.py` | `GoogleTTS` | constructor arg | `voice_name` | `voice_name` | `params.VoiceSelectionParams` | `params.VoiceSelectionParams` | keep | no | `tests/custom/test_tts_vendors.py` | + | `src/agora_agent/agentkit/vendors/tts.py` | `RimeTTS` | constructor arg | `model_id` | `model_id` | `params.modelId` | `params.modelId` | keep | no | `tests/custom/test_tts_vendors.py` | + | `src/agora_agent/agentkit/vendors/tts.py` | `MurfTTS` | constructor arg | `voice_id` | `voice_id` | `params.voiceId` | `params.voiceId` | keep | no | `tests/custom/test_tts_vendors.py`, `tests/custom/test_request_body.py` | + | `src/agora_agent/types/rime_tts_params.py` | generated model | generated alias | `modelId` | n/a | `model_id` | `modelId` | keep | no | `tests/custom/test_tts_vendors.py` | + | `src/agora_agent/types/murf_tts_params.py` | generated model | generated alias | `voiceId` | n/a | `voice_id` | `voiceId` | keep | no | `tests/custom/test_tts_vendors.py` | + | `tests/custom/test_request_body.py` | wire assertion | payload key | `voiceId` | n/a | `params.voiceId` | `params.voiceId` | keep | no | request-body test | + | `tests/custom/test_tts_vendors.py` | wire assertion | payload key | `modelId`, `voiceId`, `VoiceSelectionParams` | n/a | generated model fields | wire aliases | keep | no | wire serialization test | + + ## Guardrail Added + + `tests/custom/test_docs_snake_case.py` scans Python markdown code fences and fails on common camelCase kwargs such as `apiKey`, `baseUrl`, `modelId`, `voiceId`, `projectId`, and `greetingMessage`. JSON, TypeScript, Go, shell, and YAML examples are skipped so wire payload examples can retain required non-Python keys. + user_owned: true diff --git a/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md b/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md new file mode 100644 index 0000000..f3cd64a --- /dev/null +++ b/PYTHON-AGENTKIT-SNAKE-CASE-AUDIT.md @@ -0,0 +1,27 @@ +# Python AgentKit Snake Case API Audit + +Scope: `agora-agents-python` public AgentKit wrappers, docs, and tests. + +Search terms: + +```bash +rg -n "apiKey|baseUrl|modelId|voiceId|groupId|keyTerm|turnDetection|inputAudioTranscription|greetingMessage|failureMessage|projectId|adcCredentialsString|sampleRate|targetLanguageCode|resourceName|deploymentName" agora-agents-python +``` + +## Result + +No shipped camelCase public Python constructor kwargs were found in source or docs examples. No deprecated alias helper is required for this pass. + +| File | Class / symbol | Public arg or example | Current spelling | Desired Python spelling | `to_config()` key | Wire key | Action | Compatibility needed | Test coverage | +|---|---|---|---|---|---|---|---|---|---| +| `src/agora_agent/agentkit/vendors/tts.py` | `GoogleTTS` | constructor arg | `voice_name` | `voice_name` | `params.VoiceSelectionParams` | `params.VoiceSelectionParams` | keep | no | `tests/custom/test_tts_vendors.py` | +| `src/agora_agent/agentkit/vendors/tts.py` | `RimeTTS` | constructor arg | `model_id` | `model_id` | `params.modelId` | `params.modelId` | keep | no | `tests/custom/test_tts_vendors.py` | +| `src/agora_agent/agentkit/vendors/tts.py` | `MurfTTS` | constructor arg | `voice_id` | `voice_id` | `params.voiceId` | `params.voiceId` | keep | no | `tests/custom/test_tts_vendors.py`, `tests/custom/test_request_body.py` | +| `src/agora_agent/types/rime_tts_params.py` | generated model | generated alias | `modelId` | n/a | `model_id` | `modelId` | keep | no | `tests/custom/test_tts_vendors.py` | +| `src/agora_agent/types/murf_tts_params.py` | generated model | generated alias | `voiceId` | n/a | `voice_id` | `voiceId` | keep | no | `tests/custom/test_tts_vendors.py` | +| `tests/custom/test_request_body.py` | wire assertion | payload key | `voiceId` | n/a | `params.voiceId` | `params.voiceId` | keep | no | request-body test | +| `tests/custom/test_tts_vendors.py` | wire assertion | payload key | `modelId`, `voiceId`, `VoiceSelectionParams` | n/a | generated model fields | wire aliases | keep | no | wire serialization test | + +## Guardrail Added + +`tests/custom/test_docs_snake_case.py` scans Python markdown code fences and fails on common camelCase kwargs such as `apiKey`, `baseUrl`, `modelId`, `voiceId`, `projectId`, and `greetingMessage`. JSON, TypeScript, Go, shell, and YAML examples are skipped so wire payload examples can retain required non-Python keys. diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py index 1daba82..95cfe34 100644 --- a/src/agora_agent/agentkit/agent.py +++ b/src/agora_agent/agentkit/agent.py @@ -57,6 +57,8 @@ from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule from ..types.tts import Tts +from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule +from ..types.tts import Tts from ..types.asr import Asr from ..types.llm import Llm from ..types.llm_style import LlmStyle as GeneratedLlmStyle @@ -544,6 +546,23 @@ def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent ) return new_agent + def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": + """Returns a new Agent with the specified RTC audio scenario.""" + new_agent = self._clone() + if new_agent._parameters is None: + new_agent._parameters = StartAgentsRequestPropertiesParameters(audio_scenario=audio_scenario) + elif isinstance(new_agent._parameters, dict): + new_agent._parameters = typing.cast( + SessionParamsInput, + {**new_agent._parameters, "audio_scenario": audio_scenario}, + ) + else: + new_agent._parameters = self._copy_model_update( + new_agent._parameters, + {"audio_scenario": audio_scenario}, + ) + return new_agent + def with_failure_message(self, message: str) -> "Agent": """Deprecated. Configure the failure message on the LLM or MLLM vendor instead.""" new_agent = self._clone() diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py index 2900c18..745c465 100644 --- a/src/agora_agent/agentkit/agent_session.py +++ b/src/agora_agent/agentkit/agent_session.py @@ -15,6 +15,7 @@ AgentThinkAgentManagementResponse as AgentThinkResponse, ) from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse +from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping from .avatar_types import ( is_akool_avatar, diff --git a/src/agora_agent/agentkit/vendors/avatar.py b/src/agora_agent/agentkit/vendors/avatar.py index 1bd9633..e816367 100644 --- a/src/agora_agent/agentkit/vendors/avatar.py +++ b/src/agora_agent/agentkit/vendors/avatar.py @@ -177,6 +177,49 @@ def to_config(self) -> Dict[str, Any]: return {"enable": enable, "vendor": "generic", "params": params} +class GenericAvatarOptions(BaseModel): + model_config = ConfigDict(extra="forbid") + + api_key: str = Field(..., description="Generic avatar provider API key") + api_base_url: str = Field(..., description="Avatar provider API base URL") + avatar_id: str = Field(..., description="Avatar ID") + agora_uid: str = Field(..., description="Agora UID for the avatar video stream") + agora_appid: Optional[str] = Field(default=None, description="Agora App ID; filled by AgentSession when omitted") + agora_token: Optional[str] = Field(default=None, description="RTC token; generated by AgentSession when omitted") + agora_channel: Optional[str] = Field(default=None, description="Agora channel; filled by AgentSession when omitted") + enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") + additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") + + +class GenericAvatar(BaseAvatar): + def __init__(self, **kwargs: Any): + self.options = GenericAvatarOptions(**kwargs) + + @property + def required_sample_rate(self) -> int: + return 0 + + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = { + "api_key": self.options.api_key, + "api_base_url": self.options.api_base_url, + "avatar_id": self.options.avatar_id, + "agora_uid": self.options.agora_uid, + } + + if self.options.agora_appid is not None: + params["agora_appid"] = self.options.agora_appid + if self.options.agora_token is not None: + params["agora_token"] = self.options.agora_token + if self.options.agora_channel is not None: + params["agora_channel"] = self.options.agora_channel + if self.options.additional_params is not None: + params = {**self.options.additional_params, **params} + + enable = self.options.enable if self.options.enable is not None else True + return {"enable": enable, "vendor": "generic", "params": params} + + class AnamAvatarOptions(BaseModel): model_config = ConfigDict(extra="forbid") diff --git a/src/agora_agent/agentkit/vendors/llm.py b/src/agora_agent/agentkit/vendors/llm.py index 5a9f39e..1f1b354 100644 --- a/src/agora_agent/agentkit/vendors/llm.py +++ b/src/agora_agent/agentkit/vendors/llm.py @@ -2,6 +2,9 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator +from ...agents.types.start_agents_request_properties_llm_greeting_configs import ( + StartAgentsRequestPropertiesLlmGreetingConfigs, +) from .base import BaseLLM LlmGreetingConfigs = Dict[str, Any] From b83aa5a8d6922c2b4cbc653b0329d704c1a6cf8c Mon Sep 17 00:00:00 2001 From: plutoless Date: Wed, 17 Jun 2026 00:47:15 -0700 Subject: [PATCH 3/3] [fern-replay] Resolved conflicts Patches replayed: - patch-7465fada: fix(agentkit): resolve Python session typing issues - patch-fae1249a: Re-export agora-agents API from legacy PyPI compatibility package The compat distribution delegates to agora_agent via __getattr__ and documents both import paths in its README. - patch-299e4bd9: fix(agentkit): resolve provider config type checks - patch-bed29b6b: chore: bump Python packages to 2.1.0 - patch-fecdc77c: Fix AgentKit request validation and provider wire-key coverage --- .fern/replay.lock | 18492 +++---------------- src/agora_agent/agentkit/agent.py | 19 - src/agora_agent/agentkit/agent_session.py | 1 - src/agora_agent/agentkit/vendors/avatar.py | 43 - src/agora_agent/agentkit/vendors/llm.py | 3 - 5 files changed, 2353 insertions(+), 16205 deletions(-) diff --git a/.fern/replay.lock b/.fern/replay.lock index b9f48c4..fc1da4b 100644 --- a/.fern/replay.lock +++ b/.fern/replay.lock @@ -20,276 +20,52 @@ generations: fernapi/fern-python-sdk: 4.37.0 current_generation: 1d61baad436285e3b6a37555edb5ca67c158681c patches: - - id: patch-6e30398b - content_hash: sha256:e99898e508e2d6cb9f134cc33e0b73c1c8acb845f5887924e0e38031a6e089c0 - original_commit: 6e30398b5dc6e8ff2681a442a4d6a49c7d866032 - original_message: "chore(agentkit): bump to v1.5.0 and expose v2.7 type aliases" + - id: patch-7465fada + content_hash: sha256:a2f90f66c927424018f2c3304742f097e8594dec9cb2f783264c7b11679a14ac + original_commit: 7465fadafa0f1e62051d99b42d0eeda85f31eeee + original_message: "fix(agentkit): resolve Python session typing issues" original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf + base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c files: - - src/agora_agent/agentkit/__init__.py - - src/agora_agent/agentkit/vendors/__init__.py + - src/agora_agent/agentkit/agent_session.py patch_content: | - diff --git a/src/agora_agent/agentkit/__init__.py b/src/agora_agent/agentkit/__init__.py - index 1942bce..5ceda66 100644 - --- a/src/agora_agent/agentkit/__init__.py - +++ b/src/agora_agent/agentkit/__init__.py - @@ -1,13 +1,30 @@ - from .agent import ( - Agent, - + AgentConfig, - + AgentConfigUpdate, - + ConversationHistory, - + ConversationRole, - + ConversationSessionTurn, - + ConversationTurn, - + ConversationTurns, - StartAgentsRequestProperties, - + AvatarConfig, - + AvatarVendor, - GeofenceConfig, - + LlmConfig, - + LlmStyle, - + MllmConfig, - + MllmVendor, - RtcConfig, - + SttConfig, - + SttVendor, - + TtsConfig, - FillerWordsConfig, - FillerWordsTrigger, - FillerWordsTriggerFixedTimeConfig, - FillerWordsContent, - FillerWordsContentStaticConfig, - + FillerWordsContentSelectionRule, - TurnDetectionConfig, - TurnDetectionNestedConfig, - StartOfSpeechConfig, - @@ -37,9 +54,14 @@ from .agent import ( - InterruptionMode, - MllmTurnDetectionConfig, - MllmTurnDetectionMode, - + Labels, - LlmGreetingConfigs, - LlmGreetingConfigsMode, - McpServersItem, - + SessionInfo, - + SessionListResponse, - + SessionSummary, - + SpeakPriority, + diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py + index 2900c18..745c465 100644 + --- a/src/agora_agent/agentkit/agent_session.py + +++ b/src/agora_agent/agentkit/agent_session.py + @@ -15,6 +15,7 @@ from ..agent_management.types.agent_think_agent_management_response import ( + AgentThinkAgentManagementResponse as AgentThinkResponse, ) - from .agent_session import AgentSession, AgentSessionOptions, AsyncAgentSession - from ..agent_management.types.agent_think_agent_management_response import ( - @@ -57,8 +79,10 @@ from ..agent_management.types.agent_think_agent_management_request_on_speaking_a + from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse + +from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse + from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping from .avatar_types import ( is_akool_avatar, - is_anam_avatar, - + is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - + is_rtc_avatar, - validate_avatar_config, - validate_tts_sample_rate, - ) - @@ -112,6 +136,7 @@ from .vendors import ( - FishAudioTTS, - Gemini, - GeminiLive, - + GenericAvatar, - GoogleSTT, - GoogleTTS, - HeyGenAvatar, - @@ -132,14 +157,27 @@ from .vendors import ( - SarvamTTS, - SpeechmaticsSTT, - VertexAI, - + XaiGrok, - + XaiRealtime, - LiveAvatarAvatar, - ) - - __all__ = [ - "Agent", - + "AgentConfig", - + "AgentConfigUpdate", - # Return type of Agent.to_properties() - "StartAgentsRequestProperties", - # Top-level config types - + "LlmConfig", - + "LlmStyle", - + "SttConfig", - + "SttVendor", - + "TtsConfig", - + "MllmConfig", - + "MllmVendor", - + "AvatarConfig", - + "AvatarVendor", - "GeofenceConfig", - "RtcConfig", - "FillerWordsConfig", - @@ -147,6 +185,7 @@ __all__ = [ - "FillerWordsTriggerFixedTimeConfig", - "FillerWordsContent", - "FillerWordsContentStaticConfig", - + "FillerWordsContentSelectionRule", - # Turn detection types - "TurnDetectionConfig", - "TurnDetectionNestedConfig", - @@ -181,6 +220,7 @@ __all__ = [ - "InterruptionMode", - "MllmTurnDetectionConfig", - "MllmTurnDetectionMode", - + "Labels", - # Type-safe constants - "DataChannel", - "AudioScenario", - @@ -197,6 +237,15 @@ __all__ = [ - "AgentSession", - "AsyncAgentSession", - "AgentSessionOptions", - + "SessionInfo", - + "SessionListResponse", - + "SessionSummary", - + "ConversationHistory", - + "ConversationTurn", - + "ConversationRole", - + "ConversationTurns", - + "ConversationSessionTurn", - + "SpeakPriority", - "AgentThinkResponse", - "AgentThinkRequestOnListeningAction", - "AgentThinkRequestOnThinkingAction", - @@ -253,14 +302,19 @@ __all__ = [ - "OpenAIRealtime", - "GeminiLive", - "VertexAI", - + "XaiGrok", - + "XaiRealtime", - "HeyGenAvatar", - "LiveAvatarAvatar", - "AkoolAvatar", - "AnamAvatar", - + "GenericAvatar", - "is_heygen_avatar", - "is_live_avatar_avatar", - "is_akool_avatar", - "is_anam_avatar", - + "is_generic_avatar", - + "is_rtc_avatar", - "validate_avatar_config", - "validate_tts_sample_rate", - ] - diff --git a/src/agora_agent/agentkit/vendors/__init__.py b/src/agora_agent/agentkit/vendors/__init__.py - index 0320843..689eab1 100644 - --- a/src/agora_agent/agentkit/vendors/__init__.py - +++ b/src/agora_agent/agentkit/vendors/__init__.py - @@ -11,9 +11,9 @@ from .base import ( - OpenAISampleRate, - SampleRate, - ) - -from .avatar import AkoolAvatar, AnamAvatar, HeyGenAvatar, LiveAvatarAvatar - +from .avatar import AkoolAvatar, AnamAvatar, GenericAvatar, HeyGenAvatar, LiveAvatarAvatar - from .llm import Anthropic, AzureOpenAI, Gemini, OpenAI - -from .mllm import GeminiLive, OpenAIRealtime, VertexAI - +from .mllm import GeminiLive, OpenAIRealtime, VertexAI, XaiGrok, XaiRealtime - from .stt import ( - AmazonSTT, - AresSTT, - @@ -82,8 +82,11 @@ __all__ = [ - "OpenAIRealtime", - "GeminiLive", - "VertexAI", - + "XaiGrok", - + "XaiRealtime", - "HeyGenAvatar", - "LiveAvatarAvatar", - "AkoolAvatar", - "AnamAvatar", - + "GenericAvatar", - ] theirs_snapshot: - src/agora_agent/agentkit/__init__.py: | - from .agent import ( - Agent, - AgentConfig, - AgentConfigUpdate, - ConversationHistory, - ConversationRole, - ConversationSessionTurn, - ConversationTurn, - ConversationTurns, - StartAgentsRequestProperties, - AvatarConfig, - AvatarVendor, - GeofenceConfig, - LlmConfig, - LlmStyle, - MllmConfig, - MllmVendor, - RtcConfig, - SttConfig, - SttVendor, - TtsConfig, - FillerWordsConfig, - FillerWordsTrigger, - FillerWordsTriggerFixedTimeConfig, - FillerWordsContent, - FillerWordsContentStaticConfig, - FillerWordsContentSelectionRule, - TurnDetectionConfig, - TurnDetectionNestedConfig, - StartOfSpeechConfig, - StartOfSpeechMode, - StartOfSpeechVadConfig, - StartOfSpeechKeywordsConfig, - StartOfSpeechDisabledConfig, - StartOfSpeechDisabledConfigStrategy, - EndOfSpeechConfig, - EndOfSpeechMode, - EndOfSpeechVadConfig, - EndOfSpeechSemanticConfig, - TurnDetectionType, - InterruptMode, - Eagerness, - SalConfig, - SalMode, - AdvancedFeatures, - SessionParams, - SessionParamsInput, - SilenceConfig, - SilenceAction, - FarewellConfig, - ParametersDataChannel, - ParametersAudioScenario, - InterruptionConfig, - InterruptionMode, - MllmTurnDetectionConfig, - MllmTurnDetectionMode, - Labels, - LlmGreetingConfigs, - LlmGreetingConfigsMode, - McpServersItem, - SessionInfo, - SessionListResponse, - SessionSummary, - SpeakPriority, - ) - from .agent_session import AgentSession, AgentSessionOptions, AsyncAgentSession - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse as AgentThinkResponse, - ) + src/agora_agent/agentkit/agent_session.py: | + import typing + import warnings + + from ..core.api_error import ApiError from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( AgentThinkAgentManagementRequestOnListeningAction as AgentThinkRequestOnListeningAction, ) + from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( + AgentThinkAgentManagementRequestOnSpeakingAction as AgentThinkRequestOnSpeakingAction, + ) from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( AgentThinkAgentManagementRequestOnThinkingAction as AgentThinkRequestOnThinkingAction, ) - from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - AgentThinkAgentManagementRequestOnSpeakingAction as AgentThinkRequestOnSpeakingAction, + from ..agent_management.types.agent_think_agent_management_response import ( + AgentThinkAgentManagementResponse as AgentThinkResponse, ) + from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse + from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse + from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping from .avatar_types import ( is_akool_avatar, is_anam_avatar, + is_avatar_token_managed, is_generic_avatar, is_heygen_avatar, is_live_avatar_avatar, @@ -297,1884 +73,1235 @@ patches: validate_avatar_config, validate_tts_sample_rate, ) - from .constants import ( - DataChannel, - AudioScenario, - SilenceActionValues, - SalModeValues, - GeofenceArea, - GeofenceExcludeArea, - FillerWordsSelectionRule, - TurnDetectionTypeValues, - ) - from .token import ( - GenerateConvoAITokenOptions, - GenerateTokenOptions, - MAX_EXPIRY_SECONDS, - generate_convo_ai_token, - generate_rtc_token, - expires_in_hours, - expires_in_minutes, - ) from .presets import ( - AgentPresets, - DeepgramPresetModels, - MiniMaxPresetModels, - OpenAIPresetModels, - OpenAITtsPresetModels, + get_preset_category, + infer_asr_preset, + infer_llm_preset, + infer_tts_preset, normalize_preset_input, + resolve_session_presets, ) - from .vendors import ( - AkoolAvatar, - AmazonSTT, - AmazonTTS, - AnamAvatar, - Anthropic, - AresSTT, - AssemblyAISTT, - AzureOpenAI, - BaseAvatar, - BaseLLM, - BaseMLLM, - BaseSTT, - BaseTTS, - CartesiaSampleRate, - CartesiaTTS, - DeepgramSTT, - DeepgramTTS, - ElevenLabsSampleRate, - ElevenLabsTTS, - FishAudioTTS, - Gemini, - GeminiLive, - GenericAvatar, - GoogleSTT, - GoogleTTS, - HeyGenAvatar, - HumeAITTS, - MicrosoftSampleRate, - MicrosoftSTT, - MicrosoftTTS, - MiniMaxTTS, - MurfTTS, - OpenAI, - OpenAIRealtime, - OpenAISampleRate, - OpenAISTT, - OpenAITTS, - RimeTTS, - SampleRate, - SarvamSTT, - SarvamTTS, - SpeechmaticsSTT, - VertexAI, - XaiGrok, - XaiRealtime, - LiveAvatarAvatar, - ) + from .token import generate_convo_ai_token, _parse_numeric_uid - __all__ = [ - "Agent", - "AgentConfig", - "AgentConfigUpdate", - # Return type of Agent.to_properties() - "StartAgentsRequestProperties", - # Top-level config types - "LlmConfig", - "LlmStyle", - "SttConfig", - "SttVendor", - "TtsConfig", - "MllmConfig", - "MllmVendor", - "AvatarConfig", - "AvatarVendor", - "GeofenceConfig", - "RtcConfig", - "FillerWordsConfig", - "FillerWordsTrigger", - "FillerWordsTriggerFixedTimeConfig", - "FillerWordsContent", - "FillerWordsContentStaticConfig", - "FillerWordsContentSelectionRule", - # Turn detection types - "TurnDetectionConfig", - "TurnDetectionNestedConfig", - "StartOfSpeechConfig", - "StartOfSpeechMode", - "StartOfSpeechVadConfig", - "StartOfSpeechKeywordsConfig", - "StartOfSpeechDisabledConfig", - "StartOfSpeechDisabledConfigStrategy", - "EndOfSpeechConfig", - "EndOfSpeechMode", - "EndOfSpeechVadConfig", - "EndOfSpeechSemanticConfig", - # Deprecated turn detection types - "TurnDetectionType", - "InterruptMode", - "Eagerness", - # SAL types - "SalConfig", - "SalMode", - # Advanced features - "AdvancedFeatures", - # Session parameters types - "SessionParams", - "SessionParamsInput", - "SilenceConfig", - "SilenceAction", - "FarewellConfig", - "ParametersDataChannel", - "ParametersAudioScenario", - "InterruptionConfig", - "InterruptionMode", - "MllmTurnDetectionConfig", - "MllmTurnDetectionMode", - "Labels", - # Type-safe constants - "DataChannel", - "AudioScenario", - "SilenceActionValues", - "SalModeValues", - "GeofenceArea", - "GeofenceExcludeArea", - "FillerWordsSelectionRule", - "TurnDetectionTypeValues", - # LLM sub-types - "LlmGreetingConfigs", - "LlmGreetingConfigsMode", - "McpServersItem", - "AgentSession", - "AsyncAgentSession", - "AgentSessionOptions", - "SessionInfo", - "SessionListResponse", - "SessionSummary", - "ConversationHistory", - "ConversationTurn", - "ConversationRole", - "ConversationTurns", - "ConversationSessionTurn", - "SpeakPriority", - "AgentThinkResponse", - "AgentThinkRequestOnListeningAction", - "AgentThinkRequestOnThinkingAction", - "AgentThinkRequestOnSpeakingAction", - "AgentPresets", - "DeepgramPresetModels", - "OpenAIPresetModels", - "OpenAITtsPresetModels", - "MiniMaxPresetModels", - "normalize_preset_input", - "generate_rtc_token", - "GenerateTokenOptions", - "generate_convo_ai_token", - "GenerateConvoAITokenOptions", - "MAX_EXPIRY_SECONDS", - "expires_in_hours", - "expires_in_minutes", - "BaseLLM", - "BaseTTS", - "BaseSTT", - "BaseMLLM", - "BaseAvatar", - "SampleRate", - "ElevenLabsSampleRate", - "MicrosoftSampleRate", - "OpenAISampleRate", - "CartesiaSampleRate", - "OpenAI", - "AzureOpenAI", - "Anthropic", - "Gemini", - "ElevenLabsTTS", - "MicrosoftTTS", - "OpenAITTS", - "CartesiaTTS", - "DeepgramTTS", - "GoogleTTS", - "AmazonTTS", - "HumeAITTS", - "RimeTTS", - "FishAudioTTS", - "MiniMaxTTS", - "MurfTTS", - "SarvamTTS", - "SpeechmaticsSTT", - "DeepgramSTT", - "MicrosoftSTT", - "OpenAISTT", - "GoogleSTT", - "AmazonSTT", - "AssemblyAISTT", - "AresSTT", - "SarvamSTT", - "OpenAIRealtime", - "GeminiLive", - "VertexAI", - "XaiGrok", - "XaiRealtime", - "HeyGenAvatar", - "LiveAvatarAvatar", - "AkoolAvatar", - "AnamAvatar", - "GenericAvatar", - "is_heygen_avatar", - "is_live_avatar_avatar", - "is_akool_avatar", - "is_anam_avatar", - "is_generic_avatar", - "is_rtc_avatar", - "validate_avatar_config", - "validate_tts_sample_rate", - ] - src/agora_agent/agentkit/vendors/__init__.py: | - from .base import ( - BaseAvatar, - BaseLLM, - BaseMLLM, - BaseSTT, - BaseTTS, - CartesiaSampleRate, - ElevenLabsSampleRate, - GoogleTTSSampleRate, - MicrosoftSampleRate, - OpenAISampleRate, - SampleRate, - ) - from .avatar import AkoolAvatar, AnamAvatar, GenericAvatar, HeyGenAvatar, LiveAvatarAvatar - from .llm import Anthropic, AzureOpenAI, Gemini, OpenAI - from .mllm import GeminiLive, OpenAIRealtime, VertexAI, XaiGrok, XaiRealtime - from .stt import ( - AmazonSTT, - AresSTT, - AssemblyAISTT, - DeepgramSTT, - GoogleSTT, - MicrosoftSTT, - OpenAISTT, - SarvamSTT, - SpeechmaticsSTT, - ) - from .tts import ( - AmazonTTS, - CartesiaTTS, - DeepgramTTS, - ElevenLabsTTS, - FishAudioTTS, - GoogleTTS, - HumeAITTS, - MicrosoftTTS, - MiniMaxTTS, - MurfTTS, - OpenAITTS, - RimeTTS, - SarvamTTS, - ) - __all__ = [ - "BaseLLM", - "BaseTTS", - "BaseSTT", - "BaseMLLM", - "BaseAvatar", - "SampleRate", - "ElevenLabsSampleRate", - "MicrosoftSampleRate", - "OpenAISampleRate", - "CartesiaSampleRate", - "GoogleTTSSampleRate", - "OpenAI", - "AzureOpenAI", - "Anthropic", - "Gemini", - "ElevenLabsTTS", - "MicrosoftTTS", - "OpenAITTS", - "CartesiaTTS", - "DeepgramTTS", - "GoogleTTS", - "AmazonTTS", - "HumeAITTS", - "RimeTTS", - "FishAudioTTS", - "MiniMaxTTS", - "MurfTTS", - "SarvamTTS", - "SpeechmaticsSTT", - "DeepgramSTT", - "MicrosoftSTT", - "OpenAISTT", - "GoogleSTT", - "AmazonSTT", - "AssemblyAISTT", - "AresSTT", - "SarvamSTT", - "OpenAIRealtime", - "GeminiLive", - "VertexAI", - "XaiGrok", - "XaiRealtime", - "HeyGenAvatar", - "LiveAvatarAvatar", - "AkoolAvatar", - "AnamAvatar", - "GenericAvatar", - ] - status: unresolved - - id: patch-9df782b4 - content_hash: sha256:84c08fe3239d2ecb0b0a3ddd33b0dce4e7b012125be797aa83ca12893363b565 - original_commit: 9df782b46d872599f103078e30c5ded2053f2517 - original_message: "feat(agentkit): update MLLM and LLM vendor wrappers for v2.7" - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - src/agora_agent/agentkit/vendors/llm.py - - src/agora_agent/agentkit/vendors/mllm.py - patch_content: |+ - From 9df782b46d872599f103078e30c5ded2053f2517 Mon Sep 17 00:00:00 2001 - From: digitallysavvy - Date: Wed, 20 May 2026 20:57:54 -0400 - Subject: [PATCH] feat(agentkit): update MLLM and LLM vendor wrappers for v2.7 + class _AgentSessionRequiredOptions(typing.TypedDict, total=True): + """Required fields shared by both sync and async session constructors.""" - Adds xAI Grok Realtime and Vertex AI MLLM wrappers, and aligns MLLM - config serialization with the generated core types. LLM vendors now - accept typed greeting_configs and serialize them through the generated - model shape, including interruptable. - --- - src/agora_agent/agentkit/vendors/llm.py | 31 ++++-- - src/agora_agent/agentkit/vendors/mllm.py | 118 +++++++++++++++++------ - 2 files changed, 113 insertions(+), 36 deletions(-) + client: typing.Any + agent: Agent + app_id: str + name: str + channel: str + agent_uid: str + remote_uids: typing.List[str] - diff --git a/src/agora_agent/agentkit/vendors/llm.py b/src/agora_agent/agentkit/vendors/llm.py - index 7465c9f..6f74b43 100644 - --- a/src/agora_agent/agentkit/vendors/llm.py - +++ b/src/agora_agent/agentkit/vendors/llm.py - @@ -1,9 +1,14 @@ - -from typing import Any, Dict, List, Optional - +from typing import Any, Dict, List, Optional, Union - - from pydantic import BaseModel, ConfigDict, Field - - +from ...agents.types.start_agents_request_properties_llm_greeting_configs import ( - + StartAgentsRequestPropertiesLlmGreetingConfigs, - +) - from .base import BaseLLM - - +LlmGreetingConfigs = Union[StartAgentsRequestPropertiesLlmGreetingConfigs, Dict[str, Any]] - + - - def _ensure_mcp_transport(servers: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Ensure each MCP server has transport set (API requires it). Default to streamable_http.""" - @@ -15,6 +20,14 @@ def _ensure_mcp_transport(servers: List[Dict[str, Any]]) -> List[Dict[str, Any]] - result.append(item) - return result - - + - +def _dump_optional_model(value: Any) -> Any: - + if hasattr(value, "model_dump"): - + return value.model_dump(exclude_none=True) - + if hasattr(value, "dict"): - + return value.dict(exclude_none=True) - + return value - + - class OpenAIOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - @@ -31,7 +44,7 @@ class OpenAIOptions(BaseModel): - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - - greeting_configs: Optional[Dict[str, Any]] = Field(default=None) - + greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - @@ -74,7 +87,7 @@ class OpenAI(BaseLLM): - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - - config["greeting_configs"] = self.options.greeting_configs - + config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - @@ -104,7 +117,7 @@ class AzureOpenAIOptions(BaseModel): - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - - greeting_configs: Optional[Dict[str, Any]] = Field(default=None) - + greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - @@ -150,7 +163,7 @@ class AzureOpenAI(BaseLLM): - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - - config["greeting_configs"] = self.options.greeting_configs - + config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.mcp_servers is not None: - @@ -177,7 +190,7 @@ class AnthropicOptions(BaseModel): - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - - greeting_configs: Optional[Dict[str, Any]] = Field(default=None) - + greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - @@ -216,7 +229,7 @@ class Anthropic(BaseLLM): - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - - config["greeting_configs"] = self.options.greeting_configs - + config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - @@ -246,7 +259,7 @@ class GeminiOptions(BaseModel): - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - - greeting_configs: Optional[Dict[str, Any]] = Field(default=None) - + greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - @@ -287,7 +300,7 @@ class Gemini(BaseLLM): - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - - config["greeting_configs"] = self.options.greeting_configs - + config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - diff --git a/src/agora_agent/agentkit/vendors/mllm.py b/src/agora_agent/agentkit/vendors/mllm.py - index 5f6f940..cd6cd07 100644 - --- a/src/agora_agent/agentkit/vendors/mllm.py - +++ b/src/agora_agent/agentkit/vendors/mllm.py - @@ -1,3 +1,4 @@ - +import warnings - from typing import Any, Dict, List, Optional - - from pydantic import BaseModel, ConfigDict, Field - @@ -22,9 +23,7 @@ class OpenAIRealtimeOptions(BaseModel): - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - - predefined_tools: Optional[List[str]] = Field(default=None, description="Predefined tools") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - max_history: Optional[int] = Field(default=None, description="Maximum conversation history length") - - class OpenAIRealtime(BaseMLLM): - def __init__(self, **kwargs: Any): - @@ -53,18 +52,97 @@ class OpenAIRealtime(BaseMLLM): - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - - if self.options.predefined_tools is not None: - - config["predefined_tools"] = self.options.predefined_tools - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - - if self.options.max_history is not None: - - config["max_history"] = self.options.max_history - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - return config - - - +# xAI MLLM: use XaiGrok (product name, mllm.vendor "xai"). Do not use XaiRealtime—that name - +# is deprecated and reserved naming for future XaiSTT / XaiTTS cascading vendors. - + - + - +class XaiGrokOptions(BaseModel): - + model_config = ConfigDict(extra="forbid") - + - + api_key: str = Field(..., description="xAI API key") - + url: str = Field(default="wss://api.x.ai/v1/realtime", description="xAI Realtime WebSocket URL") - + voice: Optional[str] = Field(default=None, description="Voice identifier (e.g., eve or rex)") - + language: Optional[str] = Field(default=None, description="Language code (e.g., en)") - + sample_rate: Optional[int] = Field(default=None, description="Audio sample rate in Hz") - + greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - + input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - + output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - + messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - + params: Optional[Dict[str, Any]] = Field(default=None, description="Additional xAI parameters") - + turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - + failure_message: Optional[str] = Field(default=None, description="Message played on failure") - + - + - +class XaiGrok(BaseMLLM): - + """xAI Grok MLLM vendor (`mllm.vendor`: ``xai``).""" - + - + def __init__(self, **kwargs: Any): - + self.options = XaiGrokOptions(**kwargs) - + - + def to_config(self) -> Dict[str, Any]: - + params: Dict[str, Any] = dict(self.options.params or {}) - + if self.options.voice is not None: - + params["voice"] = self.options.voice - + if self.options.language is not None: - + params["language"] = self.options.language - + if self.options.sample_rate is not None: - + params["sample_rate"] = self.options.sample_rate - + - + config: Dict[str, Any] = { - + "vendor": "xai", - + "api_key": self.options.api_key, - + "url": self.options.url, - + "params": params, - + } - + - + if self.options.greeting_message is not None: - + config["greeting_message"] = self.options.greeting_message - + if self.options.input_modalities is not None: - + config["input_modalities"] = self.options.input_modalities - + if self.options.output_modalities is not None: - + config["output_modalities"] = self.options.output_modalities - + if self.options.messages is not None: - + config["messages"] = self.options.messages - + if self.options.failure_message is not None: - + config["failure_message"] = self.options.failure_message - + if self.options.turn_detection is not None: - + config["turn_detection"] = self.options.turn_detection - + - + return config - + - + - +class XaiRealtimeOptions(XaiGrokOptions): - + """Deprecated: use :class:`XaiGrokOptions` instead.""" - + - + def __init__(self, **data: Any): - + warnings.warn( - + "XaiRealtimeOptions is deprecated; use XaiGrokOptions instead.", - + DeprecationWarning, - + stacklevel=2, - + ) - + super().__init__(**data) - + - + - +class XaiRealtime(XaiGrok): - + """Deprecated: use :class:`XaiGrok` instead.""" - + - + def __init__(self, **kwargs: Any): - + warnings.warn( - + "XaiRealtime is deprecated; use XaiGrok instead.", - + DeprecationWarning, - + stacklevel=2, - + ) - + super().__init__(**kwargs) - + - + - class VertexAIOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - @@ -81,28 +159,24 @@ class VertexAIOptions(BaseModel): - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - - predefined_tools: Optional[List[str]] = Field(default=None, description="Predefined tools") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - max_history: Optional[int] = Field(default=None, description="Maximum conversation history length") - - class VertexAI(BaseMLLM): - def __init__(self, **kwargs: Any): - self.options = VertexAIOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - - params: Dict[str, Any] = { - - "model": self.options.model, - - "project_id": self.options.project_id, - - "location": self.options.location, - - "adc_credentials_string": self.options.adc_credentials_string, - - } - - - + # additional_params spread first so that explicit fields always win, - + # matching the TypeScript SDK. - + params: Dict[str, Any] = dict(self.options.additional_params or {}) - + params["model"] = self.options.model - + params["project_id"] = self.options.project_id - + params["location"] = self.options.location - + params["adc_credentials_string"] = self.options.adc_credentials_string - if self.options.instructions is not None: - params["instructions"] = self.options.instructions - if self.options.voice is not None: - params["voice"] = self.options.voice - - if self.options.additional_params is not None: - - params.update(self.options.additional_params) - - config: Dict[str, Any] = { - "vendor": "vertexai", - @@ -119,12 +193,8 @@ class VertexAI(BaseMLLM): - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - - if self.options.predefined_tools is not None: - - config["predefined_tools"] = self.options.predefined_tools - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - - if self.options.max_history is not None: - - config["max_history"] = self.options.max_history - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - @@ -145,9 +215,7 @@ class GeminiLiveOptions(BaseModel): - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - - predefined_tools: Optional[List[str]] = Field(default=None, description="Predefined tools") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - max_history: Optional[int] = Field(default=None, description="Maximum conversation history length") - - class GeminiLive(BaseMLLM): - def __init__(self, **kwargs: Any): - @@ -179,12 +247,8 @@ class GeminiLive(BaseMLLM): - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - - if self.options.predefined_tools is not None: - - config["predefined_tools"] = self.options.predefined_tools - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - - if self.options.max_history is not None: - - config["max_history"] = self.options.max_history - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - -- - 2.52.0 - theirs_snapshot: - src/agora_agent/agentkit/vendors/llm.py: | - from typing import Any, Dict, List, Optional, Union + class AgentSessionOptions(_AgentSessionRequiredOptions, total=False): + """Configuration options for creating an agent session. - from pydantic import BaseModel, ConfigDict, Field + Required fields + --------------- + client, agent, app_id, name, channel, agent_uid, remote_uids - from ...agents.types.start_agents_request_properties_llm_greeting_configs import ( - StartAgentsRequestPropertiesLlmGreetingConfigs, - ) - from .base import BaseLLM + Optional fields + --------------- + app_certificate, token, idle_timeout, enable_string_uid, preset, + pipeline_id, expires_in, debug, warn + """ - LlmGreetingConfigs = Union[StartAgentsRequestPropertiesLlmGreetingConfigs, Dict[str, Any]] + app_certificate: str + token: str + idle_timeout: int + enable_string_uid: bool + preset: typing.Union[str, typing.Sequence[str]] + pipeline_id: str + expires_in: int + debug: bool + warn: typing.Callable[[str], None] - def _ensure_mcp_transport(servers: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Ensure each MCP server has transport set (API requires it). Default to streamable_http.""" - result = [] - for s in servers: - item = dict(s) - if item.get("transport") is None: - item["transport"] = "streamable_http" - result.append(item) - return result + class _AgentSessionBase: + """Shared state and helpers for :class:`AgentSession` and :class:`AsyncAgentSession`. + Not intended for direct use — instantiate one of the concrete subclasses or + call :meth:`Agent.create_session` / :meth:`Agent.create_async_session`. + """ - def _dump_optional_model(value: Any) -> Any: - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if hasattr(value, "dict"): - return value.dict(exclude_none=True) - return value + def __init__( + self, + client: typing.Any, + agent: Agent, + app_id: str, + name: str, + channel: str, + agent_uid: str, + remote_uids: typing.List[str], + app_certificate: typing.Optional[str] = None, + token: typing.Optional[str] = None, + idle_timeout: typing.Optional[int] = None, + enable_string_uid: typing.Optional[bool] = None, + preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, + pipeline_id: typing.Optional[str] = None, + expires_in: typing.Optional[int] = None, + debug: typing.Optional[bool] = None, + warn: typing.Optional[typing.Callable[[str], None]] = None, + ): + self._client = client + self._agent = agent + self._app_id = app_id + self._app_certificate = app_certificate + self._name = name + self._channel = channel + self._token = token + self._agent_uid = agent_uid + self._remote_uids = remote_uids + self._idle_timeout = idle_timeout + self._enable_string_uid = enable_string_uid + self._preset = preset + self._pipeline_id = pipeline_id + self._expires_in = expires_in + self._debug = debug + self._warn = warn or warnings.warn + self._agent_id: typing.Optional[str] = None + self._status: str = "idle" + self._event_handlers: typing.Dict[str, typing.List[typing.Callable[..., None]]] = {} - class OpenAIOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + # ------------------------------------------------------------------ + # Public read-only properties + # ------------------------------------------------------------------ - api_key: Optional[str] = Field(default=None, description="OpenAI API key") - model: str = Field(default="gpt-4o-mini", description="Model name") - base_url: Optional[str] = Field(default=None, description="Custom base URL") - temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - max_tokens: Optional[int] = Field(default=None, gt=0) - system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) - greeting_message: Optional[str] = Field(default=None) - failure_message: Optional[str] = Field(default=None) - input_modalities: Optional[List[str]] = Field(default=None) - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") + @property + def id(self) -> typing.Optional[str]: + return self._agent_id - class OpenAI(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = OpenAIOptions(**kwargs) + @property + def status(self) -> str: + return self._status - def to_config(self) -> Dict[str, Any]: - # model is the default; explicit params entries extend/override it. - # This matches the TS SDK behaviour: { model, ...params }. - params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} + @property + def agent(self) -> Agent: + return self._agent - # Named fields take precedence over anything in the generic params dict. - if self.options.max_tokens is not None: - params["max_tokens"] = self.options.max_tokens - if self.options.temperature is not None: - params["temperature"] = self.options.temperature - if self.options.top_p is not None: - params["top_p"] = self.options.top_p + @property + def app_id(self) -> str: + return self._app_id - config: Dict[str, Any] = { - "url": self.options.base_url or "https://api.openai.com/v1/chat/completions", - "params": params, - "style": "openai", - "input_modalities": self.options.input_modalities or ["text"], - } - if self.options.api_key is not None: - config["api_key"] = self.options.api_key - if self.options.headers is not None: - config["headers"] = self.options.headers + @property + def raw(self) -> typing.Any: + """Direct access to the underlying Fern-generated AgentsClient. - if self.options.system_messages is not None: - config["system_messages"] = self.options.system_messages - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - config["vendor"] = self.options.vendor - if self.options.mcp_servers is not None: - config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) - if self.options.max_history is not None: - config["max_history"] = self.options.max_history + Use this to access any new endpoints that Fern generates without + waiting for agentkit method updates. + """ + return self._client.agents - return config + @property + def raw_agent_management(self) -> typing.Any: + """Direct access to the underlying Fern-generated AgentManagement client.""" + return self._client.agent_management + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ - class AzureOpenAIOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + def _convo_ai_headers(self) -> typing.Optional[typing.Dict[str, str]]: + """Return per-request auth headers when client is in app-credentials mode. - api_key: str = Field(..., description="Azure OpenAI API key") - endpoint: str = Field(..., description="Azure endpoint URL") - deployment_name: str = Field(..., description="Azure deployment name") - api_version: str = Field(default="2024-08-01-preview", description="Azure API version") - temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - max_tokens: Optional[int] = Field(default=None, gt=0) - system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) - greeting_message: Optional[str] = Field(default=None) - failure_message: Optional[str] = Field(default=None) - input_modalities: Optional[List[str]] = Field(default=None) - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") + In app-credentials mode a fresh ConvoAI token (RTC + RTM) is generated + for every request and returned as ``Authorization: agora token=``. + In basic-auth mode this returns ``None`` (the client-level header is used). + """ + if getattr(self._client, "auth_mode", None) != "app-credentials": + return None + app_id: str = getattr(self._client, "app_id", self._app_id) + app_certificate: typing.Optional[str] = getattr( + self._client, "app_certificate", self._app_certificate + ) + if not app_certificate: + raise RuntimeError("app_certificate is required for app-credentials auth mode") + token = generate_convo_ai_token( + app_id=app_id, + app_certificate=app_certificate, + channel_name=self._channel, + uid=_parse_numeric_uid(self._agent_uid, "agent_uid"), + ) + return {"Authorization": f"agora token={token}"} - class AzureOpenAI(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = AzureOpenAIOptions(**kwargs) + def _request_options(self) -> typing.Optional[typing.Dict[str, typing.Any]]: + """Build request_options dict with per-request auth headers if needed.""" + headers = self._convo_ai_headers() + if headers is None: + return None + return {"additional_headers": headers} - def to_config(self) -> Dict[str, Any]: - url = ( - f"{self.options.endpoint}/openai/deployments/" - f"{self.options.deployment_name}/chat/completions" - f"?api-version={self.options.api_version}" - ) - config: Dict[str, Any] = { - "url": url, - "api_key": self.options.api_key, - "vendor": self.options.vendor or "azure", - "style": "openai", - "input_modalities": self.options.input_modalities or ["text"], - } + def _validate_avatar_config(self) -> None: + avatar = self._agent.avatar + tts = self._agent.tts + if not avatar or avatar.get("enable", True) is False: + return + if self._is_mllm_mode(): + raise ValueError( + "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " + "Remove the avatar configuration when using MLLM, or switch to a cascading session." + ) - # Named fields take precedence over anything in the generic params dict. - params: Dict[str, Any] = dict(self.options.params or {}) - if self.options.temperature is not None: - params["temperature"] = self.options.temperature - if self.options.top_p is not None: - params["top_p"] = self.options.top_p - if self.options.max_tokens is not None: - params["max_tokens"] = self.options.max_tokens - if params: - config["params"] = params - if self.options.headers is not None: - config["headers"] = self.options.headers + if ( + is_heygen_avatar(avatar) + or is_live_avatar_avatar(avatar) + or is_akool_avatar(avatar) + or is_anam_avatar(avatar) + or is_generic_avatar(avatar) + ): + validate_avatar_config(avatar) - if self.options.system_messages is not None: - config["system_messages"] = self.options.system_messages - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.mcp_servers is not None: - config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) - if self.options.max_history is not None: - config["max_history"] = self.options.max_history + tts_params = tts.get("params") if isinstance(tts, dict) else None + sample_rate = self._agent.tts_sample_rate + if sample_rate is None and isinstance(tts_params, dict): + sample_rate = ( + tts_params.get("sample_rate") + or tts_params.get("sample_rate_hertz") + or tts_params.get("samplingRate") + ) + if isinstance(sample_rate, int): + validate_tts_sample_rate(avatar, sample_rate) + elif is_heygen_avatar(avatar): + self._warn( + "Warning: HeyGen avatar detected but TTS sample_rate is not explicitly set. " + "HeyGen requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." + ) + elif is_live_avatar_avatar(avatar): + self._warn( + "Warning: LiveAvatar avatar detected but TTS sample_rate is not explicitly set. " + "LiveAvatar requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." + ) + elif is_akool_avatar(avatar): + self._warn( + "Warning: Akool avatar detected but TTS sample_rate is not explicitly set. " + "Akool requires 16,000 Hz. Please ensure your TTS provider is configured for 16kHz." + ) - return config + def _enrich_avatar_for_session(self, properties: typing.Dict[str, typing.Any]) -> None: + avatar = properties.get("avatar") + if not isinstance(avatar, dict) or avatar.get("enable", True) is False: + return + params = avatar.get("params") + if not isinstance(params, dict): + params = {} + avatar["params"] = params - class AnthropicOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + if is_generic_avatar(avatar): + if not params.get("agora_appid"): + params["agora_appid"] = self._app_id + if not params.get("agora_channel"): + params["agora_channel"] = self._channel - api_key: str = Field(..., description="Anthropic API key") - model: str = Field(default="claude-3-5-sonnet-20241022", description="Model name") - url: Optional[str] = Field(default=None, description="Custom API endpoint URL") - max_tokens: Optional[int] = Field(default=None, gt=0) - temperature: Optional[float] = Field(default=None, ge=0.0, le=1.0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) - greeting_message: Optional[str] = Field(default=None) - failure_message: Optional[str] = Field(default=None) - input_modalities: Optional[List[str]] = Field(default=None) - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") + if not is_avatar_token_managed(avatar): + validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) + return - class Anthropic(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = AnthropicOptions(**kwargs) + if not params.get("agora_uid"): + validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) + return - def to_config(self) -> Dict[str, Any]: - # Named fields take precedence over anything in the generic params dict. - params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} - if self.options.max_tokens is not None: - params["max_tokens"] = self.options.max_tokens - if self.options.temperature is not None: - params["temperature"] = self.options.temperature - if self.options.top_p is not None: - params["top_p"] = self.options.top_p + if not params.get("agora_token"): + if not self._app_certificate: + raise ValueError( + "Cannot auto-generate avatar RTC token: app_certificate is required when agora_token is omitted. " + "Pass app_certificate on the Agora client or supply agora_token explicitly on the avatar vendor." + ) + token_kwargs: typing.Dict[str, typing.Any] = {} + if self._expires_in is not None: + token_kwargs["token_expire"] = self._expires_in + params["agora_token"] = generate_convo_ai_token( + app_id=self._app_id, + app_certificate=self._app_certificate, + channel_name=self._channel, + uid=_parse_numeric_uid(str(params["agora_uid"]), "avatar agora_uid"), + **token_kwargs, + ) - config: Dict[str, Any] = { - "url": self.options.url or "https://api.anthropic.com/v1/messages", - "api_key": self.options.api_key, - "params": params, - "style": "anthropic", - "input_modalities": self.options.input_modalities or ["text"], - } + if str(params.get("agora_uid")) == self._agent_uid: + self._warn( + "Warning: avatar agora_uid matches agent_rtc_uid. Use a unique UID for the avatar video publisher." + ) - if self.options.system_messages is not None: - config["system_messages"] = self.options.system_messages - if self.options.headers is not None: - config["headers"] = self.options.headers - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - config["vendor"] = self.options.vendor - if self.options.mcp_servers is not None: - config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) - if self.options.max_history is not None: - config["max_history"] = self.options.max_history + validate_avatar_config(avatar, require_session_fields=True) - return config + @staticmethod + def _dump_model(value: typing.Any) -> typing.Any: + if hasattr(value, "model_dump"): + return value.model_dump(exclude_none=True) + if isinstance(value, dict): + return {k: _AgentSessionBase._dump_model(v) for k, v in value.items() if v is not None} + if isinstance(value, list): + return [_AgentSessionBase._dump_model(item) for item in value] + return value + def _is_mllm_mode(self) -> bool: + mllm = self._agent.mllm + if isinstance(mllm, dict) and mllm.get("enable") is True: + return True + return mllm is not None - class GeminiOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + def _build_start_properties( + self, + token_opts: typing.Dict[str, typing.Any], + skip_vendor_validation_categories: typing.AbstractSet[str], + allow_missing_vendor_categories: typing.AbstractSet[str], + ) -> typing.Dict[str, typing.Any]: + base_properties = self._agent.to_properties( + channel=self._channel, + agent_uid=self._agent_uid, + remote_uids=self._remote_uids, + idle_timeout=self._idle_timeout, + enable_string_uid=self._enable_string_uid, + skip_vendor_validation_categories=skip_vendor_validation_categories, + allow_missing_vendor_categories=allow_missing_vendor_categories, + **token_opts, + ) + properties = self._dump_model(base_properties) + self._enrich_avatar_for_session(properties) - api_key: str = Field(..., description="Google AI API key") - model: str = Field(default="gemini-2.0-flash-exp", description="Model name") - url: Optional[str] = Field(default=None, description="Custom API endpoint URL") - temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - top_k: Optional[int] = Field(default=None, gt=0) - max_output_tokens: Optional[int] = Field(default=None, gt=0) - system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) - greeting_message: Optional[str] = Field(default=None) - failure_message: Optional[str] = Field(default=None) - input_modalities: Optional[List[str]] = Field(default=None) - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") + if self._is_mllm_mode(): + if self._agent.mllm is not None: + mllm = self._dump_model(self._agent.mllm) + if not isinstance(mllm, dict): + mllm = {} + if self._agent.greeting is not None: + mllm.setdefault("greeting_message", self._agent.greeting) + if self._agent.failure_message is not None: + mllm.setdefault("failure_message", self._agent.failure_message) + properties["mllm"] = mllm + return properties - class Gemini(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = GeminiOptions(**kwargs) + if self._agent.tts is not None: + properties["tts"] = self._dump_model(self._agent.tts) + if self._agent.llm is not None: + llm = dict(self._agent.llm) + if self._agent.instructions is not None and "system_messages" not in llm: + llm["system_messages"] = [{"role": "system", "content": self._agent.instructions}] + if self._agent.greeting is not None and "greeting_message" not in llm: + llm["greeting_message"] = self._agent.greeting + if self._agent.greeting_configs is not None and "greeting_configs" not in llm: + llm["greeting_configs"] = self._dump_model(self._agent.greeting_configs) + if self._agent.failure_message is not None and "failure_message" not in llm: + llm["failure_message"] = self._agent.failure_message + if self._agent.max_history is not None and "max_history" not in llm: + llm["max_history"] = self._agent.max_history + properties["llm"] = llm + if self._agent.stt is not None: + properties["asr"] = self._dump_model(self._agent.stt) - def to_config(self) -> Dict[str, Any]: - # Named fields take precedence over anything in the generic params dict. - params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} - if self.options.temperature is not None: - params["temperature"] = self.options.temperature - if self.options.top_p is not None: - params["top_p"] = self.options.top_p - if self.options.top_k is not None: - params["top_k"] = self.options.top_k - if self.options.max_output_tokens is not None: - params["max_output_tokens"] = self.options.max_output_tokens + return properties - config: Dict[str, Any] = { - "url": self.options.url or "https://generativelanguage.googleapis.com/v1beta/models", - "api_key": self.options.api_key, - "params": params, - "style": "gemini", - "input_modalities": self.options.input_modalities or ["text"], - } + @staticmethod + def _request_properties_for_start( + resolved_properties: typing.Dict[str, typing.Any], + *, + resolved_preset: typing.Optional[str], + pipeline_id: typing.Optional[str], + ) -> typing.Any: + try: + return _start_properties_from_mapping(resolved_properties) + except Exception as exc: + if pipeline_id: + return resolved_properties + if resolved_preset: + normalized_preset = normalize_preset_input(resolved_preset) + if not normalized_preset: + raise + preset_categories = { + category + for item in normalized_preset.split(",") + for category in [get_preset_category(item)] + if category is not None + } + error_categories = _AgentSessionBase._validation_error_categories(exc) + if error_categories and error_categories.issubset(preset_categories): + return resolved_properties + raise - if self.options.system_messages is not None: - config["system_messages"] = self.options.system_messages - if self.options.headers is not None: - config["headers"] = self.options.headers - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - config["vendor"] = self.options.vendor - if self.options.mcp_servers is not None: - config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) - if self.options.max_history is not None: - config["max_history"] = self.options.max_history + @staticmethod + def _validation_error_categories(exc: Exception) -> typing.Set[str]: + errors = getattr(exc, "errors", None) + if not callable(errors): + return set() + categories: typing.Set[str] = set() + for error in errors(): + loc = error.get("loc") if isinstance(error, dict) else None + if isinstance(loc, tuple) and loc: + field = loc[0] + if field in {"asr", "llm", "tts"}: + categories.add(typing.cast(str, field)) + return categories - return config - src/agora_agent/agentkit/vendors/mllm.py: | - import warnings - from typing import Any, Dict, List, Optional + def _vendor_validation_categories( + self, + pipeline_id: typing.Optional[str], + ) -> typing.Tuple[typing.Set[str], typing.Set[str]]: + skip_categories: typing.Set[str] = set() + allow_missing_categories: typing.Set[str] = {"asr", "llm", "tts"} if pipeline_id else set() - from pydantic import BaseModel, ConfigDict, Field + preset = normalize_preset_input(self._preset) + if preset: + for item in preset.split(","): + category = get_preset_category(item) + if category is not None: + skip_categories.add(category) + allow_missing_categories.add(category) - from ...agents.types.start_agents_request_properties_mllm_turn_detection import ( - StartAgentsRequestPropertiesMllmTurnDetection, - ) - from .base import BaseMLLM + if infer_asr_preset(self._agent.stt): + skip_categories.add("asr") + if infer_llm_preset(self._agent.llm): + skip_categories.add("llm") + if infer_tts_preset(self._agent.tts): + skip_categories.add("tts") + return skip_categories, allow_missing_categories - MllmTurnDetectionConfig = StartAgentsRequestPropertiesMllmTurnDetection + @staticmethod + def _page_value(pagination: typing.Any, field: str) -> typing.Any: + if pagination is None: + return None + if isinstance(pagination, dict): + return pagination.get(field) + return getattr(pagination, field, None) + @staticmethod + def _response_turns(response: typing.Any) -> typing.List[typing.Any]: + turns = response.get("turns") if isinstance(response, dict) else getattr(response, "turns", None) + return list(turns or []) - class OpenAIRealtimeOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + @staticmethod + def _response_pagination(response: typing.Any) -> typing.Any: + if isinstance(response, dict): + return response.get("pagination") + return getattr(response, "pagination", None) - api_key: str = Field(..., description="OpenAI API key") - model: Optional[str] = Field(default=None, description="Model name (e.g., gpt-4o-realtime-preview)") - url: Optional[str] = Field(default=None, description="WebSocket URL") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") + @classmethod + def _with_all_turns(cls, first_response: typing.Any, turns: typing.List[typing.Any]) -> GetTurnsAgentsResponse: + data = cls._dump_model(first_response) + if not isinstance(data, dict): + data = {} + data["turns"] = turns + return GetTurnsAgentsResponse(**data) - class OpenAIRealtime(BaseMLLM): - def __init__(self, **kwargs: Any): - self.options = OpenAIRealtimeOptions(**kwargs) + # ------------------------------------------------------------------ + # Event handling + # ------------------------------------------------------------------ - def to_config(self) -> Dict[str, Any]: - config: Dict[str, Any] = { - "vendor": "openai", - "api_key": self.options.api_key, - } + def on(self, event: str, handler: typing.Callable[..., None]) -> None: + """Register an event handler. - if self.options.url is not None: - config["url"] = self.options.url - if self.options.model is not None: - params = {"model": self.options.model} - if self.options.params is not None: - params.update(self.options.params) - config["params"] = params - elif self.options.params is not None: - config["params"] = self.options.params - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection + Parameters + ---------- + event : str + The event type (``started``, ``stopped``, ``error``). + handler : callable + The event handler to invoke when the event fires. + """ + if event not in self._event_handlers: + self._event_handlers[event] = [] + self._event_handlers[event].append(handler) - return config + def off(self, event: str, handler: typing.Callable[..., None]) -> None: + """Unregister a previously registered event handler.""" + handlers = self._event_handlers.get(event) + if handlers and handler in handlers: + handlers.remove(handler) + def _emit(self, event: str, data: typing.Any) -> None: + handlers = self._event_handlers.get(event) + if handlers: + for handler in handlers: + try: + handler(data) + except Exception as exc: + # Prevent a misbehaving handler from blocking other handlers or + # the session lifecycle. Warn so the error is not silently lost. + warnings.warn( + f"Event handler for '{event}' raised an exception: {exc}", + stacklevel=2, + ) - # xAI MLLM: use XaiGrok (product name, mllm.vendor "xai"). Do not use XaiRealtime—that name - # is deprecated and reserved naming for future XaiSTT / XaiTTS cascading vendors. + class AgentSession(_AgentSessionBase): + """Manages the lifecycle of an agent session (synchronous). - class XaiGrokOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + This class provides a high-level interface for managing agent sessions, + including starting, stopping, and interacting with the agent. - api_key: str = Field(..., description="xAI API key") - url: str = Field(default="wss://api.x.ai/v1/realtime", description="xAI Realtime WebSocket URL") - voice: Optional[str] = Field(default=None, description="Voice identifier (e.g., eve or rex)") - language: Optional[str] = Field(default=None, description="Language code (e.g., en)") - sample_rate: Optional[int] = Field(default=None, description="Audio sample rate in Hz") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - params: Optional[Dict[str, Any]] = Field(default=None, description="Additional xAI parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") + Use :meth:`Agent.create_session` to create a session — this is the + recommended entry point. + Examples + -------- + >>> from agora_agent import Agora, Area, Agent, OpenAI, ElevenLabsTTS + >>> + >>> client = Agora(area=Area.US, app_id="...", app_certificate="...") + >>> agent = Agent(name="assistant", instructions="You are a helpful voice assistant.") + >>> agent = agent.with_llm(OpenAI(api_key="...", base_url="https://api.openai.com/v1/chat/completions", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", base_url="wss://api.elevenlabs.io/v1")) + >>> session = agent.create_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) + >>> agent_id = session.start() + >>> session.say("Hello!") + >>> session.stop() + """ - class XaiGrok(BaseMLLM): - """xAI Grok MLLM vendor (`mllm.vendor`: ``xai``).""" + def start(self) -> str: + """Start the agent session. - def __init__(self, **kwargs: Any): - self.options = XaiGrokOptions(**kwargs) + Returns + ------- + str + The agent ID. - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.params or {}) - if self.options.voice is not None: - params["voice"] = self.options.voice - if self.options.language is not None: - params["language"] = self.options.language - if self.options.sample_rate is not None: - params["sample_rate"] = self.options.sample_rate + Raises + ------ + RuntimeError + If the session is not in a startable state. + ValueError + If avatar/TTS configuration is invalid. + """ + if self._status not in ("idle", "stopped", "error"): + raise RuntimeError(f"Cannot start session in {self._status} state") - config: Dict[str, Any] = { - "vendor": "xai", - "api_key": self.options.api_key, - "url": self.options.url, - "params": params, - } + self._validate_avatar_config() + self._status = "starting" - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection + try: + pipeline_id = self._pipeline_id if self._pipeline_id is not None else self._agent.pipeline_id + if self._token: + token_opts: typing.Dict[str, typing.Any] = {"token": self._token} + else: + token_opts = { + "app_id": self._app_id, + "app_certificate": self._app_certificate, + "expires_in": self._expires_in, + } - return config + skip_categories, allow_missing_categories = self._vendor_validation_categories(pipeline_id) + properties = self._build_start_properties( + token_opts, + skip_vendor_validation_categories=skip_categories, + allow_missing_vendor_categories=allow_missing_categories, + ) + resolved_preset, resolved_properties = resolve_session_presets( + self._preset, + properties, + ) + if self._debug: + print("[Agora Debug] Starting agent session...") + print("[Agora Debug] Request:", { + "appid": self._app_id, + "name": self._name, + "preset": resolved_preset, + "pipeline_id": pipeline_id, + "properties": resolved_properties, + }) - class XaiRealtimeOptions(XaiGrokOptions): - """Deprecated: use :class:`XaiGrokOptions` instead.""" + request_properties = self._request_properties_for_start( + resolved_properties, + resolved_preset=resolved_preset, + pipeline_id=pipeline_id, + ) - def __init__(self, **data: Any): - warnings.warn( - "XaiRealtimeOptions is deprecated; use XaiGrokOptions instead.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(**data) + response = self._client.agents.start( + self._app_id, + name=self._name, + properties=request_properties, + preset=resolved_preset, + pipeline_id=pipeline_id, + request_options=self._request_options(), + ) + self._agent_id = response.agent_id if hasattr(response, "agent_id") else None + self._status = "running" + self._emit("started", {"agent_id": self._agent_id}) + return self._agent_id or "" + except Exception as e: + self._status = "error" + self._emit("error", e) + raise - class XaiRealtime(XaiGrok): - """Deprecated: use :class:`XaiGrok` instead.""" + def stop(self) -> None: + """Stop the agent session. - def __init__(self, **kwargs: Any): - warnings.warn( - "XaiRealtime is deprecated; use XaiGrok instead.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(**kwargs) + If the agent has already stopped (e.g., crashed or timed out), the + server returns 404, which this method treats as a successful stop + rather than raising an error. + """ + if self._status != "running": + raise RuntimeError(f"Cannot stop session in {self._status} state") + if not self._agent_id: + raise RuntimeError("No agent ID available") + self._status = "stopping" - class VertexAIOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + try: + self._client.agents.stop( + self._app_id, self._agent_id, request_options=self._request_options() + ) + self._status = "stopped" + self._emit("stopped", {"agent_id": self._agent_id}) + except ApiError as e: + if e.status_code == 404: + self._status = "stopped" + self._emit("stopped", {"agent_id": self._agent_id}) + return + self._status = "error" + self._emit("error", e) + raise + except Exception as e: + self._status = "error" + self._emit("error", e) + raise - model: str = Field(..., description="Model name") - url: Optional[str] = Field(default=None, description="WebSocket URL") - project_id: str = Field(..., description="Google Cloud project ID") - location: str = Field(..., description="Google Cloud location/region") - adc_credentials_string: str = Field(..., description="Application Default Credentials JSON string") - instructions: Optional[str] = Field(default=None, description="System instructions") - voice: Optional[str] = Field(default=None, description="Voice name (e.g., Aoede, Charon)") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") + def say( + self, + text: str, + priority: typing.Optional[str] = None, + interruptable: typing.Optional[bool] = None, + *, + options: typing.Optional["SayOptions"] = None, + ) -> None: + """Send a message to be spoken by the agent. - class VertexAI(BaseMLLM): - def __init__(self, **kwargs: Any): - self.options = VertexAIOptions(**kwargs) + Parameters + ---------- + text : str + The text to speak. + priority : str, optional + Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). + interruptable : bool, optional + Whether the message can be interrupted by the user. + """ + if self._status != "running": + raise RuntimeError(f"Cannot say in {self._status} state") + if not self._agent_id: + raise RuntimeError("No agent ID available") - def to_config(self) -> Dict[str, Any]: - # additional_params spread first so that explicit fields always win, - # matching the TypeScript SDK. - params: Dict[str, Any] = dict(self.options.additional_params or {}) - params["model"] = self.options.model - params["project_id"] = self.options.project_id - params["location"] = self.options.location - params["adc_credentials_string"] = self.options.adc_credentials_string - if self.options.instructions is not None: - params["instructions"] = self.options.instructions - if self.options.voice is not None: - params["voice"] = self.options.voice - - config: Dict[str, Any] = { - "vendor": "vertexai", - "params": params, - } - - if self.options.url is not None: - config["url"] = self.options.url - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - return config + kwargs: typing.Dict[str, typing.Any] = {"text": text} + if options is not None: + kwargs.update(options) + if priority is not None: + kwargs["priority"] = priority + if interruptable is not None: + kwargs["interruptable"] = interruptable + self._client.agents.speak( + self._app_id, self._agent_id, request_options=self._request_options(), **kwargs + ) - class GeminiLiveOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + def interrupt(self) -> None: + """Interrupt the agent while it is speaking or thinking.""" + if self._status != "running": + raise RuntimeError(f"Cannot interrupt in {self._status} state") + if not self._agent_id: + raise RuntimeError("No agent ID available") - api_key: str = Field(..., description="Google API key") - model: str = Field(..., description="Gemini Live model name") - url: Optional[str] = Field(default=None, description="WebSocket URL") - instructions: Optional[str] = Field(default=None, description="System instructions") - voice: Optional[str] = Field(default=None, description="Voice name") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") + self._client.agents.interrupt( + self._app_id, self._agent_id, request_options=self._request_options() + ) - class GeminiLive(BaseMLLM): - def __init__(self, **kwargs: Any): - self.options = GeminiLiveOptions(**kwargs) + def think( + self, + text: str, + *, + on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, + on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, + on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, + interruptable: typing.Optional[bool] = None, + metadata: typing.Optional[typing.Dict[str, str]] = None, + options: typing.Optional["ThinkOptions"] = None, + ) -> AgentThinkResponse: + """Inject a custom text instruction into the current session pipeline. - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = {} - if self.options.additional_params is not None: - params.update(self.options.additional_params) - params["model"] = self.options.model - if self.options.instructions is not None: - params["instructions"] = self.options.instructions - if self.options.voice is not None: - params["voice"] = self.options.voice + In API v2.7, omitting ``on_listening_action`` uses the server default + ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to + preserve the pre-v2.7 behavior. + """ + if self._status != "running": + raise RuntimeError(f"Cannot think in {self._status} state") + if not self._agent_id: + raise RuntimeError("No agent ID available") - config: Dict[str, Any] = { - "vendor": "gemini", - "api_key": self.options.api_key, - "params": params, - } + kwargs: typing.Dict[str, typing.Any] = {"text": text} + if options is not None: + kwargs.update(options) + if on_listening_action is not None: + kwargs["on_listening_action"] = on_listening_action + if on_thinking_action is not None: + kwargs["on_thinking_action"] = on_thinking_action + if on_speaking_action is not None: + kwargs["on_speaking_action"] = on_speaking_action + if interruptable is not None: + kwargs["interruptable"] = interruptable + if metadata is not None: + kwargs["metadata"] = metadata - if self.options.url is not None: - config["url"] = self.options.url - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection + return self._client.agent_management.agent_think( + self._app_id, + self._agent_id, + request_options=self._request_options(), + **kwargs, + ) - return config - status: unresolved - - id: patch-26706d73 - content_hash: sha256:a9551e0b774b96e7734e9faa7d770611861cf443837428272ef75710447238da - original_commit: 26706d73ae15d860d57daf926837632c01be7f10 - original_message: "feat(agentkit): add GenericAvatar and session-aware avatar validation" - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - src/agora_agent/agentkit/avatar_types.py - - src/agora_agent/agentkit/vendors/avatar.py - patch_content: |+ - From 26706d73ae15d860d57daf926837632c01be7f10 Mon Sep 17 00:00:00 2001 - From: digitallysavvy - Date: Wed, 20 May 2026 20:59:22 -0400 - Subject: [PATCH] feat(agentkit): add GenericAvatar and session-aware avatar - validation - - Adds the GenericAvatar vendor wrapper and extends avatar validation - helpers for generic and RTC-backed avatars. Session-derived fields such - as agora_appid, agora_channel, and agora_token can now be validated - after AgentSession enrichment. - --- - src/agora_agent/agentkit/avatar_types.py | 35 +++++++++++++++++- - src/agora_agent/agentkit/vendors/avatar.py | 42 ++++++++++++++++++++++ - 2 files changed, 76 insertions(+), 1 deletion(-) - - diff --git a/src/agora_agent/agentkit/avatar_types.py b/src/agora_agent/agentkit/avatar_types.py - index 9e132a9..a04809c 100644 - --- a/src/agora_agent/agentkit/avatar_types.py - +++ b/src/agora_agent/agentkit/avatar_types.py - @@ -17,7 +17,21 @@ def is_anam_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "anam" - - - -def validate_avatar_config(config: typing.Dict[str, typing.Any]) -> None: - +def is_generic_avatar(config: typing.Dict[str, typing.Any]) -> bool: - + return config.get("vendor") == "generic" - + - + - +def is_rtc_avatar(config: typing.Dict[str, typing.Any]) -> bool: - + params = config.get("params", {}) - + return isinstance(params, dict) and bool(params.get("agora_uid")) and ( - + is_heygen_avatar(config) or is_live_avatar_avatar(config) or is_generic_avatar(config) - + ) - + - + - +def validate_avatar_config( - + config: typing.Dict[str, typing.Any], - + require_session_fields: bool = False, - +) -> None: - """Validates avatar configuration at runtime. - - Parameters - @@ -45,6 +59,8 @@ def validate_avatar_config(config: typing.Dict[str, typing.Any]) -> None: - f"Invalid quality for {label}: {params.get('quality')}. " - f"Must be one of: {', '.join(valid_qualities)}" - ) - + if require_session_fields and not params.get("agora_token"): - + raise ValueError(f"{label} avatar requires agora_token after session enrichment") - elif is_akool_avatar(config): - params = config.get("params", {}) - if not params.get("api_key"): - @@ -53,6 +69,23 @@ def validate_avatar_config(config: typing.Dict[str, typing.Any]) -> None: - params = config.get("params", {}) - if not params.get("api_key"): - raise ValueError("Anam avatar requires api_key") - + elif is_generic_avatar(config): - + params = config.get("params", {}) - + if not params.get("api_key"): - + raise ValueError("Generic avatar requires api_key") - + if not params.get("api_base_url"): - + raise ValueError("Generic avatar requires api_base_url") - + if not params.get("avatar_id"): - + raise ValueError("Generic avatar requires avatar_id") - + if not params.get("agora_uid"): - + raise ValueError("Generic avatar requires agora_uid") - + if require_session_fields: - + if not params.get("agora_token"): - + raise ValueError("Generic avatar requires agora_token after session enrichment") - + if not params.get("agora_appid"): - + raise ValueError("Generic avatar requires agora_appid after session enrichment") - + if not params.get("agora_channel"): - + raise ValueError("Generic avatar requires agora_channel after session enrichment") - - - def validate_tts_sample_rate( - diff --git a/src/agora_agent/agentkit/vendors/avatar.py b/src/agora_agent/agentkit/vendors/avatar.py - index b83a356..00cad8f 100644 - --- a/src/agora_agent/agentkit/vendors/avatar.py - +++ b/src/agora_agent/agentkit/vendors/avatar.py - @@ -132,6 +132,48 @@ class LiveAvatarAvatar(BaseAvatar): - return {"enable": enable, "vendor": "liveavatar", "params": params} - - - +class GenericAvatarOptions(BaseModel): - + model_config = ConfigDict(extra="forbid") - + - + api_key: str = Field(..., description="Generic avatar provider API key") - + api_base_url: str = Field(..., description="Avatar provider API base URL") - + avatar_id: str = Field(..., description="Avatar ID") - + agora_uid: str = Field(..., description="Agora UID for the avatar video stream") - + agora_appid: Optional[str] = Field(default=None, description="Agora App ID; filled by AgentSession when omitted") - + agora_token: Optional[str] = Field(default=None, description="RTC token; generated by AgentSession when omitted") - + agora_channel: Optional[str] = Field(default=None, description="Agora channel; filled by AgentSession when omitted") - + enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - + additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - + - +class GenericAvatar(BaseAvatar): - + def __init__(self, **kwargs: Any): - + self.options = GenericAvatarOptions(**kwargs) - + - + @property - + def required_sample_rate(self) -> int: - + return 0 - + - + def to_config(self) -> Dict[str, Any]: - + params: Dict[str, Any] = { - + "api_key": self.options.api_key, - + "api_base_url": self.options.api_base_url, - + "avatar_id": self.options.avatar_id, - + "agora_uid": self.options.agora_uid, - + } - + - + if self.options.agora_appid is not None: - + params["agora_appid"] = self.options.agora_appid - + if self.options.agora_token is not None: - + params["agora_token"] = self.options.agora_token - + if self.options.agora_channel is not None: - + params["agora_channel"] = self.options.agora_channel - + if self.options.additional_params is not None: - + params = {**self.options.additional_params, **params} - + - + enable = self.options.enable if self.options.enable is not None else True - + return {"enable": enable, "vendor": "generic", "params": params} - + - + - class AnamAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - -- - 2.52.0 + def update(self, properties: typing.Any) -> None: + """Update the agent configuration at runtime. - theirs_snapshot: - src/agora_agent/agentkit/avatar_types.py: | - import typing + Parameters + ---------- + properties : UpdateAgentsRequestProperties + Partial configuration to update. + """ + if self._status != "running": + raise RuntimeError(f"Cannot update in {self._status} state") + if not self._agent_id: + raise RuntimeError("No agent ID available") + self._client.agents.update( + self._app_id, + self._agent_id, + properties=properties, + request_options=self._request_options(), + ) - def is_heygen_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "heygen" + def get_history(self) -> typing.Any: + """Get the conversation history.""" + if not self._agent_id: + raise RuntimeError("No agent ID available") + return self._client.agents.get_history( + self._app_id, self._agent_id, request_options=self._request_options() + ) - def is_live_avatar_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "liveavatar" + def get_info(self) -> typing.Any: + """Get the current session info.""" + if not self._agent_id: + raise RuntimeError("No agent ID available") + return self._client.agents.get( + self._app_id, self._agent_id, request_options=self._request_options() + ) - def is_akool_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "akool" + def get_turns( + self, + *, + page_index: typing.Optional[int] = None, + page_size: typing.Optional[int] = None, + options: typing.Optional["GetTurnsOptions"] = None, + ) -> GetTurnsAgentsResponse: + """Get turn-by-turn analytics and timing details for this session.""" + if not self._agent_id: + raise RuntimeError("No agent ID available") + kwargs: typing.Dict[str, typing.Any] = {} + if options is not None: + kwargs.update(options) + if page_index is not None: + kwargs["page_index"] = page_index + if page_size is not None: + kwargs["page_size"] = page_size - def is_anam_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "anam" + return self._client.agents.get_turns( + self._app_id, + self._agent_id, + request_options=self._request_options(), + **kwargs, + ) + def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: + """Get all turn analytics pages for this session. - def is_generic_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "generic" + Raises ``RuntimeError`` if the server's pagination metadata is missing + the fields required to advance, or if requesting the next page returns + a page index that did not advance. + """ + response = self.get_turns(page_index=1, page_size=page_size) + all_turns = self._response_turns(response) + pagination = self._response_pagination(response) + current_page = self._page_value(pagination, "page_index") or 1 + while pagination is not None and self._page_value(pagination, "is_last_page") is False: + total_pages = self._page_value(pagination, "total_pages") + returned_index = self._page_value(pagination, "page_index") + if returned_index is None and total_pages is None: + raise RuntimeError( + "get_all_turns pagination cannot continue: response must include " + "page_index, total_pages, or is_last_page=true." + ) + if total_pages is not None and current_page >= total_pages: + break + next_page = current_page + 1 + response = self.get_turns(page_index=next_page, page_size=page_size) + all_turns.extend(self._response_turns(response)) + pagination = self._response_pagination(response) + returned_index = self._page_value(pagination, "page_index") if pagination else None + if returned_index is not None: + if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: + raise RuntimeError( + f"get_all_turns pagination did not advance: requested page {next_page}, " + f"received page {returned_index}." + ) + current_page = returned_index + else: + total_pages = self._page_value(pagination, "total_pages") if pagination else None + is_last_page = self._page_value(pagination, "is_last_page") if pagination else None + if total_pages is None and is_last_page is not True: + raise RuntimeError( + "get_all_turns pagination cannot continue: response must include " + "page_index, total_pages, or is_last_page=true." + ) + current_page = next_page + return self._with_all_turns(response, all_turns) - def is_rtc_avatar(config: typing.Dict[str, typing.Any]) -> bool: - params = config.get("params", {}) - return isinstance(params, dict) and bool(params.get("agora_uid")) and ( - is_heygen_avatar(config) or is_live_avatar_avatar(config) or is_generic_avatar(config) - ) + class AsyncAgentSession(_AgentSessionBase): + """Async version of :class:`AgentSession` for use with :class:`AsyncAgora`. + Use :meth:`Agent.create_async_session` to create a session — this is the + recommended entry point. - def validate_avatar_config( - config: typing.Dict[str, typing.Any], - require_session_fields: bool = False, - ) -> None: - """Validates avatar configuration at runtime. - - Parameters - ---------- - config : dict - The avatar configuration dictionary. - - Raises - ------ - ValueError - If the configuration is invalid. - """ - if is_heygen_avatar(config) or is_live_avatar_avatar(config): - label = "HeyGen" if is_heygen_avatar(config) else "LiveAvatar" - params = config.get("params", {}) - if not params.get("api_key"): - raise ValueError(f"{label} avatar requires api_key") - if not params.get("quality"): - raise ValueError(f"{label} avatar requires quality (low, medium, or high)") - if not params.get("agora_uid"): - raise ValueError(f"{label} avatar requires agora_uid") - valid_qualities = ("low", "medium", "high") - if params.get("quality") not in valid_qualities: - raise ValueError( - f"Invalid quality for {label}: {params.get('quality')}. " - f"Must be one of: {', '.join(valid_qualities)}" - ) - if require_session_fields and not params.get("agora_token"): - raise ValueError(f"{label} avatar requires agora_token after session enrichment") - elif is_akool_avatar(config): - params = config.get("params", {}) - if not params.get("api_key"): - raise ValueError("Akool avatar requires api_key") - elif is_anam_avatar(config): - params = config.get("params", {}) - if not params.get("api_key"): - raise ValueError("Anam avatar requires api_key") - elif is_generic_avatar(config): - params = config.get("params", {}) - if not params.get("api_key"): - raise ValueError("Generic avatar requires api_key") - if not params.get("api_base_url"): - raise ValueError("Generic avatar requires api_base_url") - if not params.get("avatar_id"): - raise ValueError("Generic avatar requires avatar_id") - if not params.get("agora_uid"): - raise ValueError("Generic avatar requires agora_uid") - if require_session_fields: - if not params.get("agora_token"): - raise ValueError("Generic avatar requires agora_token after session enrichment") - if not params.get("agora_appid"): - raise ValueError("Generic avatar requires agora_appid after session enrichment") - if not params.get("agora_channel"): - raise ValueError("Generic avatar requires agora_channel after session enrichment") - - - def validate_tts_sample_rate( - avatar_config: typing.Dict[str, typing.Any], - tts_sample_rate: int, - ) -> None: - """Validates that TTS sample rate is compatible with the avatar vendor. - - Different avatar vendors have specific sample rate requirements: - - HeyGen: ONLY supports 24,000 Hz - - Akool: ONLY supports 16,000 Hz - - Parameters - ---------- - avatar_config : dict - The avatar configuration dictionary. - tts_sample_rate : int - The sample rate from your TTS configuration (in Hz). - - Raises - ------ - ValueError - If TTS sample rate is incompatible with the avatar vendor. + Examples + -------- + >>> from agora_agent import AsyncAgora, Area, Agent, OpenAI, ElevenLabsTTS + >>> + >>> client = AsyncAgora(area=Area.US, app_id="...", app_certificate="...") + >>> agent = Agent(name="assistant", instructions="You are helpful.") + >>> agent = agent.with_llm(OpenAI(api_key="...", base_url="https://api.openai.com/v1/chat/completions", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", base_url="wss://api.elevenlabs.io/v1")) + >>> session = agent.create_async_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) + >>> agent_id = await session.start() + >>> await session.say("Hello!") + >>> await session.stop() """ - if is_heygen_avatar(avatar_config) or is_live_avatar_avatar(avatar_config): - if tts_sample_rate != 24000: - label = "HeyGen" if is_heygen_avatar(avatar_config) else "LiveAvatar" - raise ValueError( - f"{label} avatars ONLY support 24,000 Hz sample rate. " - f"Your TTS is configured with {tts_sample_rate} Hz. " - f"Please update your TTS configuration to use 24kHz sample rate. " - f"See: https://docs.agora.io/en/conversational-ai/models/avatar/overview" - ) - elif is_akool_avatar(avatar_config): - if tts_sample_rate != 16000: - raise ValueError( - f"Akool avatars ONLY support 16,000 Hz sample rate. " - f"Your TTS is configured with {tts_sample_rate} Hz. " - f"Please update your TTS configuration to use 16kHz sample rate. " - f"See: https://docs.agora.io/en/conversational-ai/models/avatar/akool" - ) - src/agora_agent/agentkit/vendors/avatar.py: | - import warnings - from typing import Any, Dict, Optional - from pydantic import BaseModel, ConfigDict, Field, field_validator - - from .base import BaseAvatar + async def start(self) -> str: + """Start the agent session. - HEYGEN_SAMPLE_RATE = 24000 - LIVEAVATAR_SAMPLE_RATE = 24000 - AKOOL_SAMPLE_RATE = 16000 + Returns + ------- + str + The agent ID. + Raises + ------ + RuntimeError + If the session is not in a startable state. + ValueError + If avatar/TTS configuration is invalid. + """ + if self._status not in ("idle", "stopped", "error"): + raise RuntimeError(f"Cannot start session in {self._status} state") - class HeyGenAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + self._validate_avatar_config() + self._status = "starting" - api_key: str = Field(..., description="HeyGen API key") - quality: str = Field(..., description="Avatar quality: low, medium, or high") - agora_uid: str = Field(..., description="Agora UID for the avatar stream") - agora_token: Optional[str] = Field(default=None, description="RTC token for avatar authentication") - avatar_id: Optional[str] = Field(default=None, description="HeyGen avatar ID") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - disable_idle_timeout: Optional[bool] = Field(default=None, description="Whether to disable idle timeout") - activity_idle_timeout: Optional[int] = Field(default=None, description="Idle timeout in seconds") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - @field_validator("quality") - @classmethod - def validate_quality(cls, v: str) -> str: - valid = ("low", "medium", "high") - if v not in valid: - raise ValueError(f"Invalid quality '{v}'. Must be one of: {', '.join(valid)}") - return v + try: + pipeline_id = self._pipeline_id if self._pipeline_id is not None else self._agent.pipeline_id + if self._token: + token_opts: typing.Dict[str, typing.Any] = {"token": self._token} + else: + token_opts = { + "app_id": self._app_id, + "app_certificate": self._app_certificate, + "expires_in": self._expires_in, + } - class HeyGenAvatar(BaseAvatar): - """Deprecated: HeyGen has been renamed to LiveAvatar. Use LiveAvatarAvatar instead.""" + skip_categories, allow_missing_categories = self._vendor_validation_categories(pipeline_id) + properties = self._build_start_properties( + token_opts, + skip_vendor_validation_categories=skip_categories, + allow_missing_vendor_categories=allow_missing_categories, + ) + resolved_preset, resolved_properties = resolve_session_presets( + self._preset, + properties, + ) - def __init__(self, **kwargs: Any): - warnings.warn( - "HeyGenAvatar is deprecated; use LiveAvatarAvatar instead.", - DeprecationWarning, - stacklevel=2, - ) - self.options = HeyGenAvatarOptions(**kwargs) + if self._debug: + print("[Agora Debug] Starting agent session...") + print("[Agora Debug] Request:", { + "appid": self._app_id, + "name": self._name, + "preset": resolved_preset, + "pipeline_id": pipeline_id, + "properties": resolved_properties, + }) - @property - def required_sample_rate(self) -> int: - return HEYGEN_SAMPLE_RATE + request_properties = self._request_properties_for_start( + resolved_properties, + resolved_preset=resolved_preset, + pipeline_id=pipeline_id, + ) - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - "quality": self.options.quality, - "agora_uid": self.options.agora_uid, - } + response = await self._client.agents.start( + self._app_id, + name=self._name, + properties=request_properties, + preset=resolved_preset, + pipeline_id=pipeline_id, + request_options=self._request_options(), + ) - if self.options.agora_token is not None: - params["agora_token"] = self.options.agora_token - if self.options.avatar_id is not None: - params["avatar_id"] = self.options.avatar_id - if self.options.disable_idle_timeout is not None: - params["disable_idle_timeout"] = self.options.disable_idle_timeout - if self.options.activity_idle_timeout is not None: - params["activity_idle_timeout"] = self.options.activity_idle_timeout - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} + self._agent_id = response.agent_id if hasattr(response, "agent_id") else None + self._status = "running" + self._emit("started", {"agent_id": self._agent_id}) + return self._agent_id or "" + except Exception as e: + self._status = "error" + self._emit("error", e) + raise - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "heygen", "params": params} + async def stop(self) -> None: + """Stop the agent session. + If the agent has already stopped (e.g., crashed or timed out), the + server returns 404, which this method treats as a successful stop + rather than raising an error. + """ + if self._status != "running": + raise RuntimeError(f"Cannot stop session in {self._status} state") + if not self._agent_id: + raise RuntimeError("No agent ID available") - class AkoolAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + self._status = "stopping" - api_key: str = Field(..., description="Akool API key") - avatar_id: Optional[str] = Field(default=None, description="Avatar ID") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") + try: + await self._client.agents.stop( + self._app_id, self._agent_id, request_options=self._request_options() + ) + self._status = "stopped" + self._emit("stopped", {"agent_id": self._agent_id}) + except ApiError as e: + if e.status_code == 404: + self._status = "stopped" + self._emit("stopped", {"agent_id": self._agent_id}) + return + self._status = "error" + self._emit("error", e) + raise + except Exception as e: + self._status = "error" + self._emit("error", e) + raise - class AkoolAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = AkoolAvatarOptions(**kwargs) + async def say( + self, + text: str, + priority: typing.Optional[str] = None, + interruptable: typing.Optional[bool] = None, + *, + options: typing.Optional["SayOptions"] = None, + ) -> None: + """Send a message to be spoken by the agent. - @property - def required_sample_rate(self) -> int: - return AKOOL_SAMPLE_RATE + Parameters + ---------- + text : str + The text to speak. + priority : str, optional + Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). + interruptable : bool, optional + Whether the message can be interrupted by the user. + """ + if self._status != "running": + raise RuntimeError(f"Cannot say in {self._status} state") + if not self._agent_id: + raise RuntimeError("No agent ID available") - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - } + kwargs: typing.Dict[str, typing.Any] = {"text": text} + if options is not None: + kwargs.update(options) + if priority is not None: + kwargs["priority"] = priority + if interruptable is not None: + kwargs["interruptable"] = interruptable - if self.options.avatar_id is not None: - params["avatar_id"] = self.options.avatar_id - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} + await self._client.agents.speak( + self._app_id, self._agent_id, request_options=self._request_options(), **kwargs + ) - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "akool", "params": params} + async def interrupt(self) -> None: + """Interrupt the agent while it is speaking or thinking.""" + if self._status != "running": + raise RuntimeError(f"Cannot interrupt in {self._status} state") + if not self._agent_id: + raise RuntimeError("No agent ID available") + await self._client.agents.interrupt( + self._app_id, self._agent_id, request_options=self._request_options() + ) - class LiveAvatarAvatarOptions(HeyGenAvatarOptions): - pass + async def think( + self, + text: str, + *, + on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, + on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, + on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, + interruptable: typing.Optional[bool] = None, + metadata: typing.Optional[typing.Dict[str, str]] = None, + options: typing.Optional["ThinkOptions"] = None, + ) -> AgentThinkResponse: + """Inject a custom text instruction into the current session pipeline. + In API v2.7, omitting ``on_listening_action`` uses the server default + ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to + preserve the pre-v2.7 behavior. + """ + if self._status != "running": + raise RuntimeError(f"Cannot think in {self._status} state") + if not self._agent_id: + raise RuntimeError("No agent ID available") - class LiveAvatarAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = LiveAvatarAvatarOptions(**kwargs) + kwargs: typing.Dict[str, typing.Any] = {"text": text} + if options is not None: + kwargs.update(options) + if on_listening_action is not None: + kwargs["on_listening_action"] = on_listening_action + if on_thinking_action is not None: + kwargs["on_thinking_action"] = on_thinking_action + if on_speaking_action is not None: + kwargs["on_speaking_action"] = on_speaking_action + if interruptable is not None: + kwargs["interruptable"] = interruptable + if metadata is not None: + kwargs["metadata"] = metadata - @property - def required_sample_rate(self) -> int: - return LIVEAVATAR_SAMPLE_RATE + return await self._client.agent_management.agent_think( + self._app_id, + self._agent_id, + request_options=self._request_options(), + **kwargs, + ) - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - "quality": self.options.quality, - "agora_uid": self.options.agora_uid, - } + async def update(self, properties: typing.Any) -> None: + """Update the agent configuration at runtime. - if self.options.agora_token is not None: - params["agora_token"] = self.options.agora_token - if self.options.avatar_id is not None: - params["avatar_id"] = self.options.avatar_id - if self.options.disable_idle_timeout is not None: - params["disable_idle_timeout"] = self.options.disable_idle_timeout - if self.options.activity_idle_timeout is not None: - params["activity_idle_timeout"] = self.options.activity_idle_timeout - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} + Parameters + ---------- + properties : UpdateAgentsRequestProperties + Partial configuration to update. + """ + if self._status != "running": + raise RuntimeError(f"Cannot update in {self._status} state") + if not self._agent_id: + raise RuntimeError("No agent ID available") - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "liveavatar", "params": params} + await self._client.agents.update( + self._app_id, + self._agent_id, + properties=properties, + request_options=self._request_options(), + ) + async def get_history(self) -> typing.Any: + """Get the conversation history.""" + if not self._agent_id: + raise RuntimeError("No agent ID available") - class GenericAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + return await self._client.agents.get_history( + self._app_id, self._agent_id, request_options=self._request_options() + ) - api_key: str = Field(..., description="Generic avatar provider API key") - api_base_url: str = Field(..., description="Avatar provider API base URL") - avatar_id: str = Field(..., description="Avatar ID") - agora_uid: str = Field(..., description="Agora UID for the avatar video stream") - agora_appid: Optional[str] = Field(default=None, description="Agora App ID; filled by AgentSession when omitted") - agora_token: Optional[str] = Field(default=None, description="RTC token; generated by AgentSession when omitted") - agora_channel: Optional[str] = Field(default=None, description="Agora channel; filled by AgentSession when omitted") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - class GenericAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = GenericAvatarOptions(**kwargs) + async def get_info(self) -> typing.Any: + """Get the current session info.""" + if not self._agent_id: + raise RuntimeError("No agent ID available") - @property - def required_sample_rate(self) -> int: - return 0 + return await self._client.agents.get( + self._app_id, self._agent_id, request_options=self._request_options() + ) - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - "api_base_url": self.options.api_base_url, - "avatar_id": self.options.avatar_id, - "agora_uid": self.options.agora_uid, - } + async def get_turns( + self, + *, + page_index: typing.Optional[int] = None, + page_size: typing.Optional[int] = None, + options: typing.Optional["GetTurnsOptions"] = None, + ) -> GetTurnsAgentsResponse: + """Get turn-by-turn analytics and timing details for this session.""" + if not self._agent_id: + raise RuntimeError("No agent ID available") - if self.options.agora_appid is not None: - params["agora_appid"] = self.options.agora_appid - if self.options.agora_token is not None: - params["agora_token"] = self.options.agora_token - if self.options.agora_channel is not None: - params["agora_channel"] = self.options.agora_channel - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} + kwargs: typing.Dict[str, typing.Any] = {} + if options is not None: + kwargs.update(options) + if page_index is not None: + kwargs["page_index"] = page_index + if page_size is not None: + kwargs["page_size"] = page_size - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "generic", "params": params} + return await self._client.agents.get_turns( + self._app_id, + self._agent_id, + request_options=self._request_options(), + **kwargs, + ) + async def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: + """Get all turn analytics pages for this session. - class AnamAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") + Raises ``RuntimeError`` if the server's pagination metadata is missing + the fields required to advance, or if requesting the next page returns + a page index that did not advance. + """ + response = await self.get_turns(page_index=1, page_size=page_size) + all_turns = self._response_turns(response) + pagination = self._response_pagination(response) + current_page = self._page_value(pagination, "page_index") or 1 + while pagination is not None and self._page_value(pagination, "is_last_page") is False: + total_pages = self._page_value(pagination, "total_pages") + returned_index = self._page_value(pagination, "page_index") + if returned_index is None and total_pages is None: + raise RuntimeError( + "get_all_turns pagination cannot continue: response must include " + "page_index, total_pages, or is_last_page=true." + ) + if total_pages is not None and current_page >= total_pages: + break + next_page = current_page + 1 + response = await self.get_turns(page_index=next_page, page_size=page_size) + all_turns.extend(self._response_turns(response)) + pagination = self._response_pagination(response) + returned_index = self._page_value(pagination, "page_index") if pagination else None + if returned_index is not None: + if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: + raise RuntimeError( + f"get_all_turns pagination did not advance: requested page {next_page}, " + f"received page {returned_index}." + ) + current_page = returned_index + else: + total_pages = self._page_value(pagination, "total_pages") if pagination else None + is_last_page = self._page_value(pagination, "is_last_page") if pagination else None + if total_pages is None and is_last_page is not True: + raise RuntimeError( + "get_all_turns pagination cannot continue: response must include " + "page_index, total_pages, or is_last_page=true." + ) + current_page = next_page + return self._with_all_turns(response, all_turns) + - id: patch-fae1249a + content_hash: sha256:01bf21f3cc4c784dfcff80a48c9c7bb3123af4327a567b7c990b528e9780e9a2 + original_commit: fae1249a20c53761a2eb5515a1bf92ca666760d1 + original_message: Re-export agora-agents API from legacy PyPI compatibility package The compat distribution delegates to agora_agent via __getattr__ and documents both import paths in its README. + original_author: digitallysavvy + base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c + files: + - compat/agora-agent-server-sdk/README.md + - compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py + patch_content: |+ + From fae1249a20c53761a2eb5515a1bf92ca666760d1 Mon Sep 17 00:00:00 2001 + From: digitallysavvy + Date: Wed, 27 May 2026 16:58:18 -0400 + Subject: [PATCH] Re-export agora-agents API from legacy PyPI compatibility + package The compat distribution delegates to agora_agent via __getattr__ and + documents both import paths in its README. - api_key: str = Field(..., description="Anam API key") - persona_id: Optional[str] = Field(default=None, description="Persona ID") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") + --- + compat/agora-agent-server-sdk/README.md | 7 +++++-- + .../src/agora_agent_server_sdk_compat/__init__.py | 14 +++++++++++++- + 2 files changed, 18 insertions(+), 3 deletions(-) - class AnamAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = AnamAvatarOptions(**kwargs) + diff --git a/compat/agora-agent-server-sdk/README.md b/compat/agora-agent-server-sdk/README.md + index cff3cfe..e43d1d8 100644 + --- a/compat/agora-agent-server-sdk/README.md + +++ b/compat/agora-agent-server-sdk/README.md + @@ -8,6 +8,9 @@ New projects should install: + pip install agora-agents + ``` + + -This compatibility package is kept only to preserve the legacy distribution name during the migration window. It depends on `agora-agents`, which continues to provide the `agora_agent` Python import path. + +This compatibility package re-exports the public API from `agora-agents` to support existing installs during the migration window. The primary import path remains `agora_agent`; you can also import from `agora_agent_server_sdk_compat`: + + -It intentionally contains only a minimal compatibility module so the distribution can be built and published cleanly with Poetry. + +```python + +from agora_agent import Agora, Area + +from agora_agent_server_sdk_compat import Agora, Area + +``` + diff --git a/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py b/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py + index 55522c6..6283244 100644 + --- a/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py + +++ b/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py + @@ -1 +1,13 @@ + -"""Compatibility package for the renamed agora-agents distribution.""" + +"""Compatibility re-exports for the renamed agora-agents package.""" + + + +import agora_agent as _agora_agent + + + +__all__ = getattr(_agora_agent, "__all__", []) + + + + + +def __getattr__(name: str): + + return getattr(_agora_agent, name) + + + + + +def __dir__(): + + return dir(_agora_agent) + -- + 2.52.0 - @property - def required_sample_rate(self) -> int: - return 0 + theirs_snapshot: + compat/agora-agent-server-sdk/README.md: | + # agora-agent-server-sdk - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - } + This package has been renamed to `agora-agents`. - if self.options.persona_id is not None: - params["persona_id"] = self.options.persona_id - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} - - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "anam", "params": params} - status: unresolved - - id: patch-9f491c63 - content_hash: sha256:d9811b2c5927be74f2125444dcf36642b88ad7be422019688cb0228093dce1d0 - original_commit: 9f491c63a964c13c67ba4af3708379e1b75a92d8 - original_message: "feat(agentkit): update Agent builder and session lifecycle for v2.7" + New projects should install: + + ```sh + pip install agora-agents + ``` + + This compatibility package re-exports the public API from `agora-agents` to support existing installs during the migration window. The primary import path remains `agora_agent`; you can also import from `agora_agent_server_sdk_compat`: + + ```python + from agora_agent import Agora, Area + from agora_agent_server_sdk_compat import Agora, Area + ``` + compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py: | + """Compatibility re-exports for the renamed agora-agents package.""" + + import agora_agent as _agora_agent + + __all__ = getattr(_agora_agent, "__all__", []) + + + def __getattr__(name: str): + return getattr(_agora_agent, name) + + + def __dir__(): + return dir(_agora_agent) + user_owned: true + - id: patch-299e4bd9 + content_hash: sha256:ee71350debd51653f1cb1472477a577436d74cbb847b3536a9cdbff0211abf2d + original_commit: 299e4bd9cb59bd6144084332a7c3fa7bf260769f + original_message: "fix(agentkit): resolve provider config type checks" original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf + base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c files: - src/agora_agent/agentkit/agent.py - - src/agora_agent/agentkit/agent_session.py - patch_content: |+ - From 9f491c63a964c13c67ba4af3708379e1b75a92d8 Mon Sep 17 00:00:00 2001 - From: digitallysavvy - Date: Wed, 20 May 2026 21:00:58 -0400 - Subject: [PATCH] feat(agentkit): update Agent builder and session lifecycle - for v2.7 - - Aligns Agent and AgentSession with the generated v2.7 request shape. - MLLM sessions no longer require TTS, LLM, or STT, and enabled avatars - are rejected when MLLM is configured. AgentSession now enriches generic - and RTC avatars with session context, auto-generates avatar tokens, - validates TTS sample rates from vendor-specific fields, and adds - paginated get_turns/get_all_turns helpers with fail-fast pagination - guards. - --- - src/agora_agent/agentkit/agent.py | 164 +++++++++++++-- - src/agora_agent/agentkit/agent_session.py | 231 ++++++++++++++++++++-- - 2 files changed, 360 insertions(+), 35 deletions(-) - + - src/agora_agent/agentkit/vendors/llm.py + - src/agora_agent/agentkit/vendors/mllm.py + - src/agora_agent/agentkit/vendors/stt.py + patch_content: | diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py - index 70a1bdd..86a958e 100644 + index 1daba82..95cfe34 100644 --- a/src/agora_agent/agentkit/agent.py +++ b/src/agora_agent/agentkit/agent.py - @@ -8,6 +8,24 @@ if typing.TYPE_CHECKING: - from .agent_session import AgentSession, AsyncAgentSession - - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - +from ..agents.types.start_agents_request_properties_asr import StartAgentsRequestPropertiesAsr - +from ..agents.types.start_agents_request_properties_asr_vendor import StartAgentsRequestPropertiesAsrVendor - +from ..agents.types.start_agents_request_properties_avatar import StartAgentsRequestPropertiesAvatar - +from ..agents.types.start_agents_request_properties_avatar_vendor import StartAgentsRequestPropertiesAvatarVendor - +from ..agents.types.start_agents_request_properties_llm import StartAgentsRequestPropertiesLlm - +from ..agents.types.start_agents_request_properties_llm_style import StartAgentsRequestPropertiesLlmStyle - +from ..agents.types.start_agents_request_properties_mllm import StartAgentsRequestPropertiesMllm - +from ..agents.types.start_agents_request_properties_mllm_vendor import StartAgentsRequestPropertiesMllmVendor - +from ..agents.types.update_agents_request_properties import UpdateAgentsRequestProperties - +from ..agents.types.get_agents_response import GetAgentsResponse - +from ..agents.types.list_agents_response import ListAgentsResponse - +from ..agents.types.list_agents_response_data_list_item import ListAgentsResponseDataListItem - +from ..agents.types.get_history_agents_response import GetHistoryAgentsResponse - +from ..agents.types.get_history_agents_response_contents_item import GetHistoryAgentsResponseContentsItem - +from ..agents.types.get_history_agents_response_contents_item_role import GetHistoryAgentsResponseContentsItemRole - +from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - +from ..agents.types.get_turns_agents_response_turns_item import GetTurnsAgentsResponseTurnsItem - +from ..agents.types.speak_agents_request_priority import SpeakAgentsRequestPriority - from ..agents.types.start_agents_request_properties_turn_detection import StartAgentsRequestPropertiesTurnDetection - from ..agents.types.start_agents_request_properties_turn_detection_config import StartAgentsRequestPropertiesTurnDetectionConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeech - @@ -46,10 +64,21 @@ from ..agents.types.start_agents_request_properties_filler_words_trigger import - from ..agents.types.start_agents_request_properties_filler_words_trigger_fixed_time_config import StartAgentsRequestPropertiesFillerWordsTriggerFixedTimeConfig - from ..agents.types.start_agents_request_properties_filler_words_content import StartAgentsRequestPropertiesFillerWordsContent + @@ -57,6 +57,8 @@ from ..agents.types.start_agents_request_properties_filler_words_content import from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig + from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule + from ..types.tts import Tts +from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule +from ..types.tts import Tts - from .token import generate_convo_ai_token, _validate_expires_in - from .vendors.base import BaseAvatar, BaseLLM, BaseMLLM, BaseSTT, BaseTTS - - # Top-level aliases - +LlmConfig = StartAgentsRequestPropertiesLlm - +LlmStyle = StartAgentsRequestPropertiesLlmStyle - +SttConfig = StartAgentsRequestPropertiesAsr - +SttVendor = StartAgentsRequestPropertiesAsrVendor - +TtsConfig = Tts - +MllmConfig = StartAgentsRequestPropertiesMllm - +MllmVendor = StartAgentsRequestPropertiesMllmVendor - +AvatarConfig = StartAgentsRequestPropertiesAvatar - +AvatarVendor = StartAgentsRequestPropertiesAvatarVendor - TurnDetectionConfig = StartAgentsRequestPropertiesTurnDetection - SalConfig = StartAgentsRequestPropertiesSal - SalMode = StartAgentsRequestPropertiesSalSalMode - @@ -93,6 +122,18 @@ InterruptionConfig = StartAgentsRequestPropertiesInterruption - InterruptionMode = StartAgentsRequestPropertiesInterruptionMode - MllmTurnDetectionConfig = StartAgentsRequestPropertiesMllmTurnDetection - MllmTurnDetectionMode = StartAgentsRequestPropertiesMllmTurnDetectionMode - +AgentConfig = StartAgentsRequestProperties - +AgentConfigUpdate = UpdateAgentsRequestProperties - +SessionInfo = GetAgentsResponse - +SessionListResponse = ListAgentsResponse - +SessionSummary = ListAgentsResponseDataListItem - +ConversationHistory = GetHistoryAgentsResponse - +ConversationTurn = GetHistoryAgentsResponseContentsItem - +ConversationRole = GetHistoryAgentsResponseContentsItemRole - +ConversationTurns = GetTurnsAgentsResponse - +ConversationSessionTurn = GetTurnsAgentsResponseTurnsItem - +SpeakPriority = SpeakAgentsRequestPriority - +Labels = typing.Dict[str, str] - - - class SessionParamsInput(typing_extensions.TypedDict, total=False): - @@ -116,6 +157,7 @@ FillerWordsTrigger = StartAgentsRequestPropertiesFillerWordsTrigger - FillerWordsTriggerFixedTimeConfig = StartAgentsRequestPropertiesFillerWordsTriggerFixedTimeConfig - FillerWordsContent = StartAgentsRequestPropertiesFillerWordsContent - FillerWordsContentStaticConfig = StartAgentsRequestPropertiesFillerWordsContentStaticConfig - +FillerWordsContentSelectionRule = StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - - - class Agent: - @@ -183,9 +225,20 @@ class Agent: - return new_agent - - def with_tts(self, vendor: BaseTTS) -> "Agent": - + sample_rate = vendor.sample_rate - + if ( - + self._avatar_required_sample_rate not in (None, 0) - + and sample_rate is not None - + and sample_rate != self._avatar_required_sample_rate - + ): - + raise ValueError( - + f"Avatar requires TTS sample rate of {self._avatar_required_sample_rate} Hz, " - + f"but TTS is configured with {sample_rate} Hz. " - + f"Please update your TTS sample_rate to {self._avatar_required_sample_rate}." - + ) - new_agent = self._clone() - new_agent._tts = vendor.to_config() - - new_agent._tts_sample_rate = vendor.sample_rate - + new_agent._tts_sample_rate = sample_rate - return new_agent - - def with_stt(self, vendor: BaseSTT) -> "Agent": - @@ -194,6 +247,9 @@ class Agent: - return new_agent - - def with_mllm(self, vendor: BaseMLLM) -> "Agent": - + # Note: avatars are not supported with MLLM. The combination is rejected - + # at ``to_properties`` / ``AgentSession.start`` so callers can still - + # configure both for tests, debugging, or disabled-avatar use cases. - new_agent = self._clone() - new_agent._mllm = vendor.to_config() - if isinstance(new_agent._mllm, dict): - @@ -202,7 +258,10 @@ class Agent: - advanced_features = {key: value for key, value in new_agent._advanced_features.items() if key != "enable_mllm"} - new_agent._advanced_features = typing.cast(AdvancedFeatures, advanced_features) if advanced_features else None - elif isinstance(new_agent._advanced_features, StartAgentsRequestPropertiesAdvancedFeatures): - - advanced_features_model = new_agent._advanced_features.model_copy(update={"enable_mllm": None}) - + advanced_features_model = self._copy_model_update( - + new_agent._advanced_features, - + {"enable_mllm": None}, - + ) - if ( - advanced_features_model.enable_rtm is None - and advanced_features_model.enable_sal is None - @@ -214,6 +273,10 @@ class Agent: - return new_agent - - def with_avatar(self, vendor: BaseAvatar) -> "Agent": - + # Note: avatars are not supported with MLLM. The combination is rejected - + # at ``to_properties`` / ``AgentSession.start`` (only when the avatar is - + # enabled) so callers may still combine the two for testing or for the - + # disabled-avatar pattern. - required_sample_rate = vendor.required_sample_rate - if ( - required_sample_rate not in (None, 0) - @@ -282,7 +345,10 @@ class Agent: - {**new_agent._advanced_features, "enable_tools": enabled}, + from ..types.asr import Asr + from ..types.llm import Llm + from ..types.llm_style import LlmStyle as GeneratedLlmStyle + @@ -544,6 +546,23 @@ class Agent: ) - else: - - new_agent._advanced_features = new_agent._advanced_features.model_copy(update={"enable_tools": enabled}) - + new_agent._advanced_features = self._copy_model_update( - + new_agent._advanced_features, - + {"enable_tools": enabled}, - + ) - return new_agent - - def with_parameters(self, parameters: typing.Union[SessionParams, SessionParamsInput]) -> "Agent": - @@ -294,6 +360,23 @@ class Agent: - new_agent._parameters = parameters return new_agent + def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": @@ -2195,477 +1322,22 @@ patches: + return new_agent + def with_failure_message(self, message: str) -> "Agent": - """Returns a new Agent with the specified failure message. - - @@ -342,6 +425,33 @@ class Agent: - new_agent._filler_words = filler_words - return new_agent - - + @staticmethod - + def _field_value(value: typing.Any, field: str) -> typing.Any: - + if value is None: - + return None - + if isinstance(value, dict): - + return value.get(field) - + return getattr(value, field, None) - + - + @staticmethod - + def _copy_model_update(value: typing.Any, update: typing.Dict[str, typing.Any]) -> typing.Any: - + if hasattr(value, "model_copy"): - + return value.model_copy(update=update) - + if hasattr(value, "copy"): - + return value.copy(update=update) - + raise TypeError(f"Object of type {type(value).__name__} does not support model copying") - + - + def _resolved_parameters(self) -> typing.Optional[typing.Union[SessionParams, SessionParamsInput]]: - + enable_rtm = self._field_value(self._advanced_features, "enable_rtm") is True - + data_channel = self._field_value(self._parameters, "data_channel") - + if not enable_rtm or data_channel is not None: - + return self._parameters - + if self._parameters is None: - + return StartAgentsRequestPropertiesParameters(data_channel="rtm") - + if isinstance(self._parameters, dict): - + return typing.cast(SessionParamsInput, {**self._parameters, "data_channel": "rtm"}) - + return self._copy_model_update(self._parameters, {"data_channel": "rtm"}) - + - @property - def name(self) -> typing.Optional[str]: - return self._name - @@ -354,6 +464,10 @@ class Agent: - def tts(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._tts - - + @property - + def tts_sample_rate(self) -> typing.Optional[int]: - + return self._tts_sample_rate - + - @property - def stt(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._stt - @@ -536,6 +650,20 @@ class Agent: - expires_in: typing.Optional[int] = None, - skip_vendor_validation: bool = False, - ) -> StartAgentsRequestProperties: - + # Validate the MLLM + enabled-avatar combination BEFORE generating the - + # RTC token so callers get a clear, actionable error first (matches the - + # TypeScript and Go SDKs' fail-fast contract). - + mllm_flag = isinstance(self._mllm, dict) and self._mllm.get("enable") is True - + is_mllm_mode = bool(mllm_flag or self._mllm is not None) - + avatar_enabled = ( - + isinstance(self._avatar, dict) and self._avatar.get("enable") is not False - + ) - + if is_mllm_mode and avatar_enabled: - + raise ValueError( - + "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - + "Remove the avatar configuration when using MLLM, or switch to a cascading session." - + ) - + - if token is None: - if app_id is None or app_certificate is None: - raise ValueError("Either token or app_id+app_certificate must be provided") - @@ -553,9 +681,6 @@ class Agent: - **token_kwargs, - ) - - - mllm_flag = isinstance(self._mllm, dict) and self._mllm.get("enable") is True - - is_mllm_mode = bool(mllm_flag or self._mllm is not None) - - - base_kwargs: typing.Dict[str, typing.Any] = { - "channel": channel, - "token": token, - @@ -579,11 +704,12 @@ class Agent: - base_kwargs["avatar"] = self._avatar - if self._advanced_features is not None: - base_kwargs["advanced_features"] = self._advanced_features - - if self._parameters is not None: - - if isinstance(self._parameters, dict): - - base_kwargs["parameters"] = StartAgentsRequestPropertiesParameters(**self._parameters) - + parameters = self._resolved_parameters() - + if parameters is not None: - + if isinstance(parameters, dict): - + base_kwargs["parameters"] = StartAgentsRequestPropertiesParameters(**parameters) - else: - - base_kwargs["parameters"] = self._parameters - + base_kwargs["parameters"] = parameters - if self._geofence is not None: - base_kwargs["geofence"] = self._geofence - if self._labels is not None: - @@ -596,12 +722,10 @@ class Agent: - if is_mllm_mode: - if self._mllm is not None: - mllm_config = dict(self._mllm) - - if self._greeting: - + if self._greeting is not None: - mllm_config.setdefault("greeting_message", self._greeting) - - if self._failure_message: - + if self._failure_message is not None: - mllm_config.setdefault("failure_message", self._failure_message) - - if self._max_history is not None: - - mllm_config.setdefault("max_history", self._max_history) - base_kwargs["mllm"] = mllm_config - return StartAgentsRequestProperties(**base_kwargs) - - @@ -617,14 +741,14 @@ class Agent: - llm_config = dict(self._llm) - # Agent-level fields take priority over the vendor's defaults. - # This matches the TS SDK where agent-level values override vendor config. - - if self._instructions: - + if self._instructions is not None: - llm_config["system_messages"] = [{"role": "system", "content": self._instructions}] - - if self._greeting: - - llm_config.setdefault("greeting_message", self._greeting) - - if self._failure_message: - - llm_config.setdefault("failure_message", self._failure_message) - + if self._greeting is not None: - + llm_config["greeting_message"] = self._greeting - + if self._failure_message is not None: - + llm_config["failure_message"] = self._failure_message - if self._max_history is not None: - - llm_config.setdefault("max_history", self._max_history) - + llm_config["max_history"] = self._max_history - - base_kwargs["llm"] = llm_config - base_kwargs["tts"] = self._tts - diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py - index 2408659..e41a399 100644 - --- a/src/agora_agent/agentkit/agent_session.py - +++ b/src/agora_agent/agentkit/agent_session.py - @@ -14,13 +14,16 @@ from ..agent_management.types.agent_think_agent_management_request_on_thinking_a - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse as AgentThinkResponse, - ) - +from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from .agent import Agent - from .avatar_types import ( - is_akool_avatar, - is_anam_avatar, - + is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - + is_rtc_avatar, - validate_avatar_config, - validate_tts_sample_rate, - ) - @@ -182,17 +185,29 @@ class _AgentSessionBase: - tts = self._agent.tts - if not avatar or avatar.get("enable", True) is False: - return - + if self._is_mllm_mode(): - + raise ValueError( - + "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - + "Remove the avatar configuration when using MLLM, or switch to a cascading session." - + ) + """Deprecated. Configure the failure message on the LLM or MLLM vendor instead.""" + new_agent = self._clone() + diff --git a/src/agora_agent/agentkit/vendors/llm.py b/src/agora_agent/agentkit/vendors/llm.py + index 5a9f39e..1f1b354 100644 + --- a/src/agora_agent/agentkit/vendors/llm.py + +++ b/src/agora_agent/agentkit/vendors/llm.py + @@ -2,6 +2,9 @@ from typing import Any, Dict, List, Optional, Union - if ( - is_heygen_avatar(avatar) - or is_live_avatar_avatar(avatar) - or is_akool_avatar(avatar) - or is_anam_avatar(avatar) - + or is_generic_avatar(avatar) - ): - validate_avatar_config(avatar) + from pydantic import BaseModel, ConfigDict, Field, model_validator - tts_params = tts.get("params") if isinstance(tts, dict) else None - - sample_rate = tts_params.get("sample_rate") if isinstance(tts_params, dict) else None - + sample_rate = self._agent.tts_sample_rate - + if sample_rate is None and isinstance(tts_params, dict): - + sample_rate = ( - + tts_params.get("sample_rate") - + or tts_params.get("sample_rate_hertz") - + or tts_params.get("samplingRate") - + ) - if isinstance(sample_rate, int): - validate_tts_sample_rate(avatar, sample_rate) - elif is_heygen_avatar(avatar): - @@ -211,6 +226,50 @@ class _AgentSessionBase: - "Akool requires 16,000 Hz. Please ensure your TTS provider is configured for 16kHz." - ) + +from ...agents.types.start_agents_request_properties_llm_greeting_configs import ( + + StartAgentsRequestPropertiesLlmGreetingConfigs, + +) + from .base import BaseLLM - + def _enrich_avatar_for_session(self, properties: typing.Dict[str, typing.Any]) -> None: - + avatar = properties.get("avatar") - + if not isinstance(avatar, dict) or avatar.get("enable", True) is False: - + return - + - + params = avatar.get("params") - + if not isinstance(params, dict): - + params = {} - + avatar["params"] = params - + - + if is_generic_avatar(avatar): - + if not params.get("agora_appid"): - + params["agora_appid"] = self._app_id - + if not params.get("agora_channel"): - + params["agora_channel"] = self._channel - + - + if not is_rtc_avatar(avatar): - + validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - + return - + - + if not params.get("agora_token"): - + if not self._app_certificate: - + raise ValueError( - + "Cannot auto-generate avatar RTC token: app_certificate is required when agora_token is omitted. " - + "Pass app_certificate on the Agora client or supply agora_token explicitly on the avatar vendor." - + ) - + token_kwargs: typing.Dict[str, typing.Any] = {} - + if self._expires_in is not None: - + token_kwargs["token_expire"] = self._expires_in - + params["agora_token"] = generate_convo_ai_token( - + app_id=self._app_id, - + app_certificate=self._app_certificate, - + channel_name=self._channel, - + account=str(params["agora_uid"]), - + **token_kwargs, - + ) - + - + if str(params.get("agora_uid")) == self._agent_uid: - + self._warn( - + "Warning: avatar agora_uid matches agent_rtc_uid. Use a unique UID for the avatar video publisher." - + ) - + - + validate_avatar_config(avatar, require_session_fields=True) - + - @staticmethod - def _dump_model(value: typing.Any) -> typing.Any: - if hasattr(value, "model_dump"): - @@ -238,12 +297,17 @@ class _AgentSessionBase: - **token_opts, - ) - properties = self._dump_model(base_properties) - + self._enrich_avatar_for_session(properties) - - if self._is_mllm_mode(): - if self._agent.mllm is not None: - - mllm = dict(self._agent.mllm) - - if self._agent.greeting: - + mllm = self._dump_model(self._agent.mllm) - + if not isinstance(mllm, dict): - + mllm = {} - + if self._agent.greeting is not None: - mllm.setdefault("greeting_message", self._agent.greeting) - + if self._agent.failure_message is not None: - + mllm.setdefault("failure_message", self._agent.failure_message) - properties["mllm"] = mllm - return properties - - @@ -251,20 +315,41 @@ class _AgentSessionBase: - properties["tts"] = self._dump_model(self._agent.tts) - if self._agent.llm is not None: - llm = dict(self._agent.llm) - - if self._agent.instructions: - + if self._agent.instructions is not None: - llm["system_messages"] = [{"role": "system", "content": self._agent.instructions}] - - if self._agent.greeting: - - llm.setdefault("greeting_message", self._agent.greeting) - - if self._agent.failure_message: - - llm.setdefault("failure_message", self._agent.failure_message) - + if self._agent.greeting is not None: - + llm["greeting_message"] = self._agent.greeting - + if self._agent.failure_message is not None: - + llm["failure_message"] = self._agent.failure_message - if self._agent.max_history is not None: - - llm.setdefault("max_history", self._agent.max_history) - + llm["max_history"] = self._agent.max_history - properties["llm"] = llm - if self._agent.stt is not None: - properties["asr"] = self._dump_model(self._agent.stt) - - return properties - - + @staticmethod - + def _page_value(pagination: typing.Any, field: str) -> typing.Any: - + if pagination is None: - + return None - + if isinstance(pagination, dict): - + return pagination.get(field) - + return getattr(pagination, field, None) - + - + @staticmethod - + def _response_turns(response: typing.Any) -> typing.List[typing.Any]: - + turns = response.get("turns") if isinstance(response, dict) else getattr(response, "turns", None) - + return list(turns or []) - + - + @classmethod - + def _with_all_turns(cls, first_response: typing.Any, turns: typing.List[typing.Any]) -> GetTurnsAgentsResponse: - + data = cls._dump_model(first_response) - + if not isinstance(data, dict): - + data = {} - + data["turns"] = turns - + return GetTurnsAgentsResponse(**data) - + - # ------------------------------------------------------------------ - # Event handling - # ------------------------------------------------------------------ - @@ -484,7 +569,12 @@ class AgentSession(_AgentSessionBase): - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - ) -> AgentThinkResponse: - - """Inject a custom text instruction into the current session pipeline.""" - + """Inject a custom text instruction into the current session pipeline. - + - + In API v2.7, omitting ``on_listening_action`` uses the server default - + ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - + preserve the pre-v2.7 behavior. - + """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - @@ -547,15 +637,68 @@ class AgentSession(_AgentSessionBase): - self._app_id, self._agent_id, request_options=self._request_options() - ) - - - def get_turns(self) -> typing.Any: - + def get_turns( - + self, - + *, - + page_index: typing.Optional[int] = None, - + page_size: typing.Optional[int] = None, - + ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get_turns( - - self._app_id, self._agent_id, request_options=self._request_options() - + self._app_id, - + self._agent_id, - + page_index=page_index, - + page_size=page_size, - + request_options=self._request_options(), - ) - - + def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - + """Get all turn analytics pages for this session. - + - + Raises ``RuntimeError`` if the server's pagination metadata is missing - + the fields required to advance, or if requesting the next page returns - + a page index that did not advance. - + """ - + response = self.get_turns(page_index=1, page_size=page_size) - + all_turns = self._response_turns(response) - + pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - + current_page = self._page_value(pagination, "page_index") or 1 - + while pagination is not None and self._page_value(pagination, "is_last_page") is False: - + total_pages = self._page_value(pagination, "total_pages") - + returned_index = self._page_value(pagination, "page_index") - + if returned_index is None and total_pages is None: - + raise RuntimeError( - + "get_all_turns pagination cannot continue: response must include " - + "page_index, total_pages, or is_last_page=true." - + ) - + if total_pages is not None and current_page >= total_pages: - + break - + next_page = current_page + 1 - + response = self.get_turns(page_index=next_page, page_size=page_size) - + all_turns.extend(self._response_turns(response)) - + pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - + returned_index = self._page_value(pagination, "page_index") if pagination else None - + if returned_index is not None: - + if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - + raise RuntimeError( - + f"get_all_turns pagination did not advance: requested page {next_page}, " - + f"received page {returned_index}." - + ) - + current_page = returned_index - + else: - + total_pages = self._page_value(pagination, "total_pages") if pagination else None - + is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - + if total_pages is None and is_last_page is not True: - + raise RuntimeError( - + "get_all_turns pagination cannot continue: response must include " - + "page_index, total_pages, or is_last_page=true." - + ) - + current_page = next_page - + return self._with_all_turns(response, all_turns) - + - - class AsyncAgentSession(_AgentSessionBase): - """Async version of :class:`AgentSession` for use with :class:`AsyncAgora`. - @@ -734,7 +877,12 @@ class AsyncAgentSession(_AgentSessionBase): - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - ) -> AgentThinkResponse: - - """Inject a custom text instruction into the current session pipeline.""" - + """Inject a custom text instruction into the current session pipeline. - + - + In API v2.7, omitting ``on_listening_action`` uses the server default - + ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - + preserve the pre-v2.7 behavior. - + """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - @@ -797,11 +945,64 @@ class AsyncAgentSession(_AgentSessionBase): - self._app_id, self._agent_id, request_options=self._request_options() - ) - - - async def get_turns(self) -> typing.Any: - + async def get_turns( - + self, - + *, - + page_index: typing.Optional[int] = None, - + page_size: typing.Optional[int] = None, - + ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get_turns( - - self._app_id, self._agent_id, request_options=self._request_options() - + self._app_id, - + self._agent_id, - + page_index=page_index, - + page_size=page_size, - + request_options=self._request_options(), - ) - + - + async def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - + """Get all turn analytics pages for this session. - + - + Raises ``RuntimeError`` if the server's pagination metadata is missing - + the fields required to advance, or if requesting the next page returns - + a page index that did not advance. - + """ - + response = await self.get_turns(page_index=1, page_size=page_size) - + all_turns = self._response_turns(response) - + pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - + current_page = self._page_value(pagination, "page_index") or 1 - + while pagination is not None and self._page_value(pagination, "is_last_page") is False: - + total_pages = self._page_value(pagination, "total_pages") - + returned_index = self._page_value(pagination, "page_index") - + if returned_index is None and total_pages is None: - + raise RuntimeError( - + "get_all_turns pagination cannot continue: response must include " - + "page_index, total_pages, or is_last_page=true." - + ) - + if total_pages is not None and current_page >= total_pages: - + break - + next_page = current_page + 1 - + response = await self.get_turns(page_index=next_page, page_size=page_size) - + all_turns.extend(self._response_turns(response)) - + pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - + returned_index = self._page_value(pagination, "page_index") if pagination else None - + if returned_index is not None: - + if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - + raise RuntimeError( - + f"get_all_turns pagination did not advance: requested page {next_page}, " - + f"received page {returned_index}." - + ) - + current_page = returned_index - + else: - + total_pages = self._page_value(pagination, "total_pages") if pagination else None - + is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - + if total_pages is None and is_last_page is not True: - + raise RuntimeError( - + "get_all_turns pagination cannot continue: response must include " - + "page_index, total_pages, or is_last_page=true." - + ) - + current_page = next_page - + return self._with_all_turns(response, all_turns) - -- - 2.52.0 - + LlmGreetingConfigs = Dict[str, Any] theirs_snapshot: src/agora_agent/agentkit/agent.py: | from __future__ import annotations @@ -2673,23 +1345,19 @@ patches: import time import typing import typing_extensions + import warnings if typing.TYPE_CHECKING: from .agent_session import AgentSession, AsyncAgentSession from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from ..agents.types.start_agents_request_properties_asr import StartAgentsRequestPropertiesAsr - from ..agents.types.start_agents_request_properties_asr_vendor import StartAgentsRequestPropertiesAsrVendor from ..agents.types.start_agents_request_properties_avatar import StartAgentsRequestPropertiesAvatar from ..agents.types.start_agents_request_properties_avatar_vendor import StartAgentsRequestPropertiesAvatarVendor - from ..agents.types.start_agents_request_properties_llm import StartAgentsRequestPropertiesLlm - from ..agents.types.start_agents_request_properties_llm_style import StartAgentsRequestPropertiesLlmStyle - from ..agents.types.start_agents_request_properties_mllm import StartAgentsRequestPropertiesMllm - from ..agents.types.start_agents_request_properties_mllm_vendor import StartAgentsRequestPropertiesMllmVendor from ..agents.types.update_agents_request_properties import UpdateAgentsRequestProperties from ..agents.types.get_agents_response import GetAgentsResponse from ..agents.types.list_agents_response import ListAgentsResponse from ..agents.types.list_agents_response_data_list_item import ListAgentsResponseDataListItem + from ..agents.types.list_agents_response_data_list_item_status import ListAgentsResponseDataListItemStatus from ..agents.types.get_history_agents_response import GetHistoryAgentsResponse from ..agents.types.get_history_agents_response_contents_item import GetHistoryAgentsResponseContentsItem from ..agents.types.get_history_agents_response_contents_item_role import GetHistoryAgentsResponseContentsItemRole @@ -2721,11 +1389,6 @@ patches: from ..agents.types.start_agents_request_properties_parameters_audio_scenario import StartAgentsRequestPropertiesParametersAudioScenario from ..agents.types.start_agents_request_properties_interruption import StartAgentsRequestPropertiesInterruption from ..agents.types.start_agents_request_properties_interruption_mode import StartAgentsRequestPropertiesInterruptionMode - from ..agents.types.start_agents_request_properties_mllm_turn_detection import StartAgentsRequestPropertiesMllmTurnDetection - from ..agents.types.start_agents_request_properties_mllm_turn_detection_mode import StartAgentsRequestPropertiesMllmTurnDetectionMode - from ..agents.types.start_agents_request_properties_llm_greeting_configs import StartAgentsRequestPropertiesLlmGreetingConfigs - from ..agents.types.start_agents_request_properties_llm_greeting_configs_mode import StartAgentsRequestPropertiesLlmGreetingConfigsMode - from ..agents.types.start_agents_request_properties_llm_mcp_servers_item import StartAgentsRequestPropertiesLlmMcpServersItem from ..agents.types.start_agents_request_properties_geofence import StartAgentsRequestPropertiesGeofence from ..agents.types.start_agents_request_properties_rtc import StartAgentsRequestPropertiesRtc from ..agents.types.start_agents_request_properties_advanced_features import StartAgentsRequestPropertiesAdvancedFeatures @@ -2736,17 +1399,39 @@ patches: from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule from ..types.tts import Tts - from .token import generate_convo_ai_token, _validate_expires_in + from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule + from ..types.tts import Tts + from ..types.asr import Asr + from ..types.llm import Llm + from ..types.llm_style import LlmStyle as GeneratedLlmStyle + from ..types.mllm import Mllm + from ..types.mllm_turn_detection import MllmTurnDetection + from ..types.mllm_turn_detection_mode import MllmTurnDetectionMode as GeneratedMllmTurnDetectionMode + from ..types.mllm_vendor import MllmVendor as GeneratedMllmVendor + from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( + AgentThinkAgentManagementRequestOnListeningAction, + ) + from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( + AgentThinkAgentManagementRequestOnThinkingAction, + ) + from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( + AgentThinkAgentManagementRequestOnSpeakingAction, + ) + from ..agent_management.types.agent_think_agent_management_response import ( + AgentThinkAgentManagementResponse, + ) + from ..core.pydantic_utilities import parse_obj_as from .vendors.base import BaseAvatar, BaseLLM, BaseMLLM, BaseSTT, BaseTTS # Top-level aliases - LlmConfig = StartAgentsRequestPropertiesLlm - LlmStyle = StartAgentsRequestPropertiesLlmStyle - SttConfig = StartAgentsRequestPropertiesAsr - SttVendor = StartAgentsRequestPropertiesAsrVendor + LlmConfig = Llm + LlmStyle = GeneratedLlmStyle + SttConfig = Asr + AsrConfig = SttConfig + SttVendor = typing.Any TtsConfig = Tts - MllmConfig = StartAgentsRequestPropertiesMllm - MllmVendor = StartAgentsRequestPropertiesMllmVendor + MllmConfig = Mllm + MllmVendor = GeneratedMllmVendor AvatarConfig = StartAgentsRequestPropertiesAvatar AvatarVendor = StartAgentsRequestPropertiesAvatarVendor TurnDetectionConfig = StartAgentsRequestPropertiesTurnDetection @@ -2790,13 +1475,14 @@ patches: ParametersAudioScenario = StartAgentsRequestPropertiesParametersAudioScenario InterruptionConfig = StartAgentsRequestPropertiesInterruption InterruptionMode = StartAgentsRequestPropertiesInterruptionMode - MllmTurnDetectionConfig = StartAgentsRequestPropertiesMllmTurnDetection - MllmTurnDetectionMode = StartAgentsRequestPropertiesMllmTurnDetectionMode + MllmTurnDetectionConfig = MllmTurnDetection + MllmTurnDetectionMode = GeneratedMllmTurnDetectionMode AgentConfig = StartAgentsRequestProperties AgentConfigUpdate = UpdateAgentsRequestProperties SessionInfo = GetAgentsResponse SessionListResponse = ListAgentsResponse SessionSummary = ListAgentsResponseDataListItem + SessionStatus = ListAgentsResponseDataListItemStatus ConversationHistory = GetHistoryAgentsResponse ConversationTurn = GetHistoryAgentsResponseContentsItem ConversationRole = GetHistoryAgentsResponseContentsItemRole @@ -2814,10 +1500,50 @@ patches: enable_error_message: bool audio_scenario: ParametersAudioScenario + + class ThinkOptions(typing_extensions.TypedDict, total=False): + on_listening_action: AgentThinkAgentManagementRequestOnListeningAction + on_thinking_action: AgentThinkAgentManagementRequestOnThinkingAction + on_speaking_action: AgentThinkAgentManagementRequestOnSpeakingAction + interruptable: bool + metadata: typing.Dict[str, str] + + + class GetTurnsOptions(typing_extensions.TypedDict, total=False): + page_index: int + page_size: int + + + class SayOptions(typing_extensions.TypedDict, total=False): + priority: SpeakAgentsRequestPriority + interruptable: bool + + + class SessionOptions(typing_extensions.TypedDict, total=False): + name: str + channel: str + token: str + agent_uid: str + remote_uids: typing.List[str] + idle_timeout: int + enable_string_uid: bool + preset: typing.Union[str, typing.Sequence[str]] + pipeline_id: str + expires_in: int + debug: bool + warn: typing.Callable[[str], None] + + + def _start_properties_from_mapping( + properties: typing.Mapping[str, typing.Any], + ) -> StartAgentsRequestProperties: + return parse_obj_as(StartAgentsRequestProperties, dict(properties)) + + # LLM sub-type aliases - LlmGreetingConfigs = StartAgentsRequestPropertiesLlmGreetingConfigs - LlmGreetingConfigsMode = StartAgentsRequestPropertiesLlmGreetingConfigsMode - McpServersItem = StartAgentsRequestPropertiesLlmMcpServersItem + LlmGreetingConfigs = typing.Dict[str, typing.Any] + LlmGreetingConfigsMode = typing.Any + McpServersItem = typing.Dict[str, typing.Any] # Additional top-level config aliases GeofenceConfig = StartAgentsRequestPropertiesGeofence @@ -2829,6 +1555,104 @@ patches: FillerWordsContentStaticConfig = StartAgentsRequestPropertiesFillerWordsContentStaticConfig FillerWordsContentSelectionRule = StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule + # Think type aliases and response + ThinkOnListeningAction = AgentThinkAgentManagementRequestOnListeningAction + ThinkOnThinkingAction = AgentThinkAgentManagementRequestOnThinkingAction + ThinkOnSpeakingAction = AgentThinkAgentManagementRequestOnSpeakingAction + ThinkResponse = AgentThinkAgentManagementResponse + + from .token import generate_convo_ai_token, _parse_numeric_uid, _validate_expires_in + + TurnDetectionLanguage = typing_extensions.Literal[ + "ar-EG", + "ar-JO", + "ar-SA", + "ar-AE", + "bn-IN", + "zh-CN", + "zh-HK", + "zh-TW", + "nl-NL", + "en-IN", + "en-US", + "fil-PH", + "fr-FR", + "de-DE", + "gu-IN", + "he-IL", + "hi-IN", + "id-ID", + "it-IT", + "ja-JP", + "kn-IN", + "ko-KR", + "ms-MY", + "fa-IR", + "pt-PT", + "ru-RU", + "es-ES", + "ta-IN", + "te-IN", + "th-TH", + "tr-TR", + "vi-VN", + ] + + DEFAULT_TURN_DETECTION_LANGUAGE: TurnDetectionLanguage = "en-US" + TURN_DETECTION_LANGUAGE_VALUES: typing.Tuple[TurnDetectionLanguage, ...] = ( + "ar-EG", + "ar-JO", + "ar-SA", + "ar-AE", + "bn-IN", + "zh-CN", + "zh-HK", + "zh-TW", + "nl-NL", + "en-IN", + "en-US", + "fil-PH", + "fr-FR", + "de-DE", + "gu-IN", + "he-IL", + "hi-IN", + "id-ID", + "it-IT", + "ja-JP", + "kn-IN", + "ko-KR", + "ms-MY", + "fa-IR", + "pt-PT", + "ru-RU", + "es-ES", + "ta-IN", + "te-IN", + "th-TH", + "tr-TR", + "vi-VN", + ) + _TURN_DETECTION_LANGUAGES = set(TURN_DETECTION_LANGUAGE_VALUES) + + + def _dump_optional_model(value: typing.Any) -> typing.Any: + if hasattr(value, "model_dump"): + return value.model_dump(exclude_none=True) + if hasattr(value, "dict"): + return value.dict(exclude_none=True) + return value + + + def _is_turn_detection_language(value: typing.Any) -> bool: + return isinstance(value, str) and value in _TURN_DETECTION_LANGUAGES + + + def _validate_turn_detection_language(value: typing.Any) -> TurnDetectionLanguage: + if not _is_turn_detection_language(value): + raise ValueError(f"Invalid turn_detection.language: {value}") + return value # type: ignore[return-value] + class Agent: """A reusable agent definition. @@ -2836,16 +1660,21 @@ patches: Use the fluent builder methods (.with_llm(), .with_tts(), .with_stt(), .with_mllm()) to configure vendor settings after construction. + Deprecated: + The Agent-level ``instructions``, ``greeting``, ``failure_message``, + ``max_history``, and ``greeting_configs`` convenience fields are kept + for compatibility. Configure those values on the LLM or MLLM vendor + instead. + Examples -------- - >>> from agora_agent.agentkit import Agent - >>> from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS, DeepgramSTT + >>> from agora_agent import Agent, OpenAI, ElevenLabsTTS, DeepgramSTT >>> >>> agent = Agent(instructions="You are a helpful voice assistant.") >>> agent = ( ... agent - ... .with_llm(OpenAI(api_key="...", model="gpt-4")) - ... .with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", sample_rate=24000)) + ... .with_llm(OpenAI(api_key="...", base_url="https://api.openai.com/v1/chat/completions", model="gpt-4")) + ... .with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", base_url="wss://api.elevenlabs.io/v1", sample_rate=24000)) ... .with_stt(DeepgramSTT(api_key="...", model="nova-2")) ... ) """ @@ -2866,8 +1695,11 @@ patches: labels: typing.Optional[typing.Dict[str, str]] = None, rtc: typing.Optional[RtcConfig] = None, filler_words: typing.Optional[FillerWordsConfig] = None, + greeting_configs: typing.Optional[LlmGreetingConfigs] = None, + pipeline_id: typing.Optional[str] = None, ): self._name = name + self._pipeline_id = pipeline_id self._instructions = instructions self._greeting = greeting self._failure_message = failure_message @@ -2888,6 +1720,7 @@ patches: self._labels = labels self._rtc = rtc self._filler_words = filler_words + self._greeting_configs = greeting_configs def with_llm(self, vendor: BaseLLM) -> "Agent": new_agent = self._clone() @@ -2975,15 +1808,23 @@ patches: return new_agent def with_instructions(self, instructions: str) -> "Agent": + """Deprecated. Configure system messages on the LLM vendor instead.""" new_agent = self._clone() new_agent._instructions = instructions return new_agent def with_greeting(self, greeting: str) -> "Agent": + """Deprecated. Configure the greeting on the LLM or MLLM vendor instead.""" new_agent = self._clone() new_agent._greeting = greeting return new_agent + def with_greeting_configs(self, configs: LlmGreetingConfigs) -> "Agent": + """Deprecated. Configure greeting playback on the LLM vendor instead.""" + new_agent = self._clone() + new_agent._greeting_configs = configs + return new_agent + def with_name(self, name: str) -> "Agent": new_agent = self._clone() new_agent._name = name @@ -3047,17 +1888,31 @@ patches: ) return new_agent - def with_failure_message(self, message: str) -> "Agent": - """Returns a new Agent with the specified failure message. + def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": + """Returns a new Agent with the specified RTC audio scenario.""" + new_agent = self._clone() + if new_agent._parameters is None: + new_agent._parameters = StartAgentsRequestPropertiesParameters(audio_scenario=audio_scenario) + elif isinstance(new_agent._parameters, dict): + new_agent._parameters = typing.cast( + SessionParamsInput, + {**new_agent._parameters, "audio_scenario": audio_scenario}, + ) + else: + new_agent._parameters = self._copy_model_update( + new_agent._parameters, + {"audio_scenario": audio_scenario}, + ) + return new_agent - The failure message is played via TTS when the LLM call fails. - """ + def with_failure_message(self, message: str) -> "Agent": + """Deprecated. Configure the failure message on the LLM or MLLM vendor instead.""" new_agent = self._clone() new_agent._failure_message = message return new_agent def with_max_history(self, max_history: int) -> "Agent": - """Returns a new Agent with the specified maximum conversation history length.""" + """Deprecated. Configure max history on the LLM vendor instead.""" new_agent = self._clone() new_agent._max_history = max_history return new_agent @@ -3126,6 +1981,11 @@ patches: def name(self) -> typing.Optional[str]: return self._name + @property + def pipeline_id(self) -> typing.Optional[str]: + """Published AI Studio pipeline ID used as this agent's base configuration.""" + return self._pipeline_id + @property def llm(self) -> typing.Optional[typing.Dict[str, typing.Any]]: return self._llm @@ -3162,6 +2022,10 @@ patches: def greeting(self) -> typing.Optional[str]: return self._greeting + @property + def greeting_configs(self) -> typing.Optional[LlmGreetingConfigs]: + return self._greeting_configs + @property def failure_message(self) -> typing.Optional[str]: return self._failure_message @@ -3206,6 +2070,7 @@ patches: def config(self) -> typing.Dict[str, typing.Any]: return { "name": self._name, + "pipeline_id": self._pipeline_id, "instructions": self._instructions, "greeting": self._greeting, "failure_message": self._failure_message, @@ -3224,6 +2089,7 @@ patches: "labels": self._labels, "rtc": self._rtc, "filler_words": self._filler_words, + "greeting_configs": self._greeting_configs, } def create_session( @@ -3319,6 +2185,8 @@ patches: app_certificate: typing.Optional[str] = None, expires_in: typing.Optional[int] = None, skip_vendor_validation: bool = False, + skip_vendor_validation_categories: typing.Optional[typing.AbstractSet[str]] = None, + allow_missing_vendor_categories: typing.Optional[typing.AbstractSet[str]] = None, ) -> StartAgentsRequestProperties: # Validate the MLLM + enabled-avatar combination BEFORE generating the # RTC token so callers get a clear, actionable error first (matches the @@ -3347,7 +2215,7 @@ patches: app_id=app_id, app_certificate=app_certificate, channel_name=channel, - account=agent_uid, + uid=_parse_numeric_uid(agent_uid, "agent_uid"), **token_kwargs, ) @@ -3397,13849 +2265,1195 @@ patches: if self._failure_message is not None: mllm_config.setdefault("failure_message", self._failure_message) base_kwargs["mllm"] = mllm_config - return StartAgentsRequestProperties(**base_kwargs) - - if skip_vendor_validation: - return StartAgentsRequestProperties(**base_kwargs) - - if self._tts is None: - raise ValueError("TTS configuration is required. Use with_tts() to set it.") - - if self._llm is None: - raise ValueError("LLM configuration is required. Use with_llm() to set it.") - - llm_config = dict(self._llm) - # Agent-level fields take priority over the vendor's defaults. - # This matches the TS SDK where agent-level values override vendor config. - if self._instructions is not None: - llm_config["system_messages"] = [{"role": "system", "content": self._instructions}] - if self._greeting is not None: - llm_config["greeting_message"] = self._greeting - if self._failure_message is not None: - llm_config["failure_message"] = self._failure_message - if self._max_history is not None: - llm_config["max_history"] = self._max_history - - base_kwargs["llm"] = llm_config - base_kwargs["tts"] = self._tts - if self._stt is not None: - base_kwargs["asr"] = self._stt - - return StartAgentsRequestProperties(**base_kwargs) - - def _clone(self) -> "Agent": - new_agent = Agent.__new__(Agent) - new_agent._name = self._name - new_agent._llm = self._llm - new_agent._tts = self._tts - new_agent._stt = self._stt - new_agent._mllm = self._mllm - new_agent._tts_sample_rate = self._tts_sample_rate - new_agent._avatar = self._avatar - new_agent._avatar_required_sample_rate = self._avatar_required_sample_rate - new_agent._turn_detection = self._turn_detection - new_agent._interruption = self._interruption - new_agent._sal = self._sal - new_agent._advanced_features = self._advanced_features - new_agent._parameters = self._parameters - new_agent._instructions = self._instructions - new_agent._greeting = self._greeting - new_agent._failure_message = self._failure_message - new_agent._max_history = self._max_history - new_agent._geofence = self._geofence - new_agent._labels = self._labels - new_agent._rtc = self._rtc - new_agent._filler_words = self._filler_words - return new_agent - src/agora_agent/agentkit/agent_session.py: | - import typing - import warnings - - from ..core.api_error import ApiError - from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - AgentThinkAgentManagementRequestOnListeningAction as AgentThinkRequestOnListeningAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - AgentThinkAgentManagementRequestOnSpeakingAction as AgentThinkRequestOnSpeakingAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - AgentThinkAgentManagementRequestOnThinkingAction as AgentThinkRequestOnThinkingAction, - ) - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse as AgentThinkResponse, - ) - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from .agent import Agent - from .avatar_types import ( - is_akool_avatar, - is_anam_avatar, - is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - is_rtc_avatar, - validate_avatar_config, - validate_tts_sample_rate, - ) - from .presets import resolve_session_presets - from .token import generate_convo_ai_token - - - class _AgentSessionRequiredOptions(typing.TypedDict, total=True): - """Required fields shared by both sync and async session constructors.""" - - client: typing.Any - agent: Agent - app_id: str - name: str - channel: str - agent_uid: str - remote_uids: typing.List[str] - - - class AgentSessionOptions(_AgentSessionRequiredOptions, total=False): - """Configuration options for creating an agent session. - - Required fields - --------------- - client, agent, app_id, name, channel, agent_uid, remote_uids - - Optional fields - --------------- - app_certificate, token, idle_timeout, enable_string_uid, expires_in - """ - - app_certificate: str - token: str - idle_timeout: int - enable_string_uid: bool - preset: typing.Union[str, typing.Sequence[str]] - pipeline_id: str - expires_in: int - debug: bool - warn: typing.Callable[[str], None] - - - class _AgentSessionBase: - """Shared state and helpers for :class:`AgentSession` and :class:`AsyncAgentSession`. - - Not intended for direct use — instantiate one of the concrete subclasses or - call :meth:`Agent.create_session` / :meth:`Agent.create_async_session`. - """ - - def __init__( - self, - client: typing.Any, - agent: Agent, - app_id: str, - name: str, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - app_certificate: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ): - self._client = client - self._agent = agent - self._app_id = app_id - self._app_certificate = app_certificate - self._name = name - self._channel = channel - self._token = token - self._agent_uid = agent_uid - self._remote_uids = remote_uids - self._idle_timeout = idle_timeout - self._enable_string_uid = enable_string_uid - self._preset = preset - self._pipeline_id = pipeline_id - self._expires_in = expires_in - self._debug = debug - self._warn = warn or warnings.warn - self._agent_id: typing.Optional[str] = None - self._status: str = "idle" - self._event_handlers: typing.Dict[str, typing.List[typing.Callable[..., None]]] = {} - - # ------------------------------------------------------------------ - # Public read-only properties - # ------------------------------------------------------------------ - - @property - def id(self) -> typing.Optional[str]: - return self._agent_id - - @property - def status(self) -> str: - return self._status - - @property - def agent(self) -> Agent: - return self._agent - - @property - def app_id(self) -> str: - return self._app_id - - @property - def raw(self) -> typing.Any: - """Direct access to the underlying Fern-generated AgentsClient. - - Use this to access any new endpoints that Fern generates without - waiting for agentkit method updates. - """ - return self._client.agents - - @property - def raw_agent_management(self) -> typing.Any: - """Direct access to the underlying Fern-generated AgentManagement client.""" - return self._client.agent_management - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _convo_ai_headers(self) -> typing.Optional[typing.Dict[str, str]]: - """Return per-request auth headers when client is in app-credentials mode. - - In app-credentials mode a fresh ConvoAI token (RTC + RTM) is generated - for every request and returned as ``Authorization: agora token=``. - In basic-auth mode this returns ``None`` (the client-level header is used). - """ - if getattr(self._client, "auth_mode", None) != "app-credentials": - return None - app_id: str = getattr(self._client, "app_id", self._app_id) - app_certificate: typing.Optional[str] = getattr( - self._client, "app_certificate", self._app_certificate - ) - if not app_certificate: - raise RuntimeError("app_certificate is required for app-credentials auth mode") - token = generate_convo_ai_token( - app_id=app_id, - app_certificate=app_certificate, - channel_name=self._channel, - account=self._agent_uid, - ) - return {"Authorization": f"agora token={token}"} - - def _request_options(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - """Build request_options dict with per-request auth headers if needed.""" - headers = self._convo_ai_headers() - if headers is None: - return None - return {"additional_headers": headers} - - def _validate_avatar_config(self) -> None: - avatar = self._agent.avatar - tts = self._agent.tts - if not avatar or avatar.get("enable", True) is False: - return - if self._is_mllm_mode(): - raise ValueError( - "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - "Remove the avatar configuration when using MLLM, or switch to a cascading session." - ) - - if ( - is_heygen_avatar(avatar) - or is_live_avatar_avatar(avatar) - or is_akool_avatar(avatar) - or is_anam_avatar(avatar) - or is_generic_avatar(avatar) - ): - validate_avatar_config(avatar) - - tts_params = tts.get("params") if isinstance(tts, dict) else None - sample_rate = self._agent.tts_sample_rate - if sample_rate is None and isinstance(tts_params, dict): - sample_rate = ( - tts_params.get("sample_rate") - or tts_params.get("sample_rate_hertz") - or tts_params.get("samplingRate") - ) - if isinstance(sample_rate, int): - validate_tts_sample_rate(avatar, sample_rate) - elif is_heygen_avatar(avatar): - self._warn( - "Warning: HeyGen avatar detected but TTS sample_rate is not explicitly set. " - "HeyGen requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." - ) - elif is_live_avatar_avatar(avatar): - self._warn( - "Warning: LiveAvatar avatar detected but TTS sample_rate is not explicitly set. " - "LiveAvatar requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." - ) - elif is_akool_avatar(avatar): - self._warn( - "Warning: Akool avatar detected but TTS sample_rate is not explicitly set. " - "Akool requires 16,000 Hz. Please ensure your TTS provider is configured for 16kHz." - ) - - def _enrich_avatar_for_session(self, properties: typing.Dict[str, typing.Any]) -> None: - avatar = properties.get("avatar") - if not isinstance(avatar, dict) or avatar.get("enable", True) is False: - return - - params = avatar.get("params") - if not isinstance(params, dict): - params = {} - avatar["params"] = params - - if is_generic_avatar(avatar): - if not params.get("agora_appid"): - params["agora_appid"] = self._app_id - if not params.get("agora_channel"): - params["agora_channel"] = self._channel - - if not is_rtc_avatar(avatar): - validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - return - - if not params.get("agora_token"): - if not self._app_certificate: - raise ValueError( - "Cannot auto-generate avatar RTC token: app_certificate is required when agora_token is omitted. " - "Pass app_certificate on the Agora client or supply agora_token explicitly on the avatar vendor." - ) - token_kwargs: typing.Dict[str, typing.Any] = {} - if self._expires_in is not None: - token_kwargs["token_expire"] = self._expires_in - params["agora_token"] = generate_convo_ai_token( - app_id=self._app_id, - app_certificate=self._app_certificate, - channel_name=self._channel, - account=str(params["agora_uid"]), - **token_kwargs, - ) - - if str(params.get("agora_uid")) == self._agent_uid: - self._warn( - "Warning: avatar agora_uid matches agent_rtc_uid. Use a unique UID for the avatar video publisher." - ) - - validate_avatar_config(avatar, require_session_fields=True) - - @staticmethod - def _dump_model(value: typing.Any) -> typing.Any: - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if isinstance(value, dict): - return {k: _AgentSessionBase._dump_model(v) for k, v in value.items() if v is not None} - if isinstance(value, list): - return [_AgentSessionBase._dump_model(item) for item in value] - return value - - def _is_mllm_mode(self) -> bool: - mllm = self._agent.mllm - if isinstance(mllm, dict) and mllm.get("enable") is True: - return True - return mllm is not None - - def _build_start_properties(self, token_opts: typing.Dict[str, typing.Any]) -> typing.Dict[str, typing.Any]: - base_properties = self._agent.to_properties( - channel=self._channel, - agent_uid=self._agent_uid, - remote_uids=self._remote_uids, - idle_timeout=self._idle_timeout, - enable_string_uid=self._enable_string_uid, - skip_vendor_validation=True, - **token_opts, - ) - properties = self._dump_model(base_properties) - self._enrich_avatar_for_session(properties) - - if self._is_mllm_mode(): - if self._agent.mllm is not None: - mllm = self._dump_model(self._agent.mllm) - if not isinstance(mllm, dict): - mllm = {} - if self._agent.greeting is not None: - mllm.setdefault("greeting_message", self._agent.greeting) - if self._agent.failure_message is not None: - mllm.setdefault("failure_message", self._agent.failure_message) - properties["mllm"] = mllm - return properties - - if self._agent.tts is not None: - properties["tts"] = self._dump_model(self._agent.tts) - if self._agent.llm is not None: - llm = dict(self._agent.llm) - if self._agent.instructions is not None: - llm["system_messages"] = [{"role": "system", "content": self._agent.instructions}] - if self._agent.greeting is not None: - llm["greeting_message"] = self._agent.greeting - if self._agent.failure_message is not None: - llm["failure_message"] = self._agent.failure_message - if self._agent.max_history is not None: - llm["max_history"] = self._agent.max_history - properties["llm"] = llm - if self._agent.stt is not None: - properties["asr"] = self._dump_model(self._agent.stt) - - return properties - - @staticmethod - def _page_value(pagination: typing.Any, field: str) -> typing.Any: - if pagination is None: - return None - if isinstance(pagination, dict): - return pagination.get(field) - return getattr(pagination, field, None) - - @staticmethod - def _response_turns(response: typing.Any) -> typing.List[typing.Any]: - turns = response.get("turns") if isinstance(response, dict) else getattr(response, "turns", None) - return list(turns or []) - - @classmethod - def _with_all_turns(cls, first_response: typing.Any, turns: typing.List[typing.Any]) -> GetTurnsAgentsResponse: - data = cls._dump_model(first_response) - if not isinstance(data, dict): - data = {} - data["turns"] = turns - return GetTurnsAgentsResponse(**data) - - # ------------------------------------------------------------------ - # Event handling - # ------------------------------------------------------------------ - - def on(self, event: str, handler: typing.Callable[..., None]) -> None: - """Register an event handler. - - Parameters - ---------- - event : str - The event type (``started``, ``stopped``, ``error``). - handler : callable - The event handler to invoke when the event fires. - """ - if event not in self._event_handlers: - self._event_handlers[event] = [] - self._event_handlers[event].append(handler) - - def off(self, event: str, handler: typing.Callable[..., None]) -> None: - """Unregister a previously registered event handler.""" - handlers = self._event_handlers.get(event) - if handlers and handler in handlers: - handlers.remove(handler) - - def _emit(self, event: str, data: typing.Any) -> None: - handlers = self._event_handlers.get(event) - if handlers: - for handler in handlers: - try: - handler(data) - except Exception as exc: - # Prevent a misbehaving handler from blocking other handlers or - # the session lifecycle. Warn so the error is not silently lost. - warnings.warn( - f"Event handler for '{event}' raised an exception: {exc}", - stacklevel=2, - ) - - - class AgentSession(_AgentSessionBase): - """Manages the lifecycle of an agent session (synchronous). - - This class provides a high-level interface for managing agent sessions, - including starting, stopping, and interacting with the agent. - - Use :meth:`Agent.create_session` to create a session — this is the - recommended entry point. - - Examples - -------- - >>> from agora_agent import Agora, Area - >>> from agora_agent.agentkit import Agent - >>> - >>> client = Agora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are a helpful voice assistant.") - >>> from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS - >>> agent = agent.with_llm(OpenAI(api_key="...", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...")) - >>> session = agent.create_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = session.start() - >>> session.say("Hello!") - >>> session.stop() - """ - - def start(self) -> str: - """Start the agent session. - - Returns - ------- - str - The agent ID. - - Raises - ------ - RuntimeError - If the session is not in a startable state. - ValueError - If avatar/TTS configuration is invalid. - """ - if self._status not in ("idle", "stopped", "error"): - raise RuntimeError(f"Cannot start session in {self._status} state") - - self._validate_avatar_config() - self._status = "starting" - - try: - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - token_opts = { - "app_id": self._app_id, - "app_certificate": self._app_certificate, - "expires_in": self._expires_in, - } - - properties = self._build_start_properties(token_opts) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - ) - - if self._debug: - print("[Agora Debug] Starting agent session...") - print("[Agora Debug] Request:", { - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - "pipeline_id": self._pipeline_id, - "properties": resolved_properties, - }) - - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties - - response = self._client.agents.start( - self._app_id, - name=self._name, - properties=request_properties, - preset=resolved_preset, - pipeline_id=self._pipeline_id, - request_options=self._request_options(), - ) - - self._agent_id = response.agent_id if hasattr(response, "agent_id") else None - self._status = "running" - self._emit("started", {"agent_id": self._agent_id}) - return self._agent_id or "" - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - def stop(self) -> None: - """Stop the agent session. - - If the agent has already stopped (e.g., crashed or timed out), the - server returns 404, which this method treats as a successful stop - rather than raising an error. - """ - if self._status != "running": - raise RuntimeError(f"Cannot stop session in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._status = "stopping" - - try: - self._client.agents.stop( - self._app_id, self._agent_id, request_options=self._request_options() - ) - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - except ApiError as e: - if e.status_code == 404: - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - return - self._status = "error" - self._emit("error", e) - raise - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - def say( - self, - text: str, - priority: typing.Optional[str] = None, - interruptable: typing.Optional[bool] = None, - ) -> None: - """Send a message to be spoken by the agent. - - Parameters - ---------- - text : str - The text to speak. - priority : str, optional - Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). - interruptable : bool, optional - Whether the message can be interrupted by the user. - """ - if self._status != "running": - raise RuntimeError(f"Cannot say in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if priority is not None: - kwargs["priority"] = priority - if interruptable is not None: - kwargs["interruptable"] = interruptable - - self._client.agents.speak( - self._app_id, self._agent_id, request_options=self._request_options(), **kwargs - ) - - def interrupt(self) -> None: - """Interrupt the agent while it is speaking or thinking.""" - if self._status != "running": - raise RuntimeError(f"Cannot interrupt in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._client.agents.interrupt( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def think( - self, - text: str, - *, - on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, - on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, - on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - ) -> AgentThinkResponse: - """Inject a custom text instruction into the current session pipeline. - - In API v2.7, omitting ``on_listening_action`` uses the server default - ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - preserve the pre-v2.7 behavior. - """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if on_listening_action is not None: - kwargs["on_listening_action"] = on_listening_action - if on_thinking_action is not None: - kwargs["on_thinking_action"] = on_thinking_action - if on_speaking_action is not None: - kwargs["on_speaking_action"] = on_speaking_action - if interruptable is not None: - kwargs["interruptable"] = interruptable - if metadata is not None: - kwargs["metadata"] = metadata - - return self._client.agent_management.agent_think( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - def update(self, properties: typing.Any) -> None: - """Update the agent configuration at runtime. - - Parameters - ---------- - properties : UpdateAgentsRequestProperties - Partial configuration to update. - """ - if self._status != "running": - raise RuntimeError(f"Cannot update in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._client.agents.update( - self._app_id, - self._agent_id, - properties=properties, - request_options=self._request_options(), - ) - - def get_history(self) -> typing.Any: - """Get the conversation history.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get_history( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def get_info(self) -> typing.Any: - """Get the current session info.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def get_turns( - self, - *, - page_index: typing.Optional[int] = None, - page_size: typing.Optional[int] = None, - ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get_turns( - self._app_id, - self._agent_id, - page_index=page_index, - page_size=page_size, - request_options=self._request_options(), - ) - - def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - """Get all turn analytics pages for this session. - - Raises ``RuntimeError`` if the server's pagination metadata is missing - the fields required to advance, or if requesting the next page returns - a page index that did not advance. - """ - response = self.get_turns(page_index=1, page_size=page_size) - all_turns = self._response_turns(response) - pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - current_page = self._page_value(pagination, "page_index") or 1 - while pagination is not None and self._page_value(pagination, "is_last_page") is False: - total_pages = self._page_value(pagination, "total_pages") - returned_index = self._page_value(pagination, "page_index") - if returned_index is None and total_pages is None: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - if total_pages is not None and current_page >= total_pages: - break - next_page = current_page + 1 - response = self.get_turns(page_index=next_page, page_size=page_size) - all_turns.extend(self._response_turns(response)) - pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - returned_index = self._page_value(pagination, "page_index") if pagination else None - if returned_index is not None: - if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - raise RuntimeError( - f"get_all_turns pagination did not advance: requested page {next_page}, " - f"received page {returned_index}." - ) - current_page = returned_index - else: - total_pages = self._page_value(pagination, "total_pages") if pagination else None - is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - if total_pages is None and is_last_page is not True: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - current_page = next_page - return self._with_all_turns(response, all_turns) - - - class AsyncAgentSession(_AgentSessionBase): - """Async version of :class:`AgentSession` for use with :class:`AsyncAgora`. - - Use :meth:`Agent.create_async_session` to create a session — this is the - recommended entry point. - - Examples - -------- - >>> from agora_agent import AsyncAgora, Area - >>> from agora_agent.agentkit import Agent - >>> - >>> client = AsyncAgora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are helpful.") - >>> from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS - >>> agent = agent.with_llm(OpenAI(api_key="...", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...")) - >>> session = agent.create_async_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = await session.start() - >>> await session.say("Hello!") - >>> await session.stop() - """ - - async def start(self) -> str: - """Start the agent session. - - Returns - ------- - str - The agent ID. - - Raises - ------ - RuntimeError - If the session is not in a startable state. - ValueError - If avatar/TTS configuration is invalid. - """ - if self._status not in ("idle", "stopped", "error"): - raise RuntimeError(f"Cannot start session in {self._status} state") - - self._validate_avatar_config() - self._status = "starting" - - try: - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - token_opts = { - "app_id": self._app_id, - "app_certificate": self._app_certificate, - "expires_in": self._expires_in, - } - - properties = self._build_start_properties(token_opts) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - ) - - if self._debug: - print("[Agora Debug] Starting agent session...") - print("[Agora Debug] Request:", { - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - "pipeline_id": self._pipeline_id, - "properties": resolved_properties, - }) - - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties - - response = await self._client.agents.start( - self._app_id, - name=self._name, - properties=request_properties, - preset=resolved_preset, - pipeline_id=self._pipeline_id, - request_options=self._request_options(), - ) - - self._agent_id = response.agent_id if hasattr(response, "agent_id") else None - self._status = "running" - self._emit("started", {"agent_id": self._agent_id}) - return self._agent_id or "" - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - async def stop(self) -> None: - """Stop the agent session. - - If the agent has already stopped (e.g., crashed or timed out), the - server returns 404, which this method treats as a successful stop - rather than raising an error. - """ - if self._status != "running": - raise RuntimeError(f"Cannot stop session in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._status = "stopping" - - try: - await self._client.agents.stop( - self._app_id, self._agent_id, request_options=self._request_options() - ) - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - except ApiError as e: - if e.status_code == 404: - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - return - self._status = "error" - self._emit("error", e) - raise - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - async def say( - self, - text: str, - priority: typing.Optional[str] = None, - interruptable: typing.Optional[bool] = None, - ) -> None: - """Send a message to be spoken by the agent. - - Parameters - ---------- - text : str - The text to speak. - priority : str, optional - Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). - interruptable : bool, optional - Whether the message can be interrupted by the user. - """ - if self._status != "running": - raise RuntimeError(f"Cannot say in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if priority is not None: - kwargs["priority"] = priority - if interruptable is not None: - kwargs["interruptable"] = interruptable - - await self._client.agents.speak( - self._app_id, self._agent_id, request_options=self._request_options(), **kwargs - ) - - async def interrupt(self) -> None: - """Interrupt the agent while it is speaking or thinking.""" - if self._status != "running": - raise RuntimeError(f"Cannot interrupt in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - await self._client.agents.interrupt( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def think( - self, - text: str, - *, - on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, - on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, - on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - ) -> AgentThinkResponse: - """Inject a custom text instruction into the current session pipeline. - - In API v2.7, omitting ``on_listening_action`` uses the server default - ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - preserve the pre-v2.7 behavior. - """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if on_listening_action is not None: - kwargs["on_listening_action"] = on_listening_action - if on_thinking_action is not None: - kwargs["on_thinking_action"] = on_thinking_action - if on_speaking_action is not None: - kwargs["on_speaking_action"] = on_speaking_action - if interruptable is not None: - kwargs["interruptable"] = interruptable - if metadata is not None: - kwargs["metadata"] = metadata - - return await self._client.agent_management.agent_think( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - async def update(self, properties: typing.Any) -> None: - """Update the agent configuration at runtime. - - Parameters - ---------- - properties : UpdateAgentsRequestProperties - Partial configuration to update. - """ - if self._status != "running": - raise RuntimeError(f"Cannot update in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - await self._client.agents.update( - self._app_id, - self._agent_id, - properties=properties, - request_options=self._request_options(), - ) - - async def get_history(self) -> typing.Any: - """Get the conversation history.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get_history( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def get_info(self) -> typing.Any: - """Get the current session info.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def get_turns( - self, - *, - page_index: typing.Optional[int] = None, - page_size: typing.Optional[int] = None, - ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get_turns( - self._app_id, - self._agent_id, - page_index=page_index, - page_size=page_size, - request_options=self._request_options(), - ) - - async def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - """Get all turn analytics pages for this session. - - Raises ``RuntimeError`` if the server's pagination metadata is missing - the fields required to advance, or if requesting the next page returns - a page index that did not advance. - """ - response = await self.get_turns(page_index=1, page_size=page_size) - all_turns = self._response_turns(response) - pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - current_page = self._page_value(pagination, "page_index") or 1 - while pagination is not None and self._page_value(pagination, "is_last_page") is False: - total_pages = self._page_value(pagination, "total_pages") - returned_index = self._page_value(pagination, "page_index") - if returned_index is None and total_pages is None: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - if total_pages is not None and current_page >= total_pages: - break - next_page = current_page + 1 - response = await self.get_turns(page_index=next_page, page_size=page_size) - all_turns.extend(self._response_turns(response)) - pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - returned_index = self._page_value(pagination, "page_index") if pagination else None - if returned_index is not None: - if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - raise RuntimeError( - f"get_all_turns pagination did not advance: requested page {next_page}, " - f"received page {returned_index}." - ) - current_page = returned_index - else: - total_pages = self._page_value(pagination, "total_pages") if pagination else None - is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - if total_pages is None and is_last_page is not True: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - current_page = next_page - return self._with_all_turns(response, all_turns) - status: unresolved - - id: patch-eaec58eb - content_hash: sha256:8390ced175326080fc76021a97d315e71229bbc9ad70eef35a63eb9968df7830 - original_commit: eaec58eb2edfe03b1311a32dd137a867edf5d096 - original_message: "refactor(agentkit): align deprecated vendor aliases with canonical names" - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - src/agora_agent/agentkit/vendors/__init__.py - - src/agora_agent/agentkit/vendors/avatar.py - - src/agora_agent/agentkit/vendors/mllm.py - patch_content: |+ - diff --git a/src/agora_agent/agentkit/vendors/__init__.py b/src/agora_agent/agentkit/vendors/__init__.py - index 689eab1..8e2042e 100644 - --- a/src/agora_agent/agentkit/vendors/__init__.py - +++ b/src/agora_agent/agentkit/vendors/__init__.py - @@ -13,7 +13,7 @@ from .base import ( - ) - from .avatar import AkoolAvatar, AnamAvatar, GenericAvatar, HeyGenAvatar, LiveAvatarAvatar - from .llm import Anthropic, AzureOpenAI, Gemini, OpenAI - -from .mllm import GeminiLive, OpenAIRealtime, VertexAI, XaiGrok, XaiRealtime - +from .mllm import GeminiLive, OpenAIRealtime, VertexAI, XaiGrok - from .stt import ( - AmazonSTT, - AresSTT, - @@ -83,7 +83,6 @@ __all__ = [ - "GeminiLive", - "VertexAI", - "XaiGrok", - - "XaiRealtime", - "HeyGenAvatar", - "LiveAvatarAvatar", - "AkoolAvatar", - diff --git a/src/agora_agent/agentkit/vendors/avatar.py b/src/agora_agent/agentkit/vendors/avatar.py - index 00cad8f..50bdd08 100644 - --- a/src/agora_agent/agentkit/vendors/avatar.py - +++ b/src/agora_agent/agentkit/vendors/avatar.py - @@ -5,19 +5,19 @@ from pydantic import BaseModel, ConfigDict, Field, field_validator - - from .base import BaseAvatar - - -HEYGEN_SAMPLE_RATE = 24000 - LIVEAVATAR_SAMPLE_RATE = 24000 - +HEYGEN_SAMPLE_RATE = LIVEAVATAR_SAMPLE_RATE - AKOOL_SAMPLE_RATE = 16000 - - - -class HeyGenAvatarOptions(BaseModel): - +class LiveAvatarAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - - api_key: str = Field(..., description="HeyGen API key") - + api_key: str = Field(..., description="LiveAvatar API key") - quality: str = Field(..., description="Avatar quality: low, medium, or high") - agora_uid: str = Field(..., description="Agora UID for the avatar stream") - agora_token: Optional[str] = Field(default=None, description="RTC token for avatar authentication") - - avatar_id: Optional[str] = Field(default=None, description="HeyGen avatar ID") - + avatar_id: Optional[str] = Field(default=None, description="Avatar ID") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - disable_idle_timeout: Optional[bool] = Field(default=None, description="Whether to disable idle timeout") - activity_idle_timeout: Optional[int] = Field(default=None, description="Idle timeout in seconds") - @@ -31,20 +31,14 @@ class HeyGenAvatarOptions(BaseModel): - raise ValueError(f"Invalid quality '{v}'. Must be one of: {', '.join(valid)}") - return v - - -class HeyGenAvatar(BaseAvatar): - - """Deprecated: HeyGen has been renamed to LiveAvatar. Use LiveAvatarAvatar instead.""" - - +class LiveAvatarAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - - warnings.warn( - - "HeyGenAvatar is deprecated; use LiveAvatarAvatar instead.", - - DeprecationWarning, - - stacklevel=2, - - ) - - self.options = HeyGenAvatarOptions(**kwargs) - + self.options = LiveAvatarAvatarOptions(**kwargs) - - @property - def required_sample_rate(self) -> int: - - return HEYGEN_SAMPLE_RATE - + return LIVEAVATAR_SAMPLE_RATE - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - @@ -65,71 +59,79 @@ class HeyGenAvatar(BaseAvatar): - params = {**self.options.additional_params, **params} - - enable = self.options.enable if self.options.enable is not None else True - - return {"enable": enable, "vendor": "heygen", "params": params} - + return {"enable": enable, "vendor": "liveavatar", "params": params} - - - -class AkoolAvatarOptions(BaseModel): - - model_config = ConfigDict(extra="forbid") - +class HeyGenAvatarOptions(LiveAvatarAvatarOptions): - + """Deprecated: use :class:`LiveAvatarAvatarOptions` instead.""" - - - api_key: str = Field(..., description="Akool API key") - - avatar_id: Optional[str] = Field(default=None, description="Avatar ID") - - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - -class AkoolAvatar(BaseAvatar): - +class HeyGenAvatar(BaseAvatar): - + """Deprecated: HeyGen has been renamed to LiveAvatar. Use LiveAvatarAvatar instead.""" - + - def __init__(self, **kwargs: Any): - - self.options = AkoolAvatarOptions(**kwargs) - + warnings.warn( - + "HeyGenAvatar is deprecated; use LiveAvatarAvatar instead.", - + DeprecationWarning, - + stacklevel=2, - + ) - + self.options = HeyGenAvatarOptions(**kwargs) - - @property - def required_sample_rate(self) -> int: - - return AKOOL_SAMPLE_RATE - + return HEYGEN_SAMPLE_RATE - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - + "quality": self.options.quality, - + "agora_uid": self.options.agora_uid, - } - - + if self.options.agora_token is not None: - + params["agora_token"] = self.options.agora_token - if self.options.avatar_id is not None: - params["avatar_id"] = self.options.avatar_id - + if self.options.disable_idle_timeout is not None: - + params["disable_idle_timeout"] = self.options.disable_idle_timeout - + if self.options.activity_idle_timeout is not None: - + params["activity_idle_timeout"] = self.options.activity_idle_timeout - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} - - enable = self.options.enable if self.options.enable is not None else True - - return {"enable": enable, "vendor": "akool", "params": params} - + return {"enable": enable, "vendor": "heygen", "params": params} - - - -class LiveAvatarAvatarOptions(HeyGenAvatarOptions): - - pass - +class AkoolAvatarOptions(BaseModel): - + model_config = ConfigDict(extra="forbid") - + - + api_key: str = Field(..., description="Akool API key") - + avatar_id: Optional[str] = Field(default=None, description="Avatar ID") - + enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - + additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - - -class LiveAvatarAvatar(BaseAvatar): - +class AkoolAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - - self.options = LiveAvatarAvatarOptions(**kwargs) - + self.options = AkoolAvatarOptions(**kwargs) - - @property - def required_sample_rate(self) -> int: - - return LIVEAVATAR_SAMPLE_RATE - + return AKOOL_SAMPLE_RATE - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - - "quality": self.options.quality, - - "agora_uid": self.options.agora_uid, - } - - - if self.options.agora_token is not None: - - params["agora_token"] = self.options.agora_token - if self.options.avatar_id is not None: - params["avatar_id"] = self.options.avatar_id - - if self.options.disable_idle_timeout is not None: - - params["disable_idle_timeout"] = self.options.disable_idle_timeout - - if self.options.activity_idle_timeout is not None: - - params["activity_idle_timeout"] = self.options.activity_idle_timeout - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} - - enable = self.options.enable if self.options.enable is not None else True - - return {"enable": enable, "vendor": "liveavatar", "params": params} - + return {"enable": enable, "vendor": "akool", "params": params} - - - class GenericAvatarOptions(BaseModel): - @@ -145,6 +147,7 @@ class GenericAvatarOptions(BaseModel): - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - + - class GenericAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = GenericAvatarOptions(**kwargs) - @@ -178,10 +181,11 @@ class AnamAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Anam API key") - - persona_id: Optional[str] = Field(default=None, description="Persona ID") - + persona_id: Optional[str] = Field(default=None, description="Anam persona ID") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - + - class AnamAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = AnamAvatarOptions(**kwargs) - diff --git a/src/agora_agent/agentkit/vendors/mllm.py b/src/agora_agent/agentkit/vendors/mllm.py - index cd6cd07..b58f040 100644 - --- a/src/agora_agent/agentkit/vendors/mllm.py - +++ b/src/agora_agent/agentkit/vendors/mllm.py - @@ -1,4 +1,3 @@ - -import warnings - from typing import Any, Dict, List, Optional - - from pydantic import BaseModel, ConfigDict, Field - @@ -119,30 +118,6 @@ class XaiGrok(BaseMLLM): - return config - - - -class XaiRealtimeOptions(XaiGrokOptions): - - """Deprecated: use :class:`XaiGrokOptions` instead.""" - - - - def __init__(self, **data: Any): - - warnings.warn( - - "XaiRealtimeOptions is deprecated; use XaiGrokOptions instead.", - - DeprecationWarning, - - stacklevel=2, - - ) - - super().__init__(**data) - - - - - -class XaiRealtime(XaiGrok): - - """Deprecated: use :class:`XaiGrok` instead.""" - - - - def __init__(self, **kwargs: Any): - - warnings.warn( - - "XaiRealtime is deprecated; use XaiGrok instead.", - - DeprecationWarning, - - stacklevel=2, - - ) - - super().__init__(**kwargs) - - - - - class VertexAIOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - theirs_snapshot: - src/agora_agent/agentkit/vendors/__init__.py: | - from .base import ( - BaseAvatar, - BaseLLM, - BaseMLLM, - BaseSTT, - BaseTTS, - CartesiaSampleRate, - ElevenLabsSampleRate, - GoogleTTSSampleRate, - MicrosoftSampleRate, - OpenAISampleRate, - SampleRate, - ) - from .avatar import AkoolAvatar, AnamAvatar, GenericAvatar, HeyGenAvatar, LiveAvatarAvatar - from .llm import Anthropic, AzureOpenAI, Gemini, OpenAI - from .mllm import GeminiLive, OpenAIRealtime, VertexAI, XaiGrok - from .stt import ( - AmazonSTT, - AresSTT, - AssemblyAISTT, - DeepgramSTT, - GoogleSTT, - MicrosoftSTT, - OpenAISTT, - SarvamSTT, - SpeechmaticsSTT, - ) - from .tts import ( - AmazonTTS, - CartesiaTTS, - DeepgramTTS, - ElevenLabsTTS, - FishAudioTTS, - GoogleTTS, - HumeAITTS, - MicrosoftTTS, - MiniMaxTTS, - MurfTTS, - OpenAITTS, - RimeTTS, - SarvamTTS, - ) - - __all__ = [ - "BaseLLM", - "BaseTTS", - "BaseSTT", - "BaseMLLM", - "BaseAvatar", - "SampleRate", - "ElevenLabsSampleRate", - "MicrosoftSampleRate", - "OpenAISampleRate", - "CartesiaSampleRate", - "GoogleTTSSampleRate", - "OpenAI", - "AzureOpenAI", - "Anthropic", - "Gemini", - "ElevenLabsTTS", - "MicrosoftTTS", - "OpenAITTS", - "CartesiaTTS", - "DeepgramTTS", - "GoogleTTS", - "AmazonTTS", - "HumeAITTS", - "RimeTTS", - "FishAudioTTS", - "MiniMaxTTS", - "MurfTTS", - "SarvamTTS", - "SpeechmaticsSTT", - "DeepgramSTT", - "MicrosoftSTT", - "OpenAISTT", - "GoogleSTT", - "AmazonSTT", - "AssemblyAISTT", - "AresSTT", - "SarvamSTT", - "OpenAIRealtime", - "GeminiLive", - "VertexAI", - "XaiGrok", - "HeyGenAvatar", - "LiveAvatarAvatar", - "AkoolAvatar", - "AnamAvatar", - "GenericAvatar", - ] - src/agora_agent/agentkit/vendors/avatar.py: | - import warnings - from typing import Any, Dict, Optional - - from pydantic import BaseModel, ConfigDict, Field, field_validator - - from .base import BaseAvatar - - LIVEAVATAR_SAMPLE_RATE = 24000 - HEYGEN_SAMPLE_RATE = LIVEAVATAR_SAMPLE_RATE - AKOOL_SAMPLE_RATE = 16000 - - - class LiveAvatarAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="LiveAvatar API key") - quality: str = Field(..., description="Avatar quality: low, medium, or high") - agora_uid: str = Field(..., description="Agora UID for the avatar stream") - agora_token: Optional[str] = Field(default=None, description="RTC token for avatar authentication") - avatar_id: Optional[str] = Field(default=None, description="Avatar ID") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - disable_idle_timeout: Optional[bool] = Field(default=None, description="Whether to disable idle timeout") - activity_idle_timeout: Optional[int] = Field(default=None, description="Idle timeout in seconds") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - @field_validator("quality") - @classmethod - def validate_quality(cls, v: str) -> str: - valid = ("low", "medium", "high") - if v not in valid: - raise ValueError(f"Invalid quality '{v}'. Must be one of: {', '.join(valid)}") - return v - - - class LiveAvatarAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = LiveAvatarAvatarOptions(**kwargs) - - @property - def required_sample_rate(self) -> int: - return LIVEAVATAR_SAMPLE_RATE - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - "quality": self.options.quality, - "agora_uid": self.options.agora_uid, - } - - if self.options.agora_token is not None: - params["agora_token"] = self.options.agora_token - if self.options.avatar_id is not None: - params["avatar_id"] = self.options.avatar_id - if self.options.disable_idle_timeout is not None: - params["disable_idle_timeout"] = self.options.disable_idle_timeout - if self.options.activity_idle_timeout is not None: - params["activity_idle_timeout"] = self.options.activity_idle_timeout - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} - - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "liveavatar", "params": params} - - - class HeyGenAvatarOptions(LiveAvatarAvatarOptions): - """Deprecated: use :class:`LiveAvatarAvatarOptions` instead.""" - - - class HeyGenAvatar(BaseAvatar): - """Deprecated: HeyGen has been renamed to LiveAvatar. Use LiveAvatarAvatar instead.""" - - def __init__(self, **kwargs: Any): - warnings.warn( - "HeyGenAvatar is deprecated; use LiveAvatarAvatar instead.", - DeprecationWarning, - stacklevel=2, - ) - self.options = HeyGenAvatarOptions(**kwargs) - - @property - def required_sample_rate(self) -> int: - return HEYGEN_SAMPLE_RATE - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - "quality": self.options.quality, - "agora_uid": self.options.agora_uid, - } - - if self.options.agora_token is not None: - params["agora_token"] = self.options.agora_token - if self.options.avatar_id is not None: - params["avatar_id"] = self.options.avatar_id - if self.options.disable_idle_timeout is not None: - params["disable_idle_timeout"] = self.options.disable_idle_timeout - if self.options.activity_idle_timeout is not None: - params["activity_idle_timeout"] = self.options.activity_idle_timeout - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} - - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "heygen", "params": params} - - - class AkoolAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Akool API key") - avatar_id: Optional[str] = Field(default=None, description="Avatar ID") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - - class AkoolAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = AkoolAvatarOptions(**kwargs) - - @property - def required_sample_rate(self) -> int: - return AKOOL_SAMPLE_RATE - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - } - - if self.options.avatar_id is not None: - params["avatar_id"] = self.options.avatar_id - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} - - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "akool", "params": params} - - - class GenericAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Generic avatar provider API key") - api_base_url: str = Field(..., description="Avatar provider API base URL") - avatar_id: str = Field(..., description="Avatar ID") - agora_uid: str = Field(..., description="Agora UID for the avatar video stream") - agora_appid: Optional[str] = Field(default=None, description="Agora App ID; filled by AgentSession when omitted") - agora_token: Optional[str] = Field(default=None, description="RTC token; generated by AgentSession when omitted") - agora_channel: Optional[str] = Field(default=None, description="Agora channel; filled by AgentSession when omitted") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - - class GenericAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = GenericAvatarOptions(**kwargs) - - @property - def required_sample_rate(self) -> int: - return 0 - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - "api_base_url": self.options.api_base_url, - "avatar_id": self.options.avatar_id, - "agora_uid": self.options.agora_uid, - } - - if self.options.agora_appid is not None: - params["agora_appid"] = self.options.agora_appid - if self.options.agora_token is not None: - params["agora_token"] = self.options.agora_token - if self.options.agora_channel is not None: - params["agora_channel"] = self.options.agora_channel - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} - - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "generic", "params": params} - - - class AnamAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Anam API key") - persona_id: Optional[str] = Field(default=None, description="Anam persona ID") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - - class AnamAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = AnamAvatarOptions(**kwargs) - - @property - def required_sample_rate(self) -> int: - return 0 - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - } - - if self.options.persona_id is not None: - params["persona_id"] = self.options.persona_id - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} - - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "anam", "params": params} - src/agora_agent/agentkit/vendors/mllm.py: | - from typing import Any, Dict, List, Optional - - from pydantic import BaseModel, ConfigDict, Field - - from ...agents.types.start_agents_request_properties_mllm_turn_detection import ( - StartAgentsRequestPropertiesMllmTurnDetection, - ) - from .base import BaseMLLM - - MllmTurnDetectionConfig = StartAgentsRequestPropertiesMllmTurnDetection - - - class OpenAIRealtimeOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="OpenAI API key") - model: Optional[str] = Field(default=None, description="Model name (e.g., gpt-4o-realtime-preview)") - url: Optional[str] = Field(default=None, description="WebSocket URL") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - class OpenAIRealtime(BaseMLLM): - def __init__(self, **kwargs: Any): - self.options = OpenAIRealtimeOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - config: Dict[str, Any] = { - "vendor": "openai", - "api_key": self.options.api_key, - } - - if self.options.url is not None: - config["url"] = self.options.url - if self.options.model is not None: - params = {"model": self.options.model} - if self.options.params is not None: - params.update(self.options.params) - config["params"] = params - elif self.options.params is not None: - config["params"] = self.options.params - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - return config - - - # xAI MLLM: use XaiGrok (product name, mllm.vendor "xai"). Do not use XaiRealtime—that name - # is deprecated and reserved naming for future XaiSTT / XaiTTS cascading vendors. - - - class XaiGrokOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="xAI API key") - url: str = Field(default="wss://api.x.ai/v1/realtime", description="xAI Realtime WebSocket URL") - voice: Optional[str] = Field(default=None, description="Voice identifier (e.g., eve or rex)") - language: Optional[str] = Field(default=None, description="Language code (e.g., en)") - sample_rate: Optional[int] = Field(default=None, description="Audio sample rate in Hz") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - params: Optional[Dict[str, Any]] = Field(default=None, description="Additional xAI parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - - class XaiGrok(BaseMLLM): - """xAI Grok MLLM vendor (`mllm.vendor`: ``xai``).""" - - def __init__(self, **kwargs: Any): - self.options = XaiGrokOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.params or {}) - if self.options.voice is not None: - params["voice"] = self.options.voice - if self.options.language is not None: - params["language"] = self.options.language - if self.options.sample_rate is not None: - params["sample_rate"] = self.options.sample_rate - - config: Dict[str, Any] = { - "vendor": "xai", - "api_key": self.options.api_key, - "url": self.options.url, - "params": params, - } - - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - return config - - - class VertexAIOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - model: str = Field(..., description="Model name") - url: Optional[str] = Field(default=None, description="WebSocket URL") - project_id: str = Field(..., description="Google Cloud project ID") - location: str = Field(..., description="Google Cloud location/region") - adc_credentials_string: str = Field(..., description="Application Default Credentials JSON string") - instructions: Optional[str] = Field(default=None, description="System instructions") - voice: Optional[str] = Field(default=None, description="Voice name (e.g., Aoede, Charon)") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - class VertexAI(BaseMLLM): - def __init__(self, **kwargs: Any): - self.options = VertexAIOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - # additional_params spread first so that explicit fields always win, - # matching the TypeScript SDK. - params: Dict[str, Any] = dict(self.options.additional_params or {}) - params["model"] = self.options.model - params["project_id"] = self.options.project_id - params["location"] = self.options.location - params["adc_credentials_string"] = self.options.adc_credentials_string - if self.options.instructions is not None: - params["instructions"] = self.options.instructions - if self.options.voice is not None: - params["voice"] = self.options.voice - - config: Dict[str, Any] = { - "vendor": "vertexai", - "params": params, - } - - if self.options.url is not None: - config["url"] = self.options.url - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - return config - - - class GeminiLiveOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Google API key") - model: str = Field(..., description="Gemini Live model name") - url: Optional[str] = Field(default=None, description="WebSocket URL") - instructions: Optional[str] = Field(default=None, description="System instructions") - voice: Optional[str] = Field(default=None, description="Voice name") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - class GeminiLive(BaseMLLM): - def __init__(self, **kwargs: Any): - self.options = GeminiLiveOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = {} - if self.options.additional_params is not None: - params.update(self.options.additional_params) - params["model"] = self.options.model - if self.options.instructions is not None: - params["instructions"] = self.options.instructions - if self.options.voice is not None: - params["voice"] = self.options.voice - - config: Dict[str, Any] = { - "vendor": "gemini", - "api_key": self.options.api_key, - "params": params, - } - - if self.options.url is not None: - config["url"] = self.options.url - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - return config - status: unresolved - - id: patch-20245632 - content_hash: sha256:a22e4a3b114ba8105c8129ccd6222570dc1f231daf9ac6037a00bcd4e11c425b - original_commit: 20245632afd066efe5a453665b29c5ba0e13e4f8 - original_message: "feat(agentkit): export type aliases and avatar token helpers" - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - src/agora_agent/agentkit/__init__.py - - src/agora_agent/agentkit/agent.py - - src/agora_agent/agentkit/agent_session.py - - src/agora_agent/agentkit/avatar_types.py - - src/agora_agent/agentkit/constants.py - patch_content: |+ - From 20245632afd066efe5a453665b29c5ba0e13e4f8 Mon Sep 17 00:00:00 2001 - From: digitallysavvy - Date: Thu, 21 May 2026 15:17:27 -0400 - Subject: [PATCH] feat(agentkit): export type aliases and avatar token helpers - - --- - src/agora_agent/agentkit/__init__.py | 49 ++++++++++++++++------- - src/agora_agent/agentkit/agent.py | 22 +++++++++- - src/agora_agent/agentkit/agent_session.py | 8 +++- - src/agora_agent/agentkit/avatar_types.py | 23 +++++++++-- - src/agora_agent/agentkit/constants.py | 10 +++++ - 5 files changed, 90 insertions(+), 22 deletions(-) - - diff --git a/src/agora_agent/agentkit/__init__.py b/src/agora_agent/agentkit/__init__.py - index 5ceda66..e9ab221 100644 - --- a/src/agora_agent/agentkit/__init__.py - +++ b/src/agora_agent/agentkit/__init__.py - @@ -2,6 +2,7 @@ from .agent import ( - Agent, - AgentConfig, - AgentConfigUpdate, - + AsrConfig, - ConversationHistory, - ConversationRole, - ConversationSessionTurn, - @@ -62,23 +63,23 @@ from .agent import ( - SessionListResponse, - SessionSummary, - SpeakPriority, - + ThinkOnListeningAction, - + ThinkOnSpeakingAction, - + ThinkOnThinkingAction, - + ThinkResponse, - ) - -from .agent_session import AgentSession, AgentSessionOptions, AsyncAgentSession - -from ..agent_management.types.agent_think_agent_management_response import ( - - AgentThinkAgentManagementResponse as AgentThinkResponse, - -) - -from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - - AgentThinkAgentManagementRequestOnListeningAction as AgentThinkRequestOnListeningAction, - -) - -from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - - AgentThinkAgentManagementRequestOnThinkingAction as AgentThinkRequestOnThinkingAction, - -) - -from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - - AgentThinkAgentManagementRequestOnSpeakingAction as AgentThinkRequestOnSpeakingAction, - +# Deprecated think type aliases (prefer ThinkOn* names). - +from .agent import ( - + ThinkOnListeningAction as AgentThinkRequestOnListeningAction, - + ThinkOnSpeakingAction as AgentThinkRequestOnSpeakingAction, - + ThinkOnThinkingAction as AgentThinkRequestOnThinkingAction, - + ThinkResponse as AgentThinkResponse, - ) - +from .agent_session import AgentSession, AgentSessionOptions, AsyncAgentSession - from .avatar_types import ( - is_akool_avatar, - is_anam_avatar, - + is_avatar_token_managed, - is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - @@ -94,6 +95,13 @@ from .constants import ( - GeofenceArea, - GeofenceExcludeArea, - FillerWordsSelectionRule, - + ThinkOnListeningActionIgnore, - + ThinkOnListeningActionInject, - + ThinkOnListeningActionInterrupt, - + ThinkOnSpeakingActionIgnore, - + ThinkOnSpeakingActionInterrupt, - + ThinkOnThinkingActionIgnore, - + ThinkOnThinkingActionInterrupt, - TurnDetectionTypeValues, - ) - from .token import ( - @@ -158,7 +166,6 @@ from .vendors import ( - SpeechmaticsSTT, - VertexAI, - XaiGrok, - - XaiRealtime, - LiveAvatarAvatar, - ) - - @@ -172,6 +179,7 @@ __all__ = [ - "LlmConfig", - "LlmStyle", - "SttConfig", - + "AsrConfig", - "SttVendor", - "TtsConfig", - "MllmConfig", - @@ -230,6 +238,13 @@ __all__ = [ - "GeofenceExcludeArea", - "FillerWordsSelectionRule", - "TurnDetectionTypeValues", - + "ThinkOnListeningActionInject", - + "ThinkOnListeningActionInterrupt", - + "ThinkOnListeningActionIgnore", - + "ThinkOnThinkingActionInterrupt", - + "ThinkOnThinkingActionIgnore", - + "ThinkOnSpeakingActionInterrupt", - + "ThinkOnSpeakingActionIgnore", - # LLM sub-types - "LlmGreetingConfigs", - "LlmGreetingConfigsMode", - @@ -246,10 +261,16 @@ __all__ = [ - "ConversationTurns", - "ConversationSessionTurn", - "SpeakPriority", - + "ThinkResponse", - + "ThinkOnListeningAction", - + "ThinkOnThinkingAction", - + "ThinkOnSpeakingAction", - "AgentThinkResponse", - "AgentThinkRequestOnListeningAction", - "AgentThinkRequestOnThinkingAction", - "AgentThinkRequestOnSpeakingAction", - + "is_avatar_token_managed", - + "is_rtc_avatar", - "AgentPresets", - "DeepgramPresetModels", - "OpenAIPresetModels", - @@ -303,7 +324,6 @@ __all__ = [ - "GeminiLive", - "VertexAI", - "XaiGrok", - - "XaiRealtime", - "HeyGenAvatar", - "LiveAvatarAvatar", - "AkoolAvatar", - @@ -314,7 +334,6 @@ __all__ = [ - "is_akool_avatar", - "is_anam_avatar", - "is_generic_avatar", - - "is_rtc_avatar", - "validate_avatar_config", - "validate_tts_sample_rate", - ] - diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py - index 86a958e..14933a2 100644 - --- a/src/agora_agent/agentkit/agent.py - +++ b/src/agora_agent/agentkit/agent.py - @@ -66,13 +66,25 @@ from ..agents.types.start_agents_request_properties_filler_words_content import - from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig - from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - from ..types.tts import Tts - -from .token import generate_convo_ai_token, _validate_expires_in - +from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - + AgentThinkAgentManagementRequestOnListeningAction, - +) - +from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - + AgentThinkAgentManagementRequestOnThinkingAction, - +) - +from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - + AgentThinkAgentManagementRequestOnSpeakingAction, - +) - +from ..agent_management.types.agent_think_agent_management_response import ( - + AgentThinkAgentManagementResponse, - +) - from .vendors.base import BaseAvatar, BaseLLM, BaseMLLM, BaseSTT, BaseTTS - - # Top-level aliases - LlmConfig = StartAgentsRequestPropertiesLlm - LlmStyle = StartAgentsRequestPropertiesLlmStyle - SttConfig = StartAgentsRequestPropertiesAsr - +AsrConfig = SttConfig - SttVendor = StartAgentsRequestPropertiesAsrVendor - TtsConfig = Tts - MllmConfig = StartAgentsRequestPropertiesMllm - @@ -159,6 +171,14 @@ FillerWordsContent = StartAgentsRequestPropertiesFillerWordsContent - FillerWordsContentStaticConfig = StartAgentsRequestPropertiesFillerWordsContentStaticConfig - FillerWordsContentSelectionRule = StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - - +# Think type aliases and response - +ThinkOnListeningAction = AgentThinkAgentManagementRequestOnListeningAction - +ThinkOnThinkingAction = AgentThinkAgentManagementRequestOnThinkingAction - +ThinkOnSpeakingAction = AgentThinkAgentManagementRequestOnSpeakingAction - +ThinkResponse = AgentThinkAgentManagementResponse - + - +from .token import generate_convo_ai_token, _validate_expires_in - + - - class Agent: - """A reusable agent definition. - diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py - index e41a399..269619e 100644 - --- a/src/agora_agent/agentkit/agent_session.py - +++ b/src/agora_agent/agentkit/agent_session.py - @@ -20,10 +20,10 @@ from .agent import Agent - from .avatar_types import ( - is_akool_avatar, - is_anam_avatar, - + is_avatar_token_managed, - is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - - is_rtc_avatar, - validate_avatar_config, - validate_tts_sample_rate, - ) - @@ -242,7 +242,11 @@ class _AgentSessionBase: - if not params.get("agora_channel"): - params["agora_channel"] = self._channel - - - if not is_rtc_avatar(avatar): - + if not is_avatar_token_managed(avatar): - + validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - + return - + - + if not params.get("agora_uid"): - validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - return - - diff --git a/src/agora_agent/agentkit/avatar_types.py b/src/agora_agent/agentkit/avatar_types.py - index a04809c..aea9da1 100644 - --- a/src/agora_agent/agentkit/avatar_types.py - +++ b/src/agora_agent/agentkit/avatar_types.py - @@ -1,3 +1,4 @@ - +import warnings - import typing - - - @@ -21,11 +22,25 @@ def is_generic_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "generic" - - - +def is_avatar_token_managed(config: typing.Dict[str, typing.Any]) -> bool: - + """Return True when AgentKit manages the avatar RTC publisher identity.""" - + return ( - + is_heygen_avatar(config) - + or is_live_avatar_avatar(config) - + or is_generic_avatar(config) - + ) - + - + - def is_rtc_avatar(config: typing.Dict[str, typing.Any]) -> bool: - - params = config.get("params", {}) - - return isinstance(params, dict) and bool(params.get("agora_uid")) and ( - - is_heygen_avatar(config) or is_live_avatar_avatar(config) or is_generic_avatar(config) - + """Deprecated: use :func:`is_avatar_token_managed` for vendor gating.""" - + warnings.warn( - + "is_rtc_avatar is deprecated; use is_avatar_token_managed for vendor gating " - + "and keep agora_uid checks in session enrichment.", - + DeprecationWarning, - + stacklevel=2, - ) - + params = config.get("params", {}) - + return isinstance(params, dict) and bool(params.get("agora_uid")) and is_avatar_token_managed(config) - - - def validate_avatar_config( - @@ -95,7 +110,7 @@ def validate_tts_sample_rate( - """Validates that TTS sample rate is compatible with the avatar vendor. - - Different avatar vendors have specific sample rate requirements: - - - HeyGen: ONLY supports 24,000 Hz - + - HeyGen/LiveAvatar: ONLY supports 24,000 Hz - - Akool: ONLY supports 16,000 Hz - - Parameters - diff --git a/src/agora_agent/agentkit/constants.py b/src/agora_agent/agentkit/constants.py - index f86e4d3..c0a852e 100644 - --- a/src/agora_agent/agentkit/constants.py - +++ b/src/agora_agent/agentkit/constants.py - @@ -58,3 +58,13 @@ class TurnDetectionTypeValues: - AGORA_VAD = "agora_vad" - SERVER_VAD = "server_vad" - SEMANTIC_VAD = "semantic_vad" - + - + - +# Think action value constants (match Fern wire values) - +ThinkOnListeningActionInject = "inject" - +ThinkOnListeningActionInterrupt = "interrupt" - +ThinkOnListeningActionIgnore = "ignore" - +ThinkOnThinkingActionInterrupt = "interrupt" - +ThinkOnThinkingActionIgnore = "ignore" - +ThinkOnSpeakingActionInterrupt = "interrupt" - +ThinkOnSpeakingActionIgnore = "ignore" - -- - 2.52.0 - - theirs_snapshot: - src/agora_agent/agentkit/__init__.py: | - from .agent import ( - Agent, - AgentConfig, - AgentConfigUpdate, - AsrConfig, - ConversationHistory, - ConversationRole, - ConversationSessionTurn, - ConversationTurn, - ConversationTurns, - StartAgentsRequestProperties, - AvatarConfig, - AvatarVendor, - GeofenceConfig, - LlmConfig, - LlmStyle, - MllmConfig, - MllmVendor, - RtcConfig, - SttConfig, - SttVendor, - TtsConfig, - FillerWordsConfig, - FillerWordsTrigger, - FillerWordsTriggerFixedTimeConfig, - FillerWordsContent, - FillerWordsContentStaticConfig, - FillerWordsContentSelectionRule, - TurnDetectionConfig, - TurnDetectionNestedConfig, - StartOfSpeechConfig, - StartOfSpeechMode, - StartOfSpeechVadConfig, - StartOfSpeechKeywordsConfig, - StartOfSpeechDisabledConfig, - StartOfSpeechDisabledConfigStrategy, - EndOfSpeechConfig, - EndOfSpeechMode, - EndOfSpeechVadConfig, - EndOfSpeechSemanticConfig, - TurnDetectionType, - InterruptMode, - Eagerness, - SalConfig, - SalMode, - AdvancedFeatures, - SessionParams, - SessionParamsInput, - SilenceConfig, - SilenceAction, - FarewellConfig, - ParametersDataChannel, - ParametersAudioScenario, - InterruptionConfig, - InterruptionMode, - MllmTurnDetectionConfig, - MllmTurnDetectionMode, - Labels, - LlmGreetingConfigs, - LlmGreetingConfigsMode, - McpServersItem, - SessionInfo, - SessionListResponse, - SessionSummary, - SpeakPriority, - ThinkOnListeningAction, - ThinkOnSpeakingAction, - ThinkOnThinkingAction, - ThinkResponse, - ) - # Deprecated think type aliases (prefer ThinkOn* names). - from .agent import ( - ThinkOnListeningAction as AgentThinkRequestOnListeningAction, - ThinkOnSpeakingAction as AgentThinkRequestOnSpeakingAction, - ThinkOnThinkingAction as AgentThinkRequestOnThinkingAction, - ThinkResponse as AgentThinkResponse, - ) - from .agent_session import AgentSession, AgentSessionOptions, AsyncAgentSession - from .avatar_types import ( - is_akool_avatar, - is_anam_avatar, - is_avatar_token_managed, - is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - is_rtc_avatar, - validate_avatar_config, - validate_tts_sample_rate, - ) - from .constants import ( - DataChannel, - AudioScenario, - SilenceActionValues, - SalModeValues, - GeofenceArea, - GeofenceExcludeArea, - FillerWordsSelectionRule, - ThinkOnListeningActionIgnore, - ThinkOnListeningActionInject, - ThinkOnListeningActionInterrupt, - ThinkOnSpeakingActionIgnore, - ThinkOnSpeakingActionInterrupt, - ThinkOnThinkingActionIgnore, - ThinkOnThinkingActionInterrupt, - TurnDetectionTypeValues, - ) - from .token import ( - GenerateConvoAITokenOptions, - GenerateTokenOptions, - MAX_EXPIRY_SECONDS, - generate_convo_ai_token, - generate_rtc_token, - expires_in_hours, - expires_in_minutes, - ) - from .presets import ( - AgentPresets, - DeepgramPresetModels, - MiniMaxPresetModels, - OpenAIPresetModels, - OpenAITtsPresetModels, - normalize_preset_input, - ) - from .vendors import ( - AkoolAvatar, - AmazonSTT, - AmazonTTS, - AnamAvatar, - Anthropic, - AresSTT, - AssemblyAISTT, - AzureOpenAI, - BaseAvatar, - BaseLLM, - BaseMLLM, - BaseSTT, - BaseTTS, - CartesiaSampleRate, - CartesiaTTS, - DeepgramSTT, - DeepgramTTS, - ElevenLabsSampleRate, - ElevenLabsTTS, - FishAudioTTS, - Gemini, - GeminiLive, - GenericAvatar, - GoogleSTT, - GoogleTTS, - HeyGenAvatar, - HumeAITTS, - MicrosoftSampleRate, - MicrosoftSTT, - MicrosoftTTS, - MiniMaxTTS, - MurfTTS, - OpenAI, - OpenAIRealtime, - OpenAISampleRate, - OpenAISTT, - OpenAITTS, - RimeTTS, - SampleRate, - SarvamSTT, - SarvamTTS, - SpeechmaticsSTT, - VertexAI, - XaiGrok, - LiveAvatarAvatar, - ) - - __all__ = [ - "Agent", - "AgentConfig", - "AgentConfigUpdate", - # Return type of Agent.to_properties() - "StartAgentsRequestProperties", - # Top-level config types - "LlmConfig", - "LlmStyle", - "SttConfig", - "AsrConfig", - "SttVendor", - "TtsConfig", - "MllmConfig", - "MllmVendor", - "AvatarConfig", - "AvatarVendor", - "GeofenceConfig", - "RtcConfig", - "FillerWordsConfig", - "FillerWordsTrigger", - "FillerWordsTriggerFixedTimeConfig", - "FillerWordsContent", - "FillerWordsContentStaticConfig", - "FillerWordsContentSelectionRule", - # Turn detection types - "TurnDetectionConfig", - "TurnDetectionNestedConfig", - "StartOfSpeechConfig", - "StartOfSpeechMode", - "StartOfSpeechVadConfig", - "StartOfSpeechKeywordsConfig", - "StartOfSpeechDisabledConfig", - "StartOfSpeechDisabledConfigStrategy", - "EndOfSpeechConfig", - "EndOfSpeechMode", - "EndOfSpeechVadConfig", - "EndOfSpeechSemanticConfig", - # Deprecated turn detection types - "TurnDetectionType", - "InterruptMode", - "Eagerness", - # SAL types - "SalConfig", - "SalMode", - # Advanced features - "AdvancedFeatures", - # Session parameters types - "SessionParams", - "SessionParamsInput", - "SilenceConfig", - "SilenceAction", - "FarewellConfig", - "ParametersDataChannel", - "ParametersAudioScenario", - "InterruptionConfig", - "InterruptionMode", - "MllmTurnDetectionConfig", - "MllmTurnDetectionMode", - "Labels", - # Type-safe constants - "DataChannel", - "AudioScenario", - "SilenceActionValues", - "SalModeValues", - "GeofenceArea", - "GeofenceExcludeArea", - "FillerWordsSelectionRule", - "TurnDetectionTypeValues", - "ThinkOnListeningActionInject", - "ThinkOnListeningActionInterrupt", - "ThinkOnListeningActionIgnore", - "ThinkOnThinkingActionInterrupt", - "ThinkOnThinkingActionIgnore", - "ThinkOnSpeakingActionInterrupt", - "ThinkOnSpeakingActionIgnore", - # LLM sub-types - "LlmGreetingConfigs", - "LlmGreetingConfigsMode", - "McpServersItem", - "AgentSession", - "AsyncAgentSession", - "AgentSessionOptions", - "SessionInfo", - "SessionListResponse", - "SessionSummary", - "ConversationHistory", - "ConversationTurn", - "ConversationRole", - "ConversationTurns", - "ConversationSessionTurn", - "SpeakPriority", - "ThinkResponse", - "ThinkOnListeningAction", - "ThinkOnThinkingAction", - "ThinkOnSpeakingAction", - "AgentThinkResponse", - "AgentThinkRequestOnListeningAction", - "AgentThinkRequestOnThinkingAction", - "AgentThinkRequestOnSpeakingAction", - "is_avatar_token_managed", - "is_rtc_avatar", - "AgentPresets", - "DeepgramPresetModels", - "OpenAIPresetModels", - "OpenAITtsPresetModels", - "MiniMaxPresetModels", - "normalize_preset_input", - "generate_rtc_token", - "GenerateTokenOptions", - "generate_convo_ai_token", - "GenerateConvoAITokenOptions", - "MAX_EXPIRY_SECONDS", - "expires_in_hours", - "expires_in_minutes", - "BaseLLM", - "BaseTTS", - "BaseSTT", - "BaseMLLM", - "BaseAvatar", - "SampleRate", - "ElevenLabsSampleRate", - "MicrosoftSampleRate", - "OpenAISampleRate", - "CartesiaSampleRate", - "OpenAI", - "AzureOpenAI", - "Anthropic", - "Gemini", - "ElevenLabsTTS", - "MicrosoftTTS", - "OpenAITTS", - "CartesiaTTS", - "DeepgramTTS", - "GoogleTTS", - "AmazonTTS", - "HumeAITTS", - "RimeTTS", - "FishAudioTTS", - "MiniMaxTTS", - "MurfTTS", - "SarvamTTS", - "SpeechmaticsSTT", - "DeepgramSTT", - "MicrosoftSTT", - "OpenAISTT", - "GoogleSTT", - "AmazonSTT", - "AssemblyAISTT", - "AresSTT", - "SarvamSTT", - "OpenAIRealtime", - "GeminiLive", - "VertexAI", - "XaiGrok", - "HeyGenAvatar", - "LiveAvatarAvatar", - "AkoolAvatar", - "AnamAvatar", - "GenericAvatar", - "is_heygen_avatar", - "is_live_avatar_avatar", - "is_akool_avatar", - "is_anam_avatar", - "is_generic_avatar", - "validate_avatar_config", - "validate_tts_sample_rate", - ] - src/agora_agent/agentkit/agent.py: | - from __future__ import annotations - - import time - import typing - import typing_extensions - - if typing.TYPE_CHECKING: - from .agent_session import AgentSession, AsyncAgentSession - - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from ..agents.types.start_agents_request_properties_asr import StartAgentsRequestPropertiesAsr - from ..agents.types.start_agents_request_properties_asr_vendor import StartAgentsRequestPropertiesAsrVendor - from ..agents.types.start_agents_request_properties_avatar import StartAgentsRequestPropertiesAvatar - from ..agents.types.start_agents_request_properties_avatar_vendor import StartAgentsRequestPropertiesAvatarVendor - from ..agents.types.start_agents_request_properties_llm import StartAgentsRequestPropertiesLlm - from ..agents.types.start_agents_request_properties_llm_style import StartAgentsRequestPropertiesLlmStyle - from ..agents.types.start_agents_request_properties_mllm import StartAgentsRequestPropertiesMllm - from ..agents.types.start_agents_request_properties_mllm_vendor import StartAgentsRequestPropertiesMllmVendor - from ..agents.types.update_agents_request_properties import UpdateAgentsRequestProperties - from ..agents.types.get_agents_response import GetAgentsResponse - from ..agents.types.list_agents_response import ListAgentsResponse - from ..agents.types.list_agents_response_data_list_item import ListAgentsResponseDataListItem - from ..agents.types.get_history_agents_response import GetHistoryAgentsResponse - from ..agents.types.get_history_agents_response_contents_item import GetHistoryAgentsResponseContentsItem - from ..agents.types.get_history_agents_response_contents_item_role import GetHistoryAgentsResponseContentsItemRole - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.get_turns_agents_response_turns_item import GetTurnsAgentsResponseTurnsItem - from ..agents.types.speak_agents_request_priority import SpeakAgentsRequestPriority - from ..agents.types.start_agents_request_properties_turn_detection import StartAgentsRequestPropertiesTurnDetection - from ..agents.types.start_agents_request_properties_turn_detection_config import StartAgentsRequestPropertiesTurnDetectionConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeech - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_mode import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechMode - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_vad_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechVadConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_keywords_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechKeywordsConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_disabled_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_disabled_config_strategy import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfigStrategy - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_mode import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechMode - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_vad_config import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechVadConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_semantic_config import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechSemanticConfig - from ..agents.types.start_agents_request_properties_turn_detection_type import StartAgentsRequestPropertiesTurnDetectionType - from ..agents.types.start_agents_request_properties_turn_detection_interrupt_mode import StartAgentsRequestPropertiesTurnDetectionInterruptMode - from ..agents.types.start_agents_request_properties_turn_detection_eagerness import StartAgentsRequestPropertiesTurnDetectionEagerness - from ..agents.types.start_agents_request_properties_sal import StartAgentsRequestPropertiesSal - from ..agents.types.start_agents_request_properties_sal_sal_mode import StartAgentsRequestPropertiesSalSalMode - from ..agents.types.start_agents_request_properties_parameters import StartAgentsRequestPropertiesParameters - from ..agents.types.start_agents_request_properties_parameters_silence_config import StartAgentsRequestPropertiesParametersSilenceConfig - from ..agents.types.start_agents_request_properties_parameters_silence_config_action import StartAgentsRequestPropertiesParametersSilenceConfigAction - from ..agents.types.start_agents_request_properties_parameters_farewell_config import StartAgentsRequestPropertiesParametersFarewellConfig - from ..agents.types.start_agents_request_properties_parameters_data_channel import StartAgentsRequestPropertiesParametersDataChannel - from ..agents.types.start_agents_request_properties_parameters_audio_scenario import StartAgentsRequestPropertiesParametersAudioScenario - from ..agents.types.start_agents_request_properties_interruption import StartAgentsRequestPropertiesInterruption - from ..agents.types.start_agents_request_properties_interruption_mode import StartAgentsRequestPropertiesInterruptionMode - from ..agents.types.start_agents_request_properties_mllm_turn_detection import StartAgentsRequestPropertiesMllmTurnDetection - from ..agents.types.start_agents_request_properties_mllm_turn_detection_mode import StartAgentsRequestPropertiesMllmTurnDetectionMode - from ..agents.types.start_agents_request_properties_llm_greeting_configs import StartAgentsRequestPropertiesLlmGreetingConfigs - from ..agents.types.start_agents_request_properties_llm_greeting_configs_mode import StartAgentsRequestPropertiesLlmGreetingConfigsMode - from ..agents.types.start_agents_request_properties_llm_mcp_servers_item import StartAgentsRequestPropertiesLlmMcpServersItem - from ..agents.types.start_agents_request_properties_geofence import StartAgentsRequestPropertiesGeofence - from ..agents.types.start_agents_request_properties_rtc import StartAgentsRequestPropertiesRtc - from ..agents.types.start_agents_request_properties_advanced_features import StartAgentsRequestPropertiesAdvancedFeatures - from ..agents.types.start_agents_request_properties_filler_words import StartAgentsRequestPropertiesFillerWords - from ..agents.types.start_agents_request_properties_filler_words_trigger import StartAgentsRequestPropertiesFillerWordsTrigger - from ..agents.types.start_agents_request_properties_filler_words_trigger_fixed_time_config import StartAgentsRequestPropertiesFillerWordsTriggerFixedTimeConfig - from ..agents.types.start_agents_request_properties_filler_words_content import StartAgentsRequestPropertiesFillerWordsContent - from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig - from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - from ..types.tts import Tts - from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - AgentThinkAgentManagementRequestOnListeningAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - AgentThinkAgentManagementRequestOnThinkingAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - AgentThinkAgentManagementRequestOnSpeakingAction, - ) - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse, - ) - from .vendors.base import BaseAvatar, BaseLLM, BaseMLLM, BaseSTT, BaseTTS - - # Top-level aliases - LlmConfig = StartAgentsRequestPropertiesLlm - LlmStyle = StartAgentsRequestPropertiesLlmStyle - SttConfig = StartAgentsRequestPropertiesAsr - AsrConfig = SttConfig - SttVendor = StartAgentsRequestPropertiesAsrVendor - TtsConfig = Tts - MllmConfig = StartAgentsRequestPropertiesMllm - MllmVendor = StartAgentsRequestPropertiesMllmVendor - AvatarConfig = StartAgentsRequestPropertiesAvatar - AvatarVendor = StartAgentsRequestPropertiesAvatarVendor - TurnDetectionConfig = StartAgentsRequestPropertiesTurnDetection - SalConfig = StartAgentsRequestPropertiesSal - SalMode = StartAgentsRequestPropertiesSalSalMode - AdvancedFeatures = StartAgentsRequestPropertiesAdvancedFeatures - SessionParams = StartAgentsRequestPropertiesParameters - - # SOS/EOS turn detection aliases (preferred) - TurnDetectionNestedConfig = StartAgentsRequestPropertiesTurnDetectionConfig - StartOfSpeechConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeech - StartOfSpeechMode = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechMode - StartOfSpeechVadConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechVadConfig - StartOfSpeechKeywordsConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechKeywordsConfig - StartOfSpeechDisabledConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfig - StartOfSpeechDisabledConfigStrategy = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfigStrategy - EndOfSpeechConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech - EndOfSpeechMode = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechMode - EndOfSpeechVadConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechVadConfig - EndOfSpeechSemanticConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechSemanticConfig - - # Deprecated turn detection aliases - # Deprecated: Use TurnDetectionConfig with TurnDetectionNestedConfig.start_of_speech - # and .end_of_speech instead. The `type` field and agora_vad/server_vad/semantic_vad - # values will be removed in a future release. - TurnDetectionType = StartAgentsRequestPropertiesTurnDetectionType - - # Deprecated: Use StartOfSpeechConfig with mode="vad"|"keywords"|"disabled" and the - # corresponding vad_config, keywords_config, or disabled_config instead. - InterruptMode = StartAgentsRequestPropertiesTurnDetectionInterruptMode - - # Deprecated: Only applies to server_vad/semantic_vad modes with OpenAI Realtime - # (MLLM). Has no equivalent in the ASR + LLM + TTS pipeline. - Eagerness = StartAgentsRequestPropertiesTurnDetectionEagerness - - # Parameters (SessionParams) sub-type aliases - SilenceConfig = StartAgentsRequestPropertiesParametersSilenceConfig - SilenceAction = StartAgentsRequestPropertiesParametersSilenceConfigAction - FarewellConfig = StartAgentsRequestPropertiesParametersFarewellConfig - ParametersDataChannel = StartAgentsRequestPropertiesParametersDataChannel - ParametersAudioScenario = StartAgentsRequestPropertiesParametersAudioScenario - InterruptionConfig = StartAgentsRequestPropertiesInterruption - InterruptionMode = StartAgentsRequestPropertiesInterruptionMode - MllmTurnDetectionConfig = StartAgentsRequestPropertiesMllmTurnDetection - MllmTurnDetectionMode = StartAgentsRequestPropertiesMllmTurnDetectionMode - AgentConfig = StartAgentsRequestProperties - AgentConfigUpdate = UpdateAgentsRequestProperties - SessionInfo = GetAgentsResponse - SessionListResponse = ListAgentsResponse - SessionSummary = ListAgentsResponseDataListItem - ConversationHistory = GetHistoryAgentsResponse - ConversationTurn = GetHistoryAgentsResponseContentsItem - ConversationRole = GetHistoryAgentsResponseContentsItemRole - ConversationTurns = GetTurnsAgentsResponse - ConversationSessionTurn = GetTurnsAgentsResponseTurnsItem - SpeakPriority = SpeakAgentsRequestPriority - Labels = typing.Dict[str, str] - - - class SessionParamsInput(typing_extensions.TypedDict, total=False): - silence_config: StartAgentsRequestPropertiesParametersSilenceConfig - farewell_config: StartAgentsRequestPropertiesParametersFarewellConfig - data_channel: StartAgentsRequestPropertiesParametersDataChannel - enable_metrics: bool - enable_error_message: bool - audio_scenario: ParametersAudioScenario - - # LLM sub-type aliases - LlmGreetingConfigs = StartAgentsRequestPropertiesLlmGreetingConfigs - LlmGreetingConfigsMode = StartAgentsRequestPropertiesLlmGreetingConfigsMode - McpServersItem = StartAgentsRequestPropertiesLlmMcpServersItem - - # Additional top-level config aliases - GeofenceConfig = StartAgentsRequestPropertiesGeofence - RtcConfig = StartAgentsRequestPropertiesRtc - FillerWordsConfig = StartAgentsRequestPropertiesFillerWords - FillerWordsTrigger = StartAgentsRequestPropertiesFillerWordsTrigger - FillerWordsTriggerFixedTimeConfig = StartAgentsRequestPropertiesFillerWordsTriggerFixedTimeConfig - FillerWordsContent = StartAgentsRequestPropertiesFillerWordsContent - FillerWordsContentStaticConfig = StartAgentsRequestPropertiesFillerWordsContentStaticConfig - FillerWordsContentSelectionRule = StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - - # Think type aliases and response - ThinkOnListeningAction = AgentThinkAgentManagementRequestOnListeningAction - ThinkOnThinkingAction = AgentThinkAgentManagementRequestOnThinkingAction - ThinkOnSpeakingAction = AgentThinkAgentManagementRequestOnSpeakingAction - ThinkResponse = AgentThinkAgentManagementResponse - - from .token import generate_convo_ai_token, _validate_expires_in - - - class Agent: - """A reusable agent definition. - - Use the fluent builder methods (.with_llm(), .with_tts(), .with_stt(), .with_mllm()) - to configure vendor settings after construction. - - Examples - -------- - >>> from agora_agent.agentkit import Agent - >>> from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS, DeepgramSTT - >>> - >>> agent = Agent(instructions="You are a helpful voice assistant.") - >>> agent = ( - ... agent - ... .with_llm(OpenAI(api_key="...", model="gpt-4")) - ... .with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", sample_rate=24000)) - ... .with_stt(DeepgramSTT(api_key="...", model="nova-2")) - ... ) - """ - - def __init__( - self, - name: typing.Optional[str] = None, - instructions: typing.Optional[str] = None, - turn_detection: typing.Optional[TurnDetectionConfig] = None, - interruption: typing.Optional[InterruptionConfig] = None, - sal: typing.Optional[SalConfig] = None, - advanced_features: typing.Optional[AdvancedFeatures] = None, - parameters: typing.Optional[typing.Union[SessionParams, SessionParamsInput]] = None, - greeting: typing.Optional[str] = None, - failure_message: typing.Optional[str] = None, - max_history: typing.Optional[int] = None, - geofence: typing.Optional[GeofenceConfig] = None, - labels: typing.Optional[typing.Dict[str, str]] = None, - rtc: typing.Optional[RtcConfig] = None, - filler_words: typing.Optional[FillerWordsConfig] = None, - ): - self._name = name - self._instructions = instructions - self._greeting = greeting - self._failure_message = failure_message - self._max_history = max_history - self._llm: typing.Optional[typing.Dict[str, typing.Any]] = None - self._tts: typing.Optional[typing.Dict[str, typing.Any]] = None - self._stt: typing.Optional[typing.Dict[str, typing.Any]] = None - self._mllm: typing.Optional[typing.Dict[str, typing.Any]] = None - self._tts_sample_rate: typing.Optional[int] = None - self._avatar: typing.Optional[typing.Dict[str, typing.Any]] = None - self._avatar_required_sample_rate: typing.Optional[int] = None - self._turn_detection = turn_detection - self._interruption = interruption - self._sal = sal - self._advanced_features = advanced_features - self._parameters = parameters - self._geofence = geofence - self._labels = labels - self._rtc = rtc - self._filler_words = filler_words - - def with_llm(self, vendor: BaseLLM) -> "Agent": - new_agent = self._clone() - new_agent._llm = vendor.to_config() - return new_agent - - def with_tts(self, vendor: BaseTTS) -> "Agent": - sample_rate = vendor.sample_rate - if ( - self._avatar_required_sample_rate not in (None, 0) - and sample_rate is not None - and sample_rate != self._avatar_required_sample_rate - ): - raise ValueError( - f"Avatar requires TTS sample rate of {self._avatar_required_sample_rate} Hz, " - f"but TTS is configured with {sample_rate} Hz. " - f"Please update your TTS sample_rate to {self._avatar_required_sample_rate}." - ) - new_agent = self._clone() - new_agent._tts = vendor.to_config() - new_agent._tts_sample_rate = sample_rate - return new_agent - - def with_stt(self, vendor: BaseSTT) -> "Agent": - new_agent = self._clone() - new_agent._stt = vendor.to_config() - return new_agent - - def with_mllm(self, vendor: BaseMLLM) -> "Agent": - # Note: avatars are not supported with MLLM. The combination is rejected - # at ``to_properties`` / ``AgentSession.start`` so callers can still - # configure both for tests, debugging, or disabled-avatar use cases. - new_agent = self._clone() - new_agent._mllm = vendor.to_config() - if isinstance(new_agent._mllm, dict): - new_agent._mllm["enable"] = True - if isinstance(new_agent._advanced_features, dict): - advanced_features = {key: value for key, value in new_agent._advanced_features.items() if key != "enable_mllm"} - new_agent._advanced_features = typing.cast(AdvancedFeatures, advanced_features) if advanced_features else None - elif isinstance(new_agent._advanced_features, StartAgentsRequestPropertiesAdvancedFeatures): - advanced_features_model = self._copy_model_update( - new_agent._advanced_features, - {"enable_mllm": None}, - ) - if ( - advanced_features_model.enable_rtm is None - and advanced_features_model.enable_sal is None - and advanced_features_model.enable_tools is None - ): - new_agent._advanced_features = None - else: - new_agent._advanced_features = advanced_features_model - return new_agent - - def with_avatar(self, vendor: BaseAvatar) -> "Agent": - # Note: avatars are not supported with MLLM. The combination is rejected - # at ``to_properties`` / ``AgentSession.start`` (only when the avatar is - # enabled) so callers may still combine the two for testing or for the - # disabled-avatar pattern. - required_sample_rate = vendor.required_sample_rate - if ( - required_sample_rate not in (None, 0) - and self._tts_sample_rate is not None - and self._tts_sample_rate != required_sample_rate - ): - raise ValueError( - f"Avatar requires TTS sample rate of {required_sample_rate} Hz, " - f"but TTS is configured with {self._tts_sample_rate} Hz. " - f"Please update your TTS sample_rate to {required_sample_rate}." - ) - new_agent = self._clone() - new_agent._avatar = vendor.to_config() - new_agent._avatar_required_sample_rate = required_sample_rate - return new_agent - - def with_turn_detection(self, config: TurnDetectionConfig) -> "Agent": - new_agent = self._clone() - new_agent._turn_detection = config - return new_agent - - def with_interruption(self, config: InterruptionConfig) -> "Agent": - """Returns a new Agent with unified interruption control configured.""" - new_agent = self._clone() - new_agent._interruption = config - return new_agent - - def with_instructions(self, instructions: str) -> "Agent": - new_agent = self._clone() - new_agent._instructions = instructions - return new_agent - - def with_greeting(self, greeting: str) -> "Agent": - new_agent = self._clone() - new_agent._greeting = greeting - return new_agent - - def with_name(self, name: str) -> "Agent": - new_agent = self._clone() - new_agent._name = name - return new_agent - - def with_sal(self, config: SalConfig) -> "Agent": - """Returns a new Agent with the specified SAL (Selective Attention Locking) configuration.""" - new_agent = self._clone() - new_agent._sal = config - return new_agent - - def with_advanced_features(self, features: AdvancedFeatures) -> "Agent": - """Returns a new Agent with the specified advanced features configuration. - - Use this to enable RTM and other advanced features. - """ - new_agent = self._clone() - new_agent._advanced_features = features - return new_agent - - def with_tools(self, enabled: bool = True) -> "Agent": - """Returns a new Agent with MCP tool invocation enabled or disabled.""" - new_agent = self._clone() - if new_agent._advanced_features is None: - new_agent._advanced_features = StartAgentsRequestPropertiesAdvancedFeatures(enable_tools=enabled) - elif isinstance(new_agent._advanced_features, dict): - new_agent._advanced_features = typing.cast( - AdvancedFeatures, - {**new_agent._advanced_features, "enable_tools": enabled}, - ) - else: - new_agent._advanced_features = self._copy_model_update( - new_agent._advanced_features, - {"enable_tools": enabled}, - ) - return new_agent - - def with_parameters(self, parameters: typing.Union[SessionParams, SessionParamsInput]) -> "Agent": - """Returns a new Agent with the specified session parameters. - - Use this to configure silence behaviour, graceful hang-up, data channel, and more. - """ - new_agent = self._clone() - new_agent._parameters = parameters - return new_agent - - def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": - """Returns a new Agent with the specified RTC audio scenario.""" - new_agent = self._clone() - if new_agent._parameters is None: - new_agent._parameters = StartAgentsRequestPropertiesParameters(audio_scenario=audio_scenario) - elif isinstance(new_agent._parameters, dict): - new_agent._parameters = typing.cast( - SessionParamsInput, - {**new_agent._parameters, "audio_scenario": audio_scenario}, - ) - else: - new_agent._parameters = self._copy_model_update( - new_agent._parameters, - {"audio_scenario": audio_scenario}, - ) - return new_agent - - def with_failure_message(self, message: str) -> "Agent": - """Returns a new Agent with the specified failure message. - - The failure message is played via TTS when the LLM call fails. - """ - new_agent = self._clone() - new_agent._failure_message = message - return new_agent - - def with_max_history(self, max_history: int) -> "Agent": - """Returns a new Agent with the specified maximum conversation history length.""" - new_agent = self._clone() - new_agent._max_history = max_history - return new_agent - - def with_geofence(self, geofence: GeofenceConfig) -> "Agent": - """Returns a new Agent with the specified geofence configuration. - - Restricts which geographic regions the agent's backend servers may run in. - """ - new_agent = self._clone() - new_agent._geofence = geofence - return new_agent - - def with_labels(self, labels: typing.Dict[str, str]) -> "Agent": - """Returns a new Agent with the specified custom labels. - - Labels are key-value pairs attached to the agent and returned in notification callbacks. - """ - new_agent = self._clone() - new_agent._labels = dict(labels) - return new_agent - - def with_rtc(self, rtc: RtcConfig) -> "Agent": - """Returns a new Agent with the specified RTC configuration.""" - new_agent = self._clone() - new_agent._rtc = rtc - return new_agent - - def with_filler_words(self, filler_words: FillerWordsConfig) -> "Agent": - """Returns a new Agent with the specified filler words configuration. - - Filler words are played while the agent waits for the LLM to respond. - """ - new_agent = self._clone() - new_agent._filler_words = filler_words - return new_agent - - @staticmethod - def _field_value(value: typing.Any, field: str) -> typing.Any: - if value is None: - return None - if isinstance(value, dict): - return value.get(field) - return getattr(value, field, None) - - @staticmethod - def _copy_model_update(value: typing.Any, update: typing.Dict[str, typing.Any]) -> typing.Any: - if hasattr(value, "model_copy"): - return value.model_copy(update=update) - if hasattr(value, "copy"): - return value.copy(update=update) - raise TypeError(f"Object of type {type(value).__name__} does not support model copying") - - def _resolved_parameters(self) -> typing.Optional[typing.Union[SessionParams, SessionParamsInput]]: - enable_rtm = self._field_value(self._advanced_features, "enable_rtm") is True - data_channel = self._field_value(self._parameters, "data_channel") - if not enable_rtm or data_channel is not None: - return self._parameters - if self._parameters is None: - return StartAgentsRequestPropertiesParameters(data_channel="rtm") - if isinstance(self._parameters, dict): - return typing.cast(SessionParamsInput, {**self._parameters, "data_channel": "rtm"}) - return self._copy_model_update(self._parameters, {"data_channel": "rtm"}) - - @property - def name(self) -> typing.Optional[str]: - return self._name - - @property - def llm(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._llm - - @property - def tts(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._tts - - @property - def tts_sample_rate(self) -> typing.Optional[int]: - return self._tts_sample_rate - - @property - def stt(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._stt - - @property - def mllm(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._mllm - - @property - def turn_detection(self) -> typing.Optional[TurnDetectionConfig]: - return self._turn_detection - - @property - def interruption(self) -> typing.Optional[InterruptionConfig]: - return self._interruption - - @property - def instructions(self) -> typing.Optional[str]: - return self._instructions - - @property - def greeting(self) -> typing.Optional[str]: - return self._greeting - - @property - def failure_message(self) -> typing.Optional[str]: - return self._failure_message - - @property - def max_history(self) -> typing.Optional[int]: - return self._max_history - - @property - def avatar(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._avatar - - @property - def sal(self) -> typing.Optional[SalConfig]: - return self._sal - - @property - def advanced_features(self) -> typing.Optional[AdvancedFeatures]: - return self._advanced_features - - @property - def parameters(self) -> typing.Optional[typing.Union[SessionParams, SessionParamsInput]]: - return self._parameters - - @property - def geofence(self) -> typing.Optional[GeofenceConfig]: - return self._geofence - - @property - def labels(self) -> typing.Optional[typing.Dict[str, str]]: - return self._labels - - @property - def rtc(self) -> typing.Optional[RtcConfig]: - return self._rtc - - @property - def filler_words(self) -> typing.Optional[FillerWordsConfig]: - return self._filler_words - - @property - def config(self) -> typing.Dict[str, typing.Any]: - return { - "name": self._name, - "instructions": self._instructions, - "greeting": self._greeting, - "failure_message": self._failure_message, - "max_history": self._max_history, - "llm": self._llm, - "tts": self._tts, - "stt": self._stt, - "mllm": self._mllm, - "turn_detection": self._turn_detection, - "interruption": self._interruption, - "sal": self._sal, - "avatar": self._avatar, - "advanced_features": self._advanced_features, - "parameters": self._parameters, - "geofence": self._geofence, - "labels": self._labels, - "rtc": self._rtc, - "filler_words": self._filler_words, - } - - def create_session( - self, - client: typing.Any, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - name: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ) -> "AgentSession": - from .agent_session import AgentSession - - session_name = name or self._name or f"agent-{int(time.time())}" - return AgentSession( - client=client, - agent=self, - app_id=client.app_id if hasattr(client, "app_id") else "", - app_certificate=client.app_certificate if hasattr(client, "app_certificate") else None, - name=session_name, - channel=channel, - token=token, - agent_uid=agent_uid, - remote_uids=remote_uids, - idle_timeout=idle_timeout, - enable_string_uid=enable_string_uid, - preset=preset, - pipeline_id=pipeline_id, - expires_in=expires_in, - debug=debug, - warn=warn, - ) - - def create_async_session( - self, - client: typing.Any, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - name: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ) -> "AsyncAgentSession": - """Create an async session for use with :class:`~agora_agent.AsyncAgora`. - - Equivalent to :meth:`create_session` but returns an - :class:`~agora_agent.agentkit.AsyncAgentSession`. - """ - from .agent_session import AsyncAgentSession - - session_name = name or self._name or f"agent-{int(time.time())}" - return AsyncAgentSession( - client=client, - agent=self, - app_id=client.app_id if hasattr(client, "app_id") else "", - app_certificate=client.app_certificate if hasattr(client, "app_certificate") else None, - name=session_name, - channel=channel, - token=token, - agent_uid=agent_uid, - remote_uids=remote_uids, - idle_timeout=idle_timeout, - enable_string_uid=enable_string_uid, - preset=preset, - pipeline_id=pipeline_id, - expires_in=expires_in, - debug=debug, - warn=warn, - ) - - def to_properties( - self, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - token: typing.Optional[str] = None, - app_id: typing.Optional[str] = None, - app_certificate: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - skip_vendor_validation: bool = False, - ) -> StartAgentsRequestProperties: - # Validate the MLLM + enabled-avatar combination BEFORE generating the - # RTC token so callers get a clear, actionable error first (matches the - # TypeScript and Go SDKs' fail-fast contract). - mllm_flag = isinstance(self._mllm, dict) and self._mllm.get("enable") is True - is_mllm_mode = bool(mllm_flag or self._mllm is not None) - avatar_enabled = ( - isinstance(self._avatar, dict) and self._avatar.get("enable") is not False - ) - if is_mllm_mode and avatar_enabled: - raise ValueError( - "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - "Remove the avatar configuration when using MLLM, or switch to a cascading session." - ) - - if token is None: - if app_id is None or app_certificate is None: - raise ValueError("Either token or app_id+app_certificate must be provided") - validated_expires_in = _validate_expires_in(expires_in) if expires_in is not None else None - # Use generate_convo_ai_token (RTC + RTM) so the token works whether or - # not the caller enables advanced_features.enable_rtm. - token_kwargs: typing.Dict[str, typing.Any] = {} - if validated_expires_in is not None: - token_kwargs["token_expire"] = validated_expires_in - token = generate_convo_ai_token( - app_id=app_id, - app_certificate=app_certificate, - channel_name=channel, - account=agent_uid, - **token_kwargs, - ) - - base_kwargs: typing.Dict[str, typing.Any] = { - "channel": channel, - "token": token, - "agent_rtc_uid": agent_uid, - "remote_rtc_uids": remote_uids, - } - - if idle_timeout is not None: - base_kwargs["idle_timeout"] = idle_timeout - if enable_string_uid is not None: - base_kwargs["enable_string_uid"] = enable_string_uid - if self._mllm is not None: - base_kwargs["mllm"] = self._mllm - if self._turn_detection is not None: - base_kwargs["turn_detection"] = self._turn_detection - if self._interruption is not None: - base_kwargs["interruption"] = self._interruption - if self._sal is not None: - base_kwargs["sal"] = self._sal - if self._avatar is not None: - base_kwargs["avatar"] = self._avatar - if self._advanced_features is not None: - base_kwargs["advanced_features"] = self._advanced_features - parameters = self._resolved_parameters() - if parameters is not None: - if isinstance(parameters, dict): - base_kwargs["parameters"] = StartAgentsRequestPropertiesParameters(**parameters) - else: - base_kwargs["parameters"] = parameters - if self._geofence is not None: - base_kwargs["geofence"] = self._geofence - if self._labels is not None: - base_kwargs["labels"] = self._labels - if self._rtc is not None: - base_kwargs["rtc"] = self._rtc - if self._filler_words is not None: - base_kwargs["filler_words"] = self._filler_words - - if is_mllm_mode: - if self._mllm is not None: - mllm_config = dict(self._mllm) - if self._greeting is not None: - mllm_config.setdefault("greeting_message", self._greeting) - if self._failure_message is not None: - mllm_config.setdefault("failure_message", self._failure_message) - base_kwargs["mllm"] = mllm_config - return StartAgentsRequestProperties(**base_kwargs) - - if skip_vendor_validation: - return StartAgentsRequestProperties(**base_kwargs) - - if self._tts is None: - raise ValueError("TTS configuration is required. Use with_tts() to set it.") - - if self._llm is None: - raise ValueError("LLM configuration is required. Use with_llm() to set it.") - - llm_config = dict(self._llm) - # Agent-level fields take priority over the vendor's defaults. - # This matches the TS SDK where agent-level values override vendor config. - if self._instructions is not None: - llm_config["system_messages"] = [{"role": "system", "content": self._instructions}] - if self._greeting is not None: - llm_config["greeting_message"] = self._greeting - if self._failure_message is not None: - llm_config["failure_message"] = self._failure_message - if self._max_history is not None: - llm_config["max_history"] = self._max_history - - base_kwargs["llm"] = llm_config - base_kwargs["tts"] = self._tts - if self._stt is not None: - base_kwargs["asr"] = self._stt - - return StartAgentsRequestProperties(**base_kwargs) - - def _clone(self) -> "Agent": - new_agent = Agent.__new__(Agent) - new_agent._name = self._name - new_agent._llm = self._llm - new_agent._tts = self._tts - new_agent._stt = self._stt - new_agent._mllm = self._mllm - new_agent._tts_sample_rate = self._tts_sample_rate - new_agent._avatar = self._avatar - new_agent._avatar_required_sample_rate = self._avatar_required_sample_rate - new_agent._turn_detection = self._turn_detection - new_agent._interruption = self._interruption - new_agent._sal = self._sal - new_agent._advanced_features = self._advanced_features - new_agent._parameters = self._parameters - new_agent._instructions = self._instructions - new_agent._greeting = self._greeting - new_agent._failure_message = self._failure_message - new_agent._max_history = self._max_history - new_agent._geofence = self._geofence - new_agent._labels = self._labels - new_agent._rtc = self._rtc - new_agent._filler_words = self._filler_words - return new_agent - src/agora_agent/agentkit/agent_session.py: | - import typing - import warnings - - from ..core.api_error import ApiError - from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - AgentThinkAgentManagementRequestOnListeningAction as AgentThinkRequestOnListeningAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - AgentThinkAgentManagementRequestOnSpeakingAction as AgentThinkRequestOnSpeakingAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - AgentThinkAgentManagementRequestOnThinkingAction as AgentThinkRequestOnThinkingAction, - ) - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse as AgentThinkResponse, - ) - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from .agent import Agent - from .avatar_types import ( - is_akool_avatar, - is_anam_avatar, - is_avatar_token_managed, - is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - validate_avatar_config, - validate_tts_sample_rate, - ) - from .presets import resolve_session_presets - from .token import generate_convo_ai_token - - - class _AgentSessionRequiredOptions(typing.TypedDict, total=True): - """Required fields shared by both sync and async session constructors.""" - - client: typing.Any - agent: Agent - app_id: str - name: str - channel: str - agent_uid: str - remote_uids: typing.List[str] - - - class AgentSessionOptions(_AgentSessionRequiredOptions, total=False): - """Configuration options for creating an agent session. - - Required fields - --------------- - client, agent, app_id, name, channel, agent_uid, remote_uids - - Optional fields - --------------- - app_certificate, token, idle_timeout, enable_string_uid, expires_in - """ - - app_certificate: str - token: str - idle_timeout: int - enable_string_uid: bool - preset: typing.Union[str, typing.Sequence[str]] - pipeline_id: str - expires_in: int - debug: bool - warn: typing.Callable[[str], None] - - - class _AgentSessionBase: - """Shared state and helpers for :class:`AgentSession` and :class:`AsyncAgentSession`. - - Not intended for direct use — instantiate one of the concrete subclasses or - call :meth:`Agent.create_session` / :meth:`Agent.create_async_session`. - """ - - def __init__( - self, - client: typing.Any, - agent: Agent, - app_id: str, - name: str, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - app_certificate: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ): - self._client = client - self._agent = agent - self._app_id = app_id - self._app_certificate = app_certificate - self._name = name - self._channel = channel - self._token = token - self._agent_uid = agent_uid - self._remote_uids = remote_uids - self._idle_timeout = idle_timeout - self._enable_string_uid = enable_string_uid - self._preset = preset - self._pipeline_id = pipeline_id - self._expires_in = expires_in - self._debug = debug - self._warn = warn or warnings.warn - self._agent_id: typing.Optional[str] = None - self._status: str = "idle" - self._event_handlers: typing.Dict[str, typing.List[typing.Callable[..., None]]] = {} - - # ------------------------------------------------------------------ - # Public read-only properties - # ------------------------------------------------------------------ - - @property - def id(self) -> typing.Optional[str]: - return self._agent_id - - @property - def status(self) -> str: - return self._status - - @property - def agent(self) -> Agent: - return self._agent - - @property - def app_id(self) -> str: - return self._app_id - - @property - def raw(self) -> typing.Any: - """Direct access to the underlying Fern-generated AgentsClient. - - Use this to access any new endpoints that Fern generates without - waiting for agentkit method updates. - """ - return self._client.agents - - @property - def raw_agent_management(self) -> typing.Any: - """Direct access to the underlying Fern-generated AgentManagement client.""" - return self._client.agent_management - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _convo_ai_headers(self) -> typing.Optional[typing.Dict[str, str]]: - """Return per-request auth headers when client is in app-credentials mode. - - In app-credentials mode a fresh ConvoAI token (RTC + RTM) is generated - for every request and returned as ``Authorization: agora token=``. - In basic-auth mode this returns ``None`` (the client-level header is used). - """ - if getattr(self._client, "auth_mode", None) != "app-credentials": - return None - app_id: str = getattr(self._client, "app_id", self._app_id) - app_certificate: typing.Optional[str] = getattr( - self._client, "app_certificate", self._app_certificate - ) - if not app_certificate: - raise RuntimeError("app_certificate is required for app-credentials auth mode") - token = generate_convo_ai_token( - app_id=app_id, - app_certificate=app_certificate, - channel_name=self._channel, - account=self._agent_uid, - ) - return {"Authorization": f"agora token={token}"} - - def _request_options(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - """Build request_options dict with per-request auth headers if needed.""" - headers = self._convo_ai_headers() - if headers is None: - return None - return {"additional_headers": headers} - - def _validate_avatar_config(self) -> None: - avatar = self._agent.avatar - tts = self._agent.tts - if not avatar or avatar.get("enable", True) is False: - return - if self._is_mllm_mode(): - raise ValueError( - "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - "Remove the avatar configuration when using MLLM, or switch to a cascading session." - ) - - if ( - is_heygen_avatar(avatar) - or is_live_avatar_avatar(avatar) - or is_akool_avatar(avatar) - or is_anam_avatar(avatar) - or is_generic_avatar(avatar) - ): - validate_avatar_config(avatar) - - tts_params = tts.get("params") if isinstance(tts, dict) else None - sample_rate = self._agent.tts_sample_rate - if sample_rate is None and isinstance(tts_params, dict): - sample_rate = ( - tts_params.get("sample_rate") - or tts_params.get("sample_rate_hertz") - or tts_params.get("samplingRate") - ) - if isinstance(sample_rate, int): - validate_tts_sample_rate(avatar, sample_rate) - elif is_heygen_avatar(avatar): - self._warn( - "Warning: HeyGen avatar detected but TTS sample_rate is not explicitly set. " - "HeyGen requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." - ) - elif is_live_avatar_avatar(avatar): - self._warn( - "Warning: LiveAvatar avatar detected but TTS sample_rate is not explicitly set. " - "LiveAvatar requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." - ) - elif is_akool_avatar(avatar): - self._warn( - "Warning: Akool avatar detected but TTS sample_rate is not explicitly set. " - "Akool requires 16,000 Hz. Please ensure your TTS provider is configured for 16kHz." - ) - - def _enrich_avatar_for_session(self, properties: typing.Dict[str, typing.Any]) -> None: - avatar = properties.get("avatar") - if not isinstance(avatar, dict) or avatar.get("enable", True) is False: - return - - params = avatar.get("params") - if not isinstance(params, dict): - params = {} - avatar["params"] = params - - if is_generic_avatar(avatar): - if not params.get("agora_appid"): - params["agora_appid"] = self._app_id - if not params.get("agora_channel"): - params["agora_channel"] = self._channel - - if not is_avatar_token_managed(avatar): - validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - return - - if not params.get("agora_uid"): - validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - return - - if not params.get("agora_token"): - if not self._app_certificate: - raise ValueError( - "Cannot auto-generate avatar RTC token: app_certificate is required when agora_token is omitted. " - "Pass app_certificate on the Agora client or supply agora_token explicitly on the avatar vendor." - ) - token_kwargs: typing.Dict[str, typing.Any] = {} - if self._expires_in is not None: - token_kwargs["token_expire"] = self._expires_in - params["agora_token"] = generate_convo_ai_token( - app_id=self._app_id, - app_certificate=self._app_certificate, - channel_name=self._channel, - account=str(params["agora_uid"]), - **token_kwargs, - ) - - if str(params.get("agora_uid")) == self._agent_uid: - self._warn( - "Warning: avatar agora_uid matches agent_rtc_uid. Use a unique UID for the avatar video publisher." - ) - - validate_avatar_config(avatar, require_session_fields=True) - - @staticmethod - def _dump_model(value: typing.Any) -> typing.Any: - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if isinstance(value, dict): - return {k: _AgentSessionBase._dump_model(v) for k, v in value.items() if v is not None} - if isinstance(value, list): - return [_AgentSessionBase._dump_model(item) for item in value] - return value - - def _is_mllm_mode(self) -> bool: - mllm = self._agent.mllm - if isinstance(mllm, dict) and mllm.get("enable") is True: - return True - return mllm is not None - - def _build_start_properties(self, token_opts: typing.Dict[str, typing.Any]) -> typing.Dict[str, typing.Any]: - base_properties = self._agent.to_properties( - channel=self._channel, - agent_uid=self._agent_uid, - remote_uids=self._remote_uids, - idle_timeout=self._idle_timeout, - enable_string_uid=self._enable_string_uid, - skip_vendor_validation=True, - **token_opts, - ) - properties = self._dump_model(base_properties) - self._enrich_avatar_for_session(properties) - - if self._is_mllm_mode(): - if self._agent.mllm is not None: - mllm = self._dump_model(self._agent.mllm) - if not isinstance(mllm, dict): - mllm = {} - if self._agent.greeting is not None: - mllm.setdefault("greeting_message", self._agent.greeting) - if self._agent.failure_message is not None: - mllm.setdefault("failure_message", self._agent.failure_message) - properties["mllm"] = mllm - return properties - - if self._agent.tts is not None: - properties["tts"] = self._dump_model(self._agent.tts) - if self._agent.llm is not None: - llm = dict(self._agent.llm) - if self._agent.instructions is not None: - llm["system_messages"] = [{"role": "system", "content": self._agent.instructions}] - if self._agent.greeting is not None: - llm["greeting_message"] = self._agent.greeting - if self._agent.failure_message is not None: - llm["failure_message"] = self._agent.failure_message - if self._agent.max_history is not None: - llm["max_history"] = self._agent.max_history - properties["llm"] = llm - if self._agent.stt is not None: - properties["asr"] = self._dump_model(self._agent.stt) - - return properties - - @staticmethod - def _page_value(pagination: typing.Any, field: str) -> typing.Any: - if pagination is None: - return None - if isinstance(pagination, dict): - return pagination.get(field) - return getattr(pagination, field, None) - - @staticmethod - def _response_turns(response: typing.Any) -> typing.List[typing.Any]: - turns = response.get("turns") if isinstance(response, dict) else getattr(response, "turns", None) - return list(turns or []) - - @classmethod - def _with_all_turns(cls, first_response: typing.Any, turns: typing.List[typing.Any]) -> GetTurnsAgentsResponse: - data = cls._dump_model(first_response) - if not isinstance(data, dict): - data = {} - data["turns"] = turns - return GetTurnsAgentsResponse(**data) - - # ------------------------------------------------------------------ - # Event handling - # ------------------------------------------------------------------ - - def on(self, event: str, handler: typing.Callable[..., None]) -> None: - """Register an event handler. - - Parameters - ---------- - event : str - The event type (``started``, ``stopped``, ``error``). - handler : callable - The event handler to invoke when the event fires. - """ - if event not in self._event_handlers: - self._event_handlers[event] = [] - self._event_handlers[event].append(handler) - - def off(self, event: str, handler: typing.Callable[..., None]) -> None: - """Unregister a previously registered event handler.""" - handlers = self._event_handlers.get(event) - if handlers and handler in handlers: - handlers.remove(handler) - - def _emit(self, event: str, data: typing.Any) -> None: - handlers = self._event_handlers.get(event) - if handlers: - for handler in handlers: - try: - handler(data) - except Exception as exc: - # Prevent a misbehaving handler from blocking other handlers or - # the session lifecycle. Warn so the error is not silently lost. - warnings.warn( - f"Event handler for '{event}' raised an exception: {exc}", - stacklevel=2, - ) - - - class AgentSession(_AgentSessionBase): - """Manages the lifecycle of an agent session (synchronous). - - This class provides a high-level interface for managing agent sessions, - including starting, stopping, and interacting with the agent. - - Use :meth:`Agent.create_session` to create a session — this is the - recommended entry point. - - Examples - -------- - >>> from agora_agent import Agora, Area - >>> from agora_agent.agentkit import Agent - >>> - >>> client = Agora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are a helpful voice assistant.") - >>> from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS - >>> agent = agent.with_llm(OpenAI(api_key="...", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...")) - >>> session = agent.create_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = session.start() - >>> session.say("Hello!") - >>> session.stop() - """ - - def start(self) -> str: - """Start the agent session. - - Returns - ------- - str - The agent ID. - - Raises - ------ - RuntimeError - If the session is not in a startable state. - ValueError - If avatar/TTS configuration is invalid. - """ - if self._status not in ("idle", "stopped", "error"): - raise RuntimeError(f"Cannot start session in {self._status} state") - - self._validate_avatar_config() - self._status = "starting" - - try: - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - token_opts = { - "app_id": self._app_id, - "app_certificate": self._app_certificate, - "expires_in": self._expires_in, - } - - properties = self._build_start_properties(token_opts) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - ) - - if self._debug: - print("[Agora Debug] Starting agent session...") - print("[Agora Debug] Request:", { - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - "pipeline_id": self._pipeline_id, - "properties": resolved_properties, - }) - - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties - - response = self._client.agents.start( - self._app_id, - name=self._name, - properties=request_properties, - preset=resolved_preset, - pipeline_id=self._pipeline_id, - request_options=self._request_options(), - ) - - self._agent_id = response.agent_id if hasattr(response, "agent_id") else None - self._status = "running" - self._emit("started", {"agent_id": self._agent_id}) - return self._agent_id or "" - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - def stop(self) -> None: - """Stop the agent session. - - If the agent has already stopped (e.g., crashed or timed out), the - server returns 404, which this method treats as a successful stop - rather than raising an error. - """ - if self._status != "running": - raise RuntimeError(f"Cannot stop session in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._status = "stopping" - - try: - self._client.agents.stop( - self._app_id, self._agent_id, request_options=self._request_options() - ) - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - except ApiError as e: - if e.status_code == 404: - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - return - self._status = "error" - self._emit("error", e) - raise - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - def say( - self, - text: str, - priority: typing.Optional[str] = None, - interruptable: typing.Optional[bool] = None, - ) -> None: - """Send a message to be spoken by the agent. - - Parameters - ---------- - text : str - The text to speak. - priority : str, optional - Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). - interruptable : bool, optional - Whether the message can be interrupted by the user. - """ - if self._status != "running": - raise RuntimeError(f"Cannot say in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if priority is not None: - kwargs["priority"] = priority - if interruptable is not None: - kwargs["interruptable"] = interruptable - - self._client.agents.speak( - self._app_id, self._agent_id, request_options=self._request_options(), **kwargs - ) - - def interrupt(self) -> None: - """Interrupt the agent while it is speaking or thinking.""" - if self._status != "running": - raise RuntimeError(f"Cannot interrupt in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._client.agents.interrupt( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def think( - self, - text: str, - *, - on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, - on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, - on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - ) -> AgentThinkResponse: - """Inject a custom text instruction into the current session pipeline. - - In API v2.7, omitting ``on_listening_action`` uses the server default - ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - preserve the pre-v2.7 behavior. - """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if on_listening_action is not None: - kwargs["on_listening_action"] = on_listening_action - if on_thinking_action is not None: - kwargs["on_thinking_action"] = on_thinking_action - if on_speaking_action is not None: - kwargs["on_speaking_action"] = on_speaking_action - if interruptable is not None: - kwargs["interruptable"] = interruptable - if metadata is not None: - kwargs["metadata"] = metadata - - return self._client.agent_management.agent_think( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - def update(self, properties: typing.Any) -> None: - """Update the agent configuration at runtime. - - Parameters - ---------- - properties : UpdateAgentsRequestProperties - Partial configuration to update. - """ - if self._status != "running": - raise RuntimeError(f"Cannot update in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._client.agents.update( - self._app_id, - self._agent_id, - properties=properties, - request_options=self._request_options(), - ) - - def get_history(self) -> typing.Any: - """Get the conversation history.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get_history( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def get_info(self) -> typing.Any: - """Get the current session info.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def get_turns( - self, - *, - page_index: typing.Optional[int] = None, - page_size: typing.Optional[int] = None, - ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get_turns( - self._app_id, - self._agent_id, - page_index=page_index, - page_size=page_size, - request_options=self._request_options(), - ) - - def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - """Get all turn analytics pages for this session. - - Raises ``RuntimeError`` if the server's pagination metadata is missing - the fields required to advance, or if requesting the next page returns - a page index that did not advance. - """ - response = self.get_turns(page_index=1, page_size=page_size) - all_turns = self._response_turns(response) - pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - current_page = self._page_value(pagination, "page_index") or 1 - while pagination is not None and self._page_value(pagination, "is_last_page") is False: - total_pages = self._page_value(pagination, "total_pages") - returned_index = self._page_value(pagination, "page_index") - if returned_index is None and total_pages is None: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - if total_pages is not None and current_page >= total_pages: - break - next_page = current_page + 1 - response = self.get_turns(page_index=next_page, page_size=page_size) - all_turns.extend(self._response_turns(response)) - pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - returned_index = self._page_value(pagination, "page_index") if pagination else None - if returned_index is not None: - if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - raise RuntimeError( - f"get_all_turns pagination did not advance: requested page {next_page}, " - f"received page {returned_index}." - ) - current_page = returned_index - else: - total_pages = self._page_value(pagination, "total_pages") if pagination else None - is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - if total_pages is None and is_last_page is not True: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - current_page = next_page - return self._with_all_turns(response, all_turns) - - - class AsyncAgentSession(_AgentSessionBase): - """Async version of :class:`AgentSession` for use with :class:`AsyncAgora`. - - Use :meth:`Agent.create_async_session` to create a session — this is the - recommended entry point. - - Examples - -------- - >>> from agora_agent import AsyncAgora, Area - >>> from agora_agent.agentkit import Agent - >>> - >>> client = AsyncAgora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are helpful.") - >>> from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS - >>> agent = agent.with_llm(OpenAI(api_key="...", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...")) - >>> session = agent.create_async_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = await session.start() - >>> await session.say("Hello!") - >>> await session.stop() - """ - - async def start(self) -> str: - """Start the agent session. - - Returns - ------- - str - The agent ID. - - Raises - ------ - RuntimeError - If the session is not in a startable state. - ValueError - If avatar/TTS configuration is invalid. - """ - if self._status not in ("idle", "stopped", "error"): - raise RuntimeError(f"Cannot start session in {self._status} state") - - self._validate_avatar_config() - self._status = "starting" - - try: - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - token_opts = { - "app_id": self._app_id, - "app_certificate": self._app_certificate, - "expires_in": self._expires_in, - } - - properties = self._build_start_properties(token_opts) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - ) - - if self._debug: - print("[Agora Debug] Starting agent session...") - print("[Agora Debug] Request:", { - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - "pipeline_id": self._pipeline_id, - "properties": resolved_properties, - }) - - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties - - response = await self._client.agents.start( - self._app_id, - name=self._name, - properties=request_properties, - preset=resolved_preset, - pipeline_id=self._pipeline_id, - request_options=self._request_options(), - ) - - self._agent_id = response.agent_id if hasattr(response, "agent_id") else None - self._status = "running" - self._emit("started", {"agent_id": self._agent_id}) - return self._agent_id or "" - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - async def stop(self) -> None: - """Stop the agent session. - - If the agent has already stopped (e.g., crashed or timed out), the - server returns 404, which this method treats as a successful stop - rather than raising an error. - """ - if self._status != "running": - raise RuntimeError(f"Cannot stop session in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._status = "stopping" - - try: - await self._client.agents.stop( - self._app_id, self._agent_id, request_options=self._request_options() - ) - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - except ApiError as e: - if e.status_code == 404: - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - return - self._status = "error" - self._emit("error", e) - raise - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - async def say( - self, - text: str, - priority: typing.Optional[str] = None, - interruptable: typing.Optional[bool] = None, - ) -> None: - """Send a message to be spoken by the agent. - - Parameters - ---------- - text : str - The text to speak. - priority : str, optional - Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). - interruptable : bool, optional - Whether the message can be interrupted by the user. - """ - if self._status != "running": - raise RuntimeError(f"Cannot say in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if priority is not None: - kwargs["priority"] = priority - if interruptable is not None: - kwargs["interruptable"] = interruptable - - await self._client.agents.speak( - self._app_id, self._agent_id, request_options=self._request_options(), **kwargs - ) - - async def interrupt(self) -> None: - """Interrupt the agent while it is speaking or thinking.""" - if self._status != "running": - raise RuntimeError(f"Cannot interrupt in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - await self._client.agents.interrupt( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def think( - self, - text: str, - *, - on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, - on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, - on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - ) -> AgentThinkResponse: - """Inject a custom text instruction into the current session pipeline. - - In API v2.7, omitting ``on_listening_action`` uses the server default - ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - preserve the pre-v2.7 behavior. - """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if on_listening_action is not None: - kwargs["on_listening_action"] = on_listening_action - if on_thinking_action is not None: - kwargs["on_thinking_action"] = on_thinking_action - if on_speaking_action is not None: - kwargs["on_speaking_action"] = on_speaking_action - if interruptable is not None: - kwargs["interruptable"] = interruptable - if metadata is not None: - kwargs["metadata"] = metadata - - return await self._client.agent_management.agent_think( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - async def update(self, properties: typing.Any) -> None: - """Update the agent configuration at runtime. - - Parameters - ---------- - properties : UpdateAgentsRequestProperties - Partial configuration to update. - """ - if self._status != "running": - raise RuntimeError(f"Cannot update in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - await self._client.agents.update( - self._app_id, - self._agent_id, - properties=properties, - request_options=self._request_options(), - ) - - async def get_history(self) -> typing.Any: - """Get the conversation history.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get_history( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def get_info(self) -> typing.Any: - """Get the current session info.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def get_turns( - self, - *, - page_index: typing.Optional[int] = None, - page_size: typing.Optional[int] = None, - ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get_turns( - self._app_id, - self._agent_id, - page_index=page_index, - page_size=page_size, - request_options=self._request_options(), - ) - - async def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - """Get all turn analytics pages for this session. - - Raises ``RuntimeError`` if the server's pagination metadata is missing - the fields required to advance, or if requesting the next page returns - a page index that did not advance. - """ - response = await self.get_turns(page_index=1, page_size=page_size) - all_turns = self._response_turns(response) - pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - current_page = self._page_value(pagination, "page_index") or 1 - while pagination is not None and self._page_value(pagination, "is_last_page") is False: - total_pages = self._page_value(pagination, "total_pages") - returned_index = self._page_value(pagination, "page_index") - if returned_index is None and total_pages is None: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - if total_pages is not None and current_page >= total_pages: - break - next_page = current_page + 1 - response = await self.get_turns(page_index=next_page, page_size=page_size) - all_turns.extend(self._response_turns(response)) - pagination = response.get("pagination") if isinstance(response, dict) else response.pagination - returned_index = self._page_value(pagination, "page_index") if pagination else None - if returned_index is not None: - if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - raise RuntimeError( - f"get_all_turns pagination did not advance: requested page {next_page}, " - f"received page {returned_index}." - ) - current_page = returned_index - else: - total_pages = self._page_value(pagination, "total_pages") if pagination else None - is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - if total_pages is None and is_last_page is not True: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - current_page = next_page - return self._with_all_turns(response, all_turns) - src/agora_agent/agentkit/avatar_types.py: | - import warnings - import typing - - - def is_heygen_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "heygen" - - - def is_live_avatar_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "liveavatar" - - - def is_akool_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "akool" - - - def is_anam_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "anam" - - - def is_generic_avatar(config: typing.Dict[str, typing.Any]) -> bool: - return config.get("vendor") == "generic" - - - def is_avatar_token_managed(config: typing.Dict[str, typing.Any]) -> bool: - """Return True when AgentKit manages the avatar RTC publisher identity.""" - return ( - is_heygen_avatar(config) - or is_live_avatar_avatar(config) - or is_generic_avatar(config) - ) - - - def is_rtc_avatar(config: typing.Dict[str, typing.Any]) -> bool: - """Deprecated: use :func:`is_avatar_token_managed` for vendor gating.""" - warnings.warn( - "is_rtc_avatar is deprecated; use is_avatar_token_managed for vendor gating " - "and keep agora_uid checks in session enrichment.", - DeprecationWarning, - stacklevel=2, - ) - params = config.get("params", {}) - return isinstance(params, dict) and bool(params.get("agora_uid")) and is_avatar_token_managed(config) - - - def validate_avatar_config( - config: typing.Dict[str, typing.Any], - require_session_fields: bool = False, - ) -> None: - """Validates avatar configuration at runtime. - - Parameters - ---------- - config : dict - The avatar configuration dictionary. - - Raises - ------ - ValueError - If the configuration is invalid. - """ - if is_heygen_avatar(config) or is_live_avatar_avatar(config): - label = "HeyGen" if is_heygen_avatar(config) else "LiveAvatar" - params = config.get("params", {}) - if not params.get("api_key"): - raise ValueError(f"{label} avatar requires api_key") - if not params.get("quality"): - raise ValueError(f"{label} avatar requires quality (low, medium, or high)") - if not params.get("agora_uid"): - raise ValueError(f"{label} avatar requires agora_uid") - valid_qualities = ("low", "medium", "high") - if params.get("quality") not in valid_qualities: - raise ValueError( - f"Invalid quality for {label}: {params.get('quality')}. " - f"Must be one of: {', '.join(valid_qualities)}" - ) - if require_session_fields and not params.get("agora_token"): - raise ValueError(f"{label} avatar requires agora_token after session enrichment") - elif is_akool_avatar(config): - params = config.get("params", {}) - if not params.get("api_key"): - raise ValueError("Akool avatar requires api_key") - elif is_anam_avatar(config): - params = config.get("params", {}) - if not params.get("api_key"): - raise ValueError("Anam avatar requires api_key") - elif is_generic_avatar(config): - params = config.get("params", {}) - if not params.get("api_key"): - raise ValueError("Generic avatar requires api_key") - if not params.get("api_base_url"): - raise ValueError("Generic avatar requires api_base_url") - if not params.get("avatar_id"): - raise ValueError("Generic avatar requires avatar_id") - if not params.get("agora_uid"): - raise ValueError("Generic avatar requires agora_uid") - if require_session_fields: - if not params.get("agora_token"): - raise ValueError("Generic avatar requires agora_token after session enrichment") - if not params.get("agora_appid"): - raise ValueError("Generic avatar requires agora_appid after session enrichment") - if not params.get("agora_channel"): - raise ValueError("Generic avatar requires agora_channel after session enrichment") - - - def validate_tts_sample_rate( - avatar_config: typing.Dict[str, typing.Any], - tts_sample_rate: int, - ) -> None: - """Validates that TTS sample rate is compatible with the avatar vendor. - - Different avatar vendors have specific sample rate requirements: - - HeyGen/LiveAvatar: ONLY supports 24,000 Hz - - Akool: ONLY supports 16,000 Hz - - Parameters - ---------- - avatar_config : dict - The avatar configuration dictionary. - tts_sample_rate : int - The sample rate from your TTS configuration (in Hz). - - Raises - ------ - ValueError - If TTS sample rate is incompatible with the avatar vendor. - """ - if is_heygen_avatar(avatar_config) or is_live_avatar_avatar(avatar_config): - if tts_sample_rate != 24000: - label = "HeyGen" if is_heygen_avatar(avatar_config) else "LiveAvatar" - raise ValueError( - f"{label} avatars ONLY support 24,000 Hz sample rate. " - f"Your TTS is configured with {tts_sample_rate} Hz. " - f"Please update your TTS configuration to use 24kHz sample rate. " - f"See: https://docs.agora.io/en/conversational-ai/models/avatar/overview" - ) - elif is_akool_avatar(avatar_config): - if tts_sample_rate != 16000: - raise ValueError( - f"Akool avatars ONLY support 16,000 Hz sample rate. " - f"Your TTS is configured with {tts_sample_rate} Hz. " - f"Please update your TTS configuration to use 16kHz sample rate. " - f"See: https://docs.agora.io/en/conversational-ai/models/avatar/akool" - ) - src/agora_agent/agentkit/constants.py: | - """ - Type-safe constants for agent configuration values. - Use these instead of raw strings to avoid typos and get IDE autocomplete. - """ - - # Data channel: "rtm" | "datastream" - class DataChannel: - RTM = "rtm" - DATASTREAM = "datastream" - - class AudioScenario: - DEFAULT = "default" - CHORUS = "chorus" - AISERVER = "aiserver" - - - # Silence action when timeout elapses: "speak" | "think" - # (Use for parameters.silence_config.action — avoids shadowing SilenceAction type) - class SilenceActionValues: - SPEAK = "speak" - THINK = "think" - - - # SAL mode: "locking" | "recognition" - # (Use for sal.sal_mode — avoids shadowing SalMode type) - class SalModeValues: - LOCKING = "locking" - RECOGNITION = "recognition" - - - # Geofence area: "GLOBAL" | "NORTH_AMERICA" | "EUROPE" | "ASIA" | "INDIA" | "JAPAN" - class GeofenceArea: - GLOBAL = "GLOBAL" - NORTH_AMERICA = "NORTH_AMERICA" - EUROPE = "EUROPE" - ASIA = "ASIA" - INDIA = "INDIA" - JAPAN = "JAPAN" - - - # Geofence exclude area (when area is GLOBAL) - class GeofenceExcludeArea: - NORTH_AMERICA = "NORTH_AMERICA" - EUROPE = "EUROPE" - ASIA = "ASIA" - INDIA = "INDIA" - JAPAN = "JAPAN" - - - # Filler word selection rule: "shuffle" | "round_robin" - class FillerWordsSelectionRule: - SHUFFLE = "shuffle" - ROUND_ROBIN = "round_robin" - - - # Turn detection type (deprecated; use TurnDetectionNestedConfig.EndOfSpeech instead) - class TurnDetectionTypeValues: - AGORA_VAD = "agora_vad" - SERVER_VAD = "server_vad" - SEMANTIC_VAD = "semantic_vad" - - - # Think action value constants (match Fern wire values) - ThinkOnListeningActionInject = "inject" - ThinkOnListeningActionInterrupt = "interrupt" - ThinkOnListeningActionIgnore = "ignore" - ThinkOnThinkingActionInterrupt = "interrupt" - ThinkOnThinkingActionIgnore = "ignore" - ThinkOnSpeakingActionInterrupt = "interrupt" - ThinkOnSpeakingActionIgnore = "ignore" - status: unresolved - - id: patch-972dd5bd - content_hash: sha256:10f86db20e0b5a3800efce4913b736ff338dee29eb18cb31e89658e0293b848e - original_commit: 972dd5bdafc09b3981ab2ce4e0d02beae165c626 - original_message: updated docs - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - docs/reference/agent.md - patch_content: |+ - From 972dd5bdafc09b3981ab2ce4e0d02beae165c626 Mon Sep 17 00:00:00 2001 - From: digitallysavvy - Date: Thu, 21 May 2026 16:13:35 -0400 - Subject: [PATCH] updated docs - - --- - docs/reference/agent.md | 15 +++++++++++++++ - 1 file changed, 15 insertions(+) - - diff --git a/docs/reference/agent.md b/docs/reference/agent.md - index 1e88b8b..3163f9c 100644 - --- a/docs/reference/agent.md - +++ b/docs/reference/agent.md - @@ -264,3 +264,18 @@ to_properties( - | `rtc` | `Optional[RtcConfig]` | RTC configuration | - | `filler_words` | `Optional[FillerWordsConfig]` | Filler words configuration | - | `config` | `Dict[str, Any]` | Full configuration dict | - + - +## Type aliases - + - +Public aliases over Fern-generated types: `LlmConfig`, `SttConfig`, `AsrConfig` (= `SttConfig`), `MllmConfig`, `AvatarConfig`, session/conversation types, and think types (`ThinkOnListeningAction`, etc.). - + - +Think value constants: `ThinkOnListeningActionInject`, `ThinkOnListeningActionInterrupt`, `ThinkOnListeningActionIgnore`, `ThinkOnThinkingActionInterrupt`, `ThinkOnThinkingActionIgnore`, `ThinkOnSpeakingActionInterrupt`, `ThinkOnSpeakingActionIgnore`. - + - +## Cross-SDK discovery map - + - +| Concept | Python | TypeScript | Go | - +|---|---|---|---| - +| STT payload alias (wire: `asr`) | `SttConfig` / `AsrConfig` | `SttConfig` / `AsrConfig` | `AsrConfig` / `SttConfig` | - +| xAI MLLM (primary) | `XaiGrok` | `XaiGrok` | `XaiGrok` / `NewXaiGrok` | - +| Avatar token helper | `is_avatar_token_managed` | `isAvatarTokenManaged` | `IsAvatarTokenManaged` | - +| Think inject constant | `ThinkOnListeningActionInject` | `ThinkOnListeningActionInject` | `ThinkOnListeningActionInject` | - -- - 2.52.0 - - theirs_snapshot: - docs/reference/agent.md: | - --- - sidebar_position: 2 - title: Agent - description: Full API reference for the Python Agent builder class. - --- - - # Agent Reference - - **Import:** `from agora_agent.agentkit import Agent` or `from agora_agent import Agent` - - ## Constructor - - - ```python - Agent( - name: Optional[str] = None, - instructions: Optional[str] = None, - turn_detection: Optional[TurnDetectionConfig] = None, - interruption: Optional[InterruptionConfig] = None, - sal: Optional[SalConfig] = None, - advanced_features: Optional[Dict[str, Any]] = None, - parameters: Optional[SessionParams] = None, - greeting: Optional[str] = None, - failure_message: Optional[str] = None, - max_history: Optional[int] = None, - geofence: Optional[GeofenceConfig] = None, - labels: Optional[Dict[str, str]] = None, - rtc: Optional[RtcConfig] = None, - filler_words: Optional[FillerWordsConfig] = None, - ) - ``` - - | Parameter | Type | Default | Description | - |---|---|---|---| - | `name` | `Optional[str]` | `None` | Agent name, used as default session name | - | `instructions` | `Optional[str]` | `None` | System prompt for the LLM | - | `turn_detection` | `Optional[TurnDetectionConfig]` | `None` | Turn detection configuration | - | `interruption` | `Optional[InterruptionConfig]` | `None` | Unified interruption control configuration | - | `sal` | `Optional[SalConfig]` | `None` | Speech Activity Level configuration | - | `advanced_features` | `Optional[Dict[str, Any]]` | `None` | Advanced features dict (e.g., `{'enable_rtm': True}`) | - | `parameters` | `Optional[SessionParams]` | `None` | Additional session parameters | - | `greeting` | `Optional[str]` | `None` | Auto-spoken greeting when agent joins | - | `failure_message` | `Optional[str]` | `None` | Spoken on error | - | `max_history` | `Optional[int]` | `None` | Max conversation history length | - | `geofence` | `Optional[GeofenceConfig]` | `None` | Regional access restriction | - | `labels` | `Optional[Dict[str, str]]` | `None` | Custom key-value labels (returned in callbacks) | - | `rtc` | `Optional[RtcConfig]` | `None` | RTC media encryption | - | `filler_words` | `Optional[FillerWordsConfig]` | `None` | Filler words while waiting for LLM | - - ## Builder Methods - - All builder methods return a new `Agent` instance (immutable pattern). - - ### `with_llm(vendor: BaseLLM) -> Agent` - - Set the LLM vendor for cascading flow. - - - ```python - from agora_agent.agentkit.vendors import OpenAI - agent = Agent().with_llm(OpenAI(api_key='your-key', model='gpt-4o-mini')) - ``` - - ### `with_tts(vendor: BaseTTS) -> Agent` - - Set the TTS vendor. Records the vendor's `sample_rate` for avatar validation. - - - ```python - from agora_agent.agentkit.vendors import ElevenLabsTTS - agent = Agent().with_tts(ElevenLabsTTS(key='your-key', model_id='eleven_flash_v2_5', voice_id='your-voice-id')) - ``` - - ### `with_stt(vendor: BaseSTT) -> Agent` - - Set the STT (ASR) vendor. - - - ```python - from agora_agent.agentkit.vendors import DeepgramSTT - agent = Agent().with_stt(DeepgramSTT(api_key='your-key', language='en-US')) - ``` - - ### `with_mllm(vendor: BaseMLLM) -> Agent` - - Set the MLLM vendor for multimodal flow. Calling `with_mllm()` automatically sets `mllm.enable = True`. MLLM sessions do not require TTS, STT, or LLM vendors. - - - ```python - from agora_agent.agentkit.vendors import OpenAIRealtime - agent = Agent().with_mllm(OpenAIRealtime(api_key='your-key')) - ``` - - ### `with_avatar(vendor: BaseAvatar) -> Agent` - - Set the avatar vendor for the cascading ASR + LLM + TTS pipeline. Avatars are not supported when MLLM is enabled — combining `with_mllm()` and an enabled `with_avatar()` is rejected at `to_properties()` and `AgentSession.start()`. A disabled avatar (`enable=False`) is allowed alongside MLLM. - - Raises `ValueError` if the TTS sample rate does not match the avatar's `required_sample_rate`. - - - ```python - from agora_agent.agentkit.vendors import HeyGenAvatar - agent = agent.with_avatar(HeyGenAvatar(api_key='your-key', quality='medium', agora_uid='2')) - ``` - - **Raises:** `ValueError` — `"Avatar requires TTS sample rate of {required} Hz, but TTS is configured with {actual} Hz. Please update your TTS sample_rate to {required}."` - - ### `with_turn_detection(config: TurnDetectionConfig) -> Agent` - - Override cascading-flow turn detection settings. Use `config.start_of_speech` and `config.end_of_speech` for SOS/EOS detection. Use `with_interruption()` for interruption behavior and MLLM vendor `turn_detection` for MLLM turn detection. - - Pause-state detection is configured under semantic end-of-speech: - - ```python - agent = agent.with_turn_detection({ - "mode": "default", - "config": { - "end_of_speech": { - "mode": "semantic", - "semantic_config": { - "pause_state_enabled": True, - }, - }, - }, - }) - ``` - - ### `with_interruption(config: InterruptionConfig) -> Agent` - - Configure unified interruption behavior using the top-level `interruption` object. Use this for `start_of_speech` and `keywords` interruption modes. - - ### `with_instructions(instructions: str) -> Agent` - - Override the system prompt. - - ### `with_greeting(greeting: str) -> Agent` - - Override the greeting message. - - ### `with_name(name: str) -> Agent` - - Override the agent name. - - ### `with_sal(config: SalConfig) -> Agent` - - Set SAL (Selective Attention Locking) configuration. - - ### `with_advanced_features(features: AdvancedFeatures) -> Agent` - - Set advanced features (e.g. `{'enable_rtm': True}`). - - When `enable_rtm=True`, AgentKit defaults `parameters.data_channel` to `"rtm"` unless you explicitly set another data channel. - - ### `with_tools(enabled: bool = True) -> Agent` - - Enable or disable MCP tool invocation by setting `advanced_features.enable_tools`. - - ### `with_parameters(parameters: SessionParams) -> Agent` - - Set session parameters (silence config, farewell config, data channel, audio scenario, etc.). - - ### `with_audio_scenario(audio_scenario: ParametersAudioScenario) -> Agent` - - Set `parameters.audio_scenario` without replacing existing session parameters. - - ### `with_failure_message(message: str) -> Agent` - - Set the message spoken via TTS when the LLM call fails. - - ### `with_max_history(max_history: int) -> Agent` - - Set the maximum conversation history length for the standard ASR + LLM + TTS pipeline. The v2.7 MLLM core type does not expose `max_history`. - - ### `with_geofence(geofence: GeofenceConfig) -> Agent` - - Set geofence configuration (restricts backend server regions). - - ### `with_labels(labels: Dict[str, str]) -> Agent` - - Set custom labels (key-value pairs returned in notification callbacks). - - ### `with_rtc(rtc: RtcConfig) -> Agent` - - Set RTC configuration. - - ### `with_filler_words(filler_words: FillerWordsConfig) -> Agent` - - Set filler words configuration (played while waiting for LLM response). - - ## `create_session()` - - - ```python - create_session( - client: Any, - channel: str, - agent_uid: str, - remote_uids: List[str], - name: Optional[str] = None, - token: Optional[str] = None, - idle_timeout: Optional[int] = None, - enable_string_uid: Optional[bool] = None, - expires_in: Optional[int] = None, - ) -> AgentSession - ``` - - Creates an `AgentSession` bound to the given client and channel. - - | Parameter | Type | Required | Description | - |---|---|---|---| - | `client` | `Agora` or `AsyncAgora` | Yes | Authenticated client | - | `channel` | `str` | Yes | Channel name | - | `agent_uid` | `str` | Yes | UID for the agent | - | `remote_uids` | `List[str]` | Yes | UIDs of remote participants | - | `name` | `Optional[str]` | No | Session name (defaults to agent name) | - | `token` | `Optional[str]` | No | Pre-built RTC+RTM token | - | `expires_in` | `Optional[int]` | No | Token lifetime in seconds (default: `86400` = 24 h, Agora max). Only applies when the token is auto-generated. Use `expires_in_hours()` or `expires_in_minutes()` for clarity. Valid range: 1–86400. | - | `idle_timeout` | `Optional[int]` | No | Idle timeout in seconds | - | `enable_string_uid` | `Optional[bool]` | No | Enable string UIDs | - - **Returns:** `AgentSession` - - ## `to_properties()` - - Converts the agent configuration into a `StartAgentsRequestProperties` object for the Agora API. Called internally by `AgentSession.start()`. - - - ```python - to_properties( - channel: str, - agent_uid: str, - remote_uids: List[str], - idle_timeout: Optional[int] = None, - enable_string_uid: Optional[bool] = None, - token: Optional[str] = None, - app_id: Optional[str] = None, - app_certificate: Optional[str] = None, - expires_in: Optional[int] = None, - ) -> StartAgentsRequestProperties - ``` - - **Raises:** `ValueError` if neither `token` nor `app_id`+`app_certificate` is provided, or if required vendors (LLM, TTS) are missing in cascading mode. - - ## Properties - - | Property | Type | Description | - |---|---|---| - | `name` | `Optional[str]` | Agent name | - | `instructions` | `Optional[str]` | System prompt | - | `greeting` | `Optional[str]` | Greeting message | - | `failure_message` | `Optional[str]` | Message spoken when LLM fails | - | `max_history` | `Optional[int]` | Max conversation history length | - | `llm` | `Optional[Dict[str, Any]]` | LLM config dict (from `to_config()`) | - | `tts` | `Optional[Dict[str, Any]]` | TTS config dict | - | `stt` | `Optional[Dict[str, Any]]` | STT config dict | - | `mllm` | `Optional[Dict[str, Any]]` | MLLM config dict | - | `avatar` | `Optional[Dict[str, Any]]` | Avatar config dict | - | `turn_detection` | `Optional[TurnDetectionConfig]` | Turn detection settings | - | `sal` | `Optional[SalConfig]` | SAL configuration | - | `advanced_features` | `Optional[Dict[str, Any]]` | Advanced features | - | `parameters` | `Optional[SessionParams]` | Session parameters | - | `geofence` | `Optional[GeofenceConfig]` | Geofence configuration | - | `labels` | `Optional[Dict[str, str]]` | Custom labels | - | `rtc` | `Optional[RtcConfig]` | RTC configuration | - | `filler_words` | `Optional[FillerWordsConfig]` | Filler words configuration | - | `config` | `Dict[str, Any]` | Full configuration dict | - - ## Type aliases - - Public aliases over Fern-generated types: `LlmConfig`, `SttConfig`, `AsrConfig` (= `SttConfig`), `MllmConfig`, `AvatarConfig`, session/conversation types, and think types (`ThinkOnListeningAction`, etc.). - - Think value constants: `ThinkOnListeningActionInject`, `ThinkOnListeningActionInterrupt`, `ThinkOnListeningActionIgnore`, `ThinkOnThinkingActionInterrupt`, `ThinkOnThinkingActionIgnore`, `ThinkOnSpeakingActionInterrupt`, `ThinkOnSpeakingActionIgnore`. - - ## Cross-SDK discovery map - - | Concept | Python | TypeScript | Go | - |---|---|---|---| - | STT payload alias (wire: `asr`) | `SttConfig` / `AsrConfig` | `SttConfig` / `AsrConfig` | `AsrConfig` / `SttConfig` | - | xAI MLLM (primary) | `XaiGrok` | `XaiGrok` | `XaiGrok` / `NewXaiGrok` | - | Avatar token helper | `is_avatar_token_managed` | `isAvatarTokenManaged` | `IsAvatarTokenManaged` | - | Think inject constant | `ThinkOnListeningActionInject` | `ThinkOnListeningActionInject` | `ThinkOnListeningActionInject` | - status: unresolved - - id: patch-7465fada - content_hash: sha256:a2f90f66c927424018f2c3304742f097e8594dec9cb2f783264c7b11679a14ac - original_commit: 7465fadafa0f1e62051d99b42d0eeda85f31eeee - original_message: "fix(agentkit): resolve Python session typing issues" - original_author: digitallysavvy - base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c - files: - - src/agora_agent/agentkit/agent_session.py - patch_content: | - diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py - index 2900c18..745c465 100644 - --- a/src/agora_agent/agentkit/agent_session.py - +++ b/src/agora_agent/agentkit/agent_session.py - @@ -15,6 +15,7 @@ from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse as AgentThinkResponse, - ) - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - +from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping - from .avatar_types import ( - is_akool_avatar, - theirs_snapshot: - src/agora_agent/agentkit/agent_session.py: | - import typing - import warnings - - from ..core.api_error import ApiError - from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - AgentThinkAgentManagementRequestOnListeningAction as AgentThinkRequestOnListeningAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - AgentThinkAgentManagementRequestOnSpeakingAction as AgentThinkRequestOnSpeakingAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - AgentThinkAgentManagementRequestOnThinkingAction as AgentThinkRequestOnThinkingAction, - ) - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse as AgentThinkResponse, - ) - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping - from .avatar_types import ( - is_akool_avatar, - is_anam_avatar, - is_avatar_token_managed, - is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - is_rtc_avatar, - validate_avatar_config, - validate_tts_sample_rate, - ) - from .presets import ( - get_preset_category, - infer_asr_preset, - infer_llm_preset, - infer_tts_preset, - normalize_preset_input, - resolve_session_presets, - ) - from .token import generate_convo_ai_token, _parse_numeric_uid - - - class _AgentSessionRequiredOptions(typing.TypedDict, total=True): - """Required fields shared by both sync and async session constructors.""" - - client: typing.Any - agent: Agent - app_id: str - name: str - channel: str - agent_uid: str - remote_uids: typing.List[str] - - - class AgentSessionOptions(_AgentSessionRequiredOptions, total=False): - """Configuration options for creating an agent session. - - Required fields - --------------- - client, agent, app_id, name, channel, agent_uid, remote_uids - - Optional fields - --------------- - app_certificate, token, idle_timeout, enable_string_uid, preset, - pipeline_id, expires_in, debug, warn - """ - - app_certificate: str - token: str - idle_timeout: int - enable_string_uid: bool - preset: typing.Union[str, typing.Sequence[str]] - pipeline_id: str - expires_in: int - debug: bool - warn: typing.Callable[[str], None] - - - class _AgentSessionBase: - """Shared state and helpers for :class:`AgentSession` and :class:`AsyncAgentSession`. - - Not intended for direct use — instantiate one of the concrete subclasses or - call :meth:`Agent.create_session` / :meth:`Agent.create_async_session`. - """ - - def __init__( - self, - client: typing.Any, - agent: Agent, - app_id: str, - name: str, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - app_certificate: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ): - self._client = client - self._agent = agent - self._app_id = app_id - self._app_certificate = app_certificate - self._name = name - self._channel = channel - self._token = token - self._agent_uid = agent_uid - self._remote_uids = remote_uids - self._idle_timeout = idle_timeout - self._enable_string_uid = enable_string_uid - self._preset = preset - self._pipeline_id = pipeline_id - self._expires_in = expires_in - self._debug = debug - self._warn = warn or warnings.warn - self._agent_id: typing.Optional[str] = None - self._status: str = "idle" - self._event_handlers: typing.Dict[str, typing.List[typing.Callable[..., None]]] = {} - - # ------------------------------------------------------------------ - # Public read-only properties - # ------------------------------------------------------------------ - - @property - def id(self) -> typing.Optional[str]: - return self._agent_id - - @property - def status(self) -> str: - return self._status - - @property - def agent(self) -> Agent: - return self._agent - - @property - def app_id(self) -> str: - return self._app_id - - @property - def raw(self) -> typing.Any: - """Direct access to the underlying Fern-generated AgentsClient. - - Use this to access any new endpoints that Fern generates without - waiting for agentkit method updates. - """ - return self._client.agents - - @property - def raw_agent_management(self) -> typing.Any: - """Direct access to the underlying Fern-generated AgentManagement client.""" - return self._client.agent_management - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _convo_ai_headers(self) -> typing.Optional[typing.Dict[str, str]]: - """Return per-request auth headers when client is in app-credentials mode. - - In app-credentials mode a fresh ConvoAI token (RTC + RTM) is generated - for every request and returned as ``Authorization: agora token=``. - In basic-auth mode this returns ``None`` (the client-level header is used). - """ - if getattr(self._client, "auth_mode", None) != "app-credentials": - return None - app_id: str = getattr(self._client, "app_id", self._app_id) - app_certificate: typing.Optional[str] = getattr( - self._client, "app_certificate", self._app_certificate - ) - if not app_certificate: - raise RuntimeError("app_certificate is required for app-credentials auth mode") - token = generate_convo_ai_token( - app_id=app_id, - app_certificate=app_certificate, - channel_name=self._channel, - uid=_parse_numeric_uid(self._agent_uid, "agent_uid"), - ) - return {"Authorization": f"agora token={token}"} - - def _request_options(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - """Build request_options dict with per-request auth headers if needed.""" - headers = self._convo_ai_headers() - if headers is None: - return None - return {"additional_headers": headers} - - def _validate_avatar_config(self) -> None: - avatar = self._agent.avatar - tts = self._agent.tts - if not avatar or avatar.get("enable", True) is False: - return - if self._is_mllm_mode(): - raise ValueError( - "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - "Remove the avatar configuration when using MLLM, or switch to a cascading session." - ) - - if ( - is_heygen_avatar(avatar) - or is_live_avatar_avatar(avatar) - or is_akool_avatar(avatar) - or is_anam_avatar(avatar) - or is_generic_avatar(avatar) - ): - validate_avatar_config(avatar) - - tts_params = tts.get("params") if isinstance(tts, dict) else None - sample_rate = self._agent.tts_sample_rate - if sample_rate is None and isinstance(tts_params, dict): - sample_rate = ( - tts_params.get("sample_rate") - or tts_params.get("sample_rate_hertz") - or tts_params.get("samplingRate") - ) - if isinstance(sample_rate, int): - validate_tts_sample_rate(avatar, sample_rate) - elif is_heygen_avatar(avatar): - self._warn( - "Warning: HeyGen avatar detected but TTS sample_rate is not explicitly set. " - "HeyGen requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." - ) - elif is_live_avatar_avatar(avatar): - self._warn( - "Warning: LiveAvatar avatar detected but TTS sample_rate is not explicitly set. " - "LiveAvatar requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." - ) - elif is_akool_avatar(avatar): - self._warn( - "Warning: Akool avatar detected but TTS sample_rate is not explicitly set. " - "Akool requires 16,000 Hz. Please ensure your TTS provider is configured for 16kHz." - ) - - def _enrich_avatar_for_session(self, properties: typing.Dict[str, typing.Any]) -> None: - avatar = properties.get("avatar") - if not isinstance(avatar, dict) or avatar.get("enable", True) is False: - return - - params = avatar.get("params") - if not isinstance(params, dict): - params = {} - avatar["params"] = params - - if is_generic_avatar(avatar): - if not params.get("agora_appid"): - params["agora_appid"] = self._app_id - if not params.get("agora_channel"): - params["agora_channel"] = self._channel - - if not is_avatar_token_managed(avatar): - validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - return - - if not params.get("agora_uid"): - validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - return - - if not params.get("agora_token"): - if not self._app_certificate: - raise ValueError( - "Cannot auto-generate avatar RTC token: app_certificate is required when agora_token is omitted. " - "Pass app_certificate on the Agora client or supply agora_token explicitly on the avatar vendor." - ) - token_kwargs: typing.Dict[str, typing.Any] = {} - if self._expires_in is not None: - token_kwargs["token_expire"] = self._expires_in - params["agora_token"] = generate_convo_ai_token( - app_id=self._app_id, - app_certificate=self._app_certificate, - channel_name=self._channel, - uid=_parse_numeric_uid(str(params["agora_uid"]), "avatar agora_uid"), - **token_kwargs, - ) - - if str(params.get("agora_uid")) == self._agent_uid: - self._warn( - "Warning: avatar agora_uid matches agent_rtc_uid. Use a unique UID for the avatar video publisher." - ) - - validate_avatar_config(avatar, require_session_fields=True) - - @staticmethod - def _dump_model(value: typing.Any) -> typing.Any: - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if isinstance(value, dict): - return {k: _AgentSessionBase._dump_model(v) for k, v in value.items() if v is not None} - if isinstance(value, list): - return [_AgentSessionBase._dump_model(item) for item in value] - return value - - def _is_mllm_mode(self) -> bool: - mllm = self._agent.mllm - if isinstance(mllm, dict) and mllm.get("enable") is True: - return True - return mllm is not None - - def _build_start_properties( - self, - token_opts: typing.Dict[str, typing.Any], - skip_vendor_validation_categories: typing.AbstractSet[str], - allow_missing_vendor_categories: typing.AbstractSet[str], - ) -> typing.Dict[str, typing.Any]: - base_properties = self._agent.to_properties( - channel=self._channel, - agent_uid=self._agent_uid, - remote_uids=self._remote_uids, - idle_timeout=self._idle_timeout, - enable_string_uid=self._enable_string_uid, - skip_vendor_validation_categories=skip_vendor_validation_categories, - allow_missing_vendor_categories=allow_missing_vendor_categories, - **token_opts, - ) - properties = self._dump_model(base_properties) - self._enrich_avatar_for_session(properties) - - if self._is_mllm_mode(): - if self._agent.mllm is not None: - mllm = self._dump_model(self._agent.mllm) - if not isinstance(mllm, dict): - mllm = {} - if self._agent.greeting is not None: - mllm.setdefault("greeting_message", self._agent.greeting) - if self._agent.failure_message is not None: - mllm.setdefault("failure_message", self._agent.failure_message) - properties["mllm"] = mllm - return properties - - if self._agent.tts is not None: - properties["tts"] = self._dump_model(self._agent.tts) - if self._agent.llm is not None: - llm = dict(self._agent.llm) - if self._agent.instructions is not None and "system_messages" not in llm: - llm["system_messages"] = [{"role": "system", "content": self._agent.instructions}] - if self._agent.greeting is not None and "greeting_message" not in llm: - llm["greeting_message"] = self._agent.greeting - if self._agent.greeting_configs is not None and "greeting_configs" not in llm: - llm["greeting_configs"] = self._dump_model(self._agent.greeting_configs) - if self._agent.failure_message is not None and "failure_message" not in llm: - llm["failure_message"] = self._agent.failure_message - if self._agent.max_history is not None and "max_history" not in llm: - llm["max_history"] = self._agent.max_history - properties["llm"] = llm - if self._agent.stt is not None: - properties["asr"] = self._dump_model(self._agent.stt) - - return properties - - @staticmethod - def _request_properties_for_start( - resolved_properties: typing.Dict[str, typing.Any], - *, - resolved_preset: typing.Optional[str], - pipeline_id: typing.Optional[str], - ) -> typing.Any: - try: - return _start_properties_from_mapping(resolved_properties) - except Exception as exc: - if pipeline_id: - return resolved_properties - if resolved_preset: - normalized_preset = normalize_preset_input(resolved_preset) - if not normalized_preset: - raise - preset_categories = { - category - for item in normalized_preset.split(",") - for category in [get_preset_category(item)] - if category is not None - } - error_categories = _AgentSessionBase._validation_error_categories(exc) - if error_categories and error_categories.issubset(preset_categories): - return resolved_properties - raise - - @staticmethod - def _validation_error_categories(exc: Exception) -> typing.Set[str]: - errors = getattr(exc, "errors", None) - if not callable(errors): - return set() - categories: typing.Set[str] = set() - for error in errors(): - loc = error.get("loc") if isinstance(error, dict) else None - if isinstance(loc, tuple) and loc: - field = loc[0] - if field in {"asr", "llm", "tts"}: - categories.add(typing.cast(str, field)) - return categories - - def _vendor_validation_categories( - self, - pipeline_id: typing.Optional[str], - ) -> typing.Tuple[typing.Set[str], typing.Set[str]]: - skip_categories: typing.Set[str] = set() - allow_missing_categories: typing.Set[str] = {"asr", "llm", "tts"} if pipeline_id else set() - - preset = normalize_preset_input(self._preset) - if preset: - for item in preset.split(","): - category = get_preset_category(item) - if category is not None: - skip_categories.add(category) - allow_missing_categories.add(category) - - if infer_asr_preset(self._agent.stt): - skip_categories.add("asr") - if infer_llm_preset(self._agent.llm): - skip_categories.add("llm") - if infer_tts_preset(self._agent.tts): - skip_categories.add("tts") - return skip_categories, allow_missing_categories - - @staticmethod - def _page_value(pagination: typing.Any, field: str) -> typing.Any: - if pagination is None: - return None - if isinstance(pagination, dict): - return pagination.get(field) - return getattr(pagination, field, None) - - @staticmethod - def _response_turns(response: typing.Any) -> typing.List[typing.Any]: - turns = response.get("turns") if isinstance(response, dict) else getattr(response, "turns", None) - return list(turns or []) - - @staticmethod - def _response_pagination(response: typing.Any) -> typing.Any: - if isinstance(response, dict): - return response.get("pagination") - return getattr(response, "pagination", None) - - @classmethod - def _with_all_turns(cls, first_response: typing.Any, turns: typing.List[typing.Any]) -> GetTurnsAgentsResponse: - data = cls._dump_model(first_response) - if not isinstance(data, dict): - data = {} - data["turns"] = turns - return GetTurnsAgentsResponse(**data) - - # ------------------------------------------------------------------ - # Event handling - # ------------------------------------------------------------------ - - def on(self, event: str, handler: typing.Callable[..., None]) -> None: - """Register an event handler. - - Parameters - ---------- - event : str - The event type (``started``, ``stopped``, ``error``). - handler : callable - The event handler to invoke when the event fires. - """ - if event not in self._event_handlers: - self._event_handlers[event] = [] - self._event_handlers[event].append(handler) - - def off(self, event: str, handler: typing.Callable[..., None]) -> None: - """Unregister a previously registered event handler.""" - handlers = self._event_handlers.get(event) - if handlers and handler in handlers: - handlers.remove(handler) - - def _emit(self, event: str, data: typing.Any) -> None: - handlers = self._event_handlers.get(event) - if handlers: - for handler in handlers: - try: - handler(data) - except Exception as exc: - # Prevent a misbehaving handler from blocking other handlers or - # the session lifecycle. Warn so the error is not silently lost. - warnings.warn( - f"Event handler for '{event}' raised an exception: {exc}", - stacklevel=2, - ) - - - class AgentSession(_AgentSessionBase): - """Manages the lifecycle of an agent session (synchronous). - - This class provides a high-level interface for managing agent sessions, - including starting, stopping, and interacting with the agent. - - Use :meth:`Agent.create_session` to create a session — this is the - recommended entry point. - - Examples - -------- - >>> from agora_agent import Agora, Area, Agent, OpenAI, ElevenLabsTTS - >>> - >>> client = Agora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are a helpful voice assistant.") - >>> agent = agent.with_llm(OpenAI(api_key="...", base_url="https://api.openai.com/v1/chat/completions", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", base_url="wss://api.elevenlabs.io/v1")) - >>> session = agent.create_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = session.start() - >>> session.say("Hello!") - >>> session.stop() - """ - - def start(self) -> str: - """Start the agent session. - - Returns - ------- - str - The agent ID. - - Raises - ------ - RuntimeError - If the session is not in a startable state. - ValueError - If avatar/TTS configuration is invalid. - """ - if self._status not in ("idle", "stopped", "error"): - raise RuntimeError(f"Cannot start session in {self._status} state") - - self._validate_avatar_config() - self._status = "starting" - - try: - pipeline_id = self._pipeline_id if self._pipeline_id is not None else self._agent.pipeline_id - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - token_opts = { - "app_id": self._app_id, - "app_certificate": self._app_certificate, - "expires_in": self._expires_in, - } - - skip_categories, allow_missing_categories = self._vendor_validation_categories(pipeline_id) - properties = self._build_start_properties( - token_opts, - skip_vendor_validation_categories=skip_categories, - allow_missing_vendor_categories=allow_missing_categories, - ) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - ) - - if self._debug: - print("[Agora Debug] Starting agent session...") - print("[Agora Debug] Request:", { - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - "pipeline_id": pipeline_id, - "properties": resolved_properties, - }) - - request_properties = self._request_properties_for_start( - resolved_properties, - resolved_preset=resolved_preset, - pipeline_id=pipeline_id, - ) - - response = self._client.agents.start( - self._app_id, - name=self._name, - properties=request_properties, - preset=resolved_preset, - pipeline_id=pipeline_id, - request_options=self._request_options(), - ) - - self._agent_id = response.agent_id if hasattr(response, "agent_id") else None - self._status = "running" - self._emit("started", {"agent_id": self._agent_id}) - return self._agent_id or "" - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - def stop(self) -> None: - """Stop the agent session. - - If the agent has already stopped (e.g., crashed or timed out), the - server returns 404, which this method treats as a successful stop - rather than raising an error. - """ - if self._status != "running": - raise RuntimeError(f"Cannot stop session in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._status = "stopping" - - try: - self._client.agents.stop( - self._app_id, self._agent_id, request_options=self._request_options() - ) - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - except ApiError as e: - if e.status_code == 404: - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - return - self._status = "error" - self._emit("error", e) - raise - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - def say( - self, - text: str, - priority: typing.Optional[str] = None, - interruptable: typing.Optional[bool] = None, - *, - options: typing.Optional["SayOptions"] = None, - ) -> None: - """Send a message to be spoken by the agent. - - Parameters - ---------- - text : str - The text to speak. - priority : str, optional - Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). - interruptable : bool, optional - Whether the message can be interrupted by the user. - """ - if self._status != "running": - raise RuntimeError(f"Cannot say in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if priority is not None: - kwargs["priority"] = priority - if interruptable is not None: - kwargs["interruptable"] = interruptable - - self._client.agents.speak( - self._app_id, self._agent_id, request_options=self._request_options(), **kwargs - ) - - def interrupt(self) -> None: - """Interrupt the agent while it is speaking or thinking.""" - if self._status != "running": - raise RuntimeError(f"Cannot interrupt in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._client.agents.interrupt( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def think( - self, - text: str, - *, - on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, - on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, - on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - options: typing.Optional["ThinkOptions"] = None, - ) -> AgentThinkResponse: - """Inject a custom text instruction into the current session pipeline. - - In API v2.7, omitting ``on_listening_action`` uses the server default - ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - preserve the pre-v2.7 behavior. - """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if on_listening_action is not None: - kwargs["on_listening_action"] = on_listening_action - if on_thinking_action is not None: - kwargs["on_thinking_action"] = on_thinking_action - if on_speaking_action is not None: - kwargs["on_speaking_action"] = on_speaking_action - if interruptable is not None: - kwargs["interruptable"] = interruptable - if metadata is not None: - kwargs["metadata"] = metadata - - return self._client.agent_management.agent_think( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - def update(self, properties: typing.Any) -> None: - """Update the agent configuration at runtime. - - Parameters - ---------- - properties : UpdateAgentsRequestProperties - Partial configuration to update. - """ - if self._status != "running": - raise RuntimeError(f"Cannot update in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._client.agents.update( - self._app_id, - self._agent_id, - properties=properties, - request_options=self._request_options(), - ) - - def get_history(self) -> typing.Any: - """Get the conversation history.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get_history( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def get_info(self) -> typing.Any: - """Get the current session info.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def get_turns( - self, - *, - page_index: typing.Optional[int] = None, - page_size: typing.Optional[int] = None, - options: typing.Optional["GetTurnsOptions"] = None, - ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {} - if options is not None: - kwargs.update(options) - if page_index is not None: - kwargs["page_index"] = page_index - if page_size is not None: - kwargs["page_size"] = page_size - - return self._client.agents.get_turns( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - """Get all turn analytics pages for this session. - - Raises ``RuntimeError`` if the server's pagination metadata is missing - the fields required to advance, or if requesting the next page returns - a page index that did not advance. - """ - response = self.get_turns(page_index=1, page_size=page_size) - all_turns = self._response_turns(response) - pagination = self._response_pagination(response) - current_page = self._page_value(pagination, "page_index") or 1 - while pagination is not None and self._page_value(pagination, "is_last_page") is False: - total_pages = self._page_value(pagination, "total_pages") - returned_index = self._page_value(pagination, "page_index") - if returned_index is None and total_pages is None: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - if total_pages is not None and current_page >= total_pages: - break - next_page = current_page + 1 - response = self.get_turns(page_index=next_page, page_size=page_size) - all_turns.extend(self._response_turns(response)) - pagination = self._response_pagination(response) - returned_index = self._page_value(pagination, "page_index") if pagination else None - if returned_index is not None: - if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - raise RuntimeError( - f"get_all_turns pagination did not advance: requested page {next_page}, " - f"received page {returned_index}." - ) - current_page = returned_index - else: - total_pages = self._page_value(pagination, "total_pages") if pagination else None - is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - if total_pages is None and is_last_page is not True: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - current_page = next_page - return self._with_all_turns(response, all_turns) - - - class AsyncAgentSession(_AgentSessionBase): - """Async version of :class:`AgentSession` for use with :class:`AsyncAgora`. - - Use :meth:`Agent.create_async_session` to create a session — this is the - recommended entry point. - - Examples - -------- - >>> from agora_agent import AsyncAgora, Area, Agent, OpenAI, ElevenLabsTTS - >>> - >>> client = AsyncAgora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are helpful.") - >>> agent = agent.with_llm(OpenAI(api_key="...", base_url="https://api.openai.com/v1/chat/completions", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", base_url="wss://api.elevenlabs.io/v1")) - >>> session = agent.create_async_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = await session.start() - >>> await session.say("Hello!") - >>> await session.stop() - """ - - async def start(self) -> str: - """Start the agent session. - - Returns - ------- - str - The agent ID. - - Raises - ------ - RuntimeError - If the session is not in a startable state. - ValueError - If avatar/TTS configuration is invalid. - """ - if self._status not in ("idle", "stopped", "error"): - raise RuntimeError(f"Cannot start session in {self._status} state") - - self._validate_avatar_config() - self._status = "starting" - - try: - pipeline_id = self._pipeline_id if self._pipeline_id is not None else self._agent.pipeline_id - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - token_opts = { - "app_id": self._app_id, - "app_certificate": self._app_certificate, - "expires_in": self._expires_in, - } - - skip_categories, allow_missing_categories = self._vendor_validation_categories(pipeline_id) - properties = self._build_start_properties( - token_opts, - skip_vendor_validation_categories=skip_categories, - allow_missing_vendor_categories=allow_missing_categories, - ) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - ) - - if self._debug: - print("[Agora Debug] Starting agent session...") - print("[Agora Debug] Request:", { - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - "pipeline_id": pipeline_id, - "properties": resolved_properties, - }) - - request_properties = self._request_properties_for_start( - resolved_properties, - resolved_preset=resolved_preset, - pipeline_id=pipeline_id, - ) - - response = await self._client.agents.start( - self._app_id, - name=self._name, - properties=request_properties, - preset=resolved_preset, - pipeline_id=pipeline_id, - request_options=self._request_options(), - ) - - self._agent_id = response.agent_id if hasattr(response, "agent_id") else None - self._status = "running" - self._emit("started", {"agent_id": self._agent_id}) - return self._agent_id or "" - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - async def stop(self) -> None: - """Stop the agent session. - - If the agent has already stopped (e.g., crashed or timed out), the - server returns 404, which this method treats as a successful stop - rather than raising an error. - """ - if self._status != "running": - raise RuntimeError(f"Cannot stop session in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._status = "stopping" - - try: - await self._client.agents.stop( - self._app_id, self._agent_id, request_options=self._request_options() - ) - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - except ApiError as e: - if e.status_code == 404: - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - return - self._status = "error" - self._emit("error", e) - raise - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - async def say( - self, - text: str, - priority: typing.Optional[str] = None, - interruptable: typing.Optional[bool] = None, - *, - options: typing.Optional["SayOptions"] = None, - ) -> None: - """Send a message to be spoken by the agent. - - Parameters - ---------- - text : str - The text to speak. - priority : str, optional - Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). - interruptable : bool, optional - Whether the message can be interrupted by the user. - """ - if self._status != "running": - raise RuntimeError(f"Cannot say in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if priority is not None: - kwargs["priority"] = priority - if interruptable is not None: - kwargs["interruptable"] = interruptable - - await self._client.agents.speak( - self._app_id, self._agent_id, request_options=self._request_options(), **kwargs - ) - - async def interrupt(self) -> None: - """Interrupt the agent while it is speaking or thinking.""" - if self._status != "running": - raise RuntimeError(f"Cannot interrupt in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - await self._client.agents.interrupt( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def think( - self, - text: str, - *, - on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, - on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, - on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - options: typing.Optional["ThinkOptions"] = None, - ) -> AgentThinkResponse: - """Inject a custom text instruction into the current session pipeline. - - In API v2.7, omitting ``on_listening_action`` uses the server default - ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - preserve the pre-v2.7 behavior. - """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if on_listening_action is not None: - kwargs["on_listening_action"] = on_listening_action - if on_thinking_action is not None: - kwargs["on_thinking_action"] = on_thinking_action - if on_speaking_action is not None: - kwargs["on_speaking_action"] = on_speaking_action - if interruptable is not None: - kwargs["interruptable"] = interruptable - if metadata is not None: - kwargs["metadata"] = metadata - - return await self._client.agent_management.agent_think( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - async def update(self, properties: typing.Any) -> None: - """Update the agent configuration at runtime. - - Parameters - ---------- - properties : UpdateAgentsRequestProperties - Partial configuration to update. - """ - if self._status != "running": - raise RuntimeError(f"Cannot update in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - await self._client.agents.update( - self._app_id, - self._agent_id, - properties=properties, - request_options=self._request_options(), - ) - - async def get_history(self) -> typing.Any: - """Get the conversation history.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get_history( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def get_info(self) -> typing.Any: - """Get the current session info.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def get_turns( - self, - *, - page_index: typing.Optional[int] = None, - page_size: typing.Optional[int] = None, - options: typing.Optional["GetTurnsOptions"] = None, - ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {} - if options is not None: - kwargs.update(options) - if page_index is not None: - kwargs["page_index"] = page_index - if page_size is not None: - kwargs["page_size"] = page_size - - return await self._client.agents.get_turns( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - async def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - """Get all turn analytics pages for this session. - - Raises ``RuntimeError`` if the server's pagination metadata is missing - the fields required to advance, or if requesting the next page returns - a page index that did not advance. - """ - response = await self.get_turns(page_index=1, page_size=page_size) - all_turns = self._response_turns(response) - pagination = self._response_pagination(response) - current_page = self._page_value(pagination, "page_index") or 1 - while pagination is not None and self._page_value(pagination, "is_last_page") is False: - total_pages = self._page_value(pagination, "total_pages") - returned_index = self._page_value(pagination, "page_index") - if returned_index is None and total_pages is None: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - if total_pages is not None and current_page >= total_pages: - break - next_page = current_page + 1 - response = await self.get_turns(page_index=next_page, page_size=page_size) - all_turns.extend(self._response_turns(response)) - pagination = self._response_pagination(response) - returned_index = self._page_value(pagination, "page_index") if pagination else None - if returned_index is not None: - if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - raise RuntimeError( - f"get_all_turns pagination did not advance: requested page {next_page}, " - f"received page {returned_index}." - ) - current_page = returned_index - else: - total_pages = self._page_value(pagination, "total_pages") if pagination else None - is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - if total_pages is None and is_last_page is not True: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - current_page = next_page - return self._with_all_turns(response, all_turns) - - id: patch-d29165c4 - content_hash: sha256:be59d1d3efc435d5e0b83305b2cd39ce3dad4534a4125de18028c137e692e659 - original_commit: d29165c4ddd8296af703a4e9ed848516f563dd1b - original_message: make python compat package publishable - original_author: chenyuguo - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - compat/agora-agent-server-sdk/README.md - - compat/agora-agent-server-sdk/pyproject.toml - - compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py - patch_content: |+ - From d29165c4ddd8296af703a4e9ed848516f563dd1b Mon Sep 17 00:00:00 2001 - From: chenyuguo - Date: Wed, 27 May 2026 17:24:50 +0800 - Subject: [PATCH] make python compat package publishable - - --- - compat/agora-agent-server-sdk/README.md | 2 ++ - compat/agora-agent-server-sdk/pyproject.toml | 3 +++ - .../src/agora_agent_server_sdk_compat/__init__.py | 1 + - 3 files changed, 6 insertions(+) - create mode 100644 compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py - - diff --git a/compat/agora-agent-server-sdk/README.md b/compat/agora-agent-server-sdk/README.md - index 1388836..cff3cfe 100644 - --- a/compat/agora-agent-server-sdk/README.md - +++ b/compat/agora-agent-server-sdk/README.md - @@ -9,3 +9,5 @@ pip install agora-agents - ``` - - This compatibility package is kept only to preserve the legacy distribution name during the migration window. It depends on `agora-agents`, which continues to provide the `agora_agent` Python import path. - + - +It intentionally contains only a minimal compatibility module so the distribution can be built and published cleanly with Poetry. - diff --git a/compat/agora-agent-server-sdk/pyproject.toml b/compat/agora-agent-server-sdk/pyproject.toml - index 8efbe53..ac93128 100644 - --- a/compat/agora-agent-server-sdk/pyproject.toml - +++ b/compat/agora-agent-server-sdk/pyproject.toml - @@ -26,6 +26,9 @@ classifiers = [ - "Topic :: Software Development :: Libraries :: Python Modules", - "Typing :: Typed" - ] - +packages = [ - + { include = "agora_agent_server_sdk_compat", from = "src"} - +] - - [tool.poetry.urls] - Repository = 'https://github.com/AgoraIO-Conversational-AI/agent-server-sdk-python' - diff --git a/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py b/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py - new file mode 100644 - index 0000000..55522c6 - --- /dev/null - +++ b/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py - @@ -0,0 +1 @@ - +"""Compatibility package for the renamed agora-agents distribution.""" - -- - 2.52.0 - - theirs_snapshot: - compat/agora-agent-server-sdk/README.md: | - # agora-agent-server-sdk - - This package has been renamed to `agora-agents`. - - New projects should install: - - ```sh - pip install agora-agents - ``` - - This compatibility package is kept only to preserve the legacy distribution name during the migration window. It depends on `agora-agents`, which continues to provide the `agora_agent` Python import path. - - It intentionally contains only a minimal compatibility module so the distribution can be built and published cleanly with Poetry. - compat/agora-agent-server-sdk/pyproject.toml: | - [project] - name = "agora-agent-server-sdk" - - [tool.poetry] - name = "agora-agent-server-sdk" - version = "v2.0.0" - description = "Compatibility shim for the renamed agora-agents package." - readme = "README.md" - authors = [] - keywords = [] - - classifiers = [ - "Intended Audience :: Developers", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Operating System :: OS Independent", - "Operating System :: POSIX", - "Operating System :: MacOS", - "Operating System :: POSIX :: Linux", - "Operating System :: Microsoft :: Windows", - "Topic :: Software Development :: Libraries :: Python Modules", - "Typing :: Typed" - ] - packages = [ - { include = "agora_agent_server_sdk_compat", from = "src"} - ] - - [tool.poetry.urls] - Repository = 'https://github.com/AgoraIO-Conversational-AI/agent-server-sdk-python' - - [tool.poetry.dependencies] - python = "^3.8" - agora-agents = ">=2.0.0,<3.0.0" - - [build-system] - requires = ["poetry-core"] - build-backend = "poetry.core.masonry.api" - compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py: | - """Compatibility package for the renamed agora-agents distribution.""" - status: unresolved - - id: patch-fae1249a - content_hash: sha256:01bf21f3cc4c784dfcff80a48c9c7bb3123af4327a567b7c990b528e9780e9a2 - original_commit: fae1249a20c53761a2eb5515a1bf92ca666760d1 - original_message: Re-export agora-agents API from legacy PyPI compatibility package The compat distribution delegates to agora_agent via __getattr__ and documents both import paths in its README. - original_author: digitallysavvy - base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c - files: - - compat/agora-agent-server-sdk/README.md - - compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py - patch_content: |+ - From fae1249a20c53761a2eb5515a1bf92ca666760d1 Mon Sep 17 00:00:00 2001 - From: digitallysavvy - Date: Wed, 27 May 2026 16:58:18 -0400 - Subject: [PATCH] Re-export agora-agents API from legacy PyPI compatibility - package The compat distribution delegates to agora_agent via __getattr__ and - documents both import paths in its README. - - --- - compat/agora-agent-server-sdk/README.md | 7 +++++-- - .../src/agora_agent_server_sdk_compat/__init__.py | 14 +++++++++++++- - 2 files changed, 18 insertions(+), 3 deletions(-) - - diff --git a/compat/agora-agent-server-sdk/README.md b/compat/agora-agent-server-sdk/README.md - index cff3cfe..e43d1d8 100644 - --- a/compat/agora-agent-server-sdk/README.md - +++ b/compat/agora-agent-server-sdk/README.md - @@ -8,6 +8,9 @@ New projects should install: - pip install agora-agents - ``` - - -This compatibility package is kept only to preserve the legacy distribution name during the migration window. It depends on `agora-agents`, which continues to provide the `agora_agent` Python import path. - +This compatibility package re-exports the public API from `agora-agents` to support existing installs during the migration window. The primary import path remains `agora_agent`; you can also import from `agora_agent_server_sdk_compat`: - - -It intentionally contains only a minimal compatibility module so the distribution can be built and published cleanly with Poetry. - +```python - +from agora_agent import Agora, Area - +from agora_agent_server_sdk_compat import Agora, Area - +``` - diff --git a/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py b/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py - index 55522c6..6283244 100644 - --- a/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py - +++ b/compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py - @@ -1 +1,13 @@ - -"""Compatibility package for the renamed agora-agents distribution.""" - +"""Compatibility re-exports for the renamed agora-agents package.""" - + - +import agora_agent as _agora_agent - + - +__all__ = getattr(_agora_agent, "__all__", []) - + - + - +def __getattr__(name: str): - + return getattr(_agora_agent, name) - + - + - +def __dir__(): - + return dir(_agora_agent) - -- - 2.52.0 - - theirs_snapshot: - compat/agora-agent-server-sdk/README.md: | - # agora-agent-server-sdk - - This package has been renamed to `agora-agents`. - - New projects should install: - - ```sh - pip install agora-agents - ``` - - This compatibility package re-exports the public API from `agora-agents` to support existing installs during the migration window. The primary import path remains `agora_agent`; you can also import from `agora_agent_server_sdk_compat`: - - ```python - from agora_agent import Agora, Area - from agora_agent_server_sdk_compat import Agora, Area - ``` - compat/agora-agent-server-sdk/src/agora_agent_server_sdk_compat/__init__.py: | - """Compatibility re-exports for the renamed agora-agents package.""" - - import agora_agent as _agora_agent - - __all__ = getattr(_agora_agent, "__all__", []) - - - def __getattr__(name: str): - return getattr(_agora_agent, name) - - - def __dir__(): - return dir(_agora_agent) - user_owned: true - - id: patch-44c21c14 - content_hash: sha256:920a8a5905a3bbb134edb28b007c5c0b1b4b2c1f75753140fef305b14a64e3e0 - original_commit: 44c21c14a14aa7ad469a18ce86024ff14ca2bf9b - original_message: Re-export AgentKit symbols from agora_agent package root Extend __getattr__ and __all__ so vendor classes, presets, and helpers are importable via `from agora_agent import ...`. Add tests and update class docstring examples to use the root import path. - original_author: digitallysavvy - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - src/agora_agent/agentkit/agent.py - - src/agora_agent/agentkit/agent_session.py - - tests/custom/test_root_exports.py - patch_content: | - diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py - index a820291..f84862c 100644 - --- a/src/agora_agent/agentkit/agent.py - +++ b/src/agora_agent/agentkit/agent.py - @@ -231,8 +231,7 @@ class Agent: - - Examples - -------- - - >>> from agora_agent.agentkit import Agent - - >>> from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS, DeepgramSTT - + >>> from agora_agent import Agent, OpenAI, ElevenLabsTTS, DeepgramSTT - >>> - >>> agent = Agent(instructions="You are a helpful voice assistant.") - >>> agent = ( - diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py - index fb8e548..a749d1e 100644 - --- a/src/agora_agent/agentkit/agent_session.py - +++ b/src/agora_agent/agentkit/agent_session.py - @@ -412,12 +412,10 @@ class AgentSession(_AgentSessionBase): - - Examples - -------- - - >>> from agora_agent import Agora, Area - - >>> from agora_agent.agentkit import Agent - + >>> from agora_agent import Agora, Area, Agent, OpenAI, ElevenLabsTTS - >>> - >>> client = Agora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are a helpful voice assistant.") - - >>> from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS - >>> agent = agent.with_llm(OpenAI(api_key="...", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...")) - >>> session = agent.create_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = session.start() - @@ -735,12 +733,10 @@ class AsyncAgentSession(_AgentSessionBase): - - Examples - -------- - - >>> from agora_agent import AsyncAgora, Area - - >>> from agora_agent.agentkit import Agent - + >>> from agora_agent import AsyncAgora, Area, Agent, OpenAI, ElevenLabsTTS - >>> - >>> client = AsyncAgora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are helpful.") - - >>> from agora_agent.agentkit.vendors import OpenAI, ElevenLabsTTS - >>> agent = agent.with_llm(OpenAI(api_key="...", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...")) - >>> session = agent.create_async_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = await session.start() - diff --git a/tests/custom/test_root_exports.py b/tests/custom/test_root_exports.py - new file mode 100644 - index 0000000..9b2f508 - --- /dev/null - +++ b/tests/custom/test_root_exports.py - @@ -0,0 +1,29 @@ - +import pytest - + - +import agora_agent - +import agora_agent.agentkit as agentkit - + - + - +def test_root_exports_match_agentkit_for_common_symbols() -> None: - + for name in ("Agent", "DeepgramSTT", "OpenAI", "AgentPresets", "generate_rtc_token", "DataChannel"): - + assert getattr(agora_agent, name) is getattr(agentkit, name) - + - + - +def test_root_exports_fern_client_symbols() -> None: - + assert agora_agent.Agora is not None - + assert agora_agent.Area is not None - + assert agora_agent.AsyncAgora is not None - + - + - +def test_unknown_root_export_raises_attribute_error() -> None: - + with pytest.raises(AttributeError): - + _ = agora_agent.NotARealExportName - + - + - +def test_dir_includes_agentkit_vendor_exports() -> None: - + assert "DeepgramSTT" in dir(agora_agent) - + - + - +def test_all_includes_agentkit_vendor_exports() -> None: - + assert "DeepgramSTT" in agora_agent.__all__ - + assert "OpenAI" in agora_agent.__all__ - theirs_snapshot: - src/agora_agent/agentkit/agent.py: | - from __future__ import annotations - - import time - import typing - import typing_extensions - - if typing.TYPE_CHECKING: - from .agent_session import AgentSession, AsyncAgentSession - - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from ..agents.types.start_agents_request_properties_asr import StartAgentsRequestPropertiesAsr - from ..agents.types.start_agents_request_properties_asr_vendor import StartAgentsRequestPropertiesAsrVendor - from ..agents.types.start_agents_request_properties_avatar import StartAgentsRequestPropertiesAvatar - from ..agents.types.start_agents_request_properties_avatar_vendor import StartAgentsRequestPropertiesAvatarVendor - from ..agents.types.start_agents_request_properties_llm import StartAgentsRequestPropertiesLlm - from ..agents.types.start_agents_request_properties_llm_style import StartAgentsRequestPropertiesLlmStyle - from ..agents.types.start_agents_request_properties_mllm import StartAgentsRequestPropertiesMllm - from ..agents.types.start_agents_request_properties_mllm_vendor import StartAgentsRequestPropertiesMllmVendor - from ..agents.types.update_agents_request_properties import UpdateAgentsRequestProperties - from ..agents.types.get_agents_response import GetAgentsResponse - from ..agents.types.list_agents_response import ListAgentsResponse - from ..agents.types.list_agents_response_data_list_item import ListAgentsResponseDataListItem - from ..agents.types.list_agents_response_data_list_item_status import ListAgentsResponseDataListItemStatus - from ..agents.types.get_history_agents_response import GetHistoryAgentsResponse - from ..agents.types.get_history_agents_response_contents_item import GetHistoryAgentsResponseContentsItem - from ..agents.types.get_history_agents_response_contents_item_role import GetHistoryAgentsResponseContentsItemRole - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.get_turns_agents_response_turns_item import GetTurnsAgentsResponseTurnsItem - from ..agents.types.speak_agents_request_priority import SpeakAgentsRequestPriority - from ..agents.types.start_agents_request_properties_turn_detection import StartAgentsRequestPropertiesTurnDetection - from ..agents.types.start_agents_request_properties_turn_detection_config import StartAgentsRequestPropertiesTurnDetectionConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeech - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_mode import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechMode - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_vad_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechVadConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_keywords_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechKeywordsConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_disabled_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_disabled_config_strategy import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfigStrategy - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_mode import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechMode - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_vad_config import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechVadConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_semantic_config import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechSemanticConfig - from ..agents.types.start_agents_request_properties_turn_detection_type import StartAgentsRequestPropertiesTurnDetectionType - from ..agents.types.start_agents_request_properties_turn_detection_interrupt_mode import StartAgentsRequestPropertiesTurnDetectionInterruptMode - from ..agents.types.start_agents_request_properties_turn_detection_eagerness import StartAgentsRequestPropertiesTurnDetectionEagerness - from ..agents.types.start_agents_request_properties_sal import StartAgentsRequestPropertiesSal - from ..agents.types.start_agents_request_properties_sal_sal_mode import StartAgentsRequestPropertiesSalSalMode - from ..agents.types.start_agents_request_properties_parameters import StartAgentsRequestPropertiesParameters - from ..agents.types.start_agents_request_properties_parameters_silence_config import StartAgentsRequestPropertiesParametersSilenceConfig - from ..agents.types.start_agents_request_properties_parameters_silence_config_action import StartAgentsRequestPropertiesParametersSilenceConfigAction - from ..agents.types.start_agents_request_properties_parameters_farewell_config import StartAgentsRequestPropertiesParametersFarewellConfig - from ..agents.types.start_agents_request_properties_parameters_data_channel import StartAgentsRequestPropertiesParametersDataChannel - from ..agents.types.start_agents_request_properties_parameters_audio_scenario import StartAgentsRequestPropertiesParametersAudioScenario - from ..agents.types.start_agents_request_properties_interruption import StartAgentsRequestPropertiesInterruption - from ..agents.types.start_agents_request_properties_interruption_mode import StartAgentsRequestPropertiesInterruptionMode - from ..agents.types.start_agents_request_properties_mllm_turn_detection import StartAgentsRequestPropertiesMllmTurnDetection - from ..agents.types.start_agents_request_properties_mllm_turn_detection_mode import StartAgentsRequestPropertiesMllmTurnDetectionMode - from ..agents.types.start_agents_request_properties_llm_greeting_configs import StartAgentsRequestPropertiesLlmGreetingConfigs - from ..agents.types.start_agents_request_properties_llm_greeting_configs_mode import StartAgentsRequestPropertiesLlmGreetingConfigsMode - from ..agents.types.start_agents_request_properties_llm_mcp_servers_item import StartAgentsRequestPropertiesLlmMcpServersItem - from ..agents.types.start_agents_request_properties_geofence import StartAgentsRequestPropertiesGeofence - from ..agents.types.start_agents_request_properties_rtc import StartAgentsRequestPropertiesRtc - from ..agents.types.start_agents_request_properties_advanced_features import StartAgentsRequestPropertiesAdvancedFeatures - from ..agents.types.start_agents_request_properties_filler_words import StartAgentsRequestPropertiesFillerWords - from ..agents.types.start_agents_request_properties_filler_words_trigger import StartAgentsRequestPropertiesFillerWordsTrigger - from ..agents.types.start_agents_request_properties_filler_words_trigger_fixed_time_config import StartAgentsRequestPropertiesFillerWordsTriggerFixedTimeConfig - from ..agents.types.start_agents_request_properties_filler_words_content import StartAgentsRequestPropertiesFillerWordsContent - from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig - from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - from ..types.tts import Tts - from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - AgentThinkAgentManagementRequestOnListeningAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - AgentThinkAgentManagementRequestOnThinkingAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - AgentThinkAgentManagementRequestOnSpeakingAction, - ) - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse, - ) - from .vendors.base import BaseAvatar, BaseLLM, BaseMLLM, BaseSTT, BaseTTS - - # Top-level aliases - LlmConfig = StartAgentsRequestPropertiesLlm - LlmStyle = StartAgentsRequestPropertiesLlmStyle - SttConfig = StartAgentsRequestPropertiesAsr - AsrConfig = SttConfig - SttVendor = StartAgentsRequestPropertiesAsrVendor - TtsConfig = Tts - MllmConfig = StartAgentsRequestPropertiesMllm - MllmVendor = StartAgentsRequestPropertiesMllmVendor - AvatarConfig = StartAgentsRequestPropertiesAvatar - AvatarVendor = StartAgentsRequestPropertiesAvatarVendor - TurnDetectionConfig = StartAgentsRequestPropertiesTurnDetection - SalConfig = StartAgentsRequestPropertiesSal - SalMode = StartAgentsRequestPropertiesSalSalMode - AdvancedFeatures = StartAgentsRequestPropertiesAdvancedFeatures - SessionParams = StartAgentsRequestPropertiesParameters - - # SOS/EOS turn detection aliases (preferred) - TurnDetectionNestedConfig = StartAgentsRequestPropertiesTurnDetectionConfig - StartOfSpeechConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeech - StartOfSpeechMode = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechMode - StartOfSpeechVadConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechVadConfig - StartOfSpeechKeywordsConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechKeywordsConfig - StartOfSpeechDisabledConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfig - StartOfSpeechDisabledConfigStrategy = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfigStrategy - EndOfSpeechConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech - EndOfSpeechMode = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechMode - EndOfSpeechVadConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechVadConfig - EndOfSpeechSemanticConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechSemanticConfig - - # Deprecated turn detection aliases - # Deprecated: Use TurnDetectionConfig with TurnDetectionNestedConfig.start_of_speech - # and .end_of_speech instead. The `type` field and agora_vad/server_vad/semantic_vad - # values will be removed in a future release. - TurnDetectionType = StartAgentsRequestPropertiesTurnDetectionType - - # Deprecated: Use StartOfSpeechConfig with mode="vad"|"keywords"|"disabled" and the - # corresponding vad_config, keywords_config, or disabled_config instead. - InterruptMode = StartAgentsRequestPropertiesTurnDetectionInterruptMode - - # Deprecated: Only applies to server_vad/semantic_vad modes with OpenAI Realtime - # (MLLM). Has no equivalent in the ASR + LLM + TTS pipeline. - Eagerness = StartAgentsRequestPropertiesTurnDetectionEagerness - - # Parameters (SessionParams) sub-type aliases - SilenceConfig = StartAgentsRequestPropertiesParametersSilenceConfig - SilenceAction = StartAgentsRequestPropertiesParametersSilenceConfigAction - FarewellConfig = StartAgentsRequestPropertiesParametersFarewellConfig - ParametersDataChannel = StartAgentsRequestPropertiesParametersDataChannel - ParametersAudioScenario = StartAgentsRequestPropertiesParametersAudioScenario - InterruptionConfig = StartAgentsRequestPropertiesInterruption - InterruptionMode = StartAgentsRequestPropertiesInterruptionMode - MllmTurnDetectionConfig = StartAgentsRequestPropertiesMllmTurnDetection - MllmTurnDetectionMode = StartAgentsRequestPropertiesMllmTurnDetectionMode - AgentConfig = StartAgentsRequestProperties - AgentConfigUpdate = UpdateAgentsRequestProperties - SessionInfo = GetAgentsResponse - SessionListResponse = ListAgentsResponse - SessionSummary = ListAgentsResponseDataListItem - SessionStatus = ListAgentsResponseDataListItemStatus - ConversationHistory = GetHistoryAgentsResponse - ConversationTurn = GetHistoryAgentsResponseContentsItem - ConversationRole = GetHistoryAgentsResponseContentsItemRole - ConversationTurns = GetTurnsAgentsResponse - ConversationSessionTurn = GetTurnsAgentsResponseTurnsItem - SpeakPriority = SpeakAgentsRequestPriority - Labels = typing.Dict[str, str] - - - class SessionParamsInput(typing_extensions.TypedDict, total=False): - silence_config: StartAgentsRequestPropertiesParametersSilenceConfig - farewell_config: StartAgentsRequestPropertiesParametersFarewellConfig - data_channel: StartAgentsRequestPropertiesParametersDataChannel - enable_metrics: bool - enable_error_message: bool - audio_scenario: ParametersAudioScenario - - - class ThinkOptions(typing_extensions.TypedDict, total=False): - on_listening_action: AgentThinkAgentManagementRequestOnListeningAction - on_thinking_action: AgentThinkAgentManagementRequestOnThinkingAction - on_speaking_action: AgentThinkAgentManagementRequestOnSpeakingAction - interruptable: bool - metadata: typing.Dict[str, str] - - - class GetTurnsOptions(typing_extensions.TypedDict, total=False): - page_index: int - page_size: int - - - class SayOptions(typing_extensions.TypedDict, total=False): - priority: SpeakAgentsRequestPriority - interruptable: bool - - - class SessionOptions(typing_extensions.TypedDict, total=False): - name: str - channel: str - token: str - agent_uid: str - remote_uids: typing.List[str] - idle_timeout: int - enable_string_uid: bool - preset: typing.Union[str, typing.Sequence[str]] - pipeline_id: str - expires_in: int - debug: bool - warn: typing.Callable[[str], None] - - # LLM sub-type aliases - LlmGreetingConfigs = StartAgentsRequestPropertiesLlmGreetingConfigs - LlmGreetingConfigsMode = StartAgentsRequestPropertiesLlmGreetingConfigsMode - McpServersItem = StartAgentsRequestPropertiesLlmMcpServersItem - - # Additional top-level config aliases - GeofenceConfig = StartAgentsRequestPropertiesGeofence - RtcConfig = StartAgentsRequestPropertiesRtc - FillerWordsConfig = StartAgentsRequestPropertiesFillerWords - FillerWordsTrigger = StartAgentsRequestPropertiesFillerWordsTrigger - FillerWordsTriggerFixedTimeConfig = StartAgentsRequestPropertiesFillerWordsTriggerFixedTimeConfig - FillerWordsContent = StartAgentsRequestPropertiesFillerWordsContent - FillerWordsContentStaticConfig = StartAgentsRequestPropertiesFillerWordsContentStaticConfig - FillerWordsContentSelectionRule = StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - - # Think type aliases and response - ThinkOnListeningAction = AgentThinkAgentManagementRequestOnListeningAction - ThinkOnThinkingAction = AgentThinkAgentManagementRequestOnThinkingAction - ThinkOnSpeakingAction = AgentThinkAgentManagementRequestOnSpeakingAction - ThinkResponse = AgentThinkAgentManagementResponse - - from .token import generate_convo_ai_token, _parse_numeric_uid, _validate_expires_in - - - def _dump_optional_model(value: typing.Any) -> typing.Any: - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if hasattr(value, "dict"): - return value.dict(exclude_none=True) - return value - - - class Agent: - """A reusable agent definition. - - Use the fluent builder methods (.with_llm(), .with_tts(), .with_stt(), .with_mllm()) - to configure vendor settings after construction. - - Examples - -------- - >>> from agora_agent import Agent, OpenAI, ElevenLabsTTS, DeepgramSTT - >>> - >>> agent = Agent(instructions="You are a helpful voice assistant.") - >>> agent = ( - ... agent - ... .with_llm(OpenAI(api_key="...", model="gpt-4")) - ... .with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", sample_rate=24000)) - ... .with_stt(DeepgramSTT(api_key="...", model="nova-2")) - ... ) - """ - - def __init__( - self, - name: typing.Optional[str] = None, - instructions: typing.Optional[str] = None, - turn_detection: typing.Optional[TurnDetectionConfig] = None, - interruption: typing.Optional[InterruptionConfig] = None, - sal: typing.Optional[SalConfig] = None, - advanced_features: typing.Optional[AdvancedFeatures] = None, - parameters: typing.Optional[typing.Union[SessionParams, SessionParamsInput]] = None, - greeting: typing.Optional[str] = None, - failure_message: typing.Optional[str] = None, - max_history: typing.Optional[int] = None, - geofence: typing.Optional[GeofenceConfig] = None, - labels: typing.Optional[typing.Dict[str, str]] = None, - rtc: typing.Optional[RtcConfig] = None, - filler_words: typing.Optional[FillerWordsConfig] = None, - greeting_configs: typing.Optional[LlmGreetingConfigs] = None, - ): - self._name = name - self._instructions = instructions - self._greeting = greeting - self._failure_message = failure_message - self._max_history = max_history - self._llm: typing.Optional[typing.Dict[str, typing.Any]] = None - self._tts: typing.Optional[typing.Dict[str, typing.Any]] = None - self._stt: typing.Optional[typing.Dict[str, typing.Any]] = None - self._mllm: typing.Optional[typing.Dict[str, typing.Any]] = None - self._tts_sample_rate: typing.Optional[int] = None - self._avatar: typing.Optional[typing.Dict[str, typing.Any]] = None - self._avatar_required_sample_rate: typing.Optional[int] = None - self._turn_detection = turn_detection - self._interruption = interruption - self._sal = sal - self._advanced_features = advanced_features - self._parameters = parameters - self._geofence = geofence - self._labels = labels - self._rtc = rtc - self._filler_words = filler_words - self._greeting_configs = greeting_configs - - def with_llm(self, vendor: BaseLLM) -> "Agent": - new_agent = self._clone() - new_agent._llm = vendor.to_config() - return new_agent - - def with_tts(self, vendor: BaseTTS) -> "Agent": - sample_rate = vendor.sample_rate - if ( - self._avatar_required_sample_rate not in (None, 0) - and sample_rate is not None - and sample_rate != self._avatar_required_sample_rate - ): - raise ValueError( - f"Avatar requires TTS sample rate of {self._avatar_required_sample_rate} Hz, " - f"but TTS is configured with {sample_rate} Hz. " - f"Please update your TTS sample_rate to {self._avatar_required_sample_rate}." - ) - new_agent = self._clone() - new_agent._tts = vendor.to_config() - new_agent._tts_sample_rate = sample_rate - return new_agent - - def with_stt(self, vendor: BaseSTT) -> "Agent": - new_agent = self._clone() - new_agent._stt = vendor.to_config() - return new_agent - - def with_mllm(self, vendor: BaseMLLM) -> "Agent": - # Note: avatars are not supported with MLLM. The combination is rejected - # at ``to_properties`` / ``AgentSession.start`` so callers can still - # configure both for tests, debugging, or disabled-avatar use cases. - new_agent = self._clone() - new_agent._mllm = vendor.to_config() - if isinstance(new_agent._mllm, dict): - new_agent._mllm["enable"] = True - if isinstance(new_agent._advanced_features, dict): - advanced_features = {key: value for key, value in new_agent._advanced_features.items() if key != "enable_mllm"} - new_agent._advanced_features = typing.cast(AdvancedFeatures, advanced_features) if advanced_features else None - elif isinstance(new_agent._advanced_features, StartAgentsRequestPropertiesAdvancedFeatures): - advanced_features_model = self._copy_model_update( - new_agent._advanced_features, - {"enable_mllm": None}, - ) - if ( - advanced_features_model.enable_rtm is None - and advanced_features_model.enable_sal is None - and advanced_features_model.enable_tools is None - ): - new_agent._advanced_features = None - else: - new_agent._advanced_features = advanced_features_model - return new_agent - - def with_avatar(self, vendor: BaseAvatar) -> "Agent": - # Note: avatars are not supported with MLLM. The combination is rejected - # at ``to_properties`` / ``AgentSession.start`` (only when the avatar is - # enabled) so callers may still combine the two for testing or for the - # disabled-avatar pattern. - required_sample_rate = vendor.required_sample_rate - if ( - required_sample_rate not in (None, 0) - and self._tts_sample_rate is not None - and self._tts_sample_rate != required_sample_rate - ): - raise ValueError( - f"Avatar requires TTS sample rate of {required_sample_rate} Hz, " - f"but TTS is configured with {self._tts_sample_rate} Hz. " - f"Please update your TTS sample_rate to {required_sample_rate}." - ) - new_agent = self._clone() - new_agent._avatar = vendor.to_config() - new_agent._avatar_required_sample_rate = required_sample_rate - return new_agent - - def with_turn_detection(self, config: TurnDetectionConfig) -> "Agent": - new_agent = self._clone() - new_agent._turn_detection = config - return new_agent - - def with_interruption(self, config: InterruptionConfig) -> "Agent": - """Returns a new Agent with unified interruption control configured.""" - new_agent = self._clone() - new_agent._interruption = config - return new_agent - - def with_instructions(self, instructions: str) -> "Agent": - new_agent = self._clone() - new_agent._instructions = instructions - return new_agent - - def with_greeting(self, greeting: str) -> "Agent": - new_agent = self._clone() - new_agent._greeting = greeting - return new_agent - - def with_greeting_configs(self, configs: LlmGreetingConfigs) -> "Agent": - """Returns a new Agent with greeting playback configuration.""" - new_agent = self._clone() - new_agent._greeting_configs = configs - return new_agent - - def with_name(self, name: str) -> "Agent": - new_agent = self._clone() - new_agent._name = name - return new_agent - - def with_sal(self, config: SalConfig) -> "Agent": - """Returns a new Agent with the specified SAL (Selective Attention Locking) configuration.""" - new_agent = self._clone() - new_agent._sal = config - return new_agent - - def with_advanced_features(self, features: AdvancedFeatures) -> "Agent": - """Returns a new Agent with the specified advanced features configuration. - - Use this to enable RTM and other advanced features. - """ - new_agent = self._clone() - new_agent._advanced_features = features - return new_agent - - def with_tools(self, enabled: bool = True) -> "Agent": - """Returns a new Agent with MCP tool invocation enabled or disabled.""" - new_agent = self._clone() - if new_agent._advanced_features is None: - new_agent._advanced_features = StartAgentsRequestPropertiesAdvancedFeatures(enable_tools=enabled) - elif isinstance(new_agent._advanced_features, dict): - new_agent._advanced_features = typing.cast( - AdvancedFeatures, - {**new_agent._advanced_features, "enable_tools": enabled}, - ) - else: - new_agent._advanced_features = self._copy_model_update( - new_agent._advanced_features, - {"enable_tools": enabled}, - ) - return new_agent - - def with_parameters(self, parameters: typing.Union[SessionParams, SessionParamsInput]) -> "Agent": - """Returns a new Agent with the specified session parameters. - - Use this to configure silence behaviour, graceful hang-up, data channel, and more. - """ - new_agent = self._clone() - new_agent._parameters = parameters - return new_agent - - def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": - """Returns a new Agent with the specified RTC audio scenario.""" - new_agent = self._clone() - if new_agent._parameters is None: - new_agent._parameters = StartAgentsRequestPropertiesParameters(audio_scenario=audio_scenario) - elif isinstance(new_agent._parameters, dict): - new_agent._parameters = typing.cast( - SessionParamsInput, - {**new_agent._parameters, "audio_scenario": audio_scenario}, - ) - else: - new_agent._parameters = self._copy_model_update( - new_agent._parameters, - {"audio_scenario": audio_scenario}, - ) - return new_agent - - def with_failure_message(self, message: str) -> "Agent": - """Returns a new Agent with the specified failure message. - - The failure message is played via TTS when the LLM call fails. - """ - new_agent = self._clone() - new_agent._failure_message = message - return new_agent - - def with_max_history(self, max_history: int) -> "Agent": - """Returns a new Agent with the specified maximum conversation history length.""" - new_agent = self._clone() - new_agent._max_history = max_history - return new_agent - - def with_geofence(self, geofence: GeofenceConfig) -> "Agent": - """Returns a new Agent with the specified geofence configuration. - - Restricts which geographic regions the agent's backend servers may run in. - """ - new_agent = self._clone() - new_agent._geofence = geofence - return new_agent - - def with_labels(self, labels: typing.Dict[str, str]) -> "Agent": - """Returns a new Agent with the specified custom labels. - - Labels are key-value pairs attached to the agent and returned in notification callbacks. - """ - new_agent = self._clone() - new_agent._labels = dict(labels) - return new_agent - - def with_rtc(self, rtc: RtcConfig) -> "Agent": - """Returns a new Agent with the specified RTC configuration.""" - new_agent = self._clone() - new_agent._rtc = rtc - return new_agent - - def with_filler_words(self, filler_words: FillerWordsConfig) -> "Agent": - """Returns a new Agent with the specified filler words configuration. - - Filler words are played while the agent waits for the LLM to respond. - """ - new_agent = self._clone() - new_agent._filler_words = filler_words - return new_agent - - @staticmethod - def _field_value(value: typing.Any, field: str) -> typing.Any: - if value is None: - return None - if isinstance(value, dict): - return value.get(field) - return getattr(value, field, None) - - @staticmethod - def _copy_model_update(value: typing.Any, update: typing.Dict[str, typing.Any]) -> typing.Any: - if hasattr(value, "model_copy"): - return value.model_copy(update=update) - if hasattr(value, "copy"): - return value.copy(update=update) - raise TypeError(f"Object of type {type(value).__name__} does not support model copying") - - def _resolved_parameters(self) -> typing.Optional[typing.Union[SessionParams, SessionParamsInput]]: - enable_rtm = self._field_value(self._advanced_features, "enable_rtm") is True - data_channel = self._field_value(self._parameters, "data_channel") - if not enable_rtm or data_channel is not None: - return self._parameters - if self._parameters is None: - return StartAgentsRequestPropertiesParameters(data_channel="rtm") - if isinstance(self._parameters, dict): - return typing.cast(SessionParamsInput, {**self._parameters, "data_channel": "rtm"}) - return self._copy_model_update(self._parameters, {"data_channel": "rtm"}) - - @property - def name(self) -> typing.Optional[str]: - return self._name - - @property - def llm(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._llm - - @property - def tts(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._tts - - @property - def tts_sample_rate(self) -> typing.Optional[int]: - return self._tts_sample_rate - - @property - def stt(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._stt - - @property - def mllm(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._mllm - - @property - def turn_detection(self) -> typing.Optional[TurnDetectionConfig]: - return self._turn_detection - - @property - def interruption(self) -> typing.Optional[InterruptionConfig]: - return self._interruption - - @property - def instructions(self) -> typing.Optional[str]: - return self._instructions - - @property - def greeting(self) -> typing.Optional[str]: - return self._greeting - - @property - def greeting_configs(self) -> typing.Optional[LlmGreetingConfigs]: - return self._greeting_configs - - @property - def failure_message(self) -> typing.Optional[str]: - return self._failure_message - - @property - def max_history(self) -> typing.Optional[int]: - return self._max_history - - @property - def avatar(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._avatar - - @property - def sal(self) -> typing.Optional[SalConfig]: - return self._sal - - @property - def advanced_features(self) -> typing.Optional[AdvancedFeatures]: - return self._advanced_features - - @property - def parameters(self) -> typing.Optional[typing.Union[SessionParams, SessionParamsInput]]: - return self._parameters - - @property - def geofence(self) -> typing.Optional[GeofenceConfig]: - return self._geofence - - @property - def labels(self) -> typing.Optional[typing.Dict[str, str]]: - return self._labels - - @property - def rtc(self) -> typing.Optional[RtcConfig]: - return self._rtc - - @property - def filler_words(self) -> typing.Optional[FillerWordsConfig]: - return self._filler_words - - @property - def config(self) -> typing.Dict[str, typing.Any]: - return { - "name": self._name, - "instructions": self._instructions, - "greeting": self._greeting, - "failure_message": self._failure_message, - "max_history": self._max_history, - "llm": self._llm, - "tts": self._tts, - "stt": self._stt, - "mllm": self._mllm, - "turn_detection": self._turn_detection, - "interruption": self._interruption, - "sal": self._sal, - "avatar": self._avatar, - "advanced_features": self._advanced_features, - "parameters": self._parameters, - "geofence": self._geofence, - "labels": self._labels, - "rtc": self._rtc, - "filler_words": self._filler_words, - "greeting_configs": self._greeting_configs, - } - - def create_session( - self, - client: typing.Any, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - name: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ) -> "AgentSession": - from .agent_session import AgentSession - - session_name = name or self._name or f"agent-{int(time.time())}" - return AgentSession( - client=client, - agent=self, - app_id=client.app_id if hasattr(client, "app_id") else "", - app_certificate=client.app_certificate if hasattr(client, "app_certificate") else None, - name=session_name, - channel=channel, - token=token, - agent_uid=agent_uid, - remote_uids=remote_uids, - idle_timeout=idle_timeout, - enable_string_uid=enable_string_uid, - preset=preset, - pipeline_id=pipeline_id, - expires_in=expires_in, - debug=debug, - warn=warn, - ) - - def create_async_session( - self, - client: typing.Any, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - name: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ) -> "AsyncAgentSession": - """Create an async session for use with :class:`~agora_agent.AsyncAgora`. - - Equivalent to :meth:`create_session` but returns an - :class:`~agora_agent.agentkit.AsyncAgentSession`. - """ - from .agent_session import AsyncAgentSession - - session_name = name or self._name or f"agent-{int(time.time())}" - return AsyncAgentSession( - client=client, - agent=self, - app_id=client.app_id if hasattr(client, "app_id") else "", - app_certificate=client.app_certificate if hasattr(client, "app_certificate") else None, - name=session_name, - channel=channel, - token=token, - agent_uid=agent_uid, - remote_uids=remote_uids, - idle_timeout=idle_timeout, - enable_string_uid=enable_string_uid, - preset=preset, - pipeline_id=pipeline_id, - expires_in=expires_in, - debug=debug, - warn=warn, - ) - - def to_properties( - self, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - token: typing.Optional[str] = None, - app_id: typing.Optional[str] = None, - app_certificate: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - skip_vendor_validation: bool = False, - ) -> StartAgentsRequestProperties: - # Validate the MLLM + enabled-avatar combination BEFORE generating the - # RTC token so callers get a clear, actionable error first (matches the - # TypeScript and Go SDKs' fail-fast contract). - mllm_flag = isinstance(self._mllm, dict) and self._mllm.get("enable") is True - is_mllm_mode = bool(mllm_flag or self._mllm is not None) - avatar_enabled = ( - isinstance(self._avatar, dict) and self._avatar.get("enable") is not False - ) - if is_mllm_mode and avatar_enabled: - raise ValueError( - "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - "Remove the avatar configuration when using MLLM, or switch to a cascading session." - ) - - if token is None: - if app_id is None or app_certificate is None: - raise ValueError("Either token or app_id+app_certificate must be provided") - validated_expires_in = _validate_expires_in(expires_in) if expires_in is not None else None - # Use generate_convo_ai_token (RTC + RTM) so the token works whether or - # not the caller enables advanced_features.enable_rtm. - token_kwargs: typing.Dict[str, typing.Any] = {} - if validated_expires_in is not None: - token_kwargs["token_expire"] = validated_expires_in - token = generate_convo_ai_token( - app_id=app_id, - app_certificate=app_certificate, - channel_name=channel, - uid=_parse_numeric_uid(agent_uid, "agent_uid"), - **token_kwargs, - ) - - base_kwargs: typing.Dict[str, typing.Any] = { - "channel": channel, - "token": token, - "agent_rtc_uid": agent_uid, - "remote_rtc_uids": remote_uids, - } - - if idle_timeout is not None: - base_kwargs["idle_timeout"] = idle_timeout - if enable_string_uid is not None: - base_kwargs["enable_string_uid"] = enable_string_uid - if self._mllm is not None: - base_kwargs["mllm"] = self._mllm - if self._turn_detection is not None: - base_kwargs["turn_detection"] = self._turn_detection - if self._interruption is not None: - base_kwargs["interruption"] = self._interruption - if self._sal is not None: - base_kwargs["sal"] = self._sal - if self._avatar is not None: - base_kwargs["avatar"] = self._avatar - if self._advanced_features is not None: - base_kwargs["advanced_features"] = self._advanced_features - parameters = self._resolved_parameters() - if parameters is not None: - if isinstance(parameters, dict): - base_kwargs["parameters"] = StartAgentsRequestPropertiesParameters(**parameters) - else: - base_kwargs["parameters"] = parameters - if self._geofence is not None: - base_kwargs["geofence"] = self._geofence - if self._labels is not None: - base_kwargs["labels"] = self._labels - if self._rtc is not None: - base_kwargs["rtc"] = self._rtc - if self._filler_words is not None: - base_kwargs["filler_words"] = self._filler_words - - if is_mllm_mode: - if self._mllm is not None: - mllm_config = dict(self._mllm) - if self._greeting is not None: - mllm_config.setdefault("greeting_message", self._greeting) - if self._failure_message is not None: - mllm_config.setdefault("failure_message", self._failure_message) - base_kwargs["mllm"] = mllm_config - return StartAgentsRequestProperties(**base_kwargs) - - if skip_vendor_validation: - return StartAgentsRequestProperties(**base_kwargs) - - if self._tts is None: - raise ValueError("TTS configuration is required. Use with_tts() to set it.") - - if self._llm is None: - raise ValueError("LLM configuration is required. Use with_llm() to set it.") - - llm_config = dict(self._llm) - # Agent-level fields take priority over the vendor's defaults. - # This matches the TS SDK where agent-level values override vendor config. - if self._instructions is not None: - llm_config["system_messages"] = [{"role": "system", "content": self._instructions}] - if self._greeting is not None: - llm_config["greeting_message"] = self._greeting - if self._greeting_configs is not None: - llm_config["greeting_configs"] = _dump_optional_model(self._greeting_configs) - if self._failure_message is not None: - llm_config["failure_message"] = self._failure_message - if self._max_history is not None: - llm_config["max_history"] = self._max_history - - base_kwargs["llm"] = llm_config - base_kwargs["tts"] = self._tts - if self._stt is not None: - base_kwargs["asr"] = self._stt - - return StartAgentsRequestProperties(**base_kwargs) - - def _clone(self) -> "Agent": - new_agent = Agent.__new__(Agent) - new_agent._name = self._name - new_agent._llm = self._llm - new_agent._tts = self._tts - new_agent._stt = self._stt - new_agent._mllm = self._mllm - new_agent._tts_sample_rate = self._tts_sample_rate - new_agent._avatar = self._avatar - new_agent._avatar_required_sample_rate = self._avatar_required_sample_rate - new_agent._turn_detection = self._turn_detection - new_agent._interruption = self._interruption - new_agent._sal = self._sal - new_agent._advanced_features = self._advanced_features - new_agent._parameters = self._parameters - new_agent._instructions = self._instructions - new_agent._greeting = self._greeting - new_agent._failure_message = self._failure_message - new_agent._max_history = self._max_history - new_agent._geofence = self._geofence - new_agent._labels = self._labels - new_agent._rtc = self._rtc - new_agent._filler_words = self._filler_words - new_agent._greeting_configs = self._greeting_configs - return new_agent - src/agora_agent/agentkit/agent_session.py: | - import typing - import warnings - - from ..core.api_error import ApiError - from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - AgentThinkAgentManagementRequestOnListeningAction as AgentThinkRequestOnListeningAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - AgentThinkAgentManagementRequestOnSpeakingAction as AgentThinkRequestOnSpeakingAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - AgentThinkAgentManagementRequestOnThinkingAction as AgentThinkRequestOnThinkingAction, - ) - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse as AgentThinkResponse, - ) - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions - from .avatar_types import ( - is_akool_avatar, - is_anam_avatar, - is_avatar_token_managed, - is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - validate_avatar_config, - validate_tts_sample_rate, - ) - from .presets import resolve_session_presets - from .token import generate_convo_ai_token, _parse_numeric_uid - - - class _AgentSessionRequiredOptions(typing.TypedDict, total=True): - """Required fields shared by both sync and async session constructors.""" - - client: typing.Any - agent: Agent - app_id: str - name: str - channel: str - agent_uid: str - remote_uids: typing.List[str] - - - class AgentSessionOptions(_AgentSessionRequiredOptions, total=False): - """Configuration options for creating an agent session. - - Required fields - --------------- - client, agent, app_id, name, channel, agent_uid, remote_uids - - Optional fields - --------------- - app_certificate, token, idle_timeout, enable_string_uid, expires_in - """ - - app_certificate: str - token: str - idle_timeout: int - enable_string_uid: bool - preset: typing.Union[str, typing.Sequence[str]] - pipeline_id: str - expires_in: int - debug: bool - warn: typing.Callable[[str], None] - - - class _AgentSessionBase: - """Shared state and helpers for :class:`AgentSession` and :class:`AsyncAgentSession`. - - Not intended for direct use — instantiate one of the concrete subclasses or - call :meth:`Agent.create_session` / :meth:`Agent.create_async_session`. - """ - - def __init__( - self, - client: typing.Any, - agent: Agent, - app_id: str, - name: str, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - app_certificate: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ): - self._client = client - self._agent = agent - self._app_id = app_id - self._app_certificate = app_certificate - self._name = name - self._channel = channel - self._token = token - self._agent_uid = agent_uid - self._remote_uids = remote_uids - self._idle_timeout = idle_timeout - self._enable_string_uid = enable_string_uid - self._preset = preset - self._pipeline_id = pipeline_id - self._expires_in = expires_in - self._debug = debug - self._warn = warn or warnings.warn - self._agent_id: typing.Optional[str] = None - self._status: str = "idle" - self._event_handlers: typing.Dict[str, typing.List[typing.Callable[..., None]]] = {} - - # ------------------------------------------------------------------ - # Public read-only properties - # ------------------------------------------------------------------ - - @property - def id(self) -> typing.Optional[str]: - return self._agent_id - - @property - def status(self) -> str: - return self._status - - @property - def agent(self) -> Agent: - return self._agent - - @property - def app_id(self) -> str: - return self._app_id - - @property - def raw(self) -> typing.Any: - """Direct access to the underlying Fern-generated AgentsClient. - - Use this to access any new endpoints that Fern generates without - waiting for agentkit method updates. - """ - return self._client.agents - - @property - def raw_agent_management(self) -> typing.Any: - """Direct access to the underlying Fern-generated AgentManagement client.""" - return self._client.agent_management - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _convo_ai_headers(self) -> typing.Optional[typing.Dict[str, str]]: - """Return per-request auth headers when client is in app-credentials mode. - - In app-credentials mode a fresh ConvoAI token (RTC + RTM) is generated - for every request and returned as ``Authorization: agora token=``. - In basic-auth mode this returns ``None`` (the client-level header is used). - """ - if getattr(self._client, "auth_mode", None) != "app-credentials": - return None - app_id: str = getattr(self._client, "app_id", self._app_id) - app_certificate: typing.Optional[str] = getattr( - self._client, "app_certificate", self._app_certificate - ) - if not app_certificate: - raise RuntimeError("app_certificate is required for app-credentials auth mode") - token = generate_convo_ai_token( - app_id=app_id, - app_certificate=app_certificate, - channel_name=self._channel, - uid=_parse_numeric_uid(self._agent_uid, "agent_uid"), - ) - return {"Authorization": f"agora token={token}"} - - def _request_options(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - """Build request_options dict with per-request auth headers if needed.""" - headers = self._convo_ai_headers() - if headers is None: - return None - return {"additional_headers": headers} - - def _validate_avatar_config(self) -> None: - avatar = self._agent.avatar - tts = self._agent.tts - if not avatar or avatar.get("enable", True) is False: - return - if self._is_mllm_mode(): - raise ValueError( - "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - "Remove the avatar configuration when using MLLM, or switch to a cascading session." - ) - - if ( - is_heygen_avatar(avatar) - or is_live_avatar_avatar(avatar) - or is_akool_avatar(avatar) - or is_anam_avatar(avatar) - or is_generic_avatar(avatar) - ): - validate_avatar_config(avatar) - - tts_params = tts.get("params") if isinstance(tts, dict) else None - sample_rate = self._agent.tts_sample_rate - if sample_rate is None and isinstance(tts_params, dict): - sample_rate = ( - tts_params.get("sample_rate") - or tts_params.get("sample_rate_hertz") - or tts_params.get("samplingRate") - ) - if isinstance(sample_rate, int): - validate_tts_sample_rate(avatar, sample_rate) - elif is_heygen_avatar(avatar): - self._warn( - "Warning: HeyGen avatar detected but TTS sample_rate is not explicitly set. " - "HeyGen requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." - ) - elif is_live_avatar_avatar(avatar): - self._warn( - "Warning: LiveAvatar avatar detected but TTS sample_rate is not explicitly set. " - "LiveAvatar requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." - ) - elif is_akool_avatar(avatar): - self._warn( - "Warning: Akool avatar detected but TTS sample_rate is not explicitly set. " - "Akool requires 16,000 Hz. Please ensure your TTS provider is configured for 16kHz." - ) - - def _enrich_avatar_for_session(self, properties: typing.Dict[str, typing.Any]) -> None: - avatar = properties.get("avatar") - if not isinstance(avatar, dict) or avatar.get("enable", True) is False: - return - - params = avatar.get("params") - if not isinstance(params, dict): - params = {} - avatar["params"] = params - - if is_generic_avatar(avatar): - if not params.get("agora_appid"): - params["agora_appid"] = self._app_id - if not params.get("agora_channel"): - params["agora_channel"] = self._channel - - if not is_avatar_token_managed(avatar): - validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - return - - if not params.get("agora_uid"): - validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - return - - if not params.get("agora_token"): - if not self._app_certificate: - raise ValueError( - "Cannot auto-generate avatar RTC token: app_certificate is required when agora_token is omitted. " - "Pass app_certificate on the Agora client or supply agora_token explicitly on the avatar vendor." - ) - token_kwargs: typing.Dict[str, typing.Any] = {} - if self._expires_in is not None: - token_kwargs["token_expire"] = self._expires_in - params["agora_token"] = generate_convo_ai_token( - app_id=self._app_id, - app_certificate=self._app_certificate, - channel_name=self._channel, - uid=_parse_numeric_uid(str(params["agora_uid"]), "avatar agora_uid"), - **token_kwargs, - ) - - if str(params.get("agora_uid")) == self._agent_uid: - self._warn( - "Warning: avatar agora_uid matches agent_rtc_uid. Use a unique UID for the avatar video publisher." - ) - - validate_avatar_config(avatar, require_session_fields=True) - - @staticmethod - def _dump_model(value: typing.Any) -> typing.Any: - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if isinstance(value, dict): - return {k: _AgentSessionBase._dump_model(v) for k, v in value.items() if v is not None} - if isinstance(value, list): - return [_AgentSessionBase._dump_model(item) for item in value] - return value - - def _is_mllm_mode(self) -> bool: - mllm = self._agent.mllm - if isinstance(mllm, dict) and mllm.get("enable") is True: - return True - return mllm is not None - - def _build_start_properties(self, token_opts: typing.Dict[str, typing.Any]) -> typing.Dict[str, typing.Any]: - base_properties = self._agent.to_properties( - channel=self._channel, - agent_uid=self._agent_uid, - remote_uids=self._remote_uids, - idle_timeout=self._idle_timeout, - enable_string_uid=self._enable_string_uid, - skip_vendor_validation=True, - **token_opts, - ) - properties = self._dump_model(base_properties) - self._enrich_avatar_for_session(properties) - - if self._is_mllm_mode(): - if self._agent.mllm is not None: - mllm = self._dump_model(self._agent.mllm) - if not isinstance(mllm, dict): - mllm = {} - if self._agent.greeting is not None: - mllm.setdefault("greeting_message", self._agent.greeting) - if self._agent.failure_message is not None: - mllm.setdefault("failure_message", self._agent.failure_message) - properties["mllm"] = mllm - return properties - - if self._agent.tts is not None: - properties["tts"] = self._dump_model(self._agent.tts) - if self._agent.llm is not None: - llm = dict(self._agent.llm) - if self._agent.instructions is not None: - llm["system_messages"] = [{"role": "system", "content": self._agent.instructions}] - if self._agent.greeting is not None: - llm["greeting_message"] = self._agent.greeting - if self._agent.greeting_configs is not None: - llm["greeting_configs"] = self._dump_model(self._agent.greeting_configs) - if self._agent.failure_message is not None: - llm["failure_message"] = self._agent.failure_message - if self._agent.max_history is not None: - llm["max_history"] = self._agent.max_history - properties["llm"] = llm - if self._agent.stt is not None: - properties["asr"] = self._dump_model(self._agent.stt) - - return properties - - @staticmethod - def _page_value(pagination: typing.Any, field: str) -> typing.Any: - if pagination is None: - return None - if isinstance(pagination, dict): - return pagination.get(field) - return getattr(pagination, field, None) - - @staticmethod - def _response_turns(response: typing.Any) -> typing.List[typing.Any]: - turns = response.get("turns") if isinstance(response, dict) else getattr(response, "turns", None) - return list(turns or []) - - @staticmethod - def _response_pagination(response: typing.Any) -> typing.Any: - if isinstance(response, dict): - return response.get("pagination") - return getattr(response, "pagination", None) - - @classmethod - def _with_all_turns(cls, first_response: typing.Any, turns: typing.List[typing.Any]) -> GetTurnsAgentsResponse: - data = cls._dump_model(first_response) - if not isinstance(data, dict): - data = {} - data["turns"] = turns - return GetTurnsAgentsResponse(**data) - - # ------------------------------------------------------------------ - # Event handling - # ------------------------------------------------------------------ - - def on(self, event: str, handler: typing.Callable[..., None]) -> None: - """Register an event handler. - - Parameters - ---------- - event : str - The event type (``started``, ``stopped``, ``error``). - handler : callable - The event handler to invoke when the event fires. - """ - if event not in self._event_handlers: - self._event_handlers[event] = [] - self._event_handlers[event].append(handler) - - def off(self, event: str, handler: typing.Callable[..., None]) -> None: - """Unregister a previously registered event handler.""" - handlers = self._event_handlers.get(event) - if handlers and handler in handlers: - handlers.remove(handler) - - def _emit(self, event: str, data: typing.Any) -> None: - handlers = self._event_handlers.get(event) - if handlers: - for handler in handlers: - try: - handler(data) - except Exception as exc: - # Prevent a misbehaving handler from blocking other handlers or - # the session lifecycle. Warn so the error is not silently lost. - warnings.warn( - f"Event handler for '{event}' raised an exception: {exc}", - stacklevel=2, - ) - - - class AgentSession(_AgentSessionBase): - """Manages the lifecycle of an agent session (synchronous). - - This class provides a high-level interface for managing agent sessions, - including starting, stopping, and interacting with the agent. - - Use :meth:`Agent.create_session` to create a session — this is the - recommended entry point. - - Examples - -------- - >>> from agora_agent import Agora, Area, Agent, OpenAI, ElevenLabsTTS - >>> - >>> client = Agora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are a helpful voice assistant.") - >>> agent = agent.with_llm(OpenAI(api_key="...", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...")) - >>> session = agent.create_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = session.start() - >>> session.say("Hello!") - >>> session.stop() - """ - - def start(self) -> str: - """Start the agent session. - - Returns - ------- - str - The agent ID. - - Raises - ------ - RuntimeError - If the session is not in a startable state. - ValueError - If avatar/TTS configuration is invalid. - """ - if self._status not in ("idle", "stopped", "error"): - raise RuntimeError(f"Cannot start session in {self._status} state") - - self._validate_avatar_config() - self._status = "starting" - - try: - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - token_opts = { - "app_id": self._app_id, - "app_certificate": self._app_certificate, - "expires_in": self._expires_in, - } - - properties = self._build_start_properties(token_opts) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - ) - - if self._debug: - print("[Agora Debug] Starting agent session...") - print("[Agora Debug] Request:", { - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - "pipeline_id": self._pipeline_id, - "properties": resolved_properties, - }) - - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties - - response = self._client.agents.start( - self._app_id, - name=self._name, - properties=request_properties, - preset=resolved_preset, - pipeline_id=self._pipeline_id, - request_options=self._request_options(), - ) - - self._agent_id = response.agent_id if hasattr(response, "agent_id") else None - self._status = "running" - self._emit("started", {"agent_id": self._agent_id}) - return self._agent_id or "" - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - def stop(self) -> None: - """Stop the agent session. - - If the agent has already stopped (e.g., crashed or timed out), the - server returns 404, which this method treats as a successful stop - rather than raising an error. - """ - if self._status != "running": - raise RuntimeError(f"Cannot stop session in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._status = "stopping" - - try: - self._client.agents.stop( - self._app_id, self._agent_id, request_options=self._request_options() - ) - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - except ApiError as e: - if e.status_code == 404: - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - return - self._status = "error" - self._emit("error", e) - raise - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - def say( - self, - text: str, - priority: typing.Optional[str] = None, - interruptable: typing.Optional[bool] = None, - *, - options: typing.Optional["SayOptions"] = None, - ) -> None: - """Send a message to be spoken by the agent. - - Parameters - ---------- - text : str - The text to speak. - priority : str, optional - Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). - interruptable : bool, optional - Whether the message can be interrupted by the user. - """ - if self._status != "running": - raise RuntimeError(f"Cannot say in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if priority is not None: - kwargs["priority"] = priority - if interruptable is not None: - kwargs["interruptable"] = interruptable - - self._client.agents.speak( - self._app_id, self._agent_id, request_options=self._request_options(), **kwargs - ) - - def interrupt(self) -> None: - """Interrupt the agent while it is speaking or thinking.""" - if self._status != "running": - raise RuntimeError(f"Cannot interrupt in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._client.agents.interrupt( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def think( - self, - text: str, - *, - on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, - on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, - on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - options: typing.Optional["ThinkOptions"] = None, - ) -> AgentThinkResponse: - """Inject a custom text instruction into the current session pipeline. - - In API v2.7, omitting ``on_listening_action`` uses the server default - ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - preserve the pre-v2.7 behavior. - """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if on_listening_action is not None: - kwargs["on_listening_action"] = on_listening_action - if on_thinking_action is not None: - kwargs["on_thinking_action"] = on_thinking_action - if on_speaking_action is not None: - kwargs["on_speaking_action"] = on_speaking_action - if interruptable is not None: - kwargs["interruptable"] = interruptable - if metadata is not None: - kwargs["metadata"] = metadata - - return self._client.agent_management.agent_think( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - def update(self, properties: typing.Any) -> None: - """Update the agent configuration at runtime. - - Parameters - ---------- - properties : UpdateAgentsRequestProperties - Partial configuration to update. - """ - if self._status != "running": - raise RuntimeError(f"Cannot update in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._client.agents.update( - self._app_id, - self._agent_id, - properties=properties, - request_options=self._request_options(), - ) - - def get_history(self) -> typing.Any: - """Get the conversation history.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get_history( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def get_info(self) -> typing.Any: - """Get the current session info.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return self._client.agents.get( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - def get_turns( - self, - *, - page_index: typing.Optional[int] = None, - page_size: typing.Optional[int] = None, - options: typing.Optional["GetTurnsOptions"] = None, - ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {} - if options is not None: - kwargs.update(options) - if page_index is not None: - kwargs["page_index"] = page_index - if page_size is not None: - kwargs["page_size"] = page_size - - return self._client.agents.get_turns( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - """Get all turn analytics pages for this session. - - Raises ``RuntimeError`` if the server's pagination metadata is missing - the fields required to advance, or if requesting the next page returns - a page index that did not advance. - """ - response = self.get_turns(page_index=1, page_size=page_size) - all_turns = self._response_turns(response) - pagination = self._response_pagination(response) - current_page = self._page_value(pagination, "page_index") or 1 - while pagination is not None and self._page_value(pagination, "is_last_page") is False: - total_pages = self._page_value(pagination, "total_pages") - returned_index = self._page_value(pagination, "page_index") - if returned_index is None and total_pages is None: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - if total_pages is not None and current_page >= total_pages: - break - next_page = current_page + 1 - response = self.get_turns(page_index=next_page, page_size=page_size) - all_turns.extend(self._response_turns(response)) - pagination = self._response_pagination(response) - returned_index = self._page_value(pagination, "page_index") if pagination else None - if returned_index is not None: - if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - raise RuntimeError( - f"get_all_turns pagination did not advance: requested page {next_page}, " - f"received page {returned_index}." - ) - current_page = returned_index - else: - total_pages = self._page_value(pagination, "total_pages") if pagination else None - is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - if total_pages is None and is_last_page is not True: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - current_page = next_page - return self._with_all_turns(response, all_turns) - - - class AsyncAgentSession(_AgentSessionBase): - """Async version of :class:`AgentSession` for use with :class:`AsyncAgora`. - - Use :meth:`Agent.create_async_session` to create a session — this is the - recommended entry point. - - Examples - -------- - >>> from agora_agent import AsyncAgora, Area, Agent, OpenAI, ElevenLabsTTS - >>> - >>> client = AsyncAgora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are helpful.") - >>> agent = agent.with_llm(OpenAI(api_key="...", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...")) - >>> session = agent.create_async_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = await session.start() - >>> await session.say("Hello!") - >>> await session.stop() - """ - - async def start(self) -> str: - """Start the agent session. - - Returns - ------- - str - The agent ID. - - Raises - ------ - RuntimeError - If the session is not in a startable state. - ValueError - If avatar/TTS configuration is invalid. - """ - if self._status not in ("idle", "stopped", "error"): - raise RuntimeError(f"Cannot start session in {self._status} state") - - self._validate_avatar_config() - self._status = "starting" - - try: - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - token_opts = { - "app_id": self._app_id, - "app_certificate": self._app_certificate, - "expires_in": self._expires_in, - } - - properties = self._build_start_properties(token_opts) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - ) - - if self._debug: - print("[Agora Debug] Starting agent session...") - print("[Agora Debug] Request:", { - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - "pipeline_id": self._pipeline_id, - "properties": resolved_properties, - }) - - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties - - response = await self._client.agents.start( - self._app_id, - name=self._name, - properties=request_properties, - preset=resolved_preset, - pipeline_id=self._pipeline_id, - request_options=self._request_options(), - ) - - self._agent_id = response.agent_id if hasattr(response, "agent_id") else None - self._status = "running" - self._emit("started", {"agent_id": self._agent_id}) - return self._agent_id or "" - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - async def stop(self) -> None: - """Stop the agent session. - - If the agent has already stopped (e.g., crashed or timed out), the - server returns 404, which this method treats as a successful stop - rather than raising an error. - """ - if self._status != "running": - raise RuntimeError(f"Cannot stop session in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - self._status = "stopping" - - try: - await self._client.agents.stop( - self._app_id, self._agent_id, request_options=self._request_options() - ) - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - except ApiError as e: - if e.status_code == 404: - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - return - self._status = "error" - self._emit("error", e) - raise - except Exception as e: - self._status = "error" - self._emit("error", e) - raise - - async def say( - self, - text: str, - priority: typing.Optional[str] = None, - interruptable: typing.Optional[bool] = None, - *, - options: typing.Optional["SayOptions"] = None, - ) -> None: - """Send a message to be spoken by the agent. - - Parameters - ---------- - text : str - The text to speak. - priority : str, optional - Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). - interruptable : bool, optional - Whether the message can be interrupted by the user. - """ - if self._status != "running": - raise RuntimeError(f"Cannot say in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if priority is not None: - kwargs["priority"] = priority - if interruptable is not None: - kwargs["interruptable"] = interruptable - - await self._client.agents.speak( - self._app_id, self._agent_id, request_options=self._request_options(), **kwargs - ) - - async def interrupt(self) -> None: - """Interrupt the agent while it is speaking or thinking.""" - if self._status != "running": - raise RuntimeError(f"Cannot interrupt in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - await self._client.agents.interrupt( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def think( - self, - text: str, - *, - on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, - on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, - on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - options: typing.Optional["ThinkOptions"] = None, - ) -> AgentThinkResponse: - """Inject a custom text instruction into the current session pipeline. - - In API v2.7, omitting ``on_listening_action`` uses the server default - ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - preserve the pre-v2.7 behavior. - """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if on_listening_action is not None: - kwargs["on_listening_action"] = on_listening_action - if on_thinking_action is not None: - kwargs["on_thinking_action"] = on_thinking_action - if on_speaking_action is not None: - kwargs["on_speaking_action"] = on_speaking_action - if interruptable is not None: - kwargs["interruptable"] = interruptable - if metadata is not None: - kwargs["metadata"] = metadata - - return await self._client.agent_management.agent_think( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - async def update(self, properties: typing.Any) -> None: - """Update the agent configuration at runtime. - - Parameters - ---------- - properties : UpdateAgentsRequestProperties - Partial configuration to update. - """ - if self._status != "running": - raise RuntimeError(f"Cannot update in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - - await self._client.agents.update( - self._app_id, - self._agent_id, - properties=properties, - request_options=self._request_options(), - ) - - async def get_history(self) -> typing.Any: - """Get the conversation history.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get_history( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def get_info(self) -> typing.Any: - """Get the current session info.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - return await self._client.agents.get( - self._app_id, self._agent_id, request_options=self._request_options() - ) - - async def get_turns( - self, - *, - page_index: typing.Optional[int] = None, - page_size: typing.Optional[int] = None, - options: typing.Optional["GetTurnsOptions"] = None, - ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - - kwargs: typing.Dict[str, typing.Any] = {} - if options is not None: - kwargs.update(options) - if page_index is not None: - kwargs["page_index"] = page_index - if page_size is not None: - kwargs["page_size"] = page_size - - return await self._client.agents.get_turns( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - - async def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - """Get all turn analytics pages for this session. - - Raises ``RuntimeError`` if the server's pagination metadata is missing - the fields required to advance, or if requesting the next page returns - a page index that did not advance. - """ - response = await self.get_turns(page_index=1, page_size=page_size) - all_turns = self._response_turns(response) - pagination = self._response_pagination(response) - current_page = self._page_value(pagination, "page_index") or 1 - while pagination is not None and self._page_value(pagination, "is_last_page") is False: - total_pages = self._page_value(pagination, "total_pages") - returned_index = self._page_value(pagination, "page_index") - if returned_index is None and total_pages is None: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - if total_pages is not None and current_page >= total_pages: - break - next_page = current_page + 1 - response = await self.get_turns(page_index=next_page, page_size=page_size) - all_turns.extend(self._response_turns(response)) - pagination = self._response_pagination(response) - returned_index = self._page_value(pagination, "page_index") if pagination else None - if returned_index is not None: - if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - raise RuntimeError( - f"get_all_turns pagination did not advance: requested page {next_page}, " - f"received page {returned_index}." - ) - current_page = returned_index - else: - total_pages = self._page_value(pagination, "total_pages") if pagination else None - is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - if total_pages is None and is_last_page is not True: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - current_page = next_page - return self._with_all_turns(response, all_turns) - tests/custom/test_root_exports.py: | - import pytest - - import agora_agent - import agora_agent.agentkit as agentkit - - - def test_root_exports_match_agentkit_for_common_symbols() -> None: - for name in ("Agent", "DeepgramSTT", "OpenAI", "AgentPresets", "generate_rtc_token", "DataChannel"): - assert getattr(agora_agent, name) is getattr(agentkit, name) - - - def test_root_exports_fern_client_symbols() -> None: - assert agora_agent.Agora is not None - assert agora_agent.Area is not None - assert agora_agent.AsyncAgora is not None - - - def test_unknown_root_export_raises_attribute_error() -> None: - with pytest.raises(AttributeError): - _ = agora_agent.NotARealExportName - - - def test_dir_includes_agentkit_vendor_exports() -> None: - assert "DeepgramSTT" in dir(agora_agent) - - - def test_all_includes_agentkit_vendor_exports() -> None: - assert "DeepgramSTT" in agora_agent.__all__ - assert "OpenAI" in agora_agent.__all__ - status: unresolved - - id: patch-299e4bd9 - content_hash: sha256:ee71350debd51653f1cb1472477a577436d74cbb847b3536a9cdbff0211abf2d - original_commit: 299e4bd9cb59bd6144084332a7c3fa7bf260769f - original_message: "fix(agentkit): resolve provider config type checks" - original_author: digitallysavvy - base_generation: 1d61baad436285e3b6a37555edb5ca67c158681c - files: - - src/agora_agent/agentkit/agent.py - - src/agora_agent/agentkit/vendors/llm.py - - src/agora_agent/agentkit/vendors/mllm.py - - src/agora_agent/agentkit/vendors/stt.py - patch_content: | - diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py - index 1daba82..95cfe34 100644 - --- a/src/agora_agent/agentkit/agent.py - +++ b/src/agora_agent/agentkit/agent.py - @@ -57,6 +57,8 @@ from ..agents.types.start_agents_request_properties_filler_words_content import - from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig - from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - from ..types.tts import Tts - +from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - +from ..types.tts import Tts - from ..types.asr import Asr - from ..types.llm import Llm - from ..types.llm_style import LlmStyle as GeneratedLlmStyle - @@ -544,6 +546,23 @@ class Agent: - ) - return new_agent - - + def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": - + """Returns a new Agent with the specified RTC audio scenario.""" - + new_agent = self._clone() - + if new_agent._parameters is None: - + new_agent._parameters = StartAgentsRequestPropertiesParameters(audio_scenario=audio_scenario) - + elif isinstance(new_agent._parameters, dict): - + new_agent._parameters = typing.cast( - + SessionParamsInput, - + {**new_agent._parameters, "audio_scenario": audio_scenario}, - + ) - + else: - + new_agent._parameters = self._copy_model_update( - + new_agent._parameters, - + {"audio_scenario": audio_scenario}, - + ) - + return new_agent - + - def with_failure_message(self, message: str) -> "Agent": - """Deprecated. Configure the failure message on the LLM or MLLM vendor instead.""" - new_agent = self._clone() - diff --git a/src/agora_agent/agentkit/vendors/llm.py b/src/agora_agent/agentkit/vendors/llm.py - index 5a9f39e..1f1b354 100644 - --- a/src/agora_agent/agentkit/vendors/llm.py - +++ b/src/agora_agent/agentkit/vendors/llm.py - @@ -2,6 +2,9 @@ from typing import Any, Dict, List, Optional, Union - - from pydantic import BaseModel, ConfigDict, Field, model_validator - - +from ...agents.types.start_agents_request_properties_llm_greeting_configs import ( - + StartAgentsRequestPropertiesLlmGreetingConfigs, - +) - from .base import BaseLLM - - LlmGreetingConfigs = Dict[str, Any] - theirs_snapshot: - src/agora_agent/agentkit/agent.py: | - from __future__ import annotations - - import time - import typing - import typing_extensions - import warnings - - if typing.TYPE_CHECKING: - from .agent_session import AgentSession, AsyncAgentSession - - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from ..agents.types.start_agents_request_properties_avatar import StartAgentsRequestPropertiesAvatar - from ..agents.types.start_agents_request_properties_avatar_vendor import StartAgentsRequestPropertiesAvatarVendor - from ..agents.types.update_agents_request_properties import UpdateAgentsRequestProperties - from ..agents.types.get_agents_response import GetAgentsResponse - from ..agents.types.list_agents_response import ListAgentsResponse - from ..agents.types.list_agents_response_data_list_item import ListAgentsResponseDataListItem - from ..agents.types.list_agents_response_data_list_item_status import ListAgentsResponseDataListItemStatus - from ..agents.types.get_history_agents_response import GetHistoryAgentsResponse - from ..agents.types.get_history_agents_response_contents_item import GetHistoryAgentsResponseContentsItem - from ..agents.types.get_history_agents_response_contents_item_role import GetHistoryAgentsResponseContentsItemRole - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.get_turns_agents_response_turns_item import GetTurnsAgentsResponseTurnsItem - from ..agents.types.speak_agents_request_priority import SpeakAgentsRequestPriority - from ..agents.types.start_agents_request_properties_turn_detection import StartAgentsRequestPropertiesTurnDetection - from ..agents.types.start_agents_request_properties_turn_detection_config import StartAgentsRequestPropertiesTurnDetectionConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeech - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_mode import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechMode - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_vad_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechVadConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_keywords_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechKeywordsConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_disabled_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_disabled_config_strategy import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfigStrategy - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_mode import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechMode - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_vad_config import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechVadConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_semantic_config import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechSemanticConfig - from ..agents.types.start_agents_request_properties_turn_detection_type import StartAgentsRequestPropertiesTurnDetectionType - from ..agents.types.start_agents_request_properties_turn_detection_interrupt_mode import StartAgentsRequestPropertiesTurnDetectionInterruptMode - from ..agents.types.start_agents_request_properties_turn_detection_eagerness import StartAgentsRequestPropertiesTurnDetectionEagerness - from ..agents.types.start_agents_request_properties_sal import StartAgentsRequestPropertiesSal - from ..agents.types.start_agents_request_properties_sal_sal_mode import StartAgentsRequestPropertiesSalSalMode - from ..agents.types.start_agents_request_properties_parameters import StartAgentsRequestPropertiesParameters - from ..agents.types.start_agents_request_properties_parameters_silence_config import StartAgentsRequestPropertiesParametersSilenceConfig - from ..agents.types.start_agents_request_properties_parameters_silence_config_action import StartAgentsRequestPropertiesParametersSilenceConfigAction - from ..agents.types.start_agents_request_properties_parameters_farewell_config import StartAgentsRequestPropertiesParametersFarewellConfig - from ..agents.types.start_agents_request_properties_parameters_data_channel import StartAgentsRequestPropertiesParametersDataChannel - from ..agents.types.start_agents_request_properties_parameters_audio_scenario import StartAgentsRequestPropertiesParametersAudioScenario - from ..agents.types.start_agents_request_properties_interruption import StartAgentsRequestPropertiesInterruption - from ..agents.types.start_agents_request_properties_interruption_mode import StartAgentsRequestPropertiesInterruptionMode - from ..agents.types.start_agents_request_properties_geofence import StartAgentsRequestPropertiesGeofence - from ..agents.types.start_agents_request_properties_rtc import StartAgentsRequestPropertiesRtc - from ..agents.types.start_agents_request_properties_advanced_features import StartAgentsRequestPropertiesAdvancedFeatures - from ..agents.types.start_agents_request_properties_filler_words import StartAgentsRequestPropertiesFillerWords - from ..agents.types.start_agents_request_properties_filler_words_trigger import StartAgentsRequestPropertiesFillerWordsTrigger - from ..agents.types.start_agents_request_properties_filler_words_trigger_fixed_time_config import StartAgentsRequestPropertiesFillerWordsTriggerFixedTimeConfig - from ..agents.types.start_agents_request_properties_filler_words_content import StartAgentsRequestPropertiesFillerWordsContent - from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig - from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - from ..types.tts import Tts - from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - from ..types.tts import Tts - from ..types.asr import Asr - from ..types.llm import Llm - from ..types.llm_style import LlmStyle as GeneratedLlmStyle - from ..types.mllm import Mllm - from ..types.mllm_turn_detection import MllmTurnDetection - from ..types.mllm_turn_detection_mode import MllmTurnDetectionMode as GeneratedMllmTurnDetectionMode - from ..types.mllm_vendor import MllmVendor as GeneratedMllmVendor - from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - AgentThinkAgentManagementRequestOnListeningAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - AgentThinkAgentManagementRequestOnThinkingAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - AgentThinkAgentManagementRequestOnSpeakingAction, - ) - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse, - ) - from ..core.pydantic_utilities import parse_obj_as - from .vendors.base import BaseAvatar, BaseLLM, BaseMLLM, BaseSTT, BaseTTS - - # Top-level aliases - LlmConfig = Llm - LlmStyle = GeneratedLlmStyle - SttConfig = Asr - AsrConfig = SttConfig - SttVendor = typing.Any - TtsConfig = Tts - MllmConfig = Mllm - MllmVendor = GeneratedMllmVendor - AvatarConfig = StartAgentsRequestPropertiesAvatar - AvatarVendor = StartAgentsRequestPropertiesAvatarVendor - TurnDetectionConfig = StartAgentsRequestPropertiesTurnDetection - SalConfig = StartAgentsRequestPropertiesSal - SalMode = StartAgentsRequestPropertiesSalSalMode - AdvancedFeatures = StartAgentsRequestPropertiesAdvancedFeatures - SessionParams = StartAgentsRequestPropertiesParameters - - # SOS/EOS turn detection aliases (preferred) - TurnDetectionNestedConfig = StartAgentsRequestPropertiesTurnDetectionConfig - StartOfSpeechConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeech - StartOfSpeechMode = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechMode - StartOfSpeechVadConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechVadConfig - StartOfSpeechKeywordsConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechKeywordsConfig - StartOfSpeechDisabledConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfig - StartOfSpeechDisabledConfigStrategy = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfigStrategy - EndOfSpeechConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech - EndOfSpeechMode = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechMode - EndOfSpeechVadConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechVadConfig - EndOfSpeechSemanticConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechSemanticConfig - - # Deprecated turn detection aliases - # Deprecated: Use TurnDetectionConfig with TurnDetectionNestedConfig.start_of_speech - # and .end_of_speech instead. The `type` field and agora_vad/server_vad/semantic_vad - # values will be removed in a future release. - TurnDetectionType = StartAgentsRequestPropertiesTurnDetectionType - - # Deprecated: Use StartOfSpeechConfig with mode="vad"|"keywords"|"disabled" and the - # corresponding vad_config, keywords_config, or disabled_config instead. - InterruptMode = StartAgentsRequestPropertiesTurnDetectionInterruptMode - - # Deprecated: Only applies to server_vad/semantic_vad modes with OpenAI Realtime - # (MLLM). Has no equivalent in the ASR + LLM + TTS pipeline. - Eagerness = StartAgentsRequestPropertiesTurnDetectionEagerness - - # Parameters (SessionParams) sub-type aliases - SilenceConfig = StartAgentsRequestPropertiesParametersSilenceConfig - SilenceAction = StartAgentsRequestPropertiesParametersSilenceConfigAction - FarewellConfig = StartAgentsRequestPropertiesParametersFarewellConfig - ParametersDataChannel = StartAgentsRequestPropertiesParametersDataChannel - ParametersAudioScenario = StartAgentsRequestPropertiesParametersAudioScenario - InterruptionConfig = StartAgentsRequestPropertiesInterruption - InterruptionMode = StartAgentsRequestPropertiesInterruptionMode - MllmTurnDetectionConfig = MllmTurnDetection - MllmTurnDetectionMode = GeneratedMllmTurnDetectionMode - AgentConfig = StartAgentsRequestProperties - AgentConfigUpdate = UpdateAgentsRequestProperties - SessionInfo = GetAgentsResponse - SessionListResponse = ListAgentsResponse - SessionSummary = ListAgentsResponseDataListItem - SessionStatus = ListAgentsResponseDataListItemStatus - ConversationHistory = GetHistoryAgentsResponse - ConversationTurn = GetHistoryAgentsResponseContentsItem - ConversationRole = GetHistoryAgentsResponseContentsItemRole - ConversationTurns = GetTurnsAgentsResponse - ConversationSessionTurn = GetTurnsAgentsResponseTurnsItem - SpeakPriority = SpeakAgentsRequestPriority - Labels = typing.Dict[str, str] - - - class SessionParamsInput(typing_extensions.TypedDict, total=False): - silence_config: StartAgentsRequestPropertiesParametersSilenceConfig - farewell_config: StartAgentsRequestPropertiesParametersFarewellConfig - data_channel: StartAgentsRequestPropertiesParametersDataChannel - enable_metrics: bool - enable_error_message: bool - audio_scenario: ParametersAudioScenario - - - class ThinkOptions(typing_extensions.TypedDict, total=False): - on_listening_action: AgentThinkAgentManagementRequestOnListeningAction - on_thinking_action: AgentThinkAgentManagementRequestOnThinkingAction - on_speaking_action: AgentThinkAgentManagementRequestOnSpeakingAction - interruptable: bool - metadata: typing.Dict[str, str] - - - class GetTurnsOptions(typing_extensions.TypedDict, total=False): - page_index: int - page_size: int - - - class SayOptions(typing_extensions.TypedDict, total=False): - priority: SpeakAgentsRequestPriority - interruptable: bool - - - class SessionOptions(typing_extensions.TypedDict, total=False): - name: str - channel: str - token: str - agent_uid: str - remote_uids: typing.List[str] - idle_timeout: int - enable_string_uid: bool - preset: typing.Union[str, typing.Sequence[str]] - pipeline_id: str - expires_in: int - debug: bool - warn: typing.Callable[[str], None] - - - def _start_properties_from_mapping( - properties: typing.Mapping[str, typing.Any], - ) -> StartAgentsRequestProperties: - return parse_obj_as(StartAgentsRequestProperties, dict(properties)) - - - # LLM sub-type aliases - LlmGreetingConfigs = typing.Dict[str, typing.Any] - LlmGreetingConfigsMode = typing.Any - McpServersItem = typing.Dict[str, typing.Any] - - # Additional top-level config aliases - GeofenceConfig = StartAgentsRequestPropertiesGeofence - RtcConfig = StartAgentsRequestPropertiesRtc - FillerWordsConfig = StartAgentsRequestPropertiesFillerWords - FillerWordsTrigger = StartAgentsRequestPropertiesFillerWordsTrigger - FillerWordsTriggerFixedTimeConfig = StartAgentsRequestPropertiesFillerWordsTriggerFixedTimeConfig - FillerWordsContent = StartAgentsRequestPropertiesFillerWordsContent - FillerWordsContentStaticConfig = StartAgentsRequestPropertiesFillerWordsContentStaticConfig - FillerWordsContentSelectionRule = StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - - # Think type aliases and response - ThinkOnListeningAction = AgentThinkAgentManagementRequestOnListeningAction - ThinkOnThinkingAction = AgentThinkAgentManagementRequestOnThinkingAction - ThinkOnSpeakingAction = AgentThinkAgentManagementRequestOnSpeakingAction - ThinkResponse = AgentThinkAgentManagementResponse - - from .token import generate_convo_ai_token, _parse_numeric_uid, _validate_expires_in - - TurnDetectionLanguage = typing_extensions.Literal[ - "ar-EG", - "ar-JO", - "ar-SA", - "ar-AE", - "bn-IN", - "zh-CN", - "zh-HK", - "zh-TW", - "nl-NL", - "en-IN", - "en-US", - "fil-PH", - "fr-FR", - "de-DE", - "gu-IN", - "he-IL", - "hi-IN", - "id-ID", - "it-IT", - "ja-JP", - "kn-IN", - "ko-KR", - "ms-MY", - "fa-IR", - "pt-PT", - "ru-RU", - "es-ES", - "ta-IN", - "te-IN", - "th-TH", - "tr-TR", - "vi-VN", - ] - - DEFAULT_TURN_DETECTION_LANGUAGE: TurnDetectionLanguage = "en-US" - TURN_DETECTION_LANGUAGE_VALUES: typing.Tuple[TurnDetectionLanguage, ...] = ( - "ar-EG", - "ar-JO", - "ar-SA", - "ar-AE", - "bn-IN", - "zh-CN", - "zh-HK", - "zh-TW", - "nl-NL", - "en-IN", - "en-US", - "fil-PH", - "fr-FR", - "de-DE", - "gu-IN", - "he-IL", - "hi-IN", - "id-ID", - "it-IT", - "ja-JP", - "kn-IN", - "ko-KR", - "ms-MY", - "fa-IR", - "pt-PT", - "ru-RU", - "es-ES", - "ta-IN", - "te-IN", - "th-TH", - "tr-TR", - "vi-VN", - ) - _TURN_DETECTION_LANGUAGES = set(TURN_DETECTION_LANGUAGE_VALUES) - - - def _dump_optional_model(value: typing.Any) -> typing.Any: - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if hasattr(value, "dict"): - return value.dict(exclude_none=True) - return value - - - def _is_turn_detection_language(value: typing.Any) -> bool: - return isinstance(value, str) and value in _TURN_DETECTION_LANGUAGES - - - def _validate_turn_detection_language(value: typing.Any) -> TurnDetectionLanguage: - if not _is_turn_detection_language(value): - raise ValueError(f"Invalid turn_detection.language: {value}") - return value # type: ignore[return-value] - - - class Agent: - """A reusable agent definition. - - Use the fluent builder methods (.with_llm(), .with_tts(), .with_stt(), .with_mllm()) - to configure vendor settings after construction. - - Deprecated: - The Agent-level ``instructions``, ``greeting``, ``failure_message``, - ``max_history``, and ``greeting_configs`` convenience fields are kept - for compatibility. Configure those values on the LLM or MLLM vendor - instead. - - Examples - -------- - >>> from agora_agent import Agent, OpenAI, ElevenLabsTTS, DeepgramSTT - >>> - >>> agent = Agent(instructions="You are a helpful voice assistant.") - >>> agent = ( - ... agent - ... .with_llm(OpenAI(api_key="...", base_url="https://api.openai.com/v1/chat/completions", model="gpt-4")) - ... .with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", base_url="wss://api.elevenlabs.io/v1", sample_rate=24000)) - ... .with_stt(DeepgramSTT(api_key="...", model="nova-2")) - ... ) - """ - - def __init__( - self, - name: typing.Optional[str] = None, - instructions: typing.Optional[str] = None, - turn_detection: typing.Optional[TurnDetectionConfig] = None, - interruption: typing.Optional[InterruptionConfig] = None, - sal: typing.Optional[SalConfig] = None, - advanced_features: typing.Optional[AdvancedFeatures] = None, - parameters: typing.Optional[typing.Union[SessionParams, SessionParamsInput]] = None, - greeting: typing.Optional[str] = None, - failure_message: typing.Optional[str] = None, - max_history: typing.Optional[int] = None, - geofence: typing.Optional[GeofenceConfig] = None, - labels: typing.Optional[typing.Dict[str, str]] = None, - rtc: typing.Optional[RtcConfig] = None, - filler_words: typing.Optional[FillerWordsConfig] = None, - greeting_configs: typing.Optional[LlmGreetingConfigs] = None, - pipeline_id: typing.Optional[str] = None, - ): - self._name = name - self._pipeline_id = pipeline_id - self._instructions = instructions - self._greeting = greeting - self._failure_message = failure_message - self._max_history = max_history - self._llm: typing.Optional[typing.Dict[str, typing.Any]] = None - self._tts: typing.Optional[typing.Dict[str, typing.Any]] = None - self._stt: typing.Optional[typing.Dict[str, typing.Any]] = None - self._mllm: typing.Optional[typing.Dict[str, typing.Any]] = None - self._tts_sample_rate: typing.Optional[int] = None - self._avatar: typing.Optional[typing.Dict[str, typing.Any]] = None - self._avatar_required_sample_rate: typing.Optional[int] = None - self._turn_detection = turn_detection - self._interruption = interruption - self._sal = sal - self._advanced_features = advanced_features - self._parameters = parameters - self._geofence = geofence - self._labels = labels - self._rtc = rtc - self._filler_words = filler_words - self._greeting_configs = greeting_configs - - def with_llm(self, vendor: BaseLLM) -> "Agent": - new_agent = self._clone() - new_agent._llm = vendor.to_config() - return new_agent - - def with_tts(self, vendor: BaseTTS) -> "Agent": - sample_rate = vendor.sample_rate - if ( - self._avatar_required_sample_rate not in (None, 0) - and sample_rate is not None - and sample_rate != self._avatar_required_sample_rate - ): - raise ValueError( - f"Avatar requires TTS sample rate of {self._avatar_required_sample_rate} Hz, " - f"but TTS is configured with {sample_rate} Hz. " - f"Please update your TTS sample_rate to {self._avatar_required_sample_rate}." - ) - new_agent = self._clone() - new_agent._tts = vendor.to_config() - new_agent._tts_sample_rate = sample_rate - return new_agent - - def with_stt(self, vendor: BaseSTT) -> "Agent": - new_agent = self._clone() - new_agent._stt = vendor.to_config() - return new_agent - - def with_mllm(self, vendor: BaseMLLM) -> "Agent": - # Note: avatars are not supported with MLLM. The combination is rejected - # at ``to_properties`` / ``AgentSession.start`` so callers can still - # configure both for tests, debugging, or disabled-avatar use cases. - new_agent = self._clone() - new_agent._mllm = vendor.to_config() - if isinstance(new_agent._mllm, dict): - new_agent._mllm["enable"] = True - if isinstance(new_agent._advanced_features, dict): - advanced_features = {key: value for key, value in new_agent._advanced_features.items() if key != "enable_mllm"} - new_agent._advanced_features = typing.cast(AdvancedFeatures, advanced_features) if advanced_features else None - elif isinstance(new_agent._advanced_features, StartAgentsRequestPropertiesAdvancedFeatures): - advanced_features_model = self._copy_model_update( - new_agent._advanced_features, - {"enable_mllm": None}, - ) - if ( - advanced_features_model.enable_rtm is None - and advanced_features_model.enable_sal is None - and advanced_features_model.enable_tools is None - ): - new_agent._advanced_features = None - else: - new_agent._advanced_features = advanced_features_model - return new_agent - - def with_avatar(self, vendor: BaseAvatar) -> "Agent": - # Note: avatars are not supported with MLLM. The combination is rejected - # at ``to_properties`` / ``AgentSession.start`` (only when the avatar is - # enabled) so callers may still combine the two for testing or for the - # disabled-avatar pattern. - required_sample_rate = vendor.required_sample_rate - if ( - required_sample_rate not in (None, 0) - and self._tts_sample_rate is not None - and self._tts_sample_rate != required_sample_rate - ): - raise ValueError( - f"Avatar requires TTS sample rate of {required_sample_rate} Hz, " - f"but TTS is configured with {self._tts_sample_rate} Hz. " - f"Please update your TTS sample_rate to {required_sample_rate}." - ) - new_agent = self._clone() - new_agent._avatar = vendor.to_config() - new_agent._avatar_required_sample_rate = required_sample_rate - return new_agent - - def with_turn_detection(self, config: TurnDetectionConfig) -> "Agent": - new_agent = self._clone() - new_agent._turn_detection = config - return new_agent - - def with_interruption(self, config: InterruptionConfig) -> "Agent": - """Returns a new Agent with unified interruption control configured.""" - new_agent = self._clone() - new_agent._interruption = config - return new_agent - - def with_instructions(self, instructions: str) -> "Agent": - """Deprecated. Configure system messages on the LLM vendor instead.""" - new_agent = self._clone() - new_agent._instructions = instructions - return new_agent - - def with_greeting(self, greeting: str) -> "Agent": - """Deprecated. Configure the greeting on the LLM or MLLM vendor instead.""" - new_agent = self._clone() - new_agent._greeting = greeting - return new_agent - - def with_greeting_configs(self, configs: LlmGreetingConfigs) -> "Agent": - """Deprecated. Configure greeting playback on the LLM vendor instead.""" - new_agent = self._clone() - new_agent._greeting_configs = configs - return new_agent - - def with_name(self, name: str) -> "Agent": - new_agent = self._clone() - new_agent._name = name - return new_agent - - def with_sal(self, config: SalConfig) -> "Agent": - """Returns a new Agent with the specified SAL (Selective Attention Locking) configuration.""" - new_agent = self._clone() - new_agent._sal = config - return new_agent - - def with_advanced_features(self, features: AdvancedFeatures) -> "Agent": - """Returns a new Agent with the specified advanced features configuration. - - Use this to enable RTM and other advanced features. - """ - new_agent = self._clone() - new_agent._advanced_features = features - return new_agent - - def with_tools(self, enabled: bool = True) -> "Agent": - """Returns a new Agent with MCP tool invocation enabled or disabled.""" - new_agent = self._clone() - if new_agent._advanced_features is None: - new_agent._advanced_features = StartAgentsRequestPropertiesAdvancedFeatures(enable_tools=enabled) - elif isinstance(new_agent._advanced_features, dict): - new_agent._advanced_features = typing.cast( - AdvancedFeatures, - {**new_agent._advanced_features, "enable_tools": enabled}, - ) - else: - new_agent._advanced_features = self._copy_model_update( - new_agent._advanced_features, - {"enable_tools": enabled}, - ) - return new_agent - - def with_parameters(self, parameters: typing.Union[SessionParams, SessionParamsInput]) -> "Agent": - """Returns a new Agent with the specified session parameters. - - Use this to configure silence behaviour, graceful hang-up, data channel, and more. - """ - new_agent = self._clone() - new_agent._parameters = parameters - return new_agent - - def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": - """Returns a new Agent with the specified RTC audio scenario.""" - new_agent = self._clone() - if new_agent._parameters is None: - new_agent._parameters = StartAgentsRequestPropertiesParameters(audio_scenario=audio_scenario) - elif isinstance(new_agent._parameters, dict): - new_agent._parameters = typing.cast( - SessionParamsInput, - {**new_agent._parameters, "audio_scenario": audio_scenario}, - ) - else: - new_agent._parameters = self._copy_model_update( - new_agent._parameters, - {"audio_scenario": audio_scenario}, - ) - return new_agent - - def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": - """Returns a new Agent with the specified RTC audio scenario.""" - new_agent = self._clone() - if new_agent._parameters is None: - new_agent._parameters = StartAgentsRequestPropertiesParameters(audio_scenario=audio_scenario) - elif isinstance(new_agent._parameters, dict): - new_agent._parameters = typing.cast( - SessionParamsInput, - {**new_agent._parameters, "audio_scenario": audio_scenario}, - ) - else: - new_agent._parameters = self._copy_model_update( - new_agent._parameters, - {"audio_scenario": audio_scenario}, - ) - return new_agent - - def with_failure_message(self, message: str) -> "Agent": - """Deprecated. Configure the failure message on the LLM or MLLM vendor instead.""" - new_agent = self._clone() - new_agent._failure_message = message - return new_agent - - def with_max_history(self, max_history: int) -> "Agent": - """Deprecated. Configure max history on the LLM vendor instead.""" - new_agent = self._clone() - new_agent._max_history = max_history - return new_agent - - def with_geofence(self, geofence: GeofenceConfig) -> "Agent": - """Returns a new Agent with the specified geofence configuration. - - Restricts which geographic regions the agent's backend servers may run in. - """ - new_agent = self._clone() - new_agent._geofence = geofence - return new_agent - - def with_labels(self, labels: typing.Dict[str, str]) -> "Agent": - """Returns a new Agent with the specified custom labels. - - Labels are key-value pairs attached to the agent and returned in notification callbacks. - """ - new_agent = self._clone() - new_agent._labels = dict(labels) - return new_agent - - def with_rtc(self, rtc: RtcConfig) -> "Agent": - """Returns a new Agent with the specified RTC configuration.""" - new_agent = self._clone() - new_agent._rtc = rtc - return new_agent - - def with_filler_words(self, filler_words: FillerWordsConfig) -> "Agent": - """Returns a new Agent with the specified filler words configuration. - - Filler words are played while the agent waits for the LLM to respond. - """ - new_agent = self._clone() - new_agent._filler_words = filler_words - return new_agent - - @staticmethod - def _field_value(value: typing.Any, field: str) -> typing.Any: - if value is None: - return None - if isinstance(value, dict): - return value.get(field) - return getattr(value, field, None) - - @staticmethod - def _copy_model_update(value: typing.Any, update: typing.Dict[str, typing.Any]) -> typing.Any: - if hasattr(value, "model_copy"): - return value.model_copy(update=update) - if hasattr(value, "copy"): - return value.copy(update=update) - raise TypeError(f"Object of type {type(value).__name__} does not support model copying") - - def _resolved_parameters(self) -> typing.Optional[typing.Union[SessionParams, SessionParamsInput]]: - enable_rtm = self._field_value(self._advanced_features, "enable_rtm") is True - data_channel = self._field_value(self._parameters, "data_channel") - if not enable_rtm or data_channel is not None: - return self._parameters - if self._parameters is None: - return StartAgentsRequestPropertiesParameters(data_channel="rtm") - if isinstance(self._parameters, dict): - return typing.cast(SessionParamsInput, {**self._parameters, "data_channel": "rtm"}) - return self._copy_model_update(self._parameters, {"data_channel": "rtm"}) - - @property - def name(self) -> typing.Optional[str]: - return self._name - - @property - def pipeline_id(self) -> typing.Optional[str]: - """Published AI Studio pipeline ID used as this agent's base configuration.""" - return self._pipeline_id - - @property - def llm(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._llm - - @property - def tts(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._tts - - @property - def tts_sample_rate(self) -> typing.Optional[int]: - return self._tts_sample_rate - - @property - def stt(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._stt - - @property - def mllm(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._mllm - - @property - def turn_detection(self) -> typing.Optional[TurnDetectionConfig]: - return self._turn_detection - - @property - def interruption(self) -> typing.Optional[InterruptionConfig]: - return self._interruption - - @property - def instructions(self) -> typing.Optional[str]: - return self._instructions - - @property - def greeting(self) -> typing.Optional[str]: - return self._greeting - - @property - def greeting_configs(self) -> typing.Optional[LlmGreetingConfigs]: - return self._greeting_configs - - @property - def failure_message(self) -> typing.Optional[str]: - return self._failure_message - - @property - def max_history(self) -> typing.Optional[int]: - return self._max_history - - @property - def avatar(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._avatar - - @property - def sal(self) -> typing.Optional[SalConfig]: - return self._sal - - @property - def advanced_features(self) -> typing.Optional[AdvancedFeatures]: - return self._advanced_features - - @property - def parameters(self) -> typing.Optional[typing.Union[SessionParams, SessionParamsInput]]: - return self._parameters - - @property - def geofence(self) -> typing.Optional[GeofenceConfig]: - return self._geofence - - @property - def labels(self) -> typing.Optional[typing.Dict[str, str]]: - return self._labels - - @property - def rtc(self) -> typing.Optional[RtcConfig]: - return self._rtc - - @property - def filler_words(self) -> typing.Optional[FillerWordsConfig]: - return self._filler_words - - @property - def config(self) -> typing.Dict[str, typing.Any]: - return { - "name": self._name, - "pipeline_id": self._pipeline_id, - "instructions": self._instructions, - "greeting": self._greeting, - "failure_message": self._failure_message, - "max_history": self._max_history, - "llm": self._llm, - "tts": self._tts, - "stt": self._stt, - "mllm": self._mllm, - "turn_detection": self._turn_detection, - "interruption": self._interruption, - "sal": self._sal, - "avatar": self._avatar, - "advanced_features": self._advanced_features, - "parameters": self._parameters, - "geofence": self._geofence, - "labels": self._labels, - "rtc": self._rtc, - "filler_words": self._filler_words, - "greeting_configs": self._greeting_configs, - } - - def create_session( - self, - client: typing.Any, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - name: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ) -> "AgentSession": - from .agent_session import AgentSession - - session_name = name or self._name or f"agent-{int(time.time())}" - return AgentSession( - client=client, - agent=self, - app_id=client.app_id if hasattr(client, "app_id") else "", - app_certificate=client.app_certificate if hasattr(client, "app_certificate") else None, - name=session_name, - channel=channel, - token=token, - agent_uid=agent_uid, - remote_uids=remote_uids, - idle_timeout=idle_timeout, - enable_string_uid=enable_string_uid, - preset=preset, - pipeline_id=pipeline_id, - expires_in=expires_in, - debug=debug, - warn=warn, - ) - - def create_async_session( - self, - client: typing.Any, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - name: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ) -> "AsyncAgentSession": - """Create an async session for use with :class:`~agora_agent.AsyncAgora`. - - Equivalent to :meth:`create_session` but returns an - :class:`~agora_agent.agentkit.AsyncAgentSession`. - """ - from .agent_session import AsyncAgentSession - - session_name = name or self._name or f"agent-{int(time.time())}" - return AsyncAgentSession( - client=client, - agent=self, - app_id=client.app_id if hasattr(client, "app_id") else "", - app_certificate=client.app_certificate if hasattr(client, "app_certificate") else None, - name=session_name, - channel=channel, - token=token, - agent_uid=agent_uid, - remote_uids=remote_uids, - idle_timeout=idle_timeout, - enable_string_uid=enable_string_uid, - preset=preset, - pipeline_id=pipeline_id, - expires_in=expires_in, - debug=debug, - warn=warn, - ) - - def to_properties( - self, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - token: typing.Optional[str] = None, - app_id: typing.Optional[str] = None, - app_certificate: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - skip_vendor_validation: bool = False, - skip_vendor_validation_categories: typing.Optional[typing.AbstractSet[str]] = None, - allow_missing_vendor_categories: typing.Optional[typing.AbstractSet[str]] = None, - ) -> StartAgentsRequestProperties: - # Validate the MLLM + enabled-avatar combination BEFORE generating the - # RTC token so callers get a clear, actionable error first (matches the - # TypeScript and Go SDKs' fail-fast contract). - mllm_flag = isinstance(self._mllm, dict) and self._mllm.get("enable") is True - is_mllm_mode = bool(mllm_flag or self._mllm is not None) - avatar_enabled = ( - isinstance(self._avatar, dict) and self._avatar.get("enable") is not False - ) - if is_mllm_mode and avatar_enabled: - raise ValueError( - "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - "Remove the avatar configuration when using MLLM, or switch to a cascading session." - ) - - if token is None: - if app_id is None or app_certificate is None: - raise ValueError("Either token or app_id+app_certificate must be provided") - validated_expires_in = _validate_expires_in(expires_in) if expires_in is not None else None - # Use generate_convo_ai_token (RTC + RTM) so the token works whether or - # not the caller enables advanced_features.enable_rtm. - token_kwargs: typing.Dict[str, typing.Any] = {} - if validated_expires_in is not None: - token_kwargs["token_expire"] = validated_expires_in - token = generate_convo_ai_token( - app_id=app_id, - app_certificate=app_certificate, - channel_name=channel, - uid=_parse_numeric_uid(agent_uid, "agent_uid"), - **token_kwargs, - ) - - base_kwargs: typing.Dict[str, typing.Any] = { - "channel": channel, - "token": token, - "agent_rtc_uid": agent_uid, - "remote_rtc_uids": remote_uids, - } - - if idle_timeout is not None: - base_kwargs["idle_timeout"] = idle_timeout - if enable_string_uid is not None: - base_kwargs["enable_string_uid"] = enable_string_uid - if self._mllm is not None: - base_kwargs["mllm"] = self._mllm - if self._turn_detection is not None: - base_kwargs["turn_detection"] = self._turn_detection - if self._interruption is not None: - base_kwargs["interruption"] = self._interruption - if self._sal is not None: - base_kwargs["sal"] = self._sal - if self._avatar is not None: - base_kwargs["avatar"] = self._avatar - if self._advanced_features is not None: - base_kwargs["advanced_features"] = self._advanced_features - parameters = self._resolved_parameters() - if parameters is not None: - if isinstance(parameters, dict): - base_kwargs["parameters"] = StartAgentsRequestPropertiesParameters(**parameters) - else: - base_kwargs["parameters"] = parameters - if self._geofence is not None: - base_kwargs["geofence"] = self._geofence - if self._labels is not None: - base_kwargs["labels"] = self._labels - if self._rtc is not None: - base_kwargs["rtc"] = self._rtc - if self._filler_words is not None: - base_kwargs["filler_words"] = self._filler_words - - if is_mllm_mode: - if self._mllm is not None: - mllm_config = dict(self._mllm) - if self._greeting is not None: - mllm_config.setdefault("greeting_message", self._greeting) - if self._failure_message is not None: - mllm_config.setdefault("failure_message", self._failure_message) - base_kwargs["mllm"] = mllm_config - return _start_properties_from_mapping(base_kwargs) - - if skip_vendor_validation: - warnings.warn( - "skip_vendor_validation is deprecated and will be removed in a future release. " - "Use skip_vendor_validation_categories and allow_missing_vendor_categories instead.", - DeprecationWarning, - stacklevel=2, - ) - - skip_categories = set(skip_vendor_validation_categories or ()) - allow_missing_categories = set(allow_missing_vendor_categories or ()) - if skip_vendor_validation: - skip_categories.update({"asr", "llm", "tts"}) - allow_missing_categories.update({"asr", "llm", "tts"}) - - skip_asr_validation = skip_vendor_validation or "asr" in skip_categories - skip_llm_validation = skip_vendor_validation or "llm" in skip_categories - skip_tts_validation = skip_vendor_validation or "tts" in skip_categories - allow_missing_asr = "asr" in allow_missing_categories - allow_missing_llm = "llm" in allow_missing_categories - allow_missing_tts = "tts" in allow_missing_categories - - turn_detection_config = self._resolve_turn_detection_config() - if not skip_asr_validation and (self._stt is not None or not allow_missing_asr): - base_kwargs["asr"] = self._resolve_asr_config(turn_detection_config) - base_kwargs["turn_detection"] = turn_detection_config - - if skip_vendor_validation: - return _start_properties_from_mapping(base_kwargs) - - if self._tts is None and not (skip_tts_validation or allow_missing_tts): - raise ValueError("TTS configuration is required. Use with_tts() to set it.") - - if self._llm is None and not (skip_llm_validation or allow_missing_llm): - raise ValueError("LLM configuration is required. Use with_llm() to set it.") - - if self._llm is not None and not skip_llm_validation: - base_kwargs["llm"] = self._resolve_llm_config() - if self._tts is not None and not skip_tts_validation: - base_kwargs["tts"] = self._tts - - return _start_properties_from_mapping(base_kwargs) - - def _resolve_llm_config(self) -> typing.Dict[str, typing.Any]: - llm_config = dict(self._llm or {}) - if self._instructions is not None and "system_messages" not in llm_config: - llm_config["system_messages"] = [{"role": "system", "content": self._instructions}] - if self._greeting is not None and "greeting_message" not in llm_config: - llm_config["greeting_message"] = self._greeting - if self._greeting_configs is not None and "greeting_configs" not in llm_config: - llm_config["greeting_configs"] = _dump_optional_model(self._greeting_configs) - if self._failure_message is not None and "failure_message" not in llm_config: - llm_config["failure_message"] = self._failure_message - if self._max_history is not None and "max_history" not in llm_config: - llm_config["max_history"] = self._max_history - return llm_config - - def _resolve_asr_config(self, turn_detection_config: TurnDetectionConfig) -> typing.Dict[str, typing.Any]: - asr_config = dict(self._stt or {}) - if not asr_config: - asr_config["vendor"] = "ares" - asr_config["language"] = self._field_value(turn_detection_config, "language") - return asr_config - - def _resolve_turn_detection_config(self) -> TurnDetectionConfig: - existing_turn_detection_language = self._field_value(self._turn_detection, "language") - language = ( - existing_turn_detection_language - if existing_turn_detection_language is not None - else DEFAULT_TURN_DETECTION_LANGUAGE - ) - language = _validate_turn_detection_language(language) - if self._turn_detection is None: - return StartAgentsRequestPropertiesTurnDetection(language=language) - if isinstance(self._turn_detection, dict): - return typing.cast(TurnDetectionConfig, {**self._turn_detection, "language": language}) - return self._copy_model_update(self._turn_detection, {"language": language}) - - def _clone(self) -> "Agent": - new_agent = Agent.__new__(Agent) - new_agent._name = self._name - new_agent._pipeline_id = self._pipeline_id - new_agent._llm = self._llm - new_agent._tts = self._tts - new_agent._stt = self._stt - new_agent._mllm = self._mllm - new_agent._tts_sample_rate = self._tts_sample_rate - new_agent._avatar = self._avatar - new_agent._avatar_required_sample_rate = self._avatar_required_sample_rate - new_agent._turn_detection = self._turn_detection - new_agent._interruption = self._interruption - new_agent._sal = self._sal - new_agent._advanced_features = self._advanced_features - new_agent._parameters = self._parameters - new_agent._instructions = self._instructions - new_agent._greeting = self._greeting - new_agent._failure_message = self._failure_message - new_agent._max_history = self._max_history - new_agent._geofence = self._geofence - new_agent._labels = self._labels - new_agent._rtc = self._rtc - new_agent._filler_words = self._filler_words - new_agent._greeting_configs = self._greeting_configs - return new_agent - src/agora_agent/agentkit/vendors/llm.py: | - from typing import Any, Dict, List, Optional, Union - - from pydantic import BaseModel, ConfigDict, Field, model_validator - - from ...agents.types.start_agents_request_properties_llm_greeting_configs import ( - StartAgentsRequestPropertiesLlmGreetingConfigs, - ) - from .base import BaseLLM - - LlmGreetingConfigs = Dict[str, Any] - _OPENAI_MANAGED_MODELS = {"gpt-4o-mini", "gpt-4.1-mini", "gpt-5-nano", "gpt-5-mini"} - - - def _ensure_mcp_transport(servers: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Ensure each MCP server has transport set (API requires it). Default to streamable_http.""" - result = [] - for s in servers: - item = dict(s) - if item.get("transport") is None: - item["transport"] = "streamable_http" - result.append(item) - return result - - - def _dump_optional_model(value: Any) -> Any: - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if hasattr(value, "dict"): - return value.dict(exclude_none=True) - return value - - class OpenAIOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: Optional[str] = Field(default=None, description="OpenAI API key") - model: str = Field(..., description="Model name") - base_url: Optional[str] = Field(default=None, description="Custom base URL") - temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - max_tokens: Optional[int] = Field(default=None, gt=0) - system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) - greeting_message: Optional[str] = Field(default=None) - failure_message: Optional[str] = Field(default=None) - input_modalities: Optional[List[str]] = Field(default=None) - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") - - @model_validator(mode="after") - def _validate_byok_params(self) -> "OpenAIOptions": - if not self.model: - raise ValueError("OpenAI requires model") - if self.api_key is not None and self.base_url is None: - raise ValueError("OpenAI requires base_url when api_key is set") - if self.api_key is None and self.base_url is not None: - raise ValueError("OpenAI base_url is only valid when api_key is set") - if self.api_key is None and self.model.strip().lower() not in _OPENAI_MANAGED_MODELS: - raise ValueError("OpenAI requires api_key unless using a supported Agora-managed model") - if self.api_key is None and self.vendor is not None: - raise ValueError("OpenAI Agora-managed mode does not allow vendor") - return self - - class OpenAI(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = OpenAIOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - # model is the default; explicit params entries extend/override it. - # This matches the TS SDK behaviour: { model, ...params }. - params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} - - # Named fields take precedence over anything in the generic params dict. - if self.options.max_tokens is not None: - params["max_tokens"] = self.options.max_tokens - if self.options.temperature is not None: - params["temperature"] = self.options.temperature - if self.options.top_p is not None: - params["top_p"] = self.options.top_p - - config: Dict[str, Any] = { - "url": self.options.base_url or "https://api.openai.com/v1/chat/completions", - "params": params, - "style": "openai", - "input_modalities": self.options.input_modalities or ["text"], - } - if self.options.api_key is not None: - config["api_key"] = self.options.api_key - if self.options.headers is not None: - config["headers"] = self.options.headers - - if self.options.system_messages is not None: - config["system_messages"] = self.options.system_messages - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - config["vendor"] = self.options.vendor - if self.options.mcp_servers is not None: - config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) - if self.options.max_history is not None: - config["max_history"] = self.options.max_history - - return config - - - class AzureOpenAIOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Azure OpenAI API key") - model: str = Field(..., description="Azure deployment model name") - endpoint: str = Field(..., description="Azure endpoint URL") - deployment_name: str = Field(..., description="Azure deployment name") - api_version: str = Field(default="2024-08-01-preview", description="Azure API version") - temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - max_tokens: Optional[int] = Field(default=None, gt=0) - system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) - greeting_message: Optional[str] = Field(default=None) - failure_message: Optional[str] = Field(default=None) - input_modalities: Optional[List[str]] = Field(default=None) - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") - - class AzureOpenAI(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = AzureOpenAIOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - url = ( - f"{self.options.endpoint}/openai/deployments/" - f"{self.options.deployment_name}/chat/completions" - f"?api-version={self.options.api_version}" - ) - config: Dict[str, Any] = { - "url": url, - "api_key": self.options.api_key, - "vendor": self.options.vendor or "azure", - "style": "openai", - "input_modalities": self.options.input_modalities or ["text"], - } - - # Named fields take precedence over anything in the generic params dict. - params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} - if self.options.temperature is not None: - params["temperature"] = self.options.temperature - if self.options.top_p is not None: - params["top_p"] = self.options.top_p - if self.options.max_tokens is not None: - params["max_tokens"] = self.options.max_tokens - if params: - config["params"] = params - if self.options.headers is not None: - config["headers"] = self.options.headers - - if self.options.system_messages is not None: - config["system_messages"] = self.options.system_messages - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.mcp_servers is not None: - config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) - if self.options.max_history is not None: - config["max_history"] = self.options.max_history - - return config - - - class AnthropicOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Anthropic API key") - model: str = Field(..., description="Model name") - url: str = Field(..., description="Anthropic messages endpoint URL") - max_tokens: int = Field(..., gt=0) - temperature: Optional[float] = Field(default=None, ge=0.0, le=1.0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) - greeting_message: Optional[str] = Field(default=None) - failure_message: Optional[str] = Field(default=None) - input_modalities: Optional[List[str]] = Field(default=None) - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Dict[str, str] = Field(..., description="Anthropic request headers") - output_modalities: Optional[List[str]] = Field(default=None) - greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") - - class Anthropic(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = AnthropicOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - # Named fields take precedence over anything in the generic params dict. - params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} - if self.options.max_tokens is not None: - params["max_tokens"] = self.options.max_tokens - if self.options.temperature is not None: - params["temperature"] = self.options.temperature - if self.options.top_p is not None: - params["top_p"] = self.options.top_p - - config: Dict[str, Any] = { - "url": self.options.url, - "api_key": self.options.api_key, - "params": params, - "headers": self.options.headers, - "style": "anthropic", - "input_modalities": self.options.input_modalities or ["text"], - } - - if self.options.system_messages is not None: - config["system_messages"] = self.options.system_messages - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - config["vendor"] = self.options.vendor - if self.options.mcp_servers is not None: - config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) - if self.options.max_history is not None: - config["max_history"] = self.options.max_history - - return config - - - class GeminiOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Google AI API key") - model: str = Field(..., description="Model name") - url: Optional[str] = Field(default=None, description="Custom API endpoint URL") - temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - top_k: Optional[int] = Field(default=None, gt=0) - max_output_tokens: Optional[int] = Field(default=None, gt=0) - system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) - greeting_message: Optional[str] = Field(default=None) - failure_message: Optional[str] = Field(default=None) - input_modalities: Optional[List[str]] = Field(default=None) - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") - - class Gemini(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = GeminiOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - # Named fields take precedence over anything in the generic params dict. - params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} - if self.options.temperature is not None: - params["temperature"] = self.options.temperature - if self.options.top_p is not None: - params["top_p"] = self.options.top_p - if self.options.top_k is not None: - params["top_k"] = self.options.top_k - if self.options.max_output_tokens is not None: - params["max_output_tokens"] = self.options.max_output_tokens - - config: Dict[str, Any] = { - "url": self.options.url or "https://generativelanguage.googleapis.com/v1beta/models", - "api_key": self.options.api_key, - "params": params, - "style": "gemini", - "input_modalities": self.options.input_modalities or ["text"], - } - - if self.options.system_messages is not None: - config["system_messages"] = self.options.system_messages - if self.options.headers is not None: - config["headers"] = self.options.headers - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - config["vendor"] = self.options.vendor - if self.options.mcp_servers is not None: - config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) - if self.options.max_history is not None: - config["max_history"] = self.options.max_history - - return config - - - class GroqOptions(OpenAIOptions): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Groq API key") - model: str = Field(..., description="Model name") - base_url: str = Field(..., description="Groq-compatible endpoint") - - - class Groq(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = GroqOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - config = OpenAI(**_dump_optional_model(self.options)).to_config() - config["url"] = self.options.base_url - return config - - - class CustomLLMOptions(OpenAIOptions): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Custom LLM API key") - base_url: str = Field(..., description="OpenAI-compatible chat completions endpoint") - - - class CustomLLM(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = CustomLLMOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - config = OpenAI(**_dump_optional_model(self.options)).to_config() - config["vendor"] = self.options.vendor or "custom" - return config - - - class VertexAILLMOptions(GeminiOptions): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Vertex AI access token or API key") - project_id: str = Field(..., description="Google Cloud project ID") - location: str = Field(..., description="Google Cloud location") - - - class VertexAILLM(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = VertexAILLMOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - options = _dump_optional_model(self.options) - options.pop("project_id", None) - options.pop("location", None) - if not options.get("url"): - options["url"] = ( - f"https://{self.options.location}-aiplatform.googleapis.com/v1/projects/" - f"{self.options.project_id}/locations/{self.options.location}/" - f"publishers/google/models/{self.options.model}:streamGenerateContent?alt=sse" - ) - return Gemini(**options).to_config() - - - class AmazonBedrockOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - access_key: str = Field(..., description="AWS access key ID") - secret_key: str = Field(..., description="AWS secret access key") - region: str = Field(..., description="AWS region") - model: str = Field(..., description="Amazon Bedrock model identifier") - max_tokens: Optional[int] = Field(default=None, gt=0) - url: Optional[str] = Field(default=None, description="Amazon Bedrock converse stream endpoint URL") - temperature: Optional[float] = Field(default=None, ge=0.0, le=1.0) - top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) - greeting_message: Optional[str] = Field(default=None) - failure_message: Optional[str] = Field(default=None) - input_modalities: Optional[List[str]] = Field(default=None) - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") - - - class AmazonBedrock(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = AmazonBedrockOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.params or {}) - if self.options.max_tokens is not None: - params["max_tokens"] = self.options.max_tokens - if self.options.temperature is not None: - params["temperature"] = self.options.temperature - if self.options.top_p is not None: - params["top_p"] = self.options.top_p - - config: Dict[str, Any] = { - "url": self.options.url or f"https://bedrock-runtime.{self.options.region}.amazonaws.com/model/{self.options.model}/converse-stream", - "access_key": self.options.access_key, - "secret_key": self.options.secret_key, - "region": self.options.region, - "model": self.options.model, - "params": params, - "style": "bedrock", - "input_modalities": self.options.input_modalities or ["text"], - } - if self.options.system_messages is not None: - config["system_messages"] = self.options.system_messages - if self.options.headers is not None: - config["headers"] = self.options.headers - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - config["vendor"] = self.options.vendor - if self.options.mcp_servers is not None: - config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) - if self.options.max_history is not None: - config["max_history"] = self.options.max_history - return config - - - class DifyOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Dify API key") - url: str = Field(..., description="Dify workflow or chat endpoint") - model: str = Field(..., description="Dify model identifier") - user: Optional[str] = Field(default=None, description="Dify user identifier") - conversation_id: Optional[str] = Field(default=None, description="Dify conversation ID") - system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) - greeting_message: Optional[str] = Field(default=None) - failure_message: Optional[str] = Field(default=None) - input_modalities: Optional[List[str]] = Field(default=None) - params: Optional[Dict[str, Any]] = Field(default=None) - headers: Optional[Dict[str, str]] = Field(default=None) - output_modalities: Optional[List[str]] = Field(default=None) - greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) - template_variables: Optional[Dict[str, str]] = Field(default=None) - vendor: Optional[str] = Field(default=None) - mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) - max_history: Optional[int] = Field(default=None, gt=0) - - - class Dify(BaseLLM): - def __init__(self, **kwargs: Any): - self.options = DifyOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} - if self.options.user is not None: - params["user"] = self.options.user - if self.options.conversation_id is not None: - params["conversation_id"] = self.options.conversation_id - - config: Dict[str, Any] = { - "url": self.options.url, - "api_key": self.options.api_key, - "params": params, - "style": "dify", - "input_modalities": self.options.input_modalities or ["text"], - } - if self.options.headers is not None: - config["headers"] = self.options.headers - if self.options.system_messages is not None: - config["system_messages"] = self.options.system_messages - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.greeting_configs is not None: - config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) - if self.options.template_variables is not None: - config["template_variables"] = self.options.template_variables - if self.options.vendor is not None: - config["vendor"] = self.options.vendor - if self.options.mcp_servers is not None: - config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) - if self.options.max_history is not None: - config["max_history"] = self.options.max_history - return config - src/agora_agent/agentkit/vendors/mllm.py: | - import warnings - from typing import Any, Dict, List, Optional - - from pydantic import BaseModel, ConfigDict, Field - - from ...types.mllm_turn_detection import MllmTurnDetection - from .base import BaseMLLM - - MllmTurnDetectionConfig = MllmTurnDetection - - - class OpenAIRealtimeOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="OpenAI API key") - model: Optional[str] = Field(default=None, description="Model name (e.g., gpt-4o-realtime-preview)") - voice: Optional[str] = Field(default=None, description="Voice identifier") - instructions: Optional[str] = Field(default=None, description="System instructions") - input_audio_transcription: Optional[Dict[str, Any]] = Field(default=None, description="Audio transcription settings") - url: Optional[str] = Field(default=None, description="WebSocket URL") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - class OpenAIRealtime(BaseMLLM): - def __init__(self, **kwargs: Any): - self.options = OpenAIRealtimeOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - config: Dict[str, Any] = { - "vendor": "openai", - "api_key": self.options.api_key, - } - - if self.options.url is not None: - config["url"] = self.options.url - if ( - self.options.model is not None - or self.options.params is not None - or self.options.voice is not None - or self.options.instructions is not None - or self.options.input_audio_transcription is not None - ): - params: Dict[str, Any] = {} - if self.options.model is not None: - params["model"] = self.options.model - if self.options.params is not None: - params.update(self.options.params) - if self.options.voice is not None: - params["voice"] = self.options.voice - if self.options.instructions is not None: - params["instructions"] = self.options.instructions - if self.options.input_audio_transcription is not None: - params["input_audio_transcription"] = self.options.input_audio_transcription - config["params"] = params - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - return config - - - # xAI MLLM: use XaiGrok (product name, mllm.vendor "xai"). Do not use XaiRealtime—that name - # is deprecated and reserved naming for future XaiSTT / XaiTTS cascading vendors. - - - class XaiGrokOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="xAI API key") - url: str = Field(default="wss://api.x.ai/v1/realtime", description="xAI Realtime WebSocket URL") - voice: Optional[str] = Field(default=None, description="Voice identifier (e.g., eve or rex)") - language: Optional[str] = Field(default=None, description="Language code (e.g., en)") - sample_rate: Optional[int] = Field(default=None, description="Audio sample rate in Hz") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - params: Optional[Dict[str, Any]] = Field(default=None, description="Additional xAI parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - - class XaiGrok(BaseMLLM): - """xAI Grok MLLM vendor (`mllm.vendor`: ``xai``).""" - - def __init__(self, **kwargs: Any): - self.options = XaiGrokOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.params or {}) - if self.options.voice is not None: - params["voice"] = self.options.voice - if self.options.language is not None: - params["language"] = self.options.language - if self.options.sample_rate is not None: - params["sample_rate"] = self.options.sample_rate - - config: Dict[str, Any] = { - "vendor": "xai", - "api_key": self.options.api_key, - "url": self.options.url, - "params": params, - } - - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - return config - - - class VertexAIOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - model: str = Field(..., description="Model name") - url: Optional[str] = Field(default=None, description="WebSocket URL") - project_id: str = Field(..., description="Google Cloud project ID") - location: str = Field(..., description="Google Cloud location/region") - adc_credentials_string: str = Field(..., description="Application Default Credentials JSON string") - instructions: Optional[str] = Field(default=None, description="System instructions") - voice: Optional[str] = Field(default=None, description="Voice name (e.g., Aoede, Charon)") - affective_dialog: Optional[bool] = Field(default=None, description="Enable affective dialog") - proactive_audio: Optional[bool] = Field(default=None, description="Enable proactive audio") - transcribe_agent: Optional[bool] = Field(default=None, description="Transcribe agent speech") - transcribe_user: Optional[bool] = Field(default=None, description="Transcribe user speech") - http_options: Optional[Dict[str, Any]] = Field(default=None, description="HTTP options") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - class VertexAI(BaseMLLM): - def __init__(self, **kwargs: Any): - self.options = VertexAIOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - # additional_params spread first so that explicit fields always win, - # matching the TypeScript SDK. - params: Dict[str, Any] = dict(self.options.additional_params or {}) - params["model"] = self.options.model - if self.options.instructions is not None: - params["instructions"] = self.options.instructions - if self.options.voice is not None: - params["voice"] = self.options.voice - if self.options.affective_dialog is not None: - params["affective_dialog"] = self.options.affective_dialog - if self.options.proactive_audio is not None: - params["proactive_audio"] = self.options.proactive_audio - if self.options.transcribe_agent is not None: - params["transcribe_agent"] = self.options.transcribe_agent - if self.options.transcribe_user is not None: - params["transcribe_user"] = self.options.transcribe_user - if self.options.http_options is not None: - params["http_options"] = self.options.http_options - - config: Dict[str, Any] = { - "vendor": "vertexai", - "project_id": self.options.project_id, - "location": self.options.location, - "adc_credentials_string": self.options.adc_credentials_string, - "params": params, - } - - if self.options.url is not None: - config["url"] = self.options.url - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - return config - - - class GeminiLiveOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Google API key") - model: str = Field(..., description="Gemini Live model name") - url: Optional[str] = Field(default=None, description="WebSocket URL") - instructions: Optional[str] = Field(default=None, description="System instructions") - voice: Optional[str] = Field(default=None, description="Voice name") - affective_dialog: Optional[bool] = Field(default=None, description="Enable affective dialog") - proactive_audio: Optional[bool] = Field(default=None, description="Enable proactive audio") - transcribe_agent: Optional[bool] = Field(default=None, description="Transcribe agent speech") - transcribe_user: Optional[bool] = Field(default=None, description="Transcribe user speech") - http_options: Optional[Dict[str, Any]] = Field(default=None, description="HTTP options") - greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") - input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") - output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") - messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") - turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") - failure_message: Optional[str] = Field(default=None, description="Message played on failure") - - class GeminiLive(BaseMLLM): - def __init__(self, **kwargs: Any): - self.options = GeminiLiveOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = {} - if self.options.additional_params is not None: - params.update(self.options.additional_params) - params["model"] = self.options.model - if self.options.instructions is not None: - params["instructions"] = self.options.instructions - if self.options.voice is not None: - params["voice"] = self.options.voice - if self.options.affective_dialog is not None: - params["affective_dialog"] = self.options.affective_dialog - if self.options.proactive_audio is not None: - params["proactive_audio"] = self.options.proactive_audio - if self.options.transcribe_agent is not None: - params["transcribe_agent"] = self.options.transcribe_agent - if self.options.transcribe_user is not None: - params["transcribe_user"] = self.options.transcribe_user - if self.options.http_options is not None: - params["http_options"] = self.options.http_options - - config: Dict[str, Any] = { - "vendor": "gemini", - "api_key": self.options.api_key, - "params": params, - } - - if self.options.url is not None: - config["url"] = self.options.url - if self.options.greeting_message is not None: - config["greeting_message"] = self.options.greeting_message - if self.options.input_modalities is not None: - config["input_modalities"] = self.options.input_modalities - if self.options.output_modalities is not None: - config["output_modalities"] = self.options.output_modalities - if self.options.messages is not None: - config["messages"] = self.options.messages - if self.options.failure_message is not None: - config["failure_message"] = self.options.failure_message - if self.options.turn_detection is not None: - config["turn_detection"] = self.options.turn_detection - - return config - src/agora_agent/agentkit/vendors/stt.py: | - from typing import Any, Dict, Optional - - from pydantic import BaseModel, ConfigDict, Field, model_validator - - from .base import BaseSTT - - _DEEPGRAM_MANAGED_MODELS = {"nova-2", "nova-3"} - - - class SpeechmaticsSTTOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Speechmatics API key") - language: str = Field(..., description="Language code (e.g., en, es, fr)") - model: Optional[str] = Field(default=None, description="Model name") - uri: Optional[str] = Field(default=None, description="Speechmatics streaming WebSocket URL") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - - class SpeechmaticsSTT(BaseSTT): - def __init__(self, **kwargs: Any): - self.options = SpeechmaticsSTTOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.additional_params or {}) - params.update({ - "api_key": self.options.api_key, - "language": self.options.language, - }) - if self.options.model is not None: - params["model"] = self.options.model - if self.options.uri is not None: - params["uri"] = self.options.uri - - config: Dict[str, Any] = { - "vendor": "speechmatics", - "params": params, - } - return config - - - class DeepgramSTTOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: Optional[str] = Field(default=None, description="Deepgram API key") - model: Optional[str] = Field(default=None, description="Model (e.g., nova-2, enhanced, base)") - language: Optional[str] = Field(default=None, description="Language code (e.g., en-US)") - keyterm: Optional[str] = Field(default=None, description="Boost specialized terms and brands for Deepgram") - smart_format: Optional[bool] = Field(default=None, description="Enable smart formatting") - punctuation: Optional[bool] = Field(default=None, description="Enable punctuation") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - - @model_validator(mode="after") - def _validate_managed_model(self) -> "DeepgramSTTOptions": - if self.api_key is None and (self.model is None or self.model.strip().lower() not in _DEEPGRAM_MANAGED_MODELS): - raise ValueError("DeepgramSTT requires api_key unless using a supported Agora-managed model") - return self - - class DeepgramSTT(BaseSTT): - def __init__(self, **kwargs: Any): - self.options = DeepgramSTTOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.additional_params or {}) - - if self.options.api_key is not None: - params["key"] = self.options.api_key - if self.options.model is not None: - params["model"] = self.options.model - if self.options.language is not None: - params["language"] = self.options.language - if self.options.smart_format is not None: - params["smart_format"] = self.options.smart_format - if self.options.punctuation is not None: - params["punctuation"] = self.options.punctuation - if self.options.keyterm is not None: - params["keyterm"] = self.options.keyterm - config: Dict[str, Any] = { - "vendor": "deepgram", - "params": params, - } - return config - - - class MicrosoftSTTOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - key: str = Field(..., description="Azure subscription key") - region: str = Field(..., description="Azure region (e.g., eastus)") - language: str = Field(..., description="Language code (e.g., en-US)") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - - class MicrosoftSTT(BaseSTT): - def __init__(self, **kwargs: Any): - self.options = MicrosoftSTTOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.additional_params or {}) - params.update({ - "key": self.options.key, - "region": self.options.region, - }) - if self.options.language is not None: - params["language"] = self.options.language - - config: Dict[str, Any] = { - "vendor": "microsoft", - "params": params, - } - return config - - - class OpenAISTTOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="OpenAI API key") - model: Optional[str] = Field(default=None, description="Model (default: whisper-1)") - language: Optional[str] = Field(default=None, description="Language code") - prompt: Optional[str] = Field(default=None, description="Prompt that guides OpenAI transcription") - input_audio_transcription: Optional[Dict[str, Any]] = Field(default=None, description="OpenAI transcription settings") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - - class OpenAISTT(BaseSTT): - def __init__(self, **kwargs: Any): - self.options = OpenAISTTOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.additional_params or {}) - params["api_key"] = self.options.api_key - - transcription: Dict[str, Any] = {"model": "gpt-4o-mini-transcribe"} - transcription.update(self.options.input_audio_transcription or {}) - if self.options.model is not None: - transcription["model"] = self.options.model - if self.options.prompt is not None: - transcription["prompt"] = self.options.prompt - if self.options.language is not None: - transcription["language"] = self.options.language - if not transcription.get("model"): - raise ValueError("OpenAISTT: input_audio_transcription.model is required") - if not transcription.get("prompt"): - raise ValueError("OpenAISTT: input_audio_transcription.prompt is required") - if not transcription.get("language"): - raise ValueError("OpenAISTT: input_audio_transcription.language is required") - params["input_audio_transcription"] = transcription - - config: Dict[str, Any] = { - "vendor": "openai", - "params": params, - } - return config - - - class GoogleSTTOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - project_id: str = Field(..., description="Google Cloud project ID") - location: str = Field(..., description="Google Cloud region") - adc_credentials_string: str = Field(..., description="Google service account credentials JSON string") - language: str = Field(..., description="Language code (e.g., en-US)") - model: Optional[str] = Field(default=None, description="Recognition model") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - - class GoogleSTT(BaseSTT): - def __init__(self, **kwargs: Any): - self.options = GoogleSTTOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.additional_params or {}) - params.update({ - "project_id": self.options.project_id, - "location": self.options.location, - "adc_credentials_string": self.options.adc_credentials_string, - }) - - if self.options.language is not None: - params["language"] = self.options.language - if self.options.model is not None: - params["model"] = self.options.model - - config: Dict[str, Any] = { - "vendor": "google", - "params": params, - } - return config - - - class AmazonSTTOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - access_key: str = Field(..., description="AWS Access Key ID") - secret_key: str = Field(..., description="AWS Secret Access Key") - region: str = Field(..., description="AWS region (e.g., us-east-1)") - language: str = Field(..., description="Language code") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - - class AmazonSTT(BaseSTT): - def __init__(self, **kwargs: Any): - self.options = AmazonSTTOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.additional_params or {}) - params.update({ - "access_key_id": self.options.access_key, - "secret_access_key": self.options.secret_key, - "region": self.options.region, - }) - if self.options.language is not None: - params["language_code"] = self.options.language - - config: Dict[str, Any] = { - "vendor": "amazon", - "params": params, - } - return config - - - class AssemblyAISTTOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="AssemblyAI API key") - language: str = Field(..., description="Language code") - uri: Optional[str] = Field(default=None, description="AssemblyAI streaming WebSocket URL") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - - class AssemblyAISTT(BaseSTT): - def __init__(self, **kwargs: Any): - self.options = AssemblyAISTTOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.additional_params or {}) - params["api_key"] = self.options.api_key - if self.options.language is not None: - params["language"] = self.options.language - if self.options.uri is not None: - params["uri"] = self.options.uri - - config: Dict[str, Any] = { - "vendor": "assemblyai", - "params": params, - } - return config - - - class AresSTTOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - additional_params: Optional[Dict[str, Any]] = Field(default=None) - - class AresSTT(BaseSTT): - def __init__(self, **kwargs: Any): - self.options = AresSTTOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - config: Dict[str, Any] = {"vendor": "ares"} - if self.options.additional_params: - config["params"] = self.options.additional_params - return config - - - class SarvamSTTOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Sarvam API key") - language: str = Field(..., description="Language code (e.g., en, hi, ta)") - model: Optional[str] = Field(default=None, description="Model name") - additional_params: Optional[Dict[str, Any]] = Field(default=None) - - class SarvamSTT(BaseSTT): - def __init__(self, **kwargs: Any): - self.options = SarvamSTTOptions(**kwargs) - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = dict(self.options.additional_params or {}) - params.update({ - "api_key": self.options.api_key, - "language": self.options.language, - }) - if self.options.model is not None: - params["model"] = self.options.model - - config: Dict[str, Any] = { - "vendor": "sarvam", - "params": params, - } - return config - - id: patch-617ee134 - content_hash: sha256:ea2d27ba8019bf09ce5766d322eb7218fcee0a90124e823ba16c4e45dc1af5a9 - original_commit: 617ee134d9dafbf4f4f83d5e98b80ad110c6e1bf - original_message: "feat(agentkit): support agent-level pipeline_id" - original_author: Hermes (agora) - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - docs/reference/agent.md - - src/agora_agent/agentkit/agent.py - - src/agora_agent/agentkit/agent_session.py - - tests/custom/test_pipeline_id.py - patch_content: | - diff --git a/docs/reference/agent.md b/docs/reference/agent.md - index 187229f..86d4fbd 100644 - --- a/docs/reference/agent.md - +++ b/docs/reference/agent.md - @@ -27,12 +27,14 @@ Agent( - labels: Optional[Dict[str, str]] = None, - rtc: Optional[RtcConfig] = None, - filler_words: Optional[FillerWordsConfig] = None, - + pipeline_id: Optional[str] = None, - ) - ``` - - | Parameter | Type | Default | Description | - |---|---|---|---| - | `name` | `Optional[str]` | `None` | Agent name, used as default session name | - +| `pipeline_id` | `Optional[str]` | `None` | Published AI Studio pipeline ID used as this agent's base configuration | - | `instructions` | `Optional[str]` | `None` | Deprecated. Use LLM vendor `system_messages` instead. | - | `turn_detection` | `Optional[TurnDetectionConfig]` | `None` | Interaction language and turn detection configuration | - | `interruption` | `Optional[InterruptionConfig]` | `None` | Unified interruption control configuration | - @@ -47,6 +49,8 @@ Agent( - | `rtc` | `Optional[RtcConfig]` | `None` | RTC media encryption | - | `filler_words` | `Optional[FillerWordsConfig]` | `None` | Filler words while waiting for LLM | - - +`pipeline_id` is an AI Studio base configuration. Explicit Agent config such as `with_llm()`, `with_tts()`, `with_stt()`, `with_mllm()`, `advanced_features`, and other builder options may send fields in `properties` that override the saved pipeline settings. Session-level `pipeline_id` overrides the agent-level value. - + - The Agent-level `instructions`, `greeting`, `failure_message`, `max_history`, and `greeting_configs` fields are compatibility shims. New code should configure those values on the LLM or MLLM vendor because that matches the core request schema. - - ## Builder Methods - @@ -202,6 +206,8 @@ create_session( - token: Optional[str] = None, - idle_timeout: Optional[int] = None, - enable_string_uid: Optional[bool] = None, - + preset: Optional[Union[str, Sequence[str]]] = None, - + pipeline_id: Optional[str] = None, - expires_in: Optional[int] = None, - ) -> AgentSession - ``` - @@ -219,6 +225,10 @@ Creates an `AgentSession` bound to the given client and channel. - | `expires_in` | `Optional[int]` | No | Token lifetime in seconds (default: `86400` = 24 h, Agora max). Only applies when the token is auto-generated. Use `expires_in_hours()` or `expires_in_minutes()` for clarity. Valid range: 1–86400. | - | `idle_timeout` | `Optional[int]` | No | Idle timeout in seconds | - | `enable_string_uid` | `Optional[bool]` | No | Enable string UIDs | - +| `preset` | `Optional[Union[str, Sequence[str]]]` | No | Advanced preset value for project-specific routing | - +| `pipeline_id` | `Optional[str]` | No | Published AI Studio pipeline ID for this session. Overrides `agent.pipeline_id`. | - + - +`pipeline_id` is sent as the top-level `/join` field `pipeline_id`, not inside `properties`. - - **Returns:** `AgentSession` - - diff --git a/docs/reference/session.md b/docs/reference/session.md - index 63402f6..76e1367 100644 - --- a/docs/reference/session.md - +++ b/docs/reference/session.md - @@ -33,6 +33,11 @@ AgentSession( - token: Optional[str] = None, - idle_timeout: Optional[int] = None, - enable_string_uid: Optional[bool] = None, - + preset: Optional[Union[str, Sequence[str]]] = None, - + pipeline_id: Optional[str] = None, - + expires_in: Optional[int] = None, - + debug: Optional[bool] = None, - + warn: Optional[Callable[[str], None]] = None, - ) - ``` - - @@ -51,6 +56,13 @@ AgentSession( - | `token` | `Optional[str]` | No | Pre-built RTC token | - | `idle_timeout` | `Optional[int]` | No | Idle timeout in seconds | - | `enable_string_uid` | `Optional[bool]` | No | Enable string UIDs | - +| `preset` | `Optional[Union[str, Sequence[str]]]` | No | Advanced preset value for project-specific routing | - +| `pipeline_id` | `Optional[str]` | No | Published AI Studio pipeline ID for this session. Overrides `agent.pipeline_id`. | - +| `expires_in` | `Optional[int]` | No | Auto-generated token lifetime in seconds | - +| `debug` | `Optional[bool]` | No | Enable debug logging of the start request | - +| `warn` | `Optional[Callable[[str], None]]` | No | Custom warning sink | - + - +`pipeline_id` is sent as the top-level `/join` field `pipeline_id`, not inside `properties`. If unset, `AgentSession.start()` uses the agent-level value from `Agent(..., pipeline_id=...)`. - - ## Methods - - diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py - index fea1f0d..0a652db 100644 - --- a/src/agora_agent/agentkit/agent.py - +++ b/src/agora_agent/agentkit/agent.py - @@ -343,8 +343,10 @@ class Agent: - rtc: typing.Optional[RtcConfig] = None, - filler_words: typing.Optional[FillerWordsConfig] = None, - greeting_configs: typing.Optional[LlmGreetingConfigs] = None, - + pipeline_id: typing.Optional[str] = None, - ): - self._name = name - + self._pipeline_id = pipeline_id - self._instructions = instructions - self._greeting = greeting - self._failure_message = failure_message - @@ -609,6 +611,11 @@ class Agent: - def name(self) -> typing.Optional[str]: - return self._name - - + @property - + def pipeline_id(self) -> typing.Optional[str]: - + """Published AI Studio pipeline ID used as this agent's base configuration.""" - + return self._pipeline_id - + - @property - def llm(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._llm - @@ -693,6 +700,7 @@ class Agent: - def config(self) -> typing.Dict[str, typing.Any]: - return { - "name": self._name, - + "pipeline_id": self._pipeline_id, - "instructions": self._instructions, - "greeting": self._greeting, - "failure_message": self._failure_message, - @@ -945,6 +953,7 @@ class Agent: - def _clone(self) -> "Agent": - new_agent = Agent.__new__(Agent) - new_agent._name = self._name - + new_agent._pipeline_id = self._pipeline_id - new_agent._llm = self._llm - new_agent._tts = self._tts - new_agent._stt = self._stt - diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py - index e113dc1..5c866ac 100644 - --- a/src/agora_agent/agentkit/agent_session.py - +++ b/src/agora_agent/agentkit/agent_session.py - @@ -52,7 +52,8 @@ class AgentSessionOptions(_AgentSessionRequiredOptions, total=False): - - Optional fields - --------------- - - app_certificate, token, idle_timeout, enable_string_uid, expires_in - + app_certificate, token, idle_timeout, enable_string_uid, preset, - + pipeline_id, expires_in, debug, warn - """ - - app_certificate: str - @@ -290,14 +291,18 @@ class _AgentSessionBase: - return True - return mllm is not None - - - def _build_start_properties(self, token_opts: typing.Dict[str, typing.Any]) -> typing.Dict[str, typing.Any]: - + def _build_start_properties( - + self, - + token_opts: typing.Dict[str, typing.Any], - + skip_vendor_validation: bool, - + ) -> typing.Dict[str, typing.Any]: - base_properties = self._agent.to_properties( - channel=self._channel, - agent_uid=self._agent_uid, - remote_uids=self._remote_uids, - idle_timeout=self._idle_timeout, - enable_string_uid=self._enable_string_uid, - - skip_vendor_validation=True, - + skip_vendor_validation=skip_vendor_validation, - **token_opts, - ) - properties = self._dump_model(base_properties) - @@ -445,6 +450,7 @@ class AgentSession(_AgentSessionBase): - self._status = "starting" - - try: - + pipeline_id = self._pipeline_id if self._pipeline_id is not None else self._agent.pipeline_id - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - @@ -454,7 +460,7 @@ class AgentSession(_AgentSessionBase): - "expires_in": self._expires_in, - } - - - properties = self._build_start_properties(token_opts) - + properties = self._build_start_properties(token_opts, skip_vendor_validation=bool(self._preset or pipeline_id)) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - @@ -466,7 +472,7 @@ class AgentSession(_AgentSessionBase): - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - - "pipeline_id": self._pipeline_id, - + "pipeline_id": pipeline_id, - "properties": resolved_properties, - }) - - @@ -480,7 +486,7 @@ class AgentSession(_AgentSessionBase): - name=self._name, - properties=request_properties, - preset=resolved_preset, - - pipeline_id=self._pipeline_id, - + pipeline_id=pipeline_id, - request_options=self._request_options(), - ) - - @@ -766,6 +772,7 @@ class AsyncAgentSession(_AgentSessionBase): - self._status = "starting" - - try: - + pipeline_id = self._pipeline_id if self._pipeline_id is not None else self._agent.pipeline_id - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - @@ -775,7 +782,7 @@ class AsyncAgentSession(_AgentSessionBase): - "expires_in": self._expires_in, - } - - - properties = self._build_start_properties(token_opts) - + properties = self._build_start_properties(token_opts, skip_vendor_validation=bool(self._preset or pipeline_id)) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - @@ -787,7 +794,7 @@ class AsyncAgentSession(_AgentSessionBase): - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - - "pipeline_id": self._pipeline_id, - + "pipeline_id": pipeline_id, - "properties": resolved_properties, - }) - - @@ -801,7 +808,7 @@ class AsyncAgentSession(_AgentSessionBase): - name=self._name, - properties=request_properties, - preset=resolved_preset, - - pipeline_id=self._pipeline_id, - + pipeline_id=pipeline_id, - request_options=self._request_options(), - ) - - diff --git a/tests/custom/test_pipeline_id.py b/tests/custom/test_pipeline_id.py - new file mode 100644 - index 0000000..c6c8c8f - --- /dev/null - +++ b/tests/custom/test_pipeline_id.py - @@ -0,0 +1,123 @@ - +import pytest - + - +from agora_agent import Agent - + - + - +def dump(value): - + if hasattr(value, "model_dump"): - + return value.model_dump(exclude_none=True) - + if hasattr(value, "dict"): - + return value.dict(exclude_none=True) - + return value - + - + - +class StartResponse: - + agent_id = "agent-id" - + - + - +class FakeAgentsClient: - + def __init__(self): - + self.calls = [] - + - + def start(self, appid, **kwargs): - + self.calls.append({"appid": appid, **kwargs}) - + return StartResponse() - + - + - +class FakeAsyncAgentsClient: - + def __init__(self): - + self.calls = [] - + - + async def start(self, appid, **kwargs): - + self.calls.append({"appid": appid, **kwargs}) - + return StartResponse() - + - + - +class FakeClient: - + app_id = "appid" - + app_certificate = None - + - + def __init__(self, agents): - + self.agents = agents - + - + - +def start_agent(agent, **overrides): - + agents = FakeAgentsClient() - + client = FakeClient(agents) - + options = { - + "channel": "channel", - + "token": "token", - + "agent_uid": "1", - + "remote_uids": ["100"], - + **overrides, - + } - + - + agent_id = agent.create_session(client, **options).start() - + - + assert agent_id == "agent-id" - + assert len(agents.calls) == 1 - + return agents.calls[0] - + - + - +def test_agent_pipeline_id_sends_top_level_pipeline_id() -> None: - + call = start_agent(Agent(name="support", pipeline_id="studio-pipeline-id")) - + - + assert call["appid"] == "appid" - + assert call["name"] == "support" - + assert call["pipeline_id"] == "studio-pipeline-id" - + properties = dump(call["properties"]) - + assert properties["channel"] == "channel" - + assert properties["token"] == "token" - + assert properties["agent_rtc_uid"] == "1" - + assert properties["remote_rtc_uids"] == ["100"] - + - + - +def test_session_pipeline_id_overrides_agent_pipeline_id() -> None: - + call = start_agent( - + Agent(name="support", pipeline_id="agent-pipeline"), - + pipeline_id="session-pipeline", - + ) - + - + assert call["pipeline_id"] == "session-pipeline" - + - + - +def test_agent_pipeline_id_skips_missing_vendor_validation() -> None: - + call = start_agent(Agent(name="support", pipeline_id="studio-pipeline-id")) - + - + assert call["pipeline_id"] == "studio-pipeline-id" - + - + - +def test_pipeline_id_is_not_sent_inside_properties() -> None: - + call = start_agent(Agent(name="support", pipeline_id="studio-pipeline-id")) - + - + assert call["pipeline_id"] == "studio-pipeline-id" - + assert "pipeline_id" not in dump(call["properties"]) - + - + - +def test_pipeline_id_survives_builder_clone() -> None: - + agent = Agent(name="support", pipeline_id="studio-pipeline-id").with_tools(True) - + - + assert agent.pipeline_id == "studio-pipeline-id" - + call = start_agent(agent) - + - + assert call["pipeline_id"] == "studio-pipeline-id" - + assert dump(call["properties"])["advanced_features"] == {"enable_tools": True} - + - + - +@pytest.mark.asyncio - +async def test_async_session_uses_agent_pipeline_id() -> None: - + agents = FakeAsyncAgentsClient() - + client = FakeClient(agents) - + agent = Agent(name="support", pipeline_id="studio-pipeline-id") - + - + agent_id = await agent.create_async_session( - + client, - + channel="channel", - + token="token", - + agent_uid="1", - + remote_uids=["100"], - + ).start() - + - + assert agent_id == "agent-id" - + assert agents.calls[0]["pipeline_id"] == "studio-pipeline-id" - + assert "pipeline_id" not in dump(agents.calls[0]["properties"]) - theirs_snapshot: - docs/reference/agent.md: | - --- - sidebar_position: 2 - title: Agent - description: Full API reference for the Python Agent builder class. - --- - - # Agent Reference - - **Import:** `from agora_agent import Agent` - - ## Constructor - - - ```python - Agent( - name: Optional[str] = None, - instructions: Optional[str] = None, - turn_detection: Optional[TurnDetectionConfig] = None, - interruption: Optional[InterruptionConfig] = None, - sal: Optional[SalConfig] = None, - advanced_features: Optional[Dict[str, Any]] = None, - parameters: Optional[SessionParams] = None, - greeting: Optional[str] = None, - failure_message: Optional[str] = None, - max_history: Optional[int] = None, - geofence: Optional[GeofenceConfig] = None, - labels: Optional[Dict[str, str]] = None, - rtc: Optional[RtcConfig] = None, - filler_words: Optional[FillerWordsConfig] = None, - pipeline_id: Optional[str] = None, - ) - ``` - - | Parameter | Type | Default | Description | - |---|---|---|---| - | `name` | `Optional[str]` | `None` | Agent name, used as default session name | - | `pipeline_id` | `Optional[str]` | `None` | Published AI Studio pipeline ID used as this agent's base configuration | - | `instructions` | `Optional[str]` | `None` | Deprecated. Use LLM vendor `system_messages` instead. | - | `turn_detection` | `Optional[TurnDetectionConfig]` | `None` | Interaction language and turn detection configuration | - | `interruption` | `Optional[InterruptionConfig]` | `None` | Unified interruption control configuration | - | `sal` | `Optional[SalConfig]` | `None` | Speech Activity Level configuration | - | `advanced_features` | `Optional[Dict[str, Any]]` | `None` | Advanced features dict (e.g., `{'enable_rtm': True}`) | - | `parameters` | `Optional[SessionParams]` | `None` | Additional session parameters | - | `greeting` | `Optional[str]` | `None` | Deprecated. Use LLM/MLLM vendor `greeting_message` instead. | - | `failure_message` | `Optional[str]` | `None` | Deprecated. Use LLM/MLLM vendor `failure_message` instead. | - | `max_history` | `Optional[int]` | `None` | Deprecated. Use LLM vendor `max_history` instead. | - | `geofence` | `Optional[GeofenceConfig]` | `None` | Regional access restriction | - | `labels` | `Optional[Dict[str, str]]` | `None` | Custom key-value labels (returned in callbacks) | - | `rtc` | `Optional[RtcConfig]` | `None` | RTC media encryption | - | `filler_words` | `Optional[FillerWordsConfig]` | `None` | Filler words while waiting for LLM | - - `pipeline_id` is an AI Studio base configuration. Explicit Agent config such as `with_llm()`, `with_tts()`, `with_stt()`, `with_mllm()`, `advanced_features`, and other builder options may send fields in `properties` that override the saved pipeline settings. Session-level `pipeline_id` overrides the agent-level value. - - The Agent-level `instructions`, `greeting`, `failure_message`, `max_history`, and `greeting_configs` fields are compatibility shims. New code should configure those values on the LLM or MLLM vendor because that matches the core request schema. - - ## Builder Methods - - All builder methods return a new `Agent` instance (immutable pattern). - - ### `with_llm(vendor: BaseLLM) -> Agent` - - Set the LLM vendor for cascading flow. - - - ```python - from agora_agent import OpenAI - agent = Agent().with_llm(OpenAI(api_key='your-key', base_url='https://api.openai.com/v1/chat/completions', model='gpt-4o-mini')) - ``` - - ### `with_tts(vendor: BaseTTS) -> Agent` - - Set the TTS vendor. Records the vendor's `sample_rate` for avatar validation. - - - ```python - from agora_agent import ElevenLabsTTS - agent = Agent().with_tts(ElevenLabsTTS(key='your-key', model_id='eleven_flash_v2_5', voice_id='your-voice-id', base_url='wss://api.elevenlabs.io/v1')) - ``` - - ### `with_stt(vendor: BaseSTT) -> Agent` - - Set the STT (ASR) vendor. - - - ```python - from agora_agent import DeepgramSTT - agent = Agent().with_stt(DeepgramSTT(api_key='your-key', language='en-US')) - ``` - - ### `with_mllm(vendor: BaseMLLM) -> Agent` - - Set the MLLM vendor for multimodal flow. Calling `with_mllm()` automatically sets `mllm.enable = True`. MLLM sessions do not require TTS, STT, or LLM vendors. - - - ```python - from agora_agent import OpenAIRealtime - agent = Agent().with_mllm(OpenAIRealtime(api_key='your-key')) - ``` - - ### `with_avatar(vendor: BaseAvatar) -> Agent` - - Set the avatar vendor for the cascading ASR + LLM + TTS pipeline. Avatars are not supported when MLLM is enabled — combining `with_mllm()` and an enabled `with_avatar()` is rejected at `to_properties()` and `AgentSession.start()`. A disabled avatar (`enable=False`) is allowed alongside MLLM. - - Raises `ValueError` if the TTS sample rate does not match the avatar's `required_sample_rate`. - - - ```python - from agora_agent import HeyGenAvatar - agent = agent.with_avatar(HeyGenAvatar(api_key='your-key', quality='medium', agora_uid='2')) - ``` - - **Raises:** `ValueError` — `"Avatar requires TTS sample rate of {required} Hz, but TTS is configured with {actual} Hz. Please update your TTS sample_rate to {required}."` - - ### `with_turn_detection(config: TurnDetectionConfig) -> Agent` - - Override cascading-flow turn detection settings. Use `language` for the Agora interaction language, `config.start_of_speech` and `config.end_of_speech` for SOS/EOS detection, `with_interruption()` for interruption behavior, and MLLM vendor `turn_detection` for MLLM turn detection. - - Pause-state detection is configured under semantic end-of-speech: - - ```python - agent = agent.with_turn_detection({ - "mode": "default", - "config": { - "end_of_speech": { - "mode": "semantic", - "semantic_config": { - "pause_state_enabled": True, - }, - }, - }, - }) - ``` - - ### `with_interruption(config: InterruptionConfig) -> Agent` - - Configure unified interruption behavior using the top-level `interruption` object. Use this for `start_of_speech` and `keywords` interruption modes. - - ### `with_instructions(instructions: str) -> Agent` - - Deprecated. Configure `system_messages` on the LLM vendor instead. - - ### `with_greeting(greeting: str) -> Agent` - - Deprecated. Configure `greeting_message` on the LLM or MLLM vendor instead. - - ### `with_name(name: str) -> Agent` - - Override the agent name. - - ### `with_sal(config: SalConfig) -> Agent` - - Set SAL (Selective Attention Locking) configuration. - - ### `with_advanced_features(features: AdvancedFeatures) -> Agent` - - Set advanced features (e.g. `{'enable_rtm': True}`). - - When `enable_rtm=True`, AgentKit defaults `parameters.data_channel` to `"rtm"` unless you explicitly set another data channel. - - ### `with_tools(enabled: bool = True) -> Agent` - - Enable or disable MCP tool invocation by setting `advanced_features.enable_tools`. - - ### `with_parameters(parameters: SessionParams) -> Agent` - - Set session parameters (silence config, farewell config, data channel, audio scenario, etc.). - - ### `with_audio_scenario(audio_scenario: ParametersAudioScenario) -> Agent` - - Set `parameters.audio_scenario` without replacing existing session parameters. - - ### `with_failure_message(message: str) -> Agent` - - Deprecated. Configure `failure_message` on the LLM or MLLM vendor instead. - - ### `with_max_history(max_history: int) -> Agent` - - Deprecated. Configure `max_history` on the LLM vendor instead. - - ### `with_geofence(geofence: GeofenceConfig) -> Agent` - - Set geofence configuration (restricts backend server regions). - - ### `with_labels(labels: Dict[str, str]) -> Agent` - - Set custom labels (key-value pairs returned in notification callbacks). - - ### `with_rtc(rtc: RtcConfig) -> Agent` - - Set RTC configuration. - - ### `with_filler_words(filler_words: FillerWordsConfig) -> Agent` - - Set filler words configuration (played while waiting for LLM response). - - ## `create_session()` - - - ```python - create_session( - client: Any, - channel: str, - agent_uid: str, - remote_uids: List[str], - name: Optional[str] = None, - token: Optional[str] = None, - idle_timeout: Optional[int] = None, - enable_string_uid: Optional[bool] = None, - preset: Optional[Union[str, Sequence[str]]] = None, - pipeline_id: Optional[str] = None, - expires_in: Optional[int] = None, - ) -> AgentSession - ``` - - Creates an `AgentSession` bound to the given client and channel. - - | Parameter | Type | Required | Description | - |---|---|---|---| - | `client` | `Agora` or `AsyncAgora` | Yes | Authenticated client | - | `channel` | `str` | Yes | Channel name | - | `agent_uid` | `str` | Yes | UID for the agent | - | `remote_uids` | `List[str]` | Yes | UIDs of remote participants | - | `name` | `Optional[str]` | No | Session name (defaults to agent name) | - | `token` | `Optional[str]` | No | Pre-built RTC+RTM token | - | `expires_in` | `Optional[int]` | No | Token lifetime in seconds (default: `86400` = 24 h, Agora max). Only applies when the token is auto-generated. Use `expires_in_hours()` or `expires_in_minutes()` for clarity. Valid range: 1–86400. | - | `idle_timeout` | `Optional[int]` | No | Idle timeout in seconds | - | `enable_string_uid` | `Optional[bool]` | No | Enable string UIDs | - | `preset` | `Optional[Union[str, Sequence[str]]]` | No | Advanced preset value for project-specific routing | - | `pipeline_id` | `Optional[str]` | No | Published AI Studio pipeline ID for this session. Overrides `agent.pipeline_id`. | - - `pipeline_id` is sent as the top-level `/join` field `pipeline_id`, not inside `properties`. - - **Returns:** `AgentSession` - - ## `to_properties()` - - Converts the agent configuration into a `StartAgentsRequestProperties` object for the Agora API. Called internally by `AgentSession.start()`. - - - ```python - to_properties( - channel: str, - agent_uid: str, - remote_uids: List[str], - idle_timeout: Optional[int] = None, - enable_string_uid: Optional[bool] = None, - token: Optional[str] = None, - app_id: Optional[str] = None, - app_certificate: Optional[str] = None, - expires_in: Optional[int] = None, - ) -> StartAgentsRequestProperties - ``` - - **Raises:** `ValueError` if neither `token` nor `app_id`+`app_certificate` is provided, or if required vendors (LLM, TTS) are missing in cascading mode. - - ## Properties - - | Property | Type | Description | - |---|---|---| - | `name` | `Optional[str]` | Agent name | - | `instructions` | `Optional[str]` | Deprecated Agent-level system prompt | - | `greeting` | `Optional[str]` | Deprecated Agent-level greeting message | - | `failure_message` | `Optional[str]` | Deprecated Agent-level failure message | - | `max_history` | `Optional[int]` | Deprecated Agent-level max history | - | `llm` | `Optional[Dict[str, Any]]` | LLM config dict (from `to_config()`) | - | `tts` | `Optional[Dict[str, Any]]` | TTS config dict | - | `stt` | `Optional[Dict[str, Any]]` | STT config dict | - | `mllm` | `Optional[Dict[str, Any]]` | MLLM config dict | - | `avatar` | `Optional[Dict[str, Any]]` | Avatar config dict | - | `turn_detection` | `Optional[TurnDetectionConfig]` | Interaction language and turn detection settings | - | `sal` | `Optional[SalConfig]` | SAL configuration | - | `advanced_features` | `Optional[Dict[str, Any]]` | Advanced features | - | `parameters` | `Optional[SessionParams]` | Session parameters | - | `geofence` | `Optional[GeofenceConfig]` | Geofence configuration | - | `labels` | `Optional[Dict[str, str]]` | Custom labels | - | `rtc` | `Optional[RtcConfig]` | RTC configuration | - | `filler_words` | `Optional[FillerWordsConfig]` | Filler words configuration | - | `config` | `Dict[str, Any]` | Full configuration dict | - - ## Type aliases - - Public aliases over Fern-generated types: `LlmConfig`, `SttConfig`, `AsrConfig` (= `SttConfig`), `MllmConfig`, `AvatarConfig`, session/conversation types, and think types (`ThinkOnListeningAction`, etc.). - - Think value constants: `ThinkOnListeningActionInject`, `ThinkOnListeningActionInterrupt`, `ThinkOnListeningActionIgnore`, `ThinkOnThinkingActionInterrupt`, `ThinkOnThinkingActionIgnore`, `ThinkOnSpeakingActionInterrupt`, `ThinkOnSpeakingActionIgnore`. - docs/reference/session.md: | - --- - sidebar_position: 3 - title: AgentSession - description: Full API reference for the Python AgentSession class. - --- - - # AgentSession / AsyncAgentSession Reference - - **Import:** - - ```python - from agora_agent import AgentSession - from agora_agent import AsyncAgentSession - # or from top-level: - from agora_agent import AgentSession, AsyncAgentSession - ``` - - ## Constructor - - Sessions are normally created via `Agent.create_session()`. Direct construction is available for advanced use: - - - ```python - AgentSession( - client: Any, - agent: Agent, - app_id: str, - name: str, - channel: str, - agent_uid: str, - remote_uids: List[str], - app_certificate: Optional[str] = None, - token: Optional[str] = None, - idle_timeout: Optional[int] = None, - enable_string_uid: Optional[bool] = None, - preset: Optional[Union[str, Sequence[str]]] = None, - pipeline_id: Optional[str] = None, - expires_in: Optional[int] = None, - debug: Optional[bool] = None, - warn: Optional[Callable[[str], None]] = None, - ) - ``` - - `AsyncAgentSession` has the same constructor signature. - - | Parameter | Type | Required | Description | - |---|---|---|---| - | `client` | `Agora` or `AsyncAgora` | Yes | Authenticated client | - | `agent` | `Agent` | Yes | Agent configuration | - | `app_id` | `str` | Yes | Agora App ID | - | `name` | `str` | Yes | Session name | - | `channel` | `str` | Yes | Channel name | - | `agent_uid` | `str` | Yes | UID for the agent | - | `remote_uids` | `List[str]` | Yes | UIDs of remote participants | - | `app_certificate` | `Optional[str]` | No | App Certificate (for auto token generation) | - | `token` | `Optional[str]` | No | Pre-built RTC token | - | `idle_timeout` | `Optional[int]` | No | Idle timeout in seconds | - | `enable_string_uid` | `Optional[bool]` | No | Enable string UIDs | - | `preset` | `Optional[Union[str, Sequence[str]]]` | No | Advanced preset value for project-specific routing | - | `pipeline_id` | `Optional[str]` | No | Published AI Studio pipeline ID for this session. Overrides `agent.pipeline_id`. | - | `expires_in` | `Optional[int]` | No | Auto-generated token lifetime in seconds | - | `debug` | `Optional[bool]` | No | Enable debug logging of the start request | - | `warn` | `Optional[Callable[[str], None]]` | No | Custom warning sink | - - `pipeline_id` is sent as the top-level `/join` field `pipeline_id`, not inside `properties`. If unset, `AgentSession.start()` uses the agent-level value from `Agent(..., pipeline_id=...)`. - - ## Methods - - ### `start()` - - Start the agent session. Generates an RTC token if not provided, validates avatar/TTS config for cascading sessions, and calls the Agora API. MLLM sessions do not require TTS; an enabled avatar is rejected when MLLM is configured (a disabled avatar is allowed). - - | | Sync (`AgentSession`) | Async (`AsyncAgentSession`) | - |---|---|---| - | **Signature** | `start() -> str` | `async start() -> str` | - | **Returns** | Agent ID | Agent ID | - | **Raises** | `RuntimeError` if not in `idle`, `stopped`, or `error` state | Same | - | **Raises** | `ValueError` if avatar/TTS sample rate mismatch or an enabled avatar is used with MLLM | Same | - - - ```python - # Sync - agent_id = session.start() - - # Async - agent_id = await session.start() - ``` - - ### `stop()` - - Stop the agent session. If the agent has already stopped (404 from API), transitions to `stopped` without raising. - - | | Sync | Async | - |---|---|---| - | **Signature** | `stop() -> None` | `async stop() -> None` | - | **Raises** | `RuntimeError` if not in `running` state | Same | - - - ```python - # Sync - session.stop() - - # Async - await session.stop() - ``` - - ### `say(text, priority=None, interruptable=None)` - - Send text to be spoken by the agent's TTS. - - | | Sync | Async | - |---|---|---| - | **Signature** | `say(text: str, priority: Optional[str] = None, interruptable: Optional[bool] = None) -> None` | Same with `async` | - | **Raises** | `RuntimeError` if not in `running` state | Same | - - | Parameter | Type | Required | Description | - |---|---|---|---| - | `text` | `str` | Yes | Text to speak | - | `priority` | `str` | No | `INTERRUPT`, `APPEND`, or `IGNORE` | - | `interruptable` | `bool` | No | Whether the message can be interrupted | - - - ```python - # Sync - session.say('Hello!', priority='INTERRUPT', interruptable=False) - - # Async - await session.say('Hello!', priority='INTERRUPT', interruptable=False) - ``` - - ### `interrupt()` - - Interrupt the agent while speaking or thinking. - - | | Sync | Async | - |---|---|---| - | **Signature** | `interrupt() -> None` | `async interrupt() -> None` | - | **Raises** | `RuntimeError` if not in `running` state | Same | - - - ```python - # Sync - session.interrupt() - - # Async - await session.interrupt() - ``` - - ### `update(properties)` - - Update the agent configuration at runtime. - - | | Sync | Async | - |---|---|---| - | **Signature** | `update(properties: Any) -> None` | `async update(properties: Any) -> None` | - | **Raises** | `RuntimeError` if not in `running` state | Same | - - - ```python - from agora_agent.agents.types import UpdateAgentsRequestProperties - - # Sync - session.update(properties) - - # Async - await session.update(properties) - ``` - - ### `think(text, ...)` - - Inject a custom text instruction into the running agent. - - In API v2.7, omitting `on_listening_action` uses the server default `interrupt`. Pass `on_listening_action='inject'` explicitly to preserve the pre-v2.7 behavior. - - ```python - session.think('Summarize the last answer', on_listening_action='inject') - ``` - - ### `get_history()` - - Retrieve the conversation history. - - | | Sync | Async | - |---|---|---| - | **Signature** | `get_history() -> Any` | `async get_history() -> Any` | - | **Raises** | `RuntimeError` if no agent ID | Same | - - - ```python - # Sync - history = session.get_history() - - # Async - history = await session.get_history() - ``` - - ### `get_info()` - - Retrieve the current session info. - - | | Sync | Async | - |---|---|---| - | **Signature** | `get_info() -> Any` | `async get_info() -> Any` | - | **Raises** | `RuntimeError` if no agent ID | Same | - - - ```python - # Sync - info = session.get_info() - - # Async - info = await session.get_info() - ``` - - ### `get_turns(page_index=None, page_size=None)` - - Retrieve paginated turn analytics for a completed or running session. In v2.7, the API defaults to page 1 and up to 50 turns per page. Responses include `agent_id`, `name`, `channel`, `total_turn_count`, `pagination`, and `turns`. - - ```python - page = session.get_turns(page_index=1, page_size=50) - ``` - - ### `get_all_turns(page_size=None)` - - Fetch all turn pages and return a single `GetTurnsAgentsResponse` with the combined `turns` list. - - ```python - all_turns = session.get_all_turns(page_size=50) - ``` - - ### `on(event, handler)` - - Register an event handler. This method is synchronous on both `AgentSession` and `AsyncAgentSession`. - - - ```python - session.on('started', lambda data: print(f'Started: {data}')) - ``` - - | Parameter | Type | Description | - |---|---|---| - | `event` | `str` | Event type: `started`, `stopped`, or `error` | - | `handler` | `Callable[..., None]` | Callback function | - - ### `off(event, handler)` - - Remove a previously registered event handler. - - - ```python - session.off('started', my_handler) - ``` - - ## Properties - - | Property | Type | Description | - |---|---|---| - | `id` | `Optional[str]` | Agent ID (set after `start()`) | - | `status` | `str` | Current state: `idle`, `starting`, `running`, `stopping`, `stopped`, `error` | - | `agent` | `Agent` | The agent configuration | - | `app_id` | `str` | Agora App ID | - | `raw` | `AgentsClient` / `AsyncAgentsClient` | Direct access to Fern-generated agents client | - - ## State Transitions - - | Current State | Allowed Actions | - |---|---| - | `idle` | `start()` | - | `starting` | (waiting for API) | - | `running` | `stop()`, `say()`, `interrupt()`, `update()`, `get_history()`, `get_info()` | - | `stopping` | (waiting for API) | - | `stopped` | `start()` (restart) | - | `error` | `start()` (retry) | - src/agora_agent/agentkit/agent.py: | - from __future__ import annotations - - import time - import typing - import typing_extensions - - if typing.TYPE_CHECKING: - from .agent_session import AgentSession, AsyncAgentSession - - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from ..agents.types.start_agents_request_properties_avatar import StartAgentsRequestPropertiesAvatar - from ..agents.types.start_agents_request_properties_avatar_vendor import StartAgentsRequestPropertiesAvatarVendor - from ..agents.types.update_agents_request_properties import UpdateAgentsRequestProperties - from ..agents.types.get_agents_response import GetAgentsResponse - from ..agents.types.list_agents_response import ListAgentsResponse - from ..agents.types.list_agents_response_data_list_item import ListAgentsResponseDataListItem - from ..agents.types.list_agents_response_data_list_item_status import ListAgentsResponseDataListItemStatus - from ..agents.types.get_history_agents_response import GetHistoryAgentsResponse - from ..agents.types.get_history_agents_response_contents_item import GetHistoryAgentsResponseContentsItem - from ..agents.types.get_history_agents_response_contents_item_role import GetHistoryAgentsResponseContentsItemRole - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.get_turns_agents_response_turns_item import GetTurnsAgentsResponseTurnsItem - from ..agents.types.speak_agents_request_priority import SpeakAgentsRequestPriority - from ..agents.types.start_agents_request_properties_turn_detection import StartAgentsRequestPropertiesTurnDetection - from ..agents.types.start_agents_request_properties_turn_detection_config import StartAgentsRequestPropertiesTurnDetectionConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeech - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_mode import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechMode - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_vad_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechVadConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_keywords_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechKeywordsConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_disabled_config import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_start_of_speech_disabled_config_strategy import StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfigStrategy - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_mode import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechMode - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_vad_config import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechVadConfig - from ..agents.types.start_agents_request_properties_turn_detection_config_end_of_speech_semantic_config import StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechSemanticConfig - from ..agents.types.start_agents_request_properties_turn_detection_type import StartAgentsRequestPropertiesTurnDetectionType - from ..agents.types.start_agents_request_properties_turn_detection_interrupt_mode import StartAgentsRequestPropertiesTurnDetectionInterruptMode - from ..agents.types.start_agents_request_properties_turn_detection_eagerness import StartAgentsRequestPropertiesTurnDetectionEagerness - from ..agents.types.start_agents_request_properties_sal import StartAgentsRequestPropertiesSal - from ..agents.types.start_agents_request_properties_sal_sal_mode import StartAgentsRequestPropertiesSalSalMode - from ..agents.types.start_agents_request_properties_parameters import StartAgentsRequestPropertiesParameters - from ..agents.types.start_agents_request_properties_parameters_silence_config import StartAgentsRequestPropertiesParametersSilenceConfig - from ..agents.types.start_agents_request_properties_parameters_silence_config_action import StartAgentsRequestPropertiesParametersSilenceConfigAction - from ..agents.types.start_agents_request_properties_parameters_farewell_config import StartAgentsRequestPropertiesParametersFarewellConfig - from ..agents.types.start_agents_request_properties_parameters_data_channel import StartAgentsRequestPropertiesParametersDataChannel - from ..agents.types.start_agents_request_properties_parameters_audio_scenario import StartAgentsRequestPropertiesParametersAudioScenario - from ..agents.types.start_agents_request_properties_interruption import StartAgentsRequestPropertiesInterruption - from ..agents.types.start_agents_request_properties_interruption_mode import StartAgentsRequestPropertiesInterruptionMode - from ..agents.types.start_agents_request_properties_geofence import StartAgentsRequestPropertiesGeofence - from ..agents.types.start_agents_request_properties_rtc import StartAgentsRequestPropertiesRtc - from ..agents.types.start_agents_request_properties_advanced_features import StartAgentsRequestPropertiesAdvancedFeatures - from ..agents.types.start_agents_request_properties_filler_words import StartAgentsRequestPropertiesFillerWords - from ..agents.types.start_agents_request_properties_filler_words_trigger import StartAgentsRequestPropertiesFillerWordsTrigger - from ..agents.types.start_agents_request_properties_filler_words_trigger_fixed_time_config import StartAgentsRequestPropertiesFillerWordsTriggerFixedTimeConfig - from ..agents.types.start_agents_request_properties_filler_words_content import StartAgentsRequestPropertiesFillerWordsContent - from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig - from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - from ..types.tts import Tts - from ..types.asr import Asr - from ..types.llm import Llm - from ..types.llm_style import LlmStyle as GeneratedLlmStyle - from ..types.mllm import Mllm - from ..types.mllm_turn_detection import MllmTurnDetection - from ..types.mllm_turn_detection_mode import MllmTurnDetectionMode as GeneratedMllmTurnDetectionMode - from ..types.mllm_vendor import MllmVendor as GeneratedMllmVendor - from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - AgentThinkAgentManagementRequestOnListeningAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - AgentThinkAgentManagementRequestOnThinkingAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - AgentThinkAgentManagementRequestOnSpeakingAction, - ) - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse, - ) - from .vendors.base import BaseAvatar, BaseLLM, BaseMLLM, BaseSTT, BaseTTS - - # Top-level aliases - LlmConfig = Llm - LlmStyle = GeneratedLlmStyle - SttConfig = Asr - AsrConfig = SttConfig - SttVendor = typing.Any - TtsConfig = Tts - MllmConfig = Mllm - MllmVendor = GeneratedMllmVendor - AvatarConfig = StartAgentsRequestPropertiesAvatar - AvatarVendor = StartAgentsRequestPropertiesAvatarVendor - TurnDetectionConfig = StartAgentsRequestPropertiesTurnDetection - SalConfig = StartAgentsRequestPropertiesSal - SalMode = StartAgentsRequestPropertiesSalSalMode - AdvancedFeatures = StartAgentsRequestPropertiesAdvancedFeatures - SessionParams = StartAgentsRequestPropertiesParameters - - # SOS/EOS turn detection aliases (preferred) - TurnDetectionNestedConfig = StartAgentsRequestPropertiesTurnDetectionConfig - StartOfSpeechConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeech - StartOfSpeechMode = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechMode - StartOfSpeechVadConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechVadConfig - StartOfSpeechKeywordsConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechKeywordsConfig - StartOfSpeechDisabledConfig = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfig - StartOfSpeechDisabledConfigStrategy = StartAgentsRequestPropertiesTurnDetectionConfigStartOfSpeechDisabledConfigStrategy - EndOfSpeechConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeech - EndOfSpeechMode = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechMode - EndOfSpeechVadConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechVadConfig - EndOfSpeechSemanticConfig = StartAgentsRequestPropertiesTurnDetectionConfigEndOfSpeechSemanticConfig - - # Deprecated turn detection aliases - # Deprecated: Use TurnDetectionConfig with TurnDetectionNestedConfig.start_of_speech - # and .end_of_speech instead. The `type` field and agora_vad/server_vad/semantic_vad - # values will be removed in a future release. - TurnDetectionType = StartAgentsRequestPropertiesTurnDetectionType - - # Deprecated: Use StartOfSpeechConfig with mode="vad"|"keywords"|"disabled" and the - # corresponding vad_config, keywords_config, or disabled_config instead. - InterruptMode = StartAgentsRequestPropertiesTurnDetectionInterruptMode - - # Deprecated: Only applies to server_vad/semantic_vad modes with OpenAI Realtime - # (MLLM). Has no equivalent in the ASR + LLM + TTS pipeline. - Eagerness = StartAgentsRequestPropertiesTurnDetectionEagerness - - # Parameters (SessionParams) sub-type aliases - SilenceConfig = StartAgentsRequestPropertiesParametersSilenceConfig - SilenceAction = StartAgentsRequestPropertiesParametersSilenceConfigAction - FarewellConfig = StartAgentsRequestPropertiesParametersFarewellConfig - ParametersDataChannel = StartAgentsRequestPropertiesParametersDataChannel - ParametersAudioScenario = StartAgentsRequestPropertiesParametersAudioScenario - InterruptionConfig = StartAgentsRequestPropertiesInterruption - InterruptionMode = StartAgentsRequestPropertiesInterruptionMode - MllmTurnDetectionConfig = MllmTurnDetection - MllmTurnDetectionMode = GeneratedMllmTurnDetectionMode - AgentConfig = StartAgentsRequestProperties - AgentConfigUpdate = UpdateAgentsRequestProperties - SessionInfo = GetAgentsResponse - SessionListResponse = ListAgentsResponse - SessionSummary = ListAgentsResponseDataListItem - SessionStatus = ListAgentsResponseDataListItemStatus - ConversationHistory = GetHistoryAgentsResponse - ConversationTurn = GetHistoryAgentsResponseContentsItem - ConversationRole = GetHistoryAgentsResponseContentsItemRole - ConversationTurns = GetTurnsAgentsResponse - ConversationSessionTurn = GetTurnsAgentsResponseTurnsItem - SpeakPriority = SpeakAgentsRequestPriority - Labels = typing.Dict[str, str] - - - class SessionParamsInput(typing_extensions.TypedDict, total=False): - silence_config: StartAgentsRequestPropertiesParametersSilenceConfig - farewell_config: StartAgentsRequestPropertiesParametersFarewellConfig - data_channel: StartAgentsRequestPropertiesParametersDataChannel - enable_metrics: bool - enable_error_message: bool - audio_scenario: ParametersAudioScenario - - - class ThinkOptions(typing_extensions.TypedDict, total=False): - on_listening_action: AgentThinkAgentManagementRequestOnListeningAction - on_thinking_action: AgentThinkAgentManagementRequestOnThinkingAction - on_speaking_action: AgentThinkAgentManagementRequestOnSpeakingAction - interruptable: bool - metadata: typing.Dict[str, str] - - - class GetTurnsOptions(typing_extensions.TypedDict, total=False): - page_index: int - page_size: int - - - class SayOptions(typing_extensions.TypedDict, total=False): - priority: SpeakAgentsRequestPriority - interruptable: bool - - - class SessionOptions(typing_extensions.TypedDict, total=False): - name: str - channel: str - token: str - agent_uid: str - remote_uids: typing.List[str] - idle_timeout: int - enable_string_uid: bool - preset: typing.Union[str, typing.Sequence[str]] - pipeline_id: str - expires_in: int - debug: bool - warn: typing.Callable[[str], None] - - # LLM sub-type aliases - LlmGreetingConfigs = typing.Dict[str, typing.Any] - LlmGreetingConfigsMode = typing.Any - McpServersItem = typing.Dict[str, typing.Any] - - # Additional top-level config aliases - GeofenceConfig = StartAgentsRequestPropertiesGeofence - RtcConfig = StartAgentsRequestPropertiesRtc - FillerWordsConfig = StartAgentsRequestPropertiesFillerWords - FillerWordsTrigger = StartAgentsRequestPropertiesFillerWordsTrigger - FillerWordsTriggerFixedTimeConfig = StartAgentsRequestPropertiesFillerWordsTriggerFixedTimeConfig - FillerWordsContent = StartAgentsRequestPropertiesFillerWordsContent - FillerWordsContentStaticConfig = StartAgentsRequestPropertiesFillerWordsContentStaticConfig - FillerWordsContentSelectionRule = StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule - - # Think type aliases and response - ThinkOnListeningAction = AgentThinkAgentManagementRequestOnListeningAction - ThinkOnThinkingAction = AgentThinkAgentManagementRequestOnThinkingAction - ThinkOnSpeakingAction = AgentThinkAgentManagementRequestOnSpeakingAction - ThinkResponse = AgentThinkAgentManagementResponse - - from .token import generate_convo_ai_token, _parse_numeric_uid, _validate_expires_in - - TurnDetectionLanguage = typing_extensions.Literal[ - "ar-EG", - "ar-JO", - "ar-SA", - "ar-AE", - "bn-IN", - "zh-CN", - "zh-HK", - "zh-TW", - "nl-NL", - "en-IN", - "en-US", - "fil-PH", - "fr-FR", - "de-DE", - "gu-IN", - "he-IL", - "hi-IN", - "id-ID", - "it-IT", - "ja-JP", - "kn-IN", - "ko-KR", - "ms-MY", - "fa-IR", - "pt-PT", - "ru-RU", - "es-ES", - "ta-IN", - "te-IN", - "th-TH", - "tr-TR", - "vi-VN", - ] - - DEFAULT_TURN_DETECTION_LANGUAGE: TurnDetectionLanguage = "en-US" - TURN_DETECTION_LANGUAGE_VALUES: typing.Tuple[TurnDetectionLanguage, ...] = ( - "ar-EG", - "ar-JO", - "ar-SA", - "ar-AE", - "bn-IN", - "zh-CN", - "zh-HK", - "zh-TW", - "nl-NL", - "en-IN", - "en-US", - "fil-PH", - "fr-FR", - "de-DE", - "gu-IN", - "he-IL", - "hi-IN", - "id-ID", - "it-IT", - "ja-JP", - "kn-IN", - "ko-KR", - "ms-MY", - "fa-IR", - "pt-PT", - "ru-RU", - "es-ES", - "ta-IN", - "te-IN", - "th-TH", - "tr-TR", - "vi-VN", - ) - _TURN_DETECTION_LANGUAGES = set(TURN_DETECTION_LANGUAGE_VALUES) - - - def _dump_optional_model(value: typing.Any) -> typing.Any: - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if hasattr(value, "dict"): - return value.dict(exclude_none=True) - return value - - - def _is_turn_detection_language(value: typing.Any) -> bool: - return isinstance(value, str) and value in _TURN_DETECTION_LANGUAGES - - - def _validate_turn_detection_language(value: typing.Any) -> TurnDetectionLanguage: - if not _is_turn_detection_language(value): - raise ValueError(f"Invalid interaction language: {value}") - return value # type: ignore[return-value] - - - class Agent: - """A reusable agent definition. - - Use the fluent builder methods (.with_llm(), .with_tts(), .with_stt(), .with_mllm()) - to configure vendor settings after construction. - - Deprecated: - The Agent-level ``instructions``, ``greeting``, ``failure_message``, - ``max_history``, and ``greeting_configs`` convenience fields are kept - for compatibility. Configure those values on the LLM or MLLM vendor - instead. - - Examples - -------- - >>> from agora_agent import Agent, OpenAI, ElevenLabsTTS, DeepgramSTT - >>> - >>> agent = Agent(instructions="You are a helpful voice assistant.") - >>> agent = ( - ... agent - ... .with_llm(OpenAI(api_key="...", base_url="https://api.openai.com/v1/chat/completions", model="gpt-4")) - ... .with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", base_url="wss://api.elevenlabs.io/v1", sample_rate=24000)) - ... .with_stt(DeepgramSTT(api_key="...", model="nova-2")) - ... ) - """ - - def __init__( - self, - name: typing.Optional[str] = None, - instructions: typing.Optional[str] = None, - turn_detection: typing.Optional[TurnDetectionConfig] = None, - interruption: typing.Optional[InterruptionConfig] = None, - sal: typing.Optional[SalConfig] = None, - advanced_features: typing.Optional[AdvancedFeatures] = None, - parameters: typing.Optional[typing.Union[SessionParams, SessionParamsInput]] = None, - greeting: typing.Optional[str] = None, - failure_message: typing.Optional[str] = None, - max_history: typing.Optional[int] = None, - geofence: typing.Optional[GeofenceConfig] = None, - labels: typing.Optional[typing.Dict[str, str]] = None, - rtc: typing.Optional[RtcConfig] = None, - filler_words: typing.Optional[FillerWordsConfig] = None, - greeting_configs: typing.Optional[LlmGreetingConfigs] = None, - pipeline_id: typing.Optional[str] = None, - ): - self._name = name - self._pipeline_id = pipeline_id - self._instructions = instructions - self._greeting = greeting - self._failure_message = failure_message - self._max_history = max_history - self._llm: typing.Optional[typing.Dict[str, typing.Any]] = None - self._tts: typing.Optional[typing.Dict[str, typing.Any]] = None - self._stt: typing.Optional[typing.Dict[str, typing.Any]] = None - self._mllm: typing.Optional[typing.Dict[str, typing.Any]] = None - self._tts_sample_rate: typing.Optional[int] = None - self._avatar: typing.Optional[typing.Dict[str, typing.Any]] = None - self._avatar_required_sample_rate: typing.Optional[int] = None - self._turn_detection = turn_detection - self._interruption = interruption - self._sal = sal - self._advanced_features = advanced_features - self._parameters = parameters - self._geofence = geofence - self._labels = labels - self._rtc = rtc - self._filler_words = filler_words - self._greeting_configs = greeting_configs - - def with_llm(self, vendor: BaseLLM) -> "Agent": - new_agent = self._clone() - new_agent._llm = vendor.to_config() - return new_agent - - def with_tts(self, vendor: BaseTTS) -> "Agent": - sample_rate = vendor.sample_rate - if ( - self._avatar_required_sample_rate not in (None, 0) - and sample_rate is not None - and sample_rate != self._avatar_required_sample_rate - ): - raise ValueError( - f"Avatar requires TTS sample rate of {self._avatar_required_sample_rate} Hz, " - f"but TTS is configured with {sample_rate} Hz. " - f"Please update your TTS sample_rate to {self._avatar_required_sample_rate}." - ) - new_agent = self._clone() - new_agent._tts = vendor.to_config() - new_agent._tts_sample_rate = sample_rate - return new_agent - - def with_stt(self, vendor: BaseSTT) -> "Agent": - new_agent = self._clone() - new_agent._stt = vendor.to_config() - return new_agent - - def with_mllm(self, vendor: BaseMLLM) -> "Agent": - # Note: avatars are not supported with MLLM. The combination is rejected - # at ``to_properties`` / ``AgentSession.start`` so callers can still - # configure both for tests, debugging, or disabled-avatar use cases. - new_agent = self._clone() - new_agent._mllm = vendor.to_config() - if isinstance(new_agent._mllm, dict): - new_agent._mllm["enable"] = True - if isinstance(new_agent._advanced_features, dict): - advanced_features = {key: value for key, value in new_agent._advanced_features.items() if key != "enable_mllm"} - new_agent._advanced_features = typing.cast(AdvancedFeatures, advanced_features) if advanced_features else None - elif isinstance(new_agent._advanced_features, StartAgentsRequestPropertiesAdvancedFeatures): - advanced_features_model = self._copy_model_update( - new_agent._advanced_features, - {"enable_mllm": None}, - ) - if ( - advanced_features_model.enable_rtm is None - and advanced_features_model.enable_sal is None - and advanced_features_model.enable_tools is None - ): - new_agent._advanced_features = None - else: - new_agent._advanced_features = advanced_features_model - return new_agent - - def with_avatar(self, vendor: BaseAvatar) -> "Agent": - # Note: avatars are not supported with MLLM. The combination is rejected - # at ``to_properties`` / ``AgentSession.start`` (only when the avatar is - # enabled) so callers may still combine the two for testing or for the - # disabled-avatar pattern. - required_sample_rate = vendor.required_sample_rate - if ( - required_sample_rate not in (None, 0) - and self._tts_sample_rate is not None - and self._tts_sample_rate != required_sample_rate - ): - raise ValueError( - f"Avatar requires TTS sample rate of {required_sample_rate} Hz, " - f"but TTS is configured with {self._tts_sample_rate} Hz. " - f"Please update your TTS sample_rate to {required_sample_rate}." - ) - new_agent = self._clone() - new_agent._avatar = vendor.to_config() - new_agent._avatar_required_sample_rate = required_sample_rate - return new_agent - - def with_turn_detection(self, config: TurnDetectionConfig) -> "Agent": - new_agent = self._clone() - new_agent._turn_detection = config - return new_agent - - def with_interruption(self, config: InterruptionConfig) -> "Agent": - """Returns a new Agent with unified interruption control configured.""" - new_agent = self._clone() - new_agent._interruption = config - return new_agent - - def with_instructions(self, instructions: str) -> "Agent": - """Deprecated. Configure system messages on the LLM vendor instead.""" - new_agent = self._clone() - new_agent._instructions = instructions - return new_agent - - def with_greeting(self, greeting: str) -> "Agent": - """Deprecated. Configure the greeting on the LLM or MLLM vendor instead.""" - new_agent = self._clone() - new_agent._greeting = greeting - return new_agent - - def with_greeting_configs(self, configs: LlmGreetingConfigs) -> "Agent": - """Deprecated. Configure greeting playback on the LLM vendor instead.""" - new_agent = self._clone() - new_agent._greeting_configs = configs - return new_agent - - def with_name(self, name: str) -> "Agent": - new_agent = self._clone() - new_agent._name = name - return new_agent - - def with_sal(self, config: SalConfig) -> "Agent": - """Returns a new Agent with the specified SAL (Selective Attention Locking) configuration.""" - new_agent = self._clone() - new_agent._sal = config - return new_agent - - def with_advanced_features(self, features: AdvancedFeatures) -> "Agent": - """Returns a new Agent with the specified advanced features configuration. - - Use this to enable RTM and other advanced features. - """ - new_agent = self._clone() - new_agent._advanced_features = features - return new_agent - - def with_tools(self, enabled: bool = True) -> "Agent": - """Returns a new Agent with MCP tool invocation enabled or disabled.""" - new_agent = self._clone() - if new_agent._advanced_features is None: - new_agent._advanced_features = StartAgentsRequestPropertiesAdvancedFeatures(enable_tools=enabled) - elif isinstance(new_agent._advanced_features, dict): - new_agent._advanced_features = typing.cast( - AdvancedFeatures, - {**new_agent._advanced_features, "enable_tools": enabled}, - ) - else: - new_agent._advanced_features = self._copy_model_update( - new_agent._advanced_features, - {"enable_tools": enabled}, - ) - return new_agent - - def with_parameters(self, parameters: typing.Union[SessionParams, SessionParamsInput]) -> "Agent": - """Returns a new Agent with the specified session parameters. - - Use this to configure silence behaviour, graceful hang-up, data channel, and more. - """ - new_agent = self._clone() - new_agent._parameters = parameters - return new_agent - - def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": - """Returns a new Agent with the specified RTC audio scenario.""" - new_agent = self._clone() - if new_agent._parameters is None: - new_agent._parameters = StartAgentsRequestPropertiesParameters(audio_scenario=audio_scenario) - elif isinstance(new_agent._parameters, dict): - new_agent._parameters = typing.cast( - SessionParamsInput, - {**new_agent._parameters, "audio_scenario": audio_scenario}, - ) - else: - new_agent._parameters = self._copy_model_update( - new_agent._parameters, - {"audio_scenario": audio_scenario}, - ) - return new_agent - - def with_failure_message(self, message: str) -> "Agent": - """Deprecated. Configure the failure message on the LLM or MLLM vendor instead.""" - new_agent = self._clone() - new_agent._failure_message = message - return new_agent - - def with_max_history(self, max_history: int) -> "Agent": - """Deprecated. Configure max history on the LLM vendor instead.""" - new_agent = self._clone() - new_agent._max_history = max_history - return new_agent - - def with_geofence(self, geofence: GeofenceConfig) -> "Agent": - """Returns a new Agent with the specified geofence configuration. - - Restricts which geographic regions the agent's backend servers may run in. - """ - new_agent = self._clone() - new_agent._geofence = geofence - return new_agent - - def with_labels(self, labels: typing.Dict[str, str]) -> "Agent": - """Returns a new Agent with the specified custom labels. - - Labels are key-value pairs attached to the agent and returned in notification callbacks. - """ - new_agent = self._clone() - new_agent._labels = dict(labels) - return new_agent - - def with_rtc(self, rtc: RtcConfig) -> "Agent": - """Returns a new Agent with the specified RTC configuration.""" - new_agent = self._clone() - new_agent._rtc = rtc - return new_agent - - def with_filler_words(self, filler_words: FillerWordsConfig) -> "Agent": - """Returns a new Agent with the specified filler words configuration. - - Filler words are played while the agent waits for the LLM to respond. - """ - new_agent = self._clone() - new_agent._filler_words = filler_words - return new_agent - - @staticmethod - def _field_value(value: typing.Any, field: str) -> typing.Any: - if value is None: - return None - if isinstance(value, dict): - return value.get(field) - return getattr(value, field, None) - - @staticmethod - def _copy_model_update(value: typing.Any, update: typing.Dict[str, typing.Any]) -> typing.Any: - if hasattr(value, "model_copy"): - return value.model_copy(update=update) - if hasattr(value, "copy"): - return value.copy(update=update) - raise TypeError(f"Object of type {type(value).__name__} does not support model copying") - - def _resolved_parameters(self) -> typing.Optional[typing.Union[SessionParams, SessionParamsInput]]: - enable_rtm = self._field_value(self._advanced_features, "enable_rtm") is True - data_channel = self._field_value(self._parameters, "data_channel") - if not enable_rtm or data_channel is not None: - return self._parameters - if self._parameters is None: - return StartAgentsRequestPropertiesParameters(data_channel="rtm") - if isinstance(self._parameters, dict): - return typing.cast(SessionParamsInput, {**self._parameters, "data_channel": "rtm"}) - return self._copy_model_update(self._parameters, {"data_channel": "rtm"}) - - @property - def name(self) -> typing.Optional[str]: - return self._name - - @property - def pipeline_id(self) -> typing.Optional[str]: - """Published AI Studio pipeline ID used as this agent's base configuration.""" - return self._pipeline_id - - @property - def llm(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._llm - - @property - def tts(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._tts - - @property - def tts_sample_rate(self) -> typing.Optional[int]: - return self._tts_sample_rate - - @property - def stt(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._stt - - @property - def mllm(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._mllm - - @property - def turn_detection(self) -> typing.Optional[TurnDetectionConfig]: - return self._turn_detection - - @property - def interruption(self) -> typing.Optional[InterruptionConfig]: - return self._interruption - - @property - def instructions(self) -> typing.Optional[str]: - return self._instructions - - @property - def greeting(self) -> typing.Optional[str]: - return self._greeting - - @property - def greeting_configs(self) -> typing.Optional[LlmGreetingConfigs]: - return self._greeting_configs - - @property - def failure_message(self) -> typing.Optional[str]: - return self._failure_message - - @property - def max_history(self) -> typing.Optional[int]: - return self._max_history - - @property - def avatar(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - return self._avatar - - @property - def sal(self) -> typing.Optional[SalConfig]: - return self._sal - - @property - def advanced_features(self) -> typing.Optional[AdvancedFeatures]: - return self._advanced_features - - @property - def parameters(self) -> typing.Optional[typing.Union[SessionParams, SessionParamsInput]]: - return self._parameters - - @property - def geofence(self) -> typing.Optional[GeofenceConfig]: - return self._geofence - - @property - def labels(self) -> typing.Optional[typing.Dict[str, str]]: - return self._labels - - @property - def rtc(self) -> typing.Optional[RtcConfig]: - return self._rtc - - @property - def filler_words(self) -> typing.Optional[FillerWordsConfig]: - return self._filler_words - - @property - def config(self) -> typing.Dict[str, typing.Any]: - return { - "name": self._name, - "pipeline_id": self._pipeline_id, - "instructions": self._instructions, - "greeting": self._greeting, - "failure_message": self._failure_message, - "max_history": self._max_history, - "llm": self._llm, - "tts": self._tts, - "stt": self._stt, - "mllm": self._mllm, - "turn_detection": self._turn_detection, - "interruption": self._interruption, - "sal": self._sal, - "avatar": self._avatar, - "advanced_features": self._advanced_features, - "parameters": self._parameters, - "geofence": self._geofence, - "labels": self._labels, - "rtc": self._rtc, - "filler_words": self._filler_words, - "greeting_configs": self._greeting_configs, - } - - def create_session( - self, - client: typing.Any, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - name: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ) -> "AgentSession": - from .agent_session import AgentSession - - session_name = name or self._name or f"agent-{int(time.time())}" - return AgentSession( - client=client, - agent=self, - app_id=client.app_id if hasattr(client, "app_id") else "", - app_certificate=client.app_certificate if hasattr(client, "app_certificate") else None, - name=session_name, - channel=channel, - token=token, - agent_uid=agent_uid, - remote_uids=remote_uids, - idle_timeout=idle_timeout, - enable_string_uid=enable_string_uid, - preset=preset, - pipeline_id=pipeline_id, - expires_in=expires_in, - debug=debug, - warn=warn, - ) - - def create_async_session( - self, - client: typing.Any, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - name: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ) -> "AsyncAgentSession": - """Create an async session for use with :class:`~agora_agent.AsyncAgora`. - - Equivalent to :meth:`create_session` but returns an - :class:`~agora_agent.agentkit.AsyncAgentSession`. - """ - from .agent_session import AsyncAgentSession - - session_name = name or self._name or f"agent-{int(time.time())}" - return AsyncAgentSession( - client=client, - agent=self, - app_id=client.app_id if hasattr(client, "app_id") else "", - app_certificate=client.app_certificate if hasattr(client, "app_certificate") else None, - name=session_name, - channel=channel, - token=token, - agent_uid=agent_uid, - remote_uids=remote_uids, - idle_timeout=idle_timeout, - enable_string_uid=enable_string_uid, - preset=preset, - pipeline_id=pipeline_id, - expires_in=expires_in, - debug=debug, - warn=warn, - ) - - def to_properties( - self, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - token: typing.Optional[str] = None, - app_id: typing.Optional[str] = None, - app_certificate: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - skip_vendor_validation: bool = False, - ) -> StartAgentsRequestProperties: - # Validate the MLLM + enabled-avatar combination BEFORE generating the - # RTC token so callers get a clear, actionable error first (matches the - # TypeScript and Go SDKs' fail-fast contract). - mllm_flag = isinstance(self._mllm, dict) and self._mllm.get("enable") is True - is_mllm_mode = bool(mllm_flag or self._mllm is not None) - avatar_enabled = ( - isinstance(self._avatar, dict) and self._avatar.get("enable") is not False - ) - if is_mllm_mode and avatar_enabled: - raise ValueError( - "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - "Remove the avatar configuration when using MLLM, or switch to a cascading session." - ) - - if token is None: - if app_id is None or app_certificate is None: - raise ValueError("Either token or app_id+app_certificate must be provided") - validated_expires_in = _validate_expires_in(expires_in) if expires_in is not None else None - # Use generate_convo_ai_token (RTC + RTM) so the token works whether or - # not the caller enables advanced_features.enable_rtm. - token_kwargs: typing.Dict[str, typing.Any] = {} - if validated_expires_in is not None: - token_kwargs["token_expire"] = validated_expires_in - token = generate_convo_ai_token( - app_id=app_id, - app_certificate=app_certificate, - channel_name=channel, - uid=_parse_numeric_uid(agent_uid, "agent_uid"), - **token_kwargs, - ) - - base_kwargs: typing.Dict[str, typing.Any] = { - "channel": channel, - "token": token, - "agent_rtc_uid": agent_uid, - "remote_rtc_uids": remote_uids, - } - - if idle_timeout is not None: - base_kwargs["idle_timeout"] = idle_timeout - if enable_string_uid is not None: - base_kwargs["enable_string_uid"] = enable_string_uid - if self._mllm is not None: - base_kwargs["mllm"] = self._mllm - if self._turn_detection is not None: - base_kwargs["turn_detection"] = self._turn_detection - if self._interruption is not None: - base_kwargs["interruption"] = self._interruption - if self._sal is not None: - base_kwargs["sal"] = self._sal - if self._avatar is not None: - base_kwargs["avatar"] = self._avatar - if self._advanced_features is not None: - base_kwargs["advanced_features"] = self._advanced_features - parameters = self._resolved_parameters() - if parameters is not None: - if isinstance(parameters, dict): - base_kwargs["parameters"] = StartAgentsRequestPropertiesParameters(**parameters) - else: - base_kwargs["parameters"] = parameters - if self._geofence is not None: - base_kwargs["geofence"] = self._geofence - if self._labels is not None: - base_kwargs["labels"] = self._labels - if self._rtc is not None: - base_kwargs["rtc"] = self._rtc - if self._filler_words is not None: - base_kwargs["filler_words"] = self._filler_words - - if is_mllm_mode: - if self._mllm is not None: - mllm_config = dict(self._mllm) - if self._greeting is not None: - mllm_config.setdefault("greeting_message", self._greeting) - if self._failure_message is not None: - mllm_config.setdefault("failure_message", self._failure_message) - base_kwargs["mllm"] = mllm_config - return StartAgentsRequestProperties(**base_kwargs) - - base_kwargs["asr"] = self._resolve_asr_config() - base_kwargs["turn_detection"] = self._resolve_turn_detection_config() - - if skip_vendor_validation: - return StartAgentsRequestProperties(**base_kwargs) - - if self._tts is None: - raise ValueError("TTS configuration is required. Use with_tts() to set it.") - - if self._llm is None: - raise ValueError("LLM configuration is required. Use with_llm() to set it.") - - llm_config = dict(self._llm) - # Agent-level fields take priority over the vendor's defaults. - # This matches the TS SDK where agent-level values override vendor config. - if self._instructions is not None: - llm_config["system_messages"] = [{"role": "system", "content": self._instructions}] - if self._greeting is not None: - llm_config["greeting_message"] = self._greeting - if self._greeting_configs is not None: - llm_config["greeting_configs"] = _dump_optional_model(self._greeting_configs) - if self._failure_message is not None: - llm_config["failure_message"] = self._failure_message - if self._max_history is not None: - llm_config["max_history"] = self._max_history - - base_kwargs["llm"] = llm_config - base_kwargs["tts"] = self._tts - - return StartAgentsRequestProperties(**base_kwargs) - - def _resolve_asr_config(self) -> typing.Dict[str, typing.Any]: - asr_config = dict(self._stt or {}) - asr_config.pop("language", None) - if not asr_config: - asr_config["vendor"] = "ares" - return asr_config - - def _resolve_turn_detection_config(self) -> TurnDetectionConfig: - existing_stt_language = self._stt.get("language") if self._stt is not None else None - existing_turn_detection_language = self._field_value(self._turn_detection, "language") - language = ( - existing_turn_detection_language - if existing_turn_detection_language is not None - else existing_stt_language - if _is_turn_detection_language(existing_stt_language) - else DEFAULT_TURN_DETECTION_LANGUAGE - ) - language = _validate_turn_detection_language(language) - if self._turn_detection is None: - return StartAgentsRequestPropertiesTurnDetection(language=language) - if isinstance(self._turn_detection, dict): - return typing.cast(TurnDetectionConfig, {**self._turn_detection, "language": language}) - return self._copy_model_update(self._turn_detection, {"language": language}) - - def _clone(self) -> "Agent": - new_agent = Agent.__new__(Agent) - new_agent._name = self._name - new_agent._pipeline_id = self._pipeline_id - new_agent._llm = self._llm - new_agent._tts = self._tts - new_agent._stt = self._stt - new_agent._mllm = self._mllm - new_agent._tts_sample_rate = self._tts_sample_rate - new_agent._avatar = self._avatar - new_agent._avatar_required_sample_rate = self._avatar_required_sample_rate - new_agent._turn_detection = self._turn_detection - new_agent._interruption = self._interruption - new_agent._sal = self._sal - new_agent._advanced_features = self._advanced_features - new_agent._parameters = self._parameters - new_agent._instructions = self._instructions - new_agent._greeting = self._greeting - new_agent._failure_message = self._failure_message - new_agent._max_history = self._max_history - new_agent._geofence = self._geofence - new_agent._labels = self._labels - new_agent._rtc = self._rtc - new_agent._filler_words = self._filler_words - new_agent._greeting_configs = self._greeting_configs - return new_agent - src/agora_agent/agentkit/agent_session.py: | - import typing - import warnings - - from ..core.api_error import ApiError - from ..agent_management.types.agent_think_agent_management_request_on_listening_action import ( - AgentThinkAgentManagementRequestOnListeningAction as AgentThinkRequestOnListeningAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_speaking_action import ( - AgentThinkAgentManagementRequestOnSpeakingAction as AgentThinkRequestOnSpeakingAction, - ) - from ..agent_management.types.agent_think_agent_management_request_on_thinking_action import ( - AgentThinkAgentManagementRequestOnThinkingAction as AgentThinkRequestOnThinkingAction, - ) - from ..agent_management.types.agent_think_agent_management_response import ( - AgentThinkAgentManagementResponse as AgentThinkResponse, - ) - from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse - from ..agents.types.start_agents_request_properties import StartAgentsRequestProperties - from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions - from .avatar_types import ( - is_akool_avatar, - is_anam_avatar, - is_avatar_token_managed, - is_generic_avatar, - is_heygen_avatar, - is_live_avatar_avatar, - validate_avatar_config, - validate_tts_sample_rate, - ) - from .presets import resolve_session_presets - from .token import generate_convo_ai_token, _parse_numeric_uid - - - class _AgentSessionRequiredOptions(typing.TypedDict, total=True): - """Required fields shared by both sync and async session constructors.""" - - client: typing.Any - agent: Agent - app_id: str - name: str - channel: str - agent_uid: str - remote_uids: typing.List[str] - - - class AgentSessionOptions(_AgentSessionRequiredOptions, total=False): - """Configuration options for creating an agent session. - - Required fields - --------------- - client, agent, app_id, name, channel, agent_uid, remote_uids - - Optional fields - --------------- - app_certificate, token, idle_timeout, enable_string_uid, preset, - pipeline_id, expires_in, debug, warn - """ - - app_certificate: str - token: str - idle_timeout: int - enable_string_uid: bool - preset: typing.Union[str, typing.Sequence[str]] - pipeline_id: str - expires_in: int - debug: bool - warn: typing.Callable[[str], None] - - - class _AgentSessionBase: - """Shared state and helpers for :class:`AgentSession` and :class:`AsyncAgentSession`. - - Not intended for direct use — instantiate one of the concrete subclasses or - call :meth:`Agent.create_session` / :meth:`Agent.create_async_session`. - """ - - def __init__( - self, - client: typing.Any, - agent: Agent, - app_id: str, - name: str, - channel: str, - agent_uid: str, - remote_uids: typing.List[str], - app_certificate: typing.Optional[str] = None, - token: typing.Optional[str] = None, - idle_timeout: typing.Optional[int] = None, - enable_string_uid: typing.Optional[bool] = None, - preset: typing.Optional[typing.Union[str, typing.Sequence[str]]] = None, - pipeline_id: typing.Optional[str] = None, - expires_in: typing.Optional[int] = None, - debug: typing.Optional[bool] = None, - warn: typing.Optional[typing.Callable[[str], None]] = None, - ): - self._client = client - self._agent = agent - self._app_id = app_id - self._app_certificate = app_certificate - self._name = name - self._channel = channel - self._token = token - self._agent_uid = agent_uid - self._remote_uids = remote_uids - self._idle_timeout = idle_timeout - self._enable_string_uid = enable_string_uid - self._preset = preset - self._pipeline_id = pipeline_id - self._expires_in = expires_in - self._debug = debug - self._warn = warn or warnings.warn - self._agent_id: typing.Optional[str] = None - self._status: str = "idle" - self._event_handlers: typing.Dict[str, typing.List[typing.Callable[..., None]]] = {} - - # ------------------------------------------------------------------ - # Public read-only properties - # ------------------------------------------------------------------ - - @property - def id(self) -> typing.Optional[str]: - return self._agent_id - - @property - def status(self) -> str: - return self._status - - @property - def agent(self) -> Agent: - return self._agent - - @property - def app_id(self) -> str: - return self._app_id - - @property - def raw(self) -> typing.Any: - """Direct access to the underlying Fern-generated AgentsClient. - - Use this to access any new endpoints that Fern generates without - waiting for agentkit method updates. - """ - return self._client.agents - - @property - def raw_agent_management(self) -> typing.Any: - """Direct access to the underlying Fern-generated AgentManagement client.""" - return self._client.agent_management - - # ------------------------------------------------------------------ - # Internal helpers - # ------------------------------------------------------------------ - - def _convo_ai_headers(self) -> typing.Optional[typing.Dict[str, str]]: - """Return per-request auth headers when client is in app-credentials mode. - - In app-credentials mode a fresh ConvoAI token (RTC + RTM) is generated - for every request and returned as ``Authorization: agora token=``. - In basic-auth mode this returns ``None`` (the client-level header is used). - """ - if getattr(self._client, "auth_mode", None) != "app-credentials": - return None - app_id: str = getattr(self._client, "app_id", self._app_id) - app_certificate: typing.Optional[str] = getattr( - self._client, "app_certificate", self._app_certificate - ) - if not app_certificate: - raise RuntimeError("app_certificate is required for app-credentials auth mode") - token = generate_convo_ai_token( - app_id=app_id, - app_certificate=app_certificate, - channel_name=self._channel, - uid=_parse_numeric_uid(self._agent_uid, "agent_uid"), - ) - return {"Authorization": f"agora token={token}"} - - def _request_options(self) -> typing.Optional[typing.Dict[str, typing.Any]]: - """Build request_options dict with per-request auth headers if needed.""" - headers = self._convo_ai_headers() - if headers is None: - return None - return {"additional_headers": headers} - - def _validate_avatar_config(self) -> None: - avatar = self._agent.avatar - tts = self._agent.tts - if not avatar or avatar.get("enable", True) is False: - return - if self._is_mllm_mode(): - raise ValueError( - "Avatars are only supported with the cascading ASR + LLM + TTS pipeline. " - "Remove the avatar configuration when using MLLM, or switch to a cascading session." - ) - - if ( - is_heygen_avatar(avatar) - or is_live_avatar_avatar(avatar) - or is_akool_avatar(avatar) - or is_anam_avatar(avatar) - or is_generic_avatar(avatar) - ): - validate_avatar_config(avatar) - - tts_params = tts.get("params") if isinstance(tts, dict) else None - sample_rate = self._agent.tts_sample_rate - if sample_rate is None and isinstance(tts_params, dict): - sample_rate = ( - tts_params.get("sample_rate") - or tts_params.get("sample_rate_hertz") - or tts_params.get("samplingRate") - ) - if isinstance(sample_rate, int): - validate_tts_sample_rate(avatar, sample_rate) - elif is_heygen_avatar(avatar): - self._warn( - "Warning: HeyGen avatar detected but TTS sample_rate is not explicitly set. " - "HeyGen requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." - ) - elif is_live_avatar_avatar(avatar): - self._warn( - "Warning: LiveAvatar avatar detected but TTS sample_rate is not explicitly set. " - "LiveAvatar requires 24,000 Hz. Please ensure your TTS provider is configured for 24kHz." - ) - elif is_akool_avatar(avatar): - self._warn( - "Warning: Akool avatar detected but TTS sample_rate is not explicitly set. " - "Akool requires 16,000 Hz. Please ensure your TTS provider is configured for 16kHz." - ) - - def _enrich_avatar_for_session(self, properties: typing.Dict[str, typing.Any]) -> None: - avatar = properties.get("avatar") - if not isinstance(avatar, dict) or avatar.get("enable", True) is False: - return - - params = avatar.get("params") - if not isinstance(params, dict): - params = {} - avatar["params"] = params - - if is_generic_avatar(avatar): - if not params.get("agora_appid"): - params["agora_appid"] = self._app_id - if not params.get("agora_channel"): - params["agora_channel"] = self._channel - - if not is_avatar_token_managed(avatar): - validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - return - - if not params.get("agora_uid"): - validate_avatar_config(avatar, require_session_fields=is_generic_avatar(avatar)) - return - - if not params.get("agora_token"): - if not self._app_certificate: - raise ValueError( - "Cannot auto-generate avatar RTC token: app_certificate is required when agora_token is omitted. " - "Pass app_certificate on the Agora client or supply agora_token explicitly on the avatar vendor." - ) - token_kwargs: typing.Dict[str, typing.Any] = {} - if self._expires_in is not None: - token_kwargs["token_expire"] = self._expires_in - params["agora_token"] = generate_convo_ai_token( - app_id=self._app_id, - app_certificate=self._app_certificate, - channel_name=self._channel, - uid=_parse_numeric_uid(str(params["agora_uid"]), "avatar agora_uid"), - **token_kwargs, - ) - - if str(params.get("agora_uid")) == self._agent_uid: - self._warn( - "Warning: avatar agora_uid matches agent_rtc_uid. Use a unique UID for the avatar video publisher." - ) - - validate_avatar_config(avatar, require_session_fields=True) - - @staticmethod - def _dump_model(value: typing.Any) -> typing.Any: - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if isinstance(value, dict): - return {k: _AgentSessionBase._dump_model(v) for k, v in value.items() if v is not None} - if isinstance(value, list): - return [_AgentSessionBase._dump_model(item) for item in value] - return value - - def _is_mllm_mode(self) -> bool: - mllm = self._agent.mllm - if isinstance(mllm, dict) and mllm.get("enable") is True: - return True - return mllm is not None - - def _build_start_properties( - self, - token_opts: typing.Dict[str, typing.Any], - skip_vendor_validation: bool, - ) -> typing.Dict[str, typing.Any]: - base_properties = self._agent.to_properties( - channel=self._channel, - agent_uid=self._agent_uid, - remote_uids=self._remote_uids, - idle_timeout=self._idle_timeout, - enable_string_uid=self._enable_string_uid, - skip_vendor_validation=skip_vendor_validation, - **token_opts, - ) - properties = self._dump_model(base_properties) - self._enrich_avatar_for_session(properties) - - if self._is_mllm_mode(): - if self._agent.mllm is not None: - mllm = self._dump_model(self._agent.mllm) - if not isinstance(mllm, dict): - mllm = {} - if self._agent.greeting is not None: - mllm.setdefault("greeting_message", self._agent.greeting) - if self._agent.failure_message is not None: - mllm.setdefault("failure_message", self._agent.failure_message) - properties["mllm"] = mllm - return properties - - if self._agent.tts is not None: - properties["tts"] = self._dump_model(self._agent.tts) - if self._agent.llm is not None: - llm = dict(self._agent.llm) - if self._agent.instructions is not None: - llm["system_messages"] = [{"role": "system", "content": self._agent.instructions}] - if self._agent.greeting is not None: - llm["greeting_message"] = self._agent.greeting - if self._agent.greeting_configs is not None: - llm["greeting_configs"] = self._dump_model(self._agent.greeting_configs) - if self._agent.failure_message is not None: - llm["failure_message"] = self._agent.failure_message - if self._agent.max_history is not None: - llm["max_history"] = self._agent.max_history - properties["llm"] = llm - if self._agent.stt is not None: - properties["asr"] = self._dump_model(self._agent.stt) - - return properties - - @staticmethod - def _page_value(pagination: typing.Any, field: str) -> typing.Any: - if pagination is None: - return None - if isinstance(pagination, dict): - return pagination.get(field) - return getattr(pagination, field, None) - - @staticmethod - def _response_turns(response: typing.Any) -> typing.List[typing.Any]: - turns = response.get("turns") if isinstance(response, dict) else getattr(response, "turns", None) - return list(turns or []) - - @staticmethod - def _response_pagination(response: typing.Any) -> typing.Any: - if isinstance(response, dict): - return response.get("pagination") - return getattr(response, "pagination", None) - - @classmethod - def _with_all_turns(cls, first_response: typing.Any, turns: typing.List[typing.Any]) -> GetTurnsAgentsResponse: - data = cls._dump_model(first_response) - if not isinstance(data, dict): - data = {} - data["turns"] = turns - return GetTurnsAgentsResponse(**data) - - # ------------------------------------------------------------------ - # Event handling - # ------------------------------------------------------------------ - - def on(self, event: str, handler: typing.Callable[..., None]) -> None: - """Register an event handler. - - Parameters - ---------- - event : str - The event type (``started``, ``stopped``, ``error``). - handler : callable - The event handler to invoke when the event fires. - """ - if event not in self._event_handlers: - self._event_handlers[event] = [] - self._event_handlers[event].append(handler) - - def off(self, event: str, handler: typing.Callable[..., None]) -> None: - """Unregister a previously registered event handler.""" - handlers = self._event_handlers.get(event) - if handlers and handler in handlers: - handlers.remove(handler) - - def _emit(self, event: str, data: typing.Any) -> None: - handlers = self._event_handlers.get(event) - if handlers: - for handler in handlers: - try: - handler(data) - except Exception as exc: - # Prevent a misbehaving handler from blocking other handlers or - # the session lifecycle. Warn so the error is not silently lost. - warnings.warn( - f"Event handler for '{event}' raised an exception: {exc}", - stacklevel=2, - ) - - - class AgentSession(_AgentSessionBase): - """Manages the lifecycle of an agent session (synchronous). - - This class provides a high-level interface for managing agent sessions, - including starting, stopping, and interacting with the agent. - - Use :meth:`Agent.create_session` to create a session — this is the - recommended entry point. - - Examples - -------- - >>> from agora_agent import Agora, Area, Agent, OpenAI, ElevenLabsTTS - >>> - >>> client = Agora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are a helpful voice assistant.") - >>> agent = agent.with_llm(OpenAI(api_key="...", base_url="https://api.openai.com/v1/chat/completions", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", base_url="wss://api.elevenlabs.io/v1")) - >>> session = agent.create_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = session.start() - >>> session.say("Hello!") - >>> session.stop() - """ - - def start(self) -> str: - """Start the agent session. - - Returns - ------- - str - The agent ID. - - Raises - ------ - RuntimeError - If the session is not in a startable state. - ValueError - If avatar/TTS configuration is invalid. - """ - if self._status not in ("idle", "stopped", "error"): - raise RuntimeError(f"Cannot start session in {self._status} state") - - self._validate_avatar_config() - self._status = "starting" - - try: - pipeline_id = self._pipeline_id if self._pipeline_id is not None else self._agent.pipeline_id - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - token_opts = { - "app_id": self._app_id, - "app_certificate": self._app_certificate, - "expires_in": self._expires_in, - } - - properties = self._build_start_properties(token_opts, skip_vendor_validation=bool(self._preset or pipeline_id)) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - ) - - if self._debug: - print("[Agora Debug] Starting agent session...") - print("[Agora Debug] Request:", { - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - "pipeline_id": pipeline_id, - "properties": resolved_properties, - }) - - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties + return _start_properties_from_mapping(base_kwargs) - response = self._client.agents.start( - self._app_id, - name=self._name, - properties=request_properties, - preset=resolved_preset, - pipeline_id=pipeline_id, - request_options=self._request_options(), + if skip_vendor_validation: + warnings.warn( + "skip_vendor_validation is deprecated and will be removed in a future release. " + "Use skip_vendor_validation_categories and allow_missing_vendor_categories instead.", + DeprecationWarning, + stacklevel=2, ) - self._agent_id = response.agent_id if hasattr(response, "agent_id") else None - self._status = "running" - self._emit("started", {"agent_id": self._agent_id}) - return self._agent_id or "" - except Exception as e: - self._status = "error" - self._emit("error", e) - raise + skip_categories = set(skip_vendor_validation_categories or ()) + allow_missing_categories = set(allow_missing_vendor_categories or ()) + if skip_vendor_validation: + skip_categories.update({"asr", "llm", "tts"}) + allow_missing_categories.update({"asr", "llm", "tts"}) - def stop(self) -> None: - """Stop the agent session. + skip_asr_validation = skip_vendor_validation or "asr" in skip_categories + skip_llm_validation = skip_vendor_validation or "llm" in skip_categories + skip_tts_validation = skip_vendor_validation or "tts" in skip_categories + allow_missing_asr = "asr" in allow_missing_categories + allow_missing_llm = "llm" in allow_missing_categories + allow_missing_tts = "tts" in allow_missing_categories - If the agent has already stopped (e.g., crashed or timed out), the - server returns 404, which this method treats as a successful stop - rather than raising an error. - """ - if self._status != "running": - raise RuntimeError(f"Cannot stop session in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") + turn_detection_config = self._resolve_turn_detection_config() + if not skip_asr_validation and (self._stt is not None or not allow_missing_asr): + base_kwargs["asr"] = self._resolve_asr_config(turn_detection_config) + base_kwargs["turn_detection"] = turn_detection_config - self._status = "stopping" + if skip_vendor_validation: + return _start_properties_from_mapping(base_kwargs) - try: - self._client.agents.stop( - self._app_id, self._agent_id, request_options=self._request_options() - ) - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - except ApiError as e: - if e.status_code == 404: - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - return - self._status = "error" - self._emit("error", e) - raise - except Exception as e: - self._status = "error" - self._emit("error", e) - raise + if self._tts is None and not (skip_tts_validation or allow_missing_tts): + raise ValueError("TTS configuration is required. Use with_tts() to set it.") - def say( - self, - text: str, - priority: typing.Optional[str] = None, - interruptable: typing.Optional[bool] = None, - *, - options: typing.Optional["SayOptions"] = None, - ) -> None: - """Send a message to be spoken by the agent. + if self._llm is None and not (skip_llm_validation or allow_missing_llm): + raise ValueError("LLM configuration is required. Use with_llm() to set it.") - Parameters - ---------- - text : str - The text to speak. - priority : str, optional - Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). - interruptable : bool, optional - Whether the message can be interrupted by the user. - """ - if self._status != "running": - raise RuntimeError(f"Cannot say in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") + if self._llm is not None and not skip_llm_validation: + base_kwargs["llm"] = self._resolve_llm_config() + if self._tts is not None and not skip_tts_validation: + base_kwargs["tts"] = self._tts - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if priority is not None: - kwargs["priority"] = priority - if interruptable is not None: - kwargs["interruptable"] = interruptable + return _start_properties_from_mapping(base_kwargs) - self._client.agents.speak( - self._app_id, self._agent_id, request_options=self._request_options(), **kwargs - ) + def _resolve_llm_config(self) -> typing.Dict[str, typing.Any]: + llm_config = dict(self._llm or {}) + if self._instructions is not None and "system_messages" not in llm_config: + llm_config["system_messages"] = [{"role": "system", "content": self._instructions}] + if self._greeting is not None and "greeting_message" not in llm_config: + llm_config["greeting_message"] = self._greeting + if self._greeting_configs is not None and "greeting_configs" not in llm_config: + llm_config["greeting_configs"] = _dump_optional_model(self._greeting_configs) + if self._failure_message is not None and "failure_message" not in llm_config: + llm_config["failure_message"] = self._failure_message + if self._max_history is not None and "max_history" not in llm_config: + llm_config["max_history"] = self._max_history + return llm_config - def interrupt(self) -> None: - """Interrupt the agent while it is speaking or thinking.""" - if self._status != "running": - raise RuntimeError(f"Cannot interrupt in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") + def _resolve_asr_config(self, turn_detection_config: TurnDetectionConfig) -> typing.Dict[str, typing.Any]: + asr_config = dict(self._stt or {}) + if not asr_config: + asr_config["vendor"] = "ares" + asr_config["language"] = self._field_value(turn_detection_config, "language") + return asr_config - self._client.agents.interrupt( - self._app_id, self._agent_id, request_options=self._request_options() + def _resolve_turn_detection_config(self) -> TurnDetectionConfig: + existing_turn_detection_language = self._field_value(self._turn_detection, "language") + language = ( + existing_turn_detection_language + if existing_turn_detection_language is not None + else DEFAULT_TURN_DETECTION_LANGUAGE ) + language = _validate_turn_detection_language(language) + if self._turn_detection is None: + return StartAgentsRequestPropertiesTurnDetection(language=language) + if isinstance(self._turn_detection, dict): + return typing.cast(TurnDetectionConfig, {**self._turn_detection, "language": language}) + return self._copy_model_update(self._turn_detection, {"language": language}) - def think( - self, - text: str, - *, - on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, - on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, - on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - options: typing.Optional["ThinkOptions"] = None, - ) -> AgentThinkResponse: - """Inject a custom text instruction into the current session pipeline. + def _clone(self) -> "Agent": + new_agent = Agent.__new__(Agent) + new_agent._name = self._name + new_agent._pipeline_id = self._pipeline_id + new_agent._llm = self._llm + new_agent._tts = self._tts + new_agent._stt = self._stt + new_agent._mllm = self._mllm + new_agent._tts_sample_rate = self._tts_sample_rate + new_agent._avatar = self._avatar + new_agent._avatar_required_sample_rate = self._avatar_required_sample_rate + new_agent._turn_detection = self._turn_detection + new_agent._interruption = self._interruption + new_agent._sal = self._sal + new_agent._advanced_features = self._advanced_features + new_agent._parameters = self._parameters + new_agent._instructions = self._instructions + new_agent._greeting = self._greeting + new_agent._failure_message = self._failure_message + new_agent._max_history = self._max_history + new_agent._geofence = self._geofence + new_agent._labels = self._labels + new_agent._rtc = self._rtc + new_agent._filler_words = self._filler_words + new_agent._greeting_configs = self._greeting_configs + return new_agent + src/agora_agent/agentkit/vendors/llm.py: | + from typing import Any, Dict, List, Optional, Union - In API v2.7, omitting ``on_listening_action`` uses the server default - ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - preserve the pre-v2.7 behavior. - """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") + from pydantic import BaseModel, ConfigDict, Field, model_validator - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if on_listening_action is not None: - kwargs["on_listening_action"] = on_listening_action - if on_thinking_action is not None: - kwargs["on_thinking_action"] = on_thinking_action - if on_speaking_action is not None: - kwargs["on_speaking_action"] = on_speaking_action - if interruptable is not None: - kwargs["interruptable"] = interruptable - if metadata is not None: - kwargs["metadata"] = metadata + from ...agents.types.start_agents_request_properties_llm_greeting_configs import ( + StartAgentsRequestPropertiesLlmGreetingConfigs, + ) + from .base import BaseLLM - return self._client.agent_management.agent_think( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) + LlmGreetingConfigs = Dict[str, Any] + _OPENAI_MANAGED_MODELS = {"gpt-4o-mini", "gpt-4.1-mini", "gpt-5-nano", "gpt-5-mini"} - def update(self, properties: typing.Any) -> None: - """Update the agent configuration at runtime. - Parameters - ---------- - properties : UpdateAgentsRequestProperties - Partial configuration to update. - """ - if self._status != "running": - raise RuntimeError(f"Cannot update in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") + def _ensure_mcp_transport(servers: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Ensure each MCP server has transport set (API requires it). Default to streamable_http.""" + result = [] + for s in servers: + item = dict(s) + if item.get("transport") is None: + item["transport"] = "streamable_http" + result.append(item) + return result - self._client.agents.update( - self._app_id, - self._agent_id, - properties=properties, - request_options=self._request_options(), - ) - def get_history(self) -> typing.Any: - """Get the conversation history.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") + def _dump_optional_model(value: Any) -> Any: + if hasattr(value, "model_dump"): + return value.model_dump(exclude_none=True) + if hasattr(value, "dict"): + return value.dict(exclude_none=True) + return value + + class OpenAIOptions(BaseModel): + model_config = ConfigDict(extra="forbid") + + api_key: Optional[str] = Field(default=None, description="OpenAI API key") + model: str = Field(..., description="Model name") + base_url: Optional[str] = Field(default=None, description="Custom base URL") + temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) + top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) + max_tokens: Optional[int] = Field(default=None, gt=0) + system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) + greeting_message: Optional[str] = Field(default=None) + failure_message: Optional[str] = Field(default=None) + input_modalities: Optional[List[str]] = Field(default=None) + params: Optional[Dict[str, Any]] = Field(default=None) + headers: Optional[Dict[str, str]] = Field(default=None) + output_modalities: Optional[List[str]] = Field(default=None) + greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) + template_variables: Optional[Dict[str, str]] = Field(default=None) + vendor: Optional[str] = Field(default=None) + mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) + max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") - return self._client.agents.get_history( - self._app_id, self._agent_id, request_options=self._request_options() - ) + @model_validator(mode="after") + def _validate_byok_params(self) -> "OpenAIOptions": + if not self.model: + raise ValueError("OpenAI requires model") + if self.api_key is not None and self.base_url is None: + raise ValueError("OpenAI requires base_url when api_key is set") + if self.api_key is None and self.base_url is not None: + raise ValueError("OpenAI base_url is only valid when api_key is set") + if self.api_key is None and self.model.strip().lower() not in _OPENAI_MANAGED_MODELS: + raise ValueError("OpenAI requires api_key unless using a supported Agora-managed model") + if self.api_key is None and self.vendor is not None: + raise ValueError("OpenAI Agora-managed mode does not allow vendor") + return self - def get_info(self) -> typing.Any: - """Get the current session info.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") + class OpenAI(BaseLLM): + def __init__(self, **kwargs: Any): + self.options = OpenAIOptions(**kwargs) - return self._client.agents.get( - self._app_id, self._agent_id, request_options=self._request_options() - ) + def to_config(self) -> Dict[str, Any]: + # model is the default; explicit params entries extend/override it. + # This matches the TS SDK behaviour: { model, ...params }. + params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} - def get_turns( - self, - *, - page_index: typing.Optional[int] = None, - page_size: typing.Optional[int] = None, - options: typing.Optional["GetTurnsOptions"] = None, - ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") + # Named fields take precedence over anything in the generic params dict. + if self.options.max_tokens is not None: + params["max_tokens"] = self.options.max_tokens + if self.options.temperature is not None: + params["temperature"] = self.options.temperature + if self.options.top_p is not None: + params["top_p"] = self.options.top_p - kwargs: typing.Dict[str, typing.Any] = {} - if options is not None: - kwargs.update(options) - if page_index is not None: - kwargs["page_index"] = page_index - if page_size is not None: - kwargs["page_size"] = page_size + config: Dict[str, Any] = { + "url": self.options.base_url or "https://api.openai.com/v1/chat/completions", + "params": params, + "style": "openai", + "input_modalities": self.options.input_modalities or ["text"], + } + if self.options.api_key is not None: + config["api_key"] = self.options.api_key + if self.options.headers is not None: + config["headers"] = self.options.headers - return self._client.agents.get_turns( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) + if self.options.system_messages is not None: + config["system_messages"] = self.options.system_messages + if self.options.greeting_message is not None: + config["greeting_message"] = self.options.greeting_message + if self.options.failure_message is not None: + config["failure_message"] = self.options.failure_message + if self.options.output_modalities is not None: + config["output_modalities"] = self.options.output_modalities + if self.options.greeting_configs is not None: + config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) + if self.options.template_variables is not None: + config["template_variables"] = self.options.template_variables + if self.options.vendor is not None: + config["vendor"] = self.options.vendor + if self.options.mcp_servers is not None: + config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) + if self.options.max_history is not None: + config["max_history"] = self.options.max_history - def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - """Get all turn analytics pages for this session. + return config - Raises ``RuntimeError`` if the server's pagination metadata is missing - the fields required to advance, or if requesting the next page returns - a page index that did not advance. - """ - response = self.get_turns(page_index=1, page_size=page_size) - all_turns = self._response_turns(response) - pagination = self._response_pagination(response) - current_page = self._page_value(pagination, "page_index") or 1 - while pagination is not None and self._page_value(pagination, "is_last_page") is False: - total_pages = self._page_value(pagination, "total_pages") - returned_index = self._page_value(pagination, "page_index") - if returned_index is None and total_pages is None: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - if total_pages is not None and current_page >= total_pages: - break - next_page = current_page + 1 - response = self.get_turns(page_index=next_page, page_size=page_size) - all_turns.extend(self._response_turns(response)) - pagination = self._response_pagination(response) - returned_index = self._page_value(pagination, "page_index") if pagination else None - if returned_index is not None: - if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - raise RuntimeError( - f"get_all_turns pagination did not advance: requested page {next_page}, " - f"received page {returned_index}." - ) - current_page = returned_index - else: - total_pages = self._page_value(pagination, "total_pages") if pagination else None - is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - if total_pages is None and is_last_page is not True: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - current_page = next_page - return self._with_all_turns(response, all_turns) + class AzureOpenAIOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - class AsyncAgentSession(_AgentSessionBase): - """Async version of :class:`AgentSession` for use with :class:`AsyncAgora`. + api_key: str = Field(..., description="Azure OpenAI API key") + model: str = Field(..., description="Azure deployment model name") + endpoint: str = Field(..., description="Azure endpoint URL") + deployment_name: str = Field(..., description="Azure deployment name") + api_version: str = Field(default="2024-08-01-preview", description="Azure API version") + temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) + top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) + max_tokens: Optional[int] = Field(default=None, gt=0) + system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) + greeting_message: Optional[str] = Field(default=None) + failure_message: Optional[str] = Field(default=None) + input_modalities: Optional[List[str]] = Field(default=None) + params: Optional[Dict[str, Any]] = Field(default=None) + headers: Optional[Dict[str, str]] = Field(default=None) + output_modalities: Optional[List[str]] = Field(default=None) + greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) + template_variables: Optional[Dict[str, str]] = Field(default=None) + vendor: Optional[str] = Field(default=None) + mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) + max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") - Use :meth:`Agent.create_async_session` to create a session — this is the - recommended entry point. + class AzureOpenAI(BaseLLM): + def __init__(self, **kwargs: Any): + self.options = AzureOpenAIOptions(**kwargs) - Examples - -------- - >>> from agora_agent import AsyncAgora, Area, Agent, OpenAI, ElevenLabsTTS - >>> - >>> client = AsyncAgora(area=Area.US, app_id="...", app_certificate="...") - >>> agent = Agent(name="assistant", instructions="You are helpful.") - >>> agent = agent.with_llm(OpenAI(api_key="...", base_url="https://api.openai.com/v1/chat/completions", model="gpt-4")).with_tts(ElevenLabsTTS(key="...", model_id="...", voice_id="...", base_url="wss://api.elevenlabs.io/v1")) - >>> session = agent.create_async_session(client, channel="room-123", agent_uid="1", remote_uids=["100"]) - >>> agent_id = await session.start() - >>> await session.say("Hello!") - >>> await session.stop() - """ + def to_config(self) -> Dict[str, Any]: + url = ( + f"{self.options.endpoint}/openai/deployments/" + f"{self.options.deployment_name}/chat/completions" + f"?api-version={self.options.api_version}" + ) + config: Dict[str, Any] = { + "url": url, + "api_key": self.options.api_key, + "vendor": self.options.vendor or "azure", + "style": "openai", + "input_modalities": self.options.input_modalities or ["text"], + } - async def start(self) -> str: - """Start the agent session. + # Named fields take precedence over anything in the generic params dict. + params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} + if self.options.temperature is not None: + params["temperature"] = self.options.temperature + if self.options.top_p is not None: + params["top_p"] = self.options.top_p + if self.options.max_tokens is not None: + params["max_tokens"] = self.options.max_tokens + if params: + config["params"] = params + if self.options.headers is not None: + config["headers"] = self.options.headers - Returns - ------- - str - The agent ID. + if self.options.system_messages is not None: + config["system_messages"] = self.options.system_messages + if self.options.greeting_message is not None: + config["greeting_message"] = self.options.greeting_message + if self.options.failure_message is not None: + config["failure_message"] = self.options.failure_message + if self.options.output_modalities is not None: + config["output_modalities"] = self.options.output_modalities + if self.options.greeting_configs is not None: + config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) + if self.options.template_variables is not None: + config["template_variables"] = self.options.template_variables + if self.options.mcp_servers is not None: + config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) + if self.options.max_history is not None: + config["max_history"] = self.options.max_history - Raises - ------ - RuntimeError - If the session is not in a startable state. - ValueError - If avatar/TTS configuration is invalid. - """ - if self._status not in ("idle", "stopped", "error"): - raise RuntimeError(f"Cannot start session in {self._status} state") + return config - self._validate_avatar_config() - self._status = "starting" - try: - pipeline_id = self._pipeline_id if self._pipeline_id is not None else self._agent.pipeline_id - if self._token: - token_opts: typing.Dict[str, typing.Any] = {"token": self._token} - else: - token_opts = { - "app_id": self._app_id, - "app_certificate": self._app_certificate, - "expires_in": self._expires_in, - } + class AnthropicOptions(BaseModel): + model_config = ConfigDict(extra="forbid") + + api_key: str = Field(..., description="Anthropic API key") + model: str = Field(..., description="Model name") + url: str = Field(..., description="Anthropic messages endpoint URL") + max_tokens: int = Field(..., gt=0) + temperature: Optional[float] = Field(default=None, ge=0.0, le=1.0) + top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) + system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) + greeting_message: Optional[str] = Field(default=None) + failure_message: Optional[str] = Field(default=None) + input_modalities: Optional[List[str]] = Field(default=None) + params: Optional[Dict[str, Any]] = Field(default=None) + headers: Dict[str, str] = Field(..., description="Anthropic request headers") + output_modalities: Optional[List[str]] = Field(default=None) + greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) + template_variables: Optional[Dict[str, str]] = Field(default=None) + vendor: Optional[str] = Field(default=None) + mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) + max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") - properties = self._build_start_properties(token_opts, skip_vendor_validation=bool(self._preset or pipeline_id)) - resolved_preset, resolved_properties = resolve_session_presets( - self._preset, - properties, - ) + class Anthropic(BaseLLM): + def __init__(self, **kwargs: Any): + self.options = AnthropicOptions(**kwargs) - if self._debug: - print("[Agora Debug] Starting agent session...") - print("[Agora Debug] Request:", { - "appid": self._app_id, - "name": self._name, - "preset": resolved_preset, - "pipeline_id": pipeline_id, - "properties": resolved_properties, - }) + def to_config(self) -> Dict[str, Any]: + # Named fields take precedence over anything in the generic params dict. + params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} + if self.options.max_tokens is not None: + params["max_tokens"] = self.options.max_tokens + if self.options.temperature is not None: + params["temperature"] = self.options.temperature + if self.options.top_p is not None: + params["top_p"] = self.options.top_p - try: - request_properties: typing.Any = StartAgentsRequestProperties(**resolved_properties) - except Exception: - request_properties = resolved_properties + config: Dict[str, Any] = { + "url": self.options.url, + "api_key": self.options.api_key, + "params": params, + "headers": self.options.headers, + "style": "anthropic", + "input_modalities": self.options.input_modalities or ["text"], + } - response = await self._client.agents.start( - self._app_id, - name=self._name, - properties=request_properties, - preset=resolved_preset, - pipeline_id=pipeline_id, - request_options=self._request_options(), - ) + if self.options.system_messages is not None: + config["system_messages"] = self.options.system_messages + if self.options.greeting_message is not None: + config["greeting_message"] = self.options.greeting_message + if self.options.failure_message is not None: + config["failure_message"] = self.options.failure_message + if self.options.output_modalities is not None: + config["output_modalities"] = self.options.output_modalities + if self.options.greeting_configs is not None: + config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) + if self.options.template_variables is not None: + config["template_variables"] = self.options.template_variables + if self.options.vendor is not None: + config["vendor"] = self.options.vendor + if self.options.mcp_servers is not None: + config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) + if self.options.max_history is not None: + config["max_history"] = self.options.max_history - self._agent_id = response.agent_id if hasattr(response, "agent_id") else None - self._status = "running" - self._emit("started", {"agent_id": self._agent_id}) - return self._agent_id or "" - except Exception as e: - self._status = "error" - self._emit("error", e) - raise + return config - async def stop(self) -> None: - """Stop the agent session. - If the agent has already stopped (e.g., crashed or timed out), the - server returns 404, which this method treats as a successful stop - rather than raising an error. - """ - if self._status != "running": - raise RuntimeError(f"Cannot stop session in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") + class GeminiOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - self._status = "stopping" + api_key: str = Field(..., description="Google AI API key") + model: str = Field(..., description="Model name") + url: Optional[str] = Field(default=None, description="Custom API endpoint URL") + temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) + top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) + top_k: Optional[int] = Field(default=None, gt=0) + max_output_tokens: Optional[int] = Field(default=None, gt=0) + system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) + greeting_message: Optional[str] = Field(default=None) + failure_message: Optional[str] = Field(default=None) + input_modalities: Optional[List[str]] = Field(default=None) + params: Optional[Dict[str, Any]] = Field(default=None) + headers: Optional[Dict[str, str]] = Field(default=None) + output_modalities: Optional[List[str]] = Field(default=None) + greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) + template_variables: Optional[Dict[str, str]] = Field(default=None) + vendor: Optional[str] = Field(default=None) + mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) + max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") - try: - await self._client.agents.stop( - self._app_id, self._agent_id, request_options=self._request_options() - ) - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - except ApiError as e: - if e.status_code == 404: - self._status = "stopped" - self._emit("stopped", {"agent_id": self._agent_id}) - return - self._status = "error" - self._emit("error", e) - raise - except Exception as e: - self._status = "error" - self._emit("error", e) - raise + class Gemini(BaseLLM): + def __init__(self, **kwargs: Any): + self.options = GeminiOptions(**kwargs) - async def say( - self, - text: str, - priority: typing.Optional[str] = None, - interruptable: typing.Optional[bool] = None, - *, - options: typing.Optional["SayOptions"] = None, - ) -> None: - """Send a message to be spoken by the agent. + def to_config(self) -> Dict[str, Any]: + # Named fields take precedence over anything in the generic params dict. + params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} + if self.options.temperature is not None: + params["temperature"] = self.options.temperature + if self.options.top_p is not None: + params["top_p"] = self.options.top_p + if self.options.top_k is not None: + params["top_k"] = self.options.top_k + if self.options.max_output_tokens is not None: + params["max_output_tokens"] = self.options.max_output_tokens - Parameters - ---------- - text : str - The text to speak. - priority : str, optional - Priority of the message (``INTERRUPT``, ``APPEND``, ``IGNORE``). - interruptable : bool, optional - Whether the message can be interrupted by the user. - """ - if self._status != "running": - raise RuntimeError(f"Cannot say in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") + config: Dict[str, Any] = { + "url": self.options.url or "https://generativelanguage.googleapis.com/v1beta/models", + "api_key": self.options.api_key, + "params": params, + "style": "gemini", + "input_modalities": self.options.input_modalities or ["text"], + } - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if priority is not None: - kwargs["priority"] = priority - if interruptable is not None: - kwargs["interruptable"] = interruptable + if self.options.system_messages is not None: + config["system_messages"] = self.options.system_messages + if self.options.headers is not None: + config["headers"] = self.options.headers + if self.options.greeting_message is not None: + config["greeting_message"] = self.options.greeting_message + if self.options.failure_message is not None: + config["failure_message"] = self.options.failure_message + if self.options.output_modalities is not None: + config["output_modalities"] = self.options.output_modalities + if self.options.greeting_configs is not None: + config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) + if self.options.template_variables is not None: + config["template_variables"] = self.options.template_variables + if self.options.vendor is not None: + config["vendor"] = self.options.vendor + if self.options.mcp_servers is not None: + config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) + if self.options.max_history is not None: + config["max_history"] = self.options.max_history - await self._client.agents.speak( - self._app_id, self._agent_id, request_options=self._request_options(), **kwargs - ) + return config - async def interrupt(self) -> None: - """Interrupt the agent while it is speaking or thinking.""" - if self._status != "running": - raise RuntimeError(f"Cannot interrupt in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - await self._client.agents.interrupt( - self._app_id, self._agent_id, request_options=self._request_options() - ) + class GroqOptions(OpenAIOptions): + model_config = ConfigDict(extra="forbid") - async def think( - self, - text: str, - *, - on_listening_action: typing.Optional[AgentThinkRequestOnListeningAction] = None, - on_thinking_action: typing.Optional[AgentThinkRequestOnThinkingAction] = None, - on_speaking_action: typing.Optional[AgentThinkRequestOnSpeakingAction] = None, - interruptable: typing.Optional[bool] = None, - metadata: typing.Optional[typing.Dict[str, str]] = None, - options: typing.Optional["ThinkOptions"] = None, - ) -> AgentThinkResponse: - """Inject a custom text instruction into the current session pipeline. + api_key: str = Field(..., description="Groq API key") + model: str = Field(..., description="Model name") + base_url: str = Field(..., description="Groq-compatible endpoint") - In API v2.7, omitting ``on_listening_action`` uses the server default - ``"interrupt"``. Pass ``on_listening_action="inject"`` explicitly to - preserve the pre-v2.7 behavior. - """ - if self._status != "running": - raise RuntimeError(f"Cannot think in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") - kwargs: typing.Dict[str, typing.Any] = {"text": text} - if options is not None: - kwargs.update(options) - if on_listening_action is not None: - kwargs["on_listening_action"] = on_listening_action - if on_thinking_action is not None: - kwargs["on_thinking_action"] = on_thinking_action - if on_speaking_action is not None: - kwargs["on_speaking_action"] = on_speaking_action - if interruptable is not None: - kwargs["interruptable"] = interruptable - if metadata is not None: - kwargs["metadata"] = metadata + class Groq(BaseLLM): + def __init__(self, **kwargs: Any): + self.options = GroqOptions(**kwargs) - return await self._client.agent_management.agent_think( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) + def to_config(self) -> Dict[str, Any]: + config = OpenAI(**_dump_optional_model(self.options)).to_config() + config["url"] = self.options.base_url + return config - async def update(self, properties: typing.Any) -> None: - """Update the agent configuration at runtime. - Parameters - ---------- - properties : UpdateAgentsRequestProperties - Partial configuration to update. - """ - if self._status != "running": - raise RuntimeError(f"Cannot update in {self._status} state") - if not self._agent_id: - raise RuntimeError("No agent ID available") + class CustomLLMOptions(OpenAIOptions): + model_config = ConfigDict(extra="forbid") - await self._client.agents.update( - self._app_id, - self._agent_id, - properties=properties, - request_options=self._request_options(), - ) + api_key: str = Field(..., description="Custom LLM API key") + base_url: str = Field(..., description="OpenAI-compatible chat completions endpoint") - async def get_history(self) -> typing.Any: - """Get the conversation history.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") - return await self._client.agents.get_history( - self._app_id, self._agent_id, request_options=self._request_options() - ) + class CustomLLM(BaseLLM): + def __init__(self, **kwargs: Any): + self.options = CustomLLMOptions(**kwargs) - async def get_info(self) -> typing.Any: - """Get the current session info.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") + def to_config(self) -> Dict[str, Any]: + config = OpenAI(**_dump_optional_model(self.options)).to_config() + config["vendor"] = self.options.vendor or "custom" + return config - return await self._client.agents.get( - self._app_id, self._agent_id, request_options=self._request_options() - ) - async def get_turns( - self, - *, - page_index: typing.Optional[int] = None, - page_size: typing.Optional[int] = None, - options: typing.Optional["GetTurnsOptions"] = None, - ) -> GetTurnsAgentsResponse: - """Get turn-by-turn analytics and timing details for this session.""" - if not self._agent_id: - raise RuntimeError("No agent ID available") + class VertexAILLMOptions(GeminiOptions): + model_config = ConfigDict(extra="forbid") - kwargs: typing.Dict[str, typing.Any] = {} - if options is not None: - kwargs.update(options) - if page_index is not None: - kwargs["page_index"] = page_index - if page_size is not None: - kwargs["page_size"] = page_size + api_key: str = Field(..., description="Vertex AI access token or API key") + project_id: str = Field(..., description="Google Cloud project ID") + location: str = Field(..., description="Google Cloud location") - return await self._client.agents.get_turns( - self._app_id, - self._agent_id, - request_options=self._request_options(), - **kwargs, - ) - async def get_all_turns(self, *, page_size: typing.Optional[int] = None) -> GetTurnsAgentsResponse: - """Get all turn analytics pages for this session. + class VertexAILLM(BaseLLM): + def __init__(self, **kwargs: Any): + self.options = VertexAILLMOptions(**kwargs) - Raises ``RuntimeError`` if the server's pagination metadata is missing - the fields required to advance, or if requesting the next page returns - a page index that did not advance. - """ - response = await self.get_turns(page_index=1, page_size=page_size) - all_turns = self._response_turns(response) - pagination = self._response_pagination(response) - current_page = self._page_value(pagination, "page_index") or 1 - while pagination is not None and self._page_value(pagination, "is_last_page") is False: - total_pages = self._page_value(pagination, "total_pages") - returned_index = self._page_value(pagination, "page_index") - if returned_index is None and total_pages is None: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - if total_pages is not None and current_page >= total_pages: - break - next_page = current_page + 1 - response = await self.get_turns(page_index=next_page, page_size=page_size) - all_turns.extend(self._response_turns(response)) - pagination = self._response_pagination(response) - returned_index = self._page_value(pagination, "page_index") if pagination else None - if returned_index is not None: - if returned_index <= current_page and self._page_value(pagination, "is_last_page") is not True: - raise RuntimeError( - f"get_all_turns pagination did not advance: requested page {next_page}, " - f"received page {returned_index}." - ) - current_page = returned_index - else: - total_pages = self._page_value(pagination, "total_pages") if pagination else None - is_last_page = self._page_value(pagination, "is_last_page") if pagination else None - if total_pages is None and is_last_page is not True: - raise RuntimeError( - "get_all_turns pagination cannot continue: response must include " - "page_index, total_pages, or is_last_page=true." - ) - current_page = next_page - return self._with_all_turns(response, all_turns) - tests/custom/test_pipeline_id.py: | - import pytest + def to_config(self) -> Dict[str, Any]: + options = _dump_optional_model(self.options) + options.pop("project_id", None) + options.pop("location", None) + if not options.get("url"): + options["url"] = ( + f"https://{self.options.location}-aiplatform.googleapis.com/v1/projects/" + f"{self.options.project_id}/locations/{self.options.location}/" + f"publishers/google/models/{self.options.model}:streamGenerateContent?alt=sse" + ) + return Gemini(**options).to_config() - from agora_agent import Agent + class AmazonBedrockOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - def dump(value): - if hasattr(value, "model_dump"): - return value.model_dump(exclude_none=True) - if hasattr(value, "dict"): - return value.dict(exclude_none=True) - return value + access_key: str = Field(..., description="AWS access key ID") + secret_key: str = Field(..., description="AWS secret access key") + region: str = Field(..., description="AWS region") + model: str = Field(..., description="Amazon Bedrock model identifier") + max_tokens: Optional[int] = Field(default=None, gt=0) + url: Optional[str] = Field(default=None, description="Amazon Bedrock converse stream endpoint URL") + temperature: Optional[float] = Field(default=None, ge=0.0, le=1.0) + top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) + system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) + greeting_message: Optional[str] = Field(default=None) + failure_message: Optional[str] = Field(default=None) + input_modalities: Optional[List[str]] = Field(default=None) + params: Optional[Dict[str, Any]] = Field(default=None) + headers: Optional[Dict[str, str]] = Field(default=None) + output_modalities: Optional[List[str]] = Field(default=None) + greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) + template_variables: Optional[Dict[str, str]] = Field(default=None) + vendor: Optional[str] = Field(default=None) + mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) + max_history: Optional[int] = Field(default=None, gt=0, description="Maximum number of conversation history messages to cache") - class StartResponse: - agent_id = "agent-id" + class AmazonBedrock(BaseLLM): + def __init__(self, **kwargs: Any): + self.options = AmazonBedrockOptions(**kwargs) + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = dict(self.options.params or {}) + if self.options.max_tokens is not None: + params["max_tokens"] = self.options.max_tokens + if self.options.temperature is not None: + params["temperature"] = self.options.temperature + if self.options.top_p is not None: + params["top_p"] = self.options.top_p - class FakeAgentsClient: - def __init__(self): - self.calls = [] + config: Dict[str, Any] = { + "url": self.options.url or f"https://bedrock-runtime.{self.options.region}.amazonaws.com/model/{self.options.model}/converse-stream", + "access_key": self.options.access_key, + "secret_key": self.options.secret_key, + "region": self.options.region, + "model": self.options.model, + "params": params, + "style": "bedrock", + "input_modalities": self.options.input_modalities or ["text"], + } + if self.options.system_messages is not None: + config["system_messages"] = self.options.system_messages + if self.options.headers is not None: + config["headers"] = self.options.headers + if self.options.greeting_message is not None: + config["greeting_message"] = self.options.greeting_message + if self.options.failure_message is not None: + config["failure_message"] = self.options.failure_message + if self.options.output_modalities is not None: + config["output_modalities"] = self.options.output_modalities + if self.options.greeting_configs is not None: + config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) + if self.options.template_variables is not None: + config["template_variables"] = self.options.template_variables + if self.options.vendor is not None: + config["vendor"] = self.options.vendor + if self.options.mcp_servers is not None: + config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) + if self.options.max_history is not None: + config["max_history"] = self.options.max_history + return config - def start(self, appid, **kwargs): - self.calls.append({"appid": appid, **kwargs}) - return StartResponse() + class DifyOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - class FakeAsyncAgentsClient: - def __init__(self): - self.calls = [] + api_key: str = Field(..., description="Dify API key") + url: str = Field(..., description="Dify workflow or chat endpoint") + model: str = Field(..., description="Dify model identifier") + user: Optional[str] = Field(default=None, description="Dify user identifier") + conversation_id: Optional[str] = Field(default=None, description="Dify conversation ID") + system_messages: Optional[List[Dict[str, Any]]] = Field(default=None) + greeting_message: Optional[str] = Field(default=None) + failure_message: Optional[str] = Field(default=None) + input_modalities: Optional[List[str]] = Field(default=None) + params: Optional[Dict[str, Any]] = Field(default=None) + headers: Optional[Dict[str, str]] = Field(default=None) + output_modalities: Optional[List[str]] = Field(default=None) + greeting_configs: Optional[LlmGreetingConfigs] = Field(default=None) + template_variables: Optional[Dict[str, str]] = Field(default=None) + vendor: Optional[str] = Field(default=None) + mcp_servers: Optional[List[Dict[str, Any]]] = Field(default=None) + max_history: Optional[int] = Field(default=None, gt=0) - async def start(self, appid, **kwargs): - self.calls.append({"appid": appid, **kwargs}) - return StartResponse() + class Dify(BaseLLM): + def __init__(self, **kwargs: Any): + self.options = DifyOptions(**kwargs) - class FakeClient: - app_id = "appid" - app_certificate = None + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = {"model": self.options.model, **(self.options.params or {})} + if self.options.user is not None: + params["user"] = self.options.user + if self.options.conversation_id is not None: + params["conversation_id"] = self.options.conversation_id - def __init__(self, agents): - self.agents = agents + config: Dict[str, Any] = { + "url": self.options.url, + "api_key": self.options.api_key, + "params": params, + "style": "dify", + "input_modalities": self.options.input_modalities or ["text"], + } + if self.options.headers is not None: + config["headers"] = self.options.headers + if self.options.system_messages is not None: + config["system_messages"] = self.options.system_messages + if self.options.greeting_message is not None: + config["greeting_message"] = self.options.greeting_message + if self.options.failure_message is not None: + config["failure_message"] = self.options.failure_message + if self.options.output_modalities is not None: + config["output_modalities"] = self.options.output_modalities + if self.options.greeting_configs is not None: + config["greeting_configs"] = _dump_optional_model(self.options.greeting_configs) + if self.options.template_variables is not None: + config["template_variables"] = self.options.template_variables + if self.options.vendor is not None: + config["vendor"] = self.options.vendor + if self.options.mcp_servers is not None: + config["mcp_servers"] = _ensure_mcp_transport(self.options.mcp_servers) + if self.options.max_history is not None: + config["max_history"] = self.options.max_history + return config + src/agora_agent/agentkit/vendors/mllm.py: | + import warnings + from typing import Any, Dict, List, Optional + from pydantic import BaseModel, ConfigDict, Field - def start_agent(agent, **overrides): - agents = FakeAgentsClient() - client = FakeClient(agents) - options = { - "channel": "channel", - "token": "token", - "agent_uid": "1", - "remote_uids": ["100"], - **overrides, - } + from ...types.mllm_turn_detection import MllmTurnDetection + from .base import BaseMLLM - agent_id = agent.create_session(client, **options).start() + MllmTurnDetectionConfig = MllmTurnDetection - assert agent_id == "agent-id" - assert len(agents.calls) == 1 - return agents.calls[0] + class OpenAIRealtimeOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - def test_agent_pipeline_id_sends_top_level_pipeline_id() -> None: - call = start_agent(Agent(name="support", pipeline_id="studio-pipeline-id")) + api_key: str = Field(..., description="OpenAI API key") + model: Optional[str] = Field(default=None, description="Model name (e.g., gpt-4o-realtime-preview)") + voice: Optional[str] = Field(default=None, description="Voice identifier") + instructions: Optional[str] = Field(default=None, description="System instructions") + input_audio_transcription: Optional[Dict[str, Any]] = Field(default=None, description="Audio transcription settings") + url: Optional[str] = Field(default=None, description="WebSocket URL") + greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") + input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") + output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") + messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") + params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") + turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") + failure_message: Optional[str] = Field(default=None, description="Message played on failure") - assert call["appid"] == "appid" - assert call["name"] == "support" - assert call["pipeline_id"] == "studio-pipeline-id" - properties = dump(call["properties"]) - assert properties["channel"] == "channel" - assert properties["token"] == "token" - assert properties["agent_rtc_uid"] == "1" - assert properties["remote_rtc_uids"] == ["100"] + class OpenAIRealtime(BaseMLLM): + def __init__(self, **kwargs: Any): + self.options = OpenAIRealtimeOptions(**kwargs) + def to_config(self) -> Dict[str, Any]: + config: Dict[str, Any] = { + "vendor": "openai", + "api_key": self.options.api_key, + } - def test_session_pipeline_id_overrides_agent_pipeline_id() -> None: - call = start_agent( - Agent(name="support", pipeline_id="agent-pipeline"), - pipeline_id="session-pipeline", - ) + if self.options.url is not None: + config["url"] = self.options.url + if ( + self.options.model is not None + or self.options.params is not None + or self.options.voice is not None + or self.options.instructions is not None + or self.options.input_audio_transcription is not None + ): + params: Dict[str, Any] = {} + if self.options.model is not None: + params["model"] = self.options.model + if self.options.params is not None: + params.update(self.options.params) + if self.options.voice is not None: + params["voice"] = self.options.voice + if self.options.instructions is not None: + params["instructions"] = self.options.instructions + if self.options.input_audio_transcription is not None: + params["input_audio_transcription"] = self.options.input_audio_transcription + config["params"] = params + if self.options.greeting_message is not None: + config["greeting_message"] = self.options.greeting_message + if self.options.input_modalities is not None: + config["input_modalities"] = self.options.input_modalities + if self.options.output_modalities is not None: + config["output_modalities"] = self.options.output_modalities + if self.options.messages is not None: + config["messages"] = self.options.messages + if self.options.failure_message is not None: + config["failure_message"] = self.options.failure_message + if self.options.turn_detection is not None: + config["turn_detection"] = self.options.turn_detection - assert call["pipeline_id"] == "session-pipeline" + return config - def test_agent_pipeline_id_skips_missing_vendor_validation() -> None: - call = start_agent(Agent(name="support", pipeline_id="studio-pipeline-id")) + # xAI MLLM: use XaiGrok (product name, mllm.vendor "xai"). Do not use XaiRealtime—that name + # is deprecated and reserved naming for future XaiSTT / XaiTTS cascading vendors. - assert call["pipeline_id"] == "studio-pipeline-id" + class XaiGrokOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - def test_pipeline_id_is_not_sent_inside_properties() -> None: - call = start_agent(Agent(name="support", pipeline_id="studio-pipeline-id")) + api_key: str = Field(..., description="xAI API key") + url: str = Field(default="wss://api.x.ai/v1/realtime", description="xAI Realtime WebSocket URL") + voice: Optional[str] = Field(default=None, description="Voice identifier (e.g., eve or rex)") + language: Optional[str] = Field(default=None, description="Language code (e.g., en)") + sample_rate: Optional[int] = Field(default=None, description="Audio sample rate in Hz") + greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") + input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") + output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") + messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") + params: Optional[Dict[str, Any]] = Field(default=None, description="Additional xAI parameters") + turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") + failure_message: Optional[str] = Field(default=None, description="Message played on failure") - assert call["pipeline_id"] == "studio-pipeline-id" - assert "pipeline_id" not in dump(call["properties"]) + class XaiGrok(BaseMLLM): + """xAI Grok MLLM vendor (`mllm.vendor`: ``xai``).""" - def test_pipeline_id_survives_builder_clone() -> None: - agent = Agent(name="support", pipeline_id="studio-pipeline-id").with_tools(True) + def __init__(self, **kwargs: Any): + self.options = XaiGrokOptions(**kwargs) - assert agent.pipeline_id == "studio-pipeline-id" - call = start_agent(agent) + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = dict(self.options.params or {}) + if self.options.voice is not None: + params["voice"] = self.options.voice + if self.options.language is not None: + params["language"] = self.options.language + if self.options.sample_rate is not None: + params["sample_rate"] = self.options.sample_rate - assert call["pipeline_id"] == "studio-pipeline-id" - assert dump(call["properties"])["advanced_features"] == {"enable_tools": True} + config: Dict[str, Any] = { + "vendor": "xai", + "api_key": self.options.api_key, + "url": self.options.url, + "params": params, + } + if self.options.greeting_message is not None: + config["greeting_message"] = self.options.greeting_message + if self.options.input_modalities is not None: + config["input_modalities"] = self.options.input_modalities + if self.options.output_modalities is not None: + config["output_modalities"] = self.options.output_modalities + if self.options.messages is not None: + config["messages"] = self.options.messages + if self.options.failure_message is not None: + config["failure_message"] = self.options.failure_message + if self.options.turn_detection is not None: + config["turn_detection"] = self.options.turn_detection - @pytest.mark.asyncio - async def test_async_session_uses_agent_pipeline_id() -> None: - agents = FakeAsyncAgentsClient() - client = FakeClient(agents) - agent = Agent(name="support", pipeline_id="studio-pipeline-id") + return config - agent_id = await agent.create_async_session( - client, - channel="channel", - token="token", - agent_uid="1", - remote_uids=["100"], - ).start() - assert agent_id == "agent-id" - assert agents.calls[0]["pipeline_id"] == "studio-pipeline-id" - assert "pipeline_id" not in dump(agents.calls[0]["properties"]) - status: unresolved - - id: patch-8e22e6d0 - content_hash: sha256:4baa4d46c129dde02b82a8367fdc1f9217d52267f82eb18f190d230d39a90927 - original_commit: 8e22e6d069e77f4c652e15f2f37945538c88c7c4 - original_message: udpated agent docs - original_author: Hermes (agora) - base_generation: a217c8ecfd919345831eebaca8295e292d65ebcf - files: - - docs/reference/agent.md - patch_content: |+ - From 8e22e6d069e77f4c652e15f2f37945538c88c7c4 Mon Sep 17 00:00:00 2001 - From: "Hermes (agora)" - Date: Tue, 2 Jun 2026 15:36:16 -0400 - Subject: [PATCH] udpated agent docs + class VertexAIOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - --- - docs/reference/agent.md | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - - diff --git a/docs/reference/agent.md b/docs/reference/agent.md - index 86d4fbd..5693e0b 100644 - --- a/docs/reference/agent.md - +++ b/docs/reference/agent.md - @@ -34,7 +34,6 @@ Agent( - | Parameter | Type | Default | Description | - |---|---|---|---| - | `name` | `Optional[str]` | `None` | Agent name, used as default session name | - -| `pipeline_id` | `Optional[str]` | `None` | Published AI Studio pipeline ID used as this agent's base configuration | - | `instructions` | `Optional[str]` | `None` | Deprecated. Use LLM vendor `system_messages` instead. | - | `turn_detection` | `Optional[TurnDetectionConfig]` | `None` | Interaction language and turn detection configuration | - | `interruption` | `Optional[InterruptionConfig]` | `None` | Unified interruption control configuration | - @@ -48,6 +47,7 @@ Agent( - | `labels` | `Optional[Dict[str, str]]` | `None` | Custom key-value labels (returned in callbacks) | - | `rtc` | `Optional[RtcConfig]` | `None` | RTC media encryption | - | `filler_words` | `Optional[FillerWordsConfig]` | `None` | Filler words while waiting for LLM | - +| `pipeline_id` | `Optional[str]` | `None` | Published AI Studio pipeline ID used as this agent's base configuration | - - `pipeline_id` is an AI Studio base configuration. Explicit Agent config such as `with_llm()`, `with_tts()`, `with_stt()`, `with_mllm()`, `advanced_features`, and other builder options may send fields in `properties` that override the saved pipeline settings. Session-level `pipeline_id` overrides the agent-level value. - - -- - 2.52.0 + model: str = Field(..., description="Model name") + url: Optional[str] = Field(default=None, description="WebSocket URL") + project_id: str = Field(..., description="Google Cloud project ID") + location: str = Field(..., description="Google Cloud location/region") + adc_credentials_string: str = Field(..., description="Application Default Credentials JSON string") + instructions: Optional[str] = Field(default=None, description="System instructions") + voice: Optional[str] = Field(default=None, description="Voice name (e.g., Aoede, Charon)") + affective_dialog: Optional[bool] = Field(default=None, description="Enable affective dialog") + proactive_audio: Optional[bool] = Field(default=None, description="Enable proactive audio") + transcribe_agent: Optional[bool] = Field(default=None, description="Transcribe agent speech") + transcribe_user: Optional[bool] = Field(default=None, description="Transcribe user speech") + http_options: Optional[Dict[str, Any]] = Field(default=None, description="HTTP options") + greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") + input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") + output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") + messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") + additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") + turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") + failure_message: Optional[str] = Field(default=None, description="Message played on failure") - theirs_snapshot: - docs/reference/agent.md: | - --- - sidebar_position: 2 - title: Agent - description: Full API reference for the Python Agent builder class. - --- + class VertexAI(BaseMLLM): + def __init__(self, **kwargs: Any): + self.options = VertexAIOptions(**kwargs) - # Agent Reference + def to_config(self) -> Dict[str, Any]: + # additional_params spread first so that explicit fields always win, + # matching the TypeScript SDK. + params: Dict[str, Any] = dict(self.options.additional_params or {}) + params["model"] = self.options.model + if self.options.instructions is not None: + params["instructions"] = self.options.instructions + if self.options.voice is not None: + params["voice"] = self.options.voice + if self.options.affective_dialog is not None: + params["affective_dialog"] = self.options.affective_dialog + if self.options.proactive_audio is not None: + params["proactive_audio"] = self.options.proactive_audio + if self.options.transcribe_agent is not None: + params["transcribe_agent"] = self.options.transcribe_agent + if self.options.transcribe_user is not None: + params["transcribe_user"] = self.options.transcribe_user + if self.options.http_options is not None: + params["http_options"] = self.options.http_options - **Import:** `from agora_agent import Agent` + config: Dict[str, Any] = { + "vendor": "vertexai", + "project_id": self.options.project_id, + "location": self.options.location, + "adc_credentials_string": self.options.adc_credentials_string, + "params": params, + } - ## Constructor + if self.options.url is not None: + config["url"] = self.options.url + if self.options.greeting_message is not None: + config["greeting_message"] = self.options.greeting_message + if self.options.input_modalities is not None: + config["input_modalities"] = self.options.input_modalities + if self.options.output_modalities is not None: + config["output_modalities"] = self.options.output_modalities + if self.options.messages is not None: + config["messages"] = self.options.messages + if self.options.failure_message is not None: + config["failure_message"] = self.options.failure_message + if self.options.turn_detection is not None: + config["turn_detection"] = self.options.turn_detection - - ```python - Agent( - name: Optional[str] = None, - instructions: Optional[str] = None, - turn_detection: Optional[TurnDetectionConfig] = None, - interruption: Optional[InterruptionConfig] = None, - sal: Optional[SalConfig] = None, - advanced_features: Optional[Dict[str, Any]] = None, - parameters: Optional[SessionParams] = None, - greeting: Optional[str] = None, - failure_message: Optional[str] = None, - max_history: Optional[int] = None, - geofence: Optional[GeofenceConfig] = None, - labels: Optional[Dict[str, str]] = None, - rtc: Optional[RtcConfig] = None, - filler_words: Optional[FillerWordsConfig] = None, - pipeline_id: Optional[str] = None, - ) - ``` + return config - | Parameter | Type | Default | Description | - |---|---|---|---| - | `name` | `Optional[str]` | `None` | Agent name, used as default session name | - | `instructions` | `Optional[str]` | `None` | Deprecated. Use LLM vendor `system_messages` instead. | - | `turn_detection` | `Optional[TurnDetectionConfig]` | `None` | Interaction language and turn detection configuration | - | `interruption` | `Optional[InterruptionConfig]` | `None` | Unified interruption control configuration | - | `sal` | `Optional[SalConfig]` | `None` | Speech Activity Level configuration | - | `advanced_features` | `Optional[Dict[str, Any]]` | `None` | Advanced features dict (e.g., `{'enable_rtm': True}`) | - | `parameters` | `Optional[SessionParams]` | `None` | Additional session parameters | - | `greeting` | `Optional[str]` | `None` | Deprecated. Use LLM/MLLM vendor `greeting_message` instead. | - | `failure_message` | `Optional[str]` | `None` | Deprecated. Use LLM/MLLM vendor `failure_message` instead. | - | `max_history` | `Optional[int]` | `None` | Deprecated. Use LLM vendor `max_history` instead. | - | `geofence` | `Optional[GeofenceConfig]` | `None` | Regional access restriction | - | `labels` | `Optional[Dict[str, str]]` | `None` | Custom key-value labels (returned in callbacks) | - | `rtc` | `Optional[RtcConfig]` | `None` | RTC media encryption | - | `filler_words` | `Optional[FillerWordsConfig]` | `None` | Filler words while waiting for LLM | - | `pipeline_id` | `Optional[str]` | `None` | Published AI Studio pipeline ID used as this agent's base configuration | - `pipeline_id` is an AI Studio base configuration. Explicit Agent config such as `with_llm()`, `with_tts()`, `with_stt()`, `with_mllm()`, `advanced_features`, and other builder options may send fields in `properties` that override the saved pipeline settings. Session-level `pipeline_id` overrides the agent-level value. + class GeminiLiveOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - The Agent-level `instructions`, `greeting`, `failure_message`, `max_history`, and `greeting_configs` fields are compatibility shims. New code should configure those values on the LLM or MLLM vendor because that matches the core request schema. + api_key: str = Field(..., description="Google API key") + model: str = Field(..., description="Gemini Live model name") + url: Optional[str] = Field(default=None, description="WebSocket URL") + instructions: Optional[str] = Field(default=None, description="System instructions") + voice: Optional[str] = Field(default=None, description="Voice name") + affective_dialog: Optional[bool] = Field(default=None, description="Enable affective dialog") + proactive_audio: Optional[bool] = Field(default=None, description="Enable proactive audio") + transcribe_agent: Optional[bool] = Field(default=None, description="Transcribe agent speech") + transcribe_user: Optional[bool] = Field(default=None, description="Transcribe user speech") + http_options: Optional[Dict[str, Any]] = Field(default=None, description="HTTP options") + greeting_message: Optional[str] = Field(default=None, description="Agent greeting message") + input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities") + output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities") + messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages") + additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters") + turn_detection: Optional[MllmTurnDetectionConfig] = Field(default=None, description="MLLM turn detection configuration") + failure_message: Optional[str] = Field(default=None, description="Message played on failure") - ## Builder Methods + class GeminiLive(BaseMLLM): + def __init__(self, **kwargs: Any): + self.options = GeminiLiveOptions(**kwargs) - All builder methods return a new `Agent` instance (immutable pattern). + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = {} + if self.options.additional_params is not None: + params.update(self.options.additional_params) + params["model"] = self.options.model + if self.options.instructions is not None: + params["instructions"] = self.options.instructions + if self.options.voice is not None: + params["voice"] = self.options.voice + if self.options.affective_dialog is not None: + params["affective_dialog"] = self.options.affective_dialog + if self.options.proactive_audio is not None: + params["proactive_audio"] = self.options.proactive_audio + if self.options.transcribe_agent is not None: + params["transcribe_agent"] = self.options.transcribe_agent + if self.options.transcribe_user is not None: + params["transcribe_user"] = self.options.transcribe_user + if self.options.http_options is not None: + params["http_options"] = self.options.http_options - ### `with_llm(vendor: BaseLLM) -> Agent` + config: Dict[str, Any] = { + "vendor": "gemini", + "api_key": self.options.api_key, + "params": params, + } - Set the LLM vendor for cascading flow. + if self.options.url is not None: + config["url"] = self.options.url + if self.options.greeting_message is not None: + config["greeting_message"] = self.options.greeting_message + if self.options.input_modalities is not None: + config["input_modalities"] = self.options.input_modalities + if self.options.output_modalities is not None: + config["output_modalities"] = self.options.output_modalities + if self.options.messages is not None: + config["messages"] = self.options.messages + if self.options.failure_message is not None: + config["failure_message"] = self.options.failure_message + if self.options.turn_detection is not None: + config["turn_detection"] = self.options.turn_detection - - ```python - from agora_agent import OpenAI - agent = Agent().with_llm(OpenAI(api_key='your-key', base_url='https://api.openai.com/v1/chat/completions', model='gpt-4o-mini')) - ``` + return config + src/agora_agent/agentkit/vendors/stt.py: | + from typing import Any, Dict, Optional - ### `with_tts(vendor: BaseTTS) -> Agent` + from pydantic import BaseModel, ConfigDict, Field, model_validator - Set the TTS vendor. Records the vendor's `sample_rate` for avatar validation. + from .base import BaseSTT - - ```python - from agora_agent import ElevenLabsTTS - agent = Agent().with_tts(ElevenLabsTTS(key='your-key', model_id='eleven_flash_v2_5', voice_id='your-voice-id', base_url='wss://api.elevenlabs.io/v1')) - ``` + _DEEPGRAM_MANAGED_MODELS = {"nova-2", "nova-3"} - ### `with_stt(vendor: BaseSTT) -> Agent` - Set the STT (ASR) vendor. + class SpeechmaticsSTTOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - - ```python - from agora_agent import DeepgramSTT - agent = Agent().with_stt(DeepgramSTT(api_key='your-key', language='en-US')) - ``` + api_key: str = Field(..., description="Speechmatics API key") + language: str = Field(..., description="Language code (e.g., en, es, fr)") + model: Optional[str] = Field(default=None, description="Model name") + uri: Optional[str] = Field(default=None, description="Speechmatics streaming WebSocket URL") + additional_params: Optional[Dict[str, Any]] = Field(default=None) - ### `with_mllm(vendor: BaseMLLM) -> Agent` + class SpeechmaticsSTT(BaseSTT): + def __init__(self, **kwargs: Any): + self.options = SpeechmaticsSTTOptions(**kwargs) - Set the MLLM vendor for multimodal flow. Calling `with_mllm()` automatically sets `mllm.enable = True`. MLLM sessions do not require TTS, STT, or LLM vendors. + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = dict(self.options.additional_params or {}) + params.update({ + "api_key": self.options.api_key, + "language": self.options.language, + }) + if self.options.model is not None: + params["model"] = self.options.model + if self.options.uri is not None: + params["uri"] = self.options.uri - - ```python - from agora_agent import OpenAIRealtime - agent = Agent().with_mllm(OpenAIRealtime(api_key='your-key')) - ``` + config: Dict[str, Any] = { + "vendor": "speechmatics", + "params": params, + } + return config - ### `with_avatar(vendor: BaseAvatar) -> Agent` - Set the avatar vendor for the cascading ASR + LLM + TTS pipeline. Avatars are not supported when MLLM is enabled — combining `with_mllm()` and an enabled `with_avatar()` is rejected at `to_properties()` and `AgentSession.start()`. A disabled avatar (`enable=False`) is allowed alongside MLLM. + class DeepgramSTTOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - Raises `ValueError` if the TTS sample rate does not match the avatar's `required_sample_rate`. + api_key: Optional[str] = Field(default=None, description="Deepgram API key") + model: Optional[str] = Field(default=None, description="Model (e.g., nova-2, enhanced, base)") + language: Optional[str] = Field(default=None, description="Language code (e.g., en-US)") + keyterm: Optional[str] = Field(default=None, description="Boost specialized terms and brands for Deepgram") + smart_format: Optional[bool] = Field(default=None, description="Enable smart formatting") + punctuation: Optional[bool] = Field(default=None, description="Enable punctuation") + additional_params: Optional[Dict[str, Any]] = Field(default=None) - - ```python - from agora_agent import HeyGenAvatar - agent = agent.with_avatar(HeyGenAvatar(api_key='your-key', quality='medium', agora_uid='2')) - ``` + @model_validator(mode="after") + def _validate_managed_model(self) -> "DeepgramSTTOptions": + if self.api_key is None and (self.model is None or self.model.strip().lower() not in _DEEPGRAM_MANAGED_MODELS): + raise ValueError("DeepgramSTT requires api_key unless using a supported Agora-managed model") + return self - **Raises:** `ValueError` — `"Avatar requires TTS sample rate of {required} Hz, but TTS is configured with {actual} Hz. Please update your TTS sample_rate to {required}."` + class DeepgramSTT(BaseSTT): + def __init__(self, **kwargs: Any): + self.options = DeepgramSTTOptions(**kwargs) - ### `with_turn_detection(config: TurnDetectionConfig) -> Agent` + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = dict(self.options.additional_params or {}) - Override cascading-flow turn detection settings. Use `language` for the Agora interaction language, `config.start_of_speech` and `config.end_of_speech` for SOS/EOS detection, `with_interruption()` for interruption behavior, and MLLM vendor `turn_detection` for MLLM turn detection. + if self.options.api_key is not None: + params["key"] = self.options.api_key + if self.options.model is not None: + params["model"] = self.options.model + if self.options.language is not None: + params["language"] = self.options.language + if self.options.smart_format is not None: + params["smart_format"] = self.options.smart_format + if self.options.punctuation is not None: + params["punctuation"] = self.options.punctuation + if self.options.keyterm is not None: + params["keyterm"] = self.options.keyterm + config: Dict[str, Any] = { + "vendor": "deepgram", + "params": params, + } + return config - Pause-state detection is configured under semantic end-of-speech: - ```python - agent = agent.with_turn_detection({ - "mode": "default", - "config": { - "end_of_speech": { - "mode": "semantic", - "semantic_config": { - "pause_state_enabled": True, - }, - }, - }, - }) - ``` + class MicrosoftSTTOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - ### `with_interruption(config: InterruptionConfig) -> Agent` + key: str = Field(..., description="Azure subscription key") + region: str = Field(..., description="Azure region (e.g., eastus)") + language: str = Field(..., description="Language code (e.g., en-US)") + additional_params: Optional[Dict[str, Any]] = Field(default=None) - Configure unified interruption behavior using the top-level `interruption` object. Use this for `start_of_speech` and `keywords` interruption modes. + class MicrosoftSTT(BaseSTT): + def __init__(self, **kwargs: Any): + self.options = MicrosoftSTTOptions(**kwargs) - ### `with_instructions(instructions: str) -> Agent` + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = dict(self.options.additional_params or {}) + params.update({ + "key": self.options.key, + "region": self.options.region, + }) + if self.options.language is not None: + params["language"] = self.options.language - Deprecated. Configure `system_messages` on the LLM vendor instead. + config: Dict[str, Any] = { + "vendor": "microsoft", + "params": params, + } + return config - ### `with_greeting(greeting: str) -> Agent` - Deprecated. Configure `greeting_message` on the LLM or MLLM vendor instead. + class OpenAISTTOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - ### `with_name(name: str) -> Agent` + api_key: str = Field(..., description="OpenAI API key") + model: Optional[str] = Field(default=None, description="Model (default: whisper-1)") + language: Optional[str] = Field(default=None, description="Language code") + prompt: Optional[str] = Field(default=None, description="Prompt that guides OpenAI transcription") + input_audio_transcription: Optional[Dict[str, Any]] = Field(default=None, description="OpenAI transcription settings") + additional_params: Optional[Dict[str, Any]] = Field(default=None) - Override the agent name. + class OpenAISTT(BaseSTT): + def __init__(self, **kwargs: Any): + self.options = OpenAISTTOptions(**kwargs) - ### `with_sal(config: SalConfig) -> Agent` + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = dict(self.options.additional_params or {}) + params["api_key"] = self.options.api_key - Set SAL (Selective Attention Locking) configuration. + transcription: Dict[str, Any] = {"model": "gpt-4o-mini-transcribe"} + transcription.update(self.options.input_audio_transcription or {}) + if self.options.model is not None: + transcription["model"] = self.options.model + if self.options.prompt is not None: + transcription["prompt"] = self.options.prompt + if self.options.language is not None: + transcription["language"] = self.options.language + if not transcription.get("model"): + raise ValueError("OpenAISTT: input_audio_transcription.model is required") + if not transcription.get("prompt"): + raise ValueError("OpenAISTT: input_audio_transcription.prompt is required") + if not transcription.get("language"): + raise ValueError("OpenAISTT: input_audio_transcription.language is required") + params["input_audio_transcription"] = transcription - ### `with_advanced_features(features: AdvancedFeatures) -> Agent` + config: Dict[str, Any] = { + "vendor": "openai", + "params": params, + } + return config - Set advanced features (e.g. `{'enable_rtm': True}`). - When `enable_rtm=True`, AgentKit defaults `parameters.data_channel` to `"rtm"` unless you explicitly set another data channel. + class GoogleSTTOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - ### `with_tools(enabled: bool = True) -> Agent` + project_id: str = Field(..., description="Google Cloud project ID") + location: str = Field(..., description="Google Cloud region") + adc_credentials_string: str = Field(..., description="Google service account credentials JSON string") + language: str = Field(..., description="Language code (e.g., en-US)") + model: Optional[str] = Field(default=None, description="Recognition model") + additional_params: Optional[Dict[str, Any]] = Field(default=None) - Enable or disable MCP tool invocation by setting `advanced_features.enable_tools`. + class GoogleSTT(BaseSTT): + def __init__(self, **kwargs: Any): + self.options = GoogleSTTOptions(**kwargs) - ### `with_parameters(parameters: SessionParams) -> Agent` + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = dict(self.options.additional_params or {}) + params.update({ + "project_id": self.options.project_id, + "location": self.options.location, + "adc_credentials_string": self.options.adc_credentials_string, + }) - Set session parameters (silence config, farewell config, data channel, audio scenario, etc.). + if self.options.language is not None: + params["language"] = self.options.language + if self.options.model is not None: + params["model"] = self.options.model - ### `with_audio_scenario(audio_scenario: ParametersAudioScenario) -> Agent` + config: Dict[str, Any] = { + "vendor": "google", + "params": params, + } + return config - Set `parameters.audio_scenario` without replacing existing session parameters. - ### `with_failure_message(message: str) -> Agent` + class AmazonSTTOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - Deprecated. Configure `failure_message` on the LLM or MLLM vendor instead. + access_key: str = Field(..., description="AWS Access Key ID") + secret_key: str = Field(..., description="AWS Secret Access Key") + region: str = Field(..., description="AWS region (e.g., us-east-1)") + language: str = Field(..., description="Language code") + additional_params: Optional[Dict[str, Any]] = Field(default=None) - ### `with_max_history(max_history: int) -> Agent` + class AmazonSTT(BaseSTT): + def __init__(self, **kwargs: Any): + self.options = AmazonSTTOptions(**kwargs) - Deprecated. Configure `max_history` on the LLM vendor instead. + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = dict(self.options.additional_params or {}) + params.update({ + "access_key_id": self.options.access_key, + "secret_access_key": self.options.secret_key, + "region": self.options.region, + }) + if self.options.language is not None: + params["language_code"] = self.options.language - ### `with_geofence(geofence: GeofenceConfig) -> Agent` + config: Dict[str, Any] = { + "vendor": "amazon", + "params": params, + } + return config - Set geofence configuration (restricts backend server regions). - ### `with_labels(labels: Dict[str, str]) -> Agent` + class AssemblyAISTTOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - Set custom labels (key-value pairs returned in notification callbacks). + api_key: str = Field(..., description="AssemblyAI API key") + language: str = Field(..., description="Language code") + uri: Optional[str] = Field(default=None, description="AssemblyAI streaming WebSocket URL") + additional_params: Optional[Dict[str, Any]] = Field(default=None) - ### `with_rtc(rtc: RtcConfig) -> Agent` + class AssemblyAISTT(BaseSTT): + def __init__(self, **kwargs: Any): + self.options = AssemblyAISTTOptions(**kwargs) - Set RTC configuration. + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = dict(self.options.additional_params or {}) + params["api_key"] = self.options.api_key + if self.options.language is not None: + params["language"] = self.options.language + if self.options.uri is not None: + params["uri"] = self.options.uri - ### `with_filler_words(filler_words: FillerWordsConfig) -> Agent` + config: Dict[str, Any] = { + "vendor": "assemblyai", + "params": params, + } + return config - Set filler words configuration (played while waiting for LLM response). - ## `create_session()` + class AresSTTOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - - ```python - create_session( - client: Any, - channel: str, - agent_uid: str, - remote_uids: List[str], - name: Optional[str] = None, - token: Optional[str] = None, - idle_timeout: Optional[int] = None, - enable_string_uid: Optional[bool] = None, - preset: Optional[Union[str, Sequence[str]]] = None, - pipeline_id: Optional[str] = None, - expires_in: Optional[int] = None, - ) -> AgentSession - ``` + additional_params: Optional[Dict[str, Any]] = Field(default=None) - Creates an `AgentSession` bound to the given client and channel. + class AresSTT(BaseSTT): + def __init__(self, **kwargs: Any): + self.options = AresSTTOptions(**kwargs) - | Parameter | Type | Required | Description | - |---|---|---|---| - | `client` | `Agora` or `AsyncAgora` | Yes | Authenticated client | - | `channel` | `str` | Yes | Channel name | - | `agent_uid` | `str` | Yes | UID for the agent | - | `remote_uids` | `List[str]` | Yes | UIDs of remote participants | - | `name` | `Optional[str]` | No | Session name (defaults to agent name) | - | `token` | `Optional[str]` | No | Pre-built RTC+RTM token | - | `expires_in` | `Optional[int]` | No | Token lifetime in seconds (default: `86400` = 24 h, Agora max). Only applies when the token is auto-generated. Use `expires_in_hours()` or `expires_in_minutes()` for clarity. Valid range: 1–86400. | - | `idle_timeout` | `Optional[int]` | No | Idle timeout in seconds | - | `enable_string_uid` | `Optional[bool]` | No | Enable string UIDs | - | `preset` | `Optional[Union[str, Sequence[str]]]` | No | Advanced preset value for project-specific routing | - | `pipeline_id` | `Optional[str]` | No | Published AI Studio pipeline ID for this session. Overrides `agent.pipeline_id`. | + def to_config(self) -> Dict[str, Any]: + config: Dict[str, Any] = {"vendor": "ares"} + if self.options.additional_params: + config["params"] = self.options.additional_params + return config - `pipeline_id` is sent as the top-level `/join` field `pipeline_id`, not inside `properties`. - **Returns:** `AgentSession` + class SarvamSTTOptions(BaseModel): + model_config = ConfigDict(extra="forbid") - ## `to_properties()` + api_key: str = Field(..., description="Sarvam API key") + language: str = Field(..., description="Language code (e.g., en, hi, ta)") + model: Optional[str] = Field(default=None, description="Model name") + additional_params: Optional[Dict[str, Any]] = Field(default=None) - Converts the agent configuration into a `StartAgentsRequestProperties` object for the Agora API. Called internally by `AgentSession.start()`. + class SarvamSTT(BaseSTT): + def __init__(self, **kwargs: Any): + self.options = SarvamSTTOptions(**kwargs) - - ```python - to_properties( - channel: str, - agent_uid: str, - remote_uids: List[str], - idle_timeout: Optional[int] = None, - enable_string_uid: Optional[bool] = None, - token: Optional[str] = None, - app_id: Optional[str] = None, - app_certificate: Optional[str] = None, - expires_in: Optional[int] = None, - ) -> StartAgentsRequestProperties - ``` + def to_config(self) -> Dict[str, Any]: + params: Dict[str, Any] = dict(self.options.additional_params or {}) + params.update({ + "api_key": self.options.api_key, + "language": self.options.language, + }) + if self.options.model is not None: + params["model"] = self.options.model - **Raises:** `ValueError` if neither `token` nor `app_id`+`app_certificate` is provided, or if required vendors (LLM, TTS) are missing in cascading mode. - - ## Properties - - | Property | Type | Description | - |---|---|---| - | `name` | `Optional[str]` | Agent name | - | `instructions` | `Optional[str]` | Deprecated Agent-level system prompt | - | `greeting` | `Optional[str]` | Deprecated Agent-level greeting message | - | `failure_message` | `Optional[str]` | Deprecated Agent-level failure message | - | `max_history` | `Optional[int]` | Deprecated Agent-level max history | - | `llm` | `Optional[Dict[str, Any]]` | LLM config dict (from `to_config()`) | - | `tts` | `Optional[Dict[str, Any]]` | TTS config dict | - | `stt` | `Optional[Dict[str, Any]]` | STT config dict | - | `mllm` | `Optional[Dict[str, Any]]` | MLLM config dict | - | `avatar` | `Optional[Dict[str, Any]]` | Avatar config dict | - | `turn_detection` | `Optional[TurnDetectionConfig]` | Interaction language and turn detection settings | - | `sal` | `Optional[SalConfig]` | SAL configuration | - | `advanced_features` | `Optional[Dict[str, Any]]` | Advanced features | - | `parameters` | `Optional[SessionParams]` | Session parameters | - | `geofence` | `Optional[GeofenceConfig]` | Geofence configuration | - | `labels` | `Optional[Dict[str, str]]` | Custom labels | - | `rtc` | `Optional[RtcConfig]` | RTC configuration | - | `filler_words` | `Optional[FillerWordsConfig]` | Filler words configuration | - | `config` | `Dict[str, Any]` | Full configuration dict | - - ## Type aliases - - Public aliases over Fern-generated types: `LlmConfig`, `SttConfig`, `AsrConfig` (= `SttConfig`), `MllmConfig`, `AvatarConfig`, session/conversation types, and think types (`ThinkOnListeningAction`, etc.). - - Think value constants: `ThinkOnListeningActionInject`, `ThinkOnListeningActionInterrupt`, `ThinkOnListeningActionIgnore`, `ThinkOnThinkingActionInterrupt`, `ThinkOnThinkingActionIgnore`, `ThinkOnSpeakingActionInterrupt`, `ThinkOnSpeakingActionIgnore`. - status: unresolved + config: Dict[str, Any] = { + "vendor": "sarvam", + "params": params, + } + return config - id: patch-bed29b6b content_hash: sha256:35a32ee64c95efd478f684c167efc54c9d95344af837e99b31da4c36f66febce original_commit: bed29b6b7d4d08480a8510b26b5e21d1ef234cc9 diff --git a/src/agora_agent/agentkit/agent.py b/src/agora_agent/agentkit/agent.py index 95cfe34..1daba82 100644 --- a/src/agora_agent/agentkit/agent.py +++ b/src/agora_agent/agentkit/agent.py @@ -57,8 +57,6 @@ from ..agents.types.start_agents_request_properties_filler_words_content_static_config import StartAgentsRequestPropertiesFillerWordsContentStaticConfig from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule from ..types.tts import Tts -from ..agents.types.start_agents_request_properties_filler_words_content_static_config_selection_rule import StartAgentsRequestPropertiesFillerWordsContentStaticConfigSelectionRule -from ..types.tts import Tts from ..types.asr import Asr from ..types.llm import Llm from ..types.llm_style import LlmStyle as GeneratedLlmStyle @@ -546,23 +544,6 @@ def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent ) return new_agent - def with_audio_scenario(self, audio_scenario: ParametersAudioScenario) -> "Agent": - """Returns a new Agent with the specified RTC audio scenario.""" - new_agent = self._clone() - if new_agent._parameters is None: - new_agent._parameters = StartAgentsRequestPropertiesParameters(audio_scenario=audio_scenario) - elif isinstance(new_agent._parameters, dict): - new_agent._parameters = typing.cast( - SessionParamsInput, - {**new_agent._parameters, "audio_scenario": audio_scenario}, - ) - else: - new_agent._parameters = self._copy_model_update( - new_agent._parameters, - {"audio_scenario": audio_scenario}, - ) - return new_agent - def with_failure_message(self, message: str) -> "Agent": """Deprecated. Configure the failure message on the LLM or MLLM vendor instead.""" new_agent = self._clone() diff --git a/src/agora_agent/agentkit/agent_session.py b/src/agora_agent/agentkit/agent_session.py index 745c465..2900c18 100644 --- a/src/agora_agent/agentkit/agent_session.py +++ b/src/agora_agent/agentkit/agent_session.py @@ -15,7 +15,6 @@ AgentThinkAgentManagementResponse as AgentThinkResponse, ) from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse -from ..agents.types.get_turns_agents_response import GetTurnsAgentsResponse from .agent import Agent, GetTurnsOptions, SayOptions, ThinkOptions, _start_properties_from_mapping from .avatar_types import ( is_akool_avatar, diff --git a/src/agora_agent/agentkit/vendors/avatar.py b/src/agora_agent/agentkit/vendors/avatar.py index e816367..1bd9633 100644 --- a/src/agora_agent/agentkit/vendors/avatar.py +++ b/src/agora_agent/agentkit/vendors/avatar.py @@ -177,49 +177,6 @@ def to_config(self) -> Dict[str, Any]: return {"enable": enable, "vendor": "generic", "params": params} -class GenericAvatarOptions(BaseModel): - model_config = ConfigDict(extra="forbid") - - api_key: str = Field(..., description="Generic avatar provider API key") - api_base_url: str = Field(..., description="Avatar provider API base URL") - avatar_id: str = Field(..., description="Avatar ID") - agora_uid: str = Field(..., description="Agora UID for the avatar video stream") - agora_appid: Optional[str] = Field(default=None, description="Agora App ID; filled by AgentSession when omitted") - agora_token: Optional[str] = Field(default=None, description="RTC token; generated by AgentSession when omitted") - agora_channel: Optional[str] = Field(default=None, description="Agora channel; filled by AgentSession when omitted") - enable: Optional[bool] = Field(default=None, description="Enable avatar (default: true)") - additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional vendor-specific parameters") - - -class GenericAvatar(BaseAvatar): - def __init__(self, **kwargs: Any): - self.options = GenericAvatarOptions(**kwargs) - - @property - def required_sample_rate(self) -> int: - return 0 - - def to_config(self) -> Dict[str, Any]: - params: Dict[str, Any] = { - "api_key": self.options.api_key, - "api_base_url": self.options.api_base_url, - "avatar_id": self.options.avatar_id, - "agora_uid": self.options.agora_uid, - } - - if self.options.agora_appid is not None: - params["agora_appid"] = self.options.agora_appid - if self.options.agora_token is not None: - params["agora_token"] = self.options.agora_token - if self.options.agora_channel is not None: - params["agora_channel"] = self.options.agora_channel - if self.options.additional_params is not None: - params = {**self.options.additional_params, **params} - - enable = self.options.enable if self.options.enable is not None else True - return {"enable": enable, "vendor": "generic", "params": params} - - class AnamAvatarOptions(BaseModel): model_config = ConfigDict(extra="forbid") diff --git a/src/agora_agent/agentkit/vendors/llm.py b/src/agora_agent/agentkit/vendors/llm.py index 1f1b354..5a9f39e 100644 --- a/src/agora_agent/agentkit/vendors/llm.py +++ b/src/agora_agent/agentkit/vendors/llm.py @@ -2,9 +2,6 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator -from ...agents.types.start_agents_request_properties_llm_greeting_configs import ( - StartAgentsRequestPropertiesLlmGreetingConfigs, -) from .base import BaseLLM LlmGreetingConfigs = Dict[str, Any]