audio

Classes

AudioClient

AudioClient(client: OWUIClientBase)

Bases: ResourceBase

Client for Audio endpoints (TTS and STT).

Used by:

Code Reference client Classes OpenWebUI Attributes audio

Source code in src/owui_client/client_base.py

def __init__(self, client: OWUIClientBase):
    self._client = client

Functions

get_config

get_config() -> dict

Get the current audio configuration.

This includes settings for both Text-to-Speech (TTS) and Speech-to-Text (STT) engines, such as API keys, base URLs, models, and voice settings.

Returns:

Name	Type	Description
`dict`	`dict`	The current configuration, with keys 'tts' and 'stt'.

Source code in src/owui_client/routers/audio.py

async def get_config(self) -> dict:
    """
    Get the current audio configuration.

    This includes settings for both Text-to-Speech (TTS) and Speech-to-Text (STT) engines,
    such as API keys, base URLs, models, and voice settings.

    Returns:
        dict: The current configuration, with keys 'tts' and 'stt'.
    """
    return await self._request("GET", "/v1/audio/config")

update_config

update_config(form_data: AudioConfigUpdateForm) -> dict

Update the audio configuration.

Parameters:

Name	Type	Description	Default
`form_data`	`AudioConfigUpdateForm`	The configuration data to update.	required

Returns:

Name	Type	Description
`dict`	`dict`	The updated configuration.

Source code in src/owui_client/routers/audio.py

async def update_config(self, form_data: AudioConfigUpdateForm) -> dict:
    """
    Update the audio configuration.

    Args:
        form_data: The configuration data to update.

    Returns:
        dict: The updated configuration.
    """
    return await self._request("POST", "/v1/audio/config/update", json=form_data.model_dump())

transcribe

transcribe(
    file_path: str | Path, language: Optional[str] = None
) -> dict

Transcribe an audio file to text.

Uses the configured STT engine (e.g. Whisper, OpenAI, Deepgram, Azure, Mistral). Automatically handles audio conversion and chunking if necessary.

Parameters:

Name	Type	Description	Default
`file_path`	`str \| Path`	Path to the audio file to transcribe.	required
`language`	`Optional[str]`	Optional language code (e.g. "en") to guide transcription.	`None`

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary containing the transcription result, typically `{"text": "...", "filename": "..."}`.

Source code in src/owui_client/routers/audio.py

async def transcribe(self, file_path: str | Path, language: Optional[str] = None) -> dict:
    """
    Transcribe an audio file to text.

    Uses the configured STT engine (e.g. Whisper, OpenAI, Deepgram, Azure, Mistral).
    Automatically handles audio conversion and chunking if necessary.

    Args:
        file_path: Path to the audio file to transcribe.
        language: Optional language code (e.g. "en") to guide transcription.

    Returns:
        dict: A dictionary containing the transcription result, typically `{"text": "...", "filename": "..."}`.
    """
    file_path = Path(file_path)
    if not file_path.exists():
        raise FileNotFoundError(f"File not found: {file_path}")

    with open(file_path, "rb") as f:
        # Read file content into memory to safely close the file handle
        content = f.read()

    files = {"file": (file_path.name, content)}

    data = {}
    if language:
        data["language"] = language

    return await self._request("POST", "/v1/audio/transcriptions", files=files, data=data)

speech

speech(
    input_text: str,
    model: Optional[str] = None,
    voice: Optional[str] = None,
    save_path: Optional[str | Path] = None,
) -> bytes

Generate speech from text (Text-to-Speech).

Uses the configured TTS engine (e.g. OpenAI, ElevenLabs, Azure, Transformers).

Parameters:

Name	Type	Description	Default
`input_text`	`str`	The text to convert to speech.	required
`model`	`Optional[str]`	Optional model identifier to override the default configuration.	`None`
`voice`	`Optional[str]`	Optional voice identifier to override the default configuration.	`None`
`save_path`	`Optional[str \| Path]`	Optional file path to save the generated audio to.	`None`

Returns:

Name	Type	Description
`bytes`	`bytes`	The generated audio content in bytes.

Source code in src/owui_client/routers/audio.py

async def speech(
    self,
    input_text: str,
    model: Optional[str] = None,
    voice: Optional[str] = None,
    save_path: Optional[str | Path] = None,
) -> bytes:
    """
    Generate speech from text (Text-to-Speech).

    Uses the configured TTS engine (e.g. OpenAI, ElevenLabs, Azure, Transformers).

    Args:
        input_text: The text to convert to speech.
        model: Optional model identifier to override the default configuration.
        voice: Optional voice identifier to override the default configuration.
        save_path: Optional file path to save the generated audio to.

    Returns:
        bytes: The generated audio content in bytes.
    """
    payload = {"input": input_text}
    if model:
        payload["model"] = model
    if voice:
        payload["voice"] = voice

    response_bytes = await self._request(
        "POST",
        "/v1/audio/speech",
        json=payload,
        model=bytes,
    )

    if save_path:
        with open(save_path, "wb") as f:
            f.write(response_bytes)

    return response_bytes

get_models

get_models() -> dict

Get available audio models for the configured TTS engine.

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary containing a list of models, e.g. `{"models": [{"id": "tts-1"}, ...]}`.

Source code in src/owui_client/routers/audio.py

async def get_models(self) -> dict:
    """
    Get available audio models for the configured TTS engine.

    Returns:
        dict: A dictionary containing a list of models, e.g. `{"models": [{"id": "tts-1"}, ...]}`.
    """
    return await self._request("GET", "/v1/audio/models")

get_voices

get_voices() -> dict

Get available voices for the configured TTS engine.

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary containing a list of voices, e.g. `{"voices": [{"id": "alloy", "name": "alloy"}, ...]}`.

Source code in src/owui_client/routers/audio.py

async def get_voices(self) -> dict:
    """
    Get available voices for the configured TTS engine.

    Returns:
        dict: A dictionary containing a list of voices, e.g. `{"voices": [{"id": "alloy", "name": "alloy"}, ...]}`.
    """
    return await self._request("GET", "/v1/audio/voices")