Import: from gaia.talk.sdk import TalkSDK, TalkConfig, TalkResponse, TalkMode
Detailed Spec: spec/talk-sdk
Purpose: Unified voice and text chat interface with RAG integration, combining ASR (Whisper) and TTS (Kokoro) with conversational AI.
When to use it:
- Building voice-enabled AI assistants
- Creating hands-free interfaces
- Adding speech capabilities to existing agents
- Building document Q&A with voice interaction
10.1 Basic Voice & Text Chat
from gaia.talk.sdk import TalkSDK, TalkConfig, TalkMode
# Configure talk SDK
config = TalkConfig(
mode=TalkMode.VOICE_AND_TEXT, # or TEXT_ONLY, VOICE_ONLY
whisper_model_size="base", # ASR model size
enable_tts=True, # Text-to-speech
assistant_name="gaia",
system_prompt="You are a helpful AI assistant.",
max_history_length=10, # Conversation memory
show_stats=True
)
# Create talk instance
talk = TalkSDK(config)
# Text chat (async)
response = await talk.chat("What's the weather like?")
print(response.text)
if response.stats:
print(f"Tokens: {response.stats['total_tokens']}")
# Streaming text chat
async for chunk in talk.chat_stream("Tell me a story"):
print(chunk.text, end="", flush=True)
# Voice chat session
await talk.start_voice_session()
# User speaks -> Whisper transcribes -> LLM responds -> TTS speaks answer
10.2 Voice + RAG Integration
from gaia.talk.sdk import TalkSDK, TalkConfig
# Configure with RAG documents
config = TalkConfig(
enable_tts=True,
rag_documents=["manual.pdf", "guide.pdf", "faq.pdf"]
)
talk = TalkSDK(config)
# Now voice queries can search documents
await talk.start_voice_session()
# User: "What does the manual say about installation?"
# Agent searches PDFs and speaks the answer
# Add more documents dynamically
talk.add_document("troubleshooting.pdf")
# Text query with document context
response = await talk.chat("How do I configure the system?")
print(response.text) # Answer based on indexed documents
10.3 Simple Talk Interface
from gaia.talk.sdk import SimpleTalk
# Minimal setup for quick integration
talk = SimpleTalk(
system_prompt="You are a cooking assistant.",
enable_tts=True,
assistant_name="chef"
)
# Ask questions
answer = await talk.ask("How do I make pasta?")
print(answer)
# Streaming responses
async for chunk in talk.ask_stream("Tell me about Italian cuisine"):
print(chunk, end="", flush=True)
# Voice chat
await talk.voice_chat()
# Starts interactive voice session
# Conversation management
history = talk.get_conversation()
print(history)
talk.clear_memory()
10.4 TalkResponse Object
response = await talk.chat("Hello!")
# Response attributes
print(response.text) # Generated response text
print(response.is_complete) # True for final response
print(response.stats) # Performance statistics
# Stats include:
# - total_tokens
# - latency_ms
# - model_name