The LLM client package provides a unified interface for generating text from multiple LLM backends using a provider pattern. Each provider implements the abstract LLMClient interface, with optional methods raising NotSupportedError when unavailable.Key Features:
Factory-based client creation with create_client()
Three providers: Lemonade (local AMD-optimized), OpenAI, Claude
Abstract base class for type safety and extensibility
Graceful handling of unsupported features via NotSupportedError
Streaming and non-streaming generation
Backward-compatibleuse_claude/use_openai flags
Provider Capabilities:
Method
Lemonade
OpenAI
Claude
generate()
✓
✓
✓
chat()
✓
✓
✓
embed()
✓
✓
✗
vision()
✓
✗
✓
get_performance_stats()
✓
✗
✗
load_model()
✓
✗
✗
unload_model()
✓
✗
✗
Methods marked with ✗ raise NotSupportedError when called on that provider.
def create_client( provider: Optional[str] = None, use_claude: bool = False, use_openai: bool = False, **kwargs,) -> LLMClient: """ Create an LLM client, auto-detecting provider from parameters. Args: provider: Explicit provider name ("lemonade", "openai", or "claude"). If not specified, auto-detected from use_claude/use_openai flags. use_claude: If True, use Claude provider (ignored if provider is specified) use_openai: If True, use OpenAI provider (ignored if provider is specified) **kwargs: Provider-specific arguments (base_url, model, api_key, etc.) Returns: LLMClient instance for the specified or detected provider Raises: ValueError: If provider is unknown or both use_claude and use_openai are True Examples: # Default Lemonade provider client = create_client() # Explicit provider selection client = create_client(provider="lemonade", model="Qwen3-0.6B-GGUF") client = create_client(provider="openai", api_key="sk-...") client = create_client(provider="claude", api_key="sk-ant-...") # Backward-compatible flags client = create_client(use_claude=True, api_key="sk-ant-...") client = create_client(use_openai=True, api_key="sk-...") Note: Provider defaults to "lemonade" when no flags are set. The design maintains backward compatibility while allowing explicit provider selection. """
from abc import ABC, abstractmethodfrom typing import Iterator, Unionclass LLMClient(ABC): """ Unified LLM client interface. Methods raise NotSupportedError if not available for this provider. """ @property @abstractmethod def provider_name(self) -> str: """Return the provider name for error messages.""" ... @abstractmethod def generate( self, prompt: str, model: str | None = None, stream: bool = False, **kwargs, ) -> Union[str, Iterator[str]]: """ Generate text completion. Args: prompt: The user prompt/query to send to the LLM model: The model to use (defaults to provider's default model) stream: If True, returns a generator that yields chunks of the response **kwargs: Additional parameters (temperature, max_tokens, etc.) Returns: If stream=False: The complete generated text as a string If stream=True: A generator yielding chunks of the response Example: response = client.generate("Write a hello world program") """ ... @abstractmethod def chat( self, messages: list[dict], model: str | None = None, stream: bool = False, **kwargs, ) -> Union[str, Iterator[str]]: """ Chat completion with message history. Args: messages: List of message dicts with 'role' and 'content' keys model: The model to use (defaults to provider's default model) stream: If True, returns a generator that yields chunks of the response **kwargs: Additional parameters (temperature, max_tokens, etc.) Returns: If stream=False: The complete generated text as a string If stream=True: A generator yielding chunks of the response Example: messages = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"}, {"role": "user", "content": "How are you?"} ] response = client.chat(messages) """ ... # Optional methods - default raises NotSupportedError def embed(self, texts: list[str], **kwargs) -> list[list[float]]: """ Generate embeddings for texts. Args: texts: List of text strings to embed **kwargs: Additional parameters (e.g., model="text-embedding-3-small" for OpenAI) Returns: List of embedding vectors (list of floats) Raises: NotSupportedError: If provider doesn't support embeddings Note: Supported by: Lemonade, OpenAI (default: "text-embedding-3-small") Not supported by: Claude """ raise NotSupportedError(self.provider_name, "embed") def vision(self, images: list[bytes], prompt: str, **kwargs) -> str: """ Vision/image understanding. Args: images: List of image data as bytes prompt: Text prompt describing what to analyze **kwargs: Additional parameters Returns: Text response describing the image Raises: NotSupportedError: If provider doesn't support vision Note: Supported by: Lemonade, Claude Not supported by: OpenAI """ raise NotSupportedError(self.provider_name, "vision") def get_performance_stats(self) -> dict: """ Get performance statistics from the last LLM request. Returns: Dictionary containing performance statistics Raises: NotSupportedError: If provider doesn't support performance stats Note: Only supported by: Lemonade """ raise NotSupportedError(self.provider_name, "get_performance_stats") def load_model(self, model_name: str, **kwargs) -> None: """ Load a specific model. Args: model_name: Name of the model to load **kwargs: Additional parameters Raises: NotSupportedError: If provider doesn't support model loading Note: Only supported by: Lemonade """ raise NotSupportedError(self.provider_name, "load_model") def unload_model(self) -> None: """ Unload the current model. Raises: NotSupportedError: If provider doesn't support model unloading Note: Only supported by: Lemonade """ raise NotSupportedError(self.provider_name, "unload_model")
class NotSupportedError(Exception): """Raised when a provider doesn't support a method.""" def __init__(self, provider: str, method: str): super().__init__(f"{provider} does not support {method}")
class LemonadeProvider(LLMClient): """Lemonade provider - local AMD-optimized inference.""" def __init__( self, model: Optional[str] = None, base_url: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None, system_prompt: Optional[str] = None, **kwargs, ): """ Initialize Lemonade provider. Args: model: Model name (defaults to "Qwen3-0.6B-GGUF") base_url: Base URL for Lemonade server (overrides LEMONADE_BASE_URL env var) host: Server host (alternative to base_url) port: Server port (alternative to base_url) system_prompt: Default system prompt for chat **kwargs: Additional arguments passed to LemonadeClient Environment: LEMONADE_BASE_URL: Default base URL (http://localhost:8000/api/v1) LEMONADE_MODEL: Default model name if not specified Note: Default model is "Qwen3-0.6B-GGUF" for CPU-only inference. All methods use temperature=0.1 by default for deterministic responses. """ # Supports all methods: generate, chat, embed, vision, # get_performance_stats, load_model, unload_model
The factory function auto-detects the provider based on parameters:
Copy
# From factory.pydef create_client(provider=None, use_claude=False, use_openai=False, **kwargs): # Auto-detect provider from flags if not explicitly specified if provider is None: if use_claude and use_openai: raise ValueError("Cannot specify both use_claude and use_openai") elif use_claude: provider = "claude" elif use_openai: provider = "openai" else: provider = "lemonade" # Default # Validate provider if provider.lower() not in _PROVIDERS: available = ", ".join(_PROVIDERS.keys()) raise ValueError(f"Unknown provider: {provider}. Available: {available}") # Load provider class dynamically...
# pyproject.toml[project]dependencies = [ "openai>=1.0.0", # OpenAI Python SDK (used for local + OpenAI) "httpx>=0.24.0", # HTTP client with timeout support "requests>=2.31.0", # For performance stats/control endpoints "python-dotenv>=1.0.0", # Environment variable management][project.optional-dependencies]claude = ["anthropic>=0.18.0"] # Claude API support
from gaia.llm import create_client# Default Lemonade providerclient = create_client()# Non-streaming generationresponse = client.generate("Write a hello world program in Python")print(response)# Get performance stats (Lemonade only)stats = client.get_performance_stats()print(f"Speed: {stats.get('tokens_per_second', 'N/A')} tokens/sec")
from gaia.llm import create_client, NotSupportedError# Create OpenAI clientclient = create_client(provider="openai", api_key="sk-...")# This works (OpenAI supports embed)embeddings = client.embed(["Hello world", "How are you?"])# This raises NotSupportedError (OpenAI doesn't support vision)try: result = client.vision([image_bytes], "Describe this image")except NotSupportedError as e: print(f"Feature not available: {e}") # Output: "OpenAI does not support vision"
from gaia.llm import create_clientclient = create_client()# Streaming with generate()print("AI: ", end="", flush=True)for chunk in client.generate("Tell me a short story", stream=True): print(chunk, end="", flush=True)print()# Streaming with chat()for chunk in client.chat([{"role": "user", "content": "Hello"}], stream=True): print(chunk, end="", flush=True)
from gaia.llm import create_client# With Lemonadelemonade = create_client()embeddings = lemonade.embed(["Hello world", "How are you?"])print(f"Embedding dimensions: {len(embeddings[0])}")# With OpenAIopenai_client = create_client(provider="openai", api_key="sk-...")embeddings = openai_client.embed(["Text to embed"])
from gaia.llm import create_client# With Claudeclaude = create_client(provider="claude", api_key="sk-ant-...")with open("image.jpg", "rb") as f: image_data = f.read()description = claude.vision([image_data], "Describe what you see")print(description)
from gaia.llm import create_client# Connect to remote serverclient = create_client(base_url="http://192.168.1.100:8000")response = client.generate("Hello from remote server")print(response)
from gaia.llm import create_client# Set default system promptclient = create_client( system_prompt="You are a helpful coding assistant.")# System prompt automatically prepended to chat messagesresponse = client.chat([ {"role": "user", "content": "Write a binary search function"}])print(response)
GAIA supports third-party LLM service providers through its OpenAI-compatible API interface. Any service implementing the OpenAI API specification can be used with GAIA.
from gaia.llm import create_client# Connect to your third-party LLM serviceclient = create_client(base_url="http://your-service:8080/v1")# Test connectionresponse = client.generate("Hello, are you working?")print(response)
### LLM Client**Import:** `from gaia.llm import create_client, LLMClient, NotSupportedError`**Purpose:** Provider-based LLM client with factory pattern for local and cloud backends.**Features:**- Factory-based client creation with `create_client()`- Three providers: Lemonade (local), OpenAI, Claude- Abstract base class for type safety- `NotSupportedError` for unsupported features- Streaming and non-streaming generation- Backward-compatible flags**Quick Start:**```pythonfrom gaia.llm import create_client# Local LLM (default)client = create_client()response = client.generate("Hello world")# Streamingfor chunk in client.generate("Tell me a story", stream=True): print(chunk, end="")# Claude APIclaude = create_client(provider="claude", api_key="sk-ant-...")response = claude.generate("Explain Python decorators")# Backward-compatibleclient = create_client(use_claude=True)