Component: SummarizerApp - Meeting and email summarization
Module: gaia.apps.summarize.app
Import: from gaia.apps.summarize.app import SummarizerApp, SummaryConfig
Overview
SummarizerApp processes meeting transcripts and emails to generate structured summaries with multiple styles (executive, detailed, bullets, action items, participants). Supports auto-detection of content type and multi-style output in a single LLM call.
Key Features:
- Auto-detect content type (transcript vs email)
- 6 summary styles (brief, detailed, bullets, executive, participants, action_items)
- Combined or individual style generation
- Multi-model support (local, Claude, ChatGPT)
- Performance statistics tracking
- Retry logic for reliability
- File and directory batch processing
API Specification
SummaryConfig
@dataclass
class SummaryConfig:
"""Configuration for summarization"""
model: str = DEFAULT_MODEL_NAME
max_tokens: int = 1024
input_type: Literal["transcript", "email", "auto"] = "auto"
styles: List[str] = None # Defaults to ["executive", "participants", "action_items"]
combined_prompt: bool = False # Generate all styles in one LLM call
use_claude: bool = False
use_chatgpt: bool = False
def __post_init__(self):
"""Validate styles and auto-detect OpenAI models."""
if self.styles is None:
self.styles = ["executive", "participants", "action_items"]
# Auto-detect OpenAI models
if self.model.lower().startswith("gpt"):
self.use_chatgpt = True
Summary Styles
SUMMARY_STYLES = {
"brief": "Generate a concise 2-3 sentence summary highlighting the most important points.",
"detailed": "Generate a comprehensive summary with all key details, context, and nuances.",
"bullets": "Generate key points in a clear bullet-point format, focusing on actionable items.",
"executive": "Generate a high-level executive summary focusing on decisions, outcomes, and strategic implications.",
"participants": "Extract and list all meeting participants with their roles if mentioned.",
"action_items": "Extract all action items with owners and deadlines where specified.",
}
SummarizerApp
SummarizerApp is a thin wrapper that delegates to
SummarizerAgent
for the actual prompt construction, retry logic, and multi-style generation.
The app class itself exposes just four public methods:
class SummarizerApp:
"""Main application class for summarization (delegates to SummarizerAgent)"""
def __init__(self, config: Optional[SummaryConfig] = None):
"""Create the underlying SummarizerAgent from a SummaryConfig."""
...
def summarize(
self,
content: str,
styles: Optional[List[str]] = None,
combined_prompt: Optional[bool] = None,
input_type: str = "auto",
) -> Dict[str, Any]:
"""
Summarize raw text. When styles/combined_prompt/input_type are None,
the values from SummaryConfig are used.
"""
...
def summarize_file(
self,
file_path: Path,
styles: Optional[List[str]] = None,
combined_prompt: Optional[bool] = None,
input_type: str = "auto",
) -> Dict[str, Any]:
"""Summarize a single file (path may be str or Path)."""
...
def summarize_directory(
self,
dir_path: Path,
styles: Optional[List[str]] = None,
combined_prompt: Optional[bool] = None,
input_type: str = "auto",
) -> List[Dict[str, Any]]:
"""Summarize all text files in a directory."""
...
Internal pipeline helpers like detect_content_type, generate_summary_prompt,
generate_combined_prompt, summarize_with_style, and summarize_combined
live on SummarizerAgent, not on SummarizerApp. Subclass or import
SummarizerAgent directly if you need to override those.
Module-level helpers
def validate_email_address(email: str) -> bool
def validate_email_list(email_list: str) -> list[str]
Utilities for the gaia summarize --email-to/--email-cc CLI flags.
Usage Examples
Example 1: Single Meeting Transcript
from gaia.apps.summarize.app import SummarizerApp, SummaryConfig
from pathlib import Path
# Configure for executive summary only
config = SummaryConfig(
model="Qwen3.5-35B-A3B-GGUF",
styles=["executive"],
input_type="transcript" # Or "auto" for detection
)
app = SummarizerApp(config)
# Summarize from file
result = app.summarize_file(Path("meeting.txt"))
print(result["summary"]["text"])
print(f"Tokens used: {result['performance']['total_tokens']}")
Example 2: Multiple Styles
# Generate multiple styles
config = SummaryConfig(
styles=["executive", "participants", "action_items"],
combined_prompt=True # More efficient - single LLM call
)
app = SummarizerApp(config)
result = app.summarize(content)
# Access different summaries
print("Executive:", result["summaries"]["executive"]["text"])
print("Participants:", result["summaries"]["participants"]["text"])
print("Action Items:", result["summaries"]["action_items"]["text"])
Example 3: Batch Processing
# Process all files in directory
config = SummaryConfig(
styles=["brief", "action_items"],
input_type="auto"
)
app = SummarizerApp(config)
results = app.summarize_directory(Path("meetings/"))
for result in results:
filename = Path(result["metadata"]["input_file"]).name
summary = result["summaries"]["brief"]["text"]
print(f"{filename}: {summary}")
Example 4: Email Summarization
email_content = """
From: [email protected]
To: [email protected]
Subject: Q4 Planning
Hi team,
We need to finalize Q4 goals by Friday...
"""
config = SummaryConfig(
styles=["executive", "participants"],
input_type="email"
)
app = SummarizerApp(config)
result = app.summarize(email_content)
# Email-specific participant extraction
participants = result["summaries"]["participants"]
print(f"Sender: {participants.get('sender')}")
print(f"Recipients: {participants.get('recipients')}")
Single Style Output
{
"metadata": {
"input_file": "meeting.txt",
"input_type": "transcript",
"model": "Qwen3.5-35B",
"timestamp": "2025-01-15T10:30:00",
"processing_time_ms": 2500,
"summary_style": "executive"
},
"summary": {
"text": "Executive summary text...",
"performance": {
"total_tokens": 450,
"tokens_per_second": 15.2
}
},
"original_content": "..."
}
Multiple Styles Output
{
"metadata": {
"summary_styles": ["executive", "participants", "action_items"],
"...": "..."
},
"summaries": {
"executive": {
"text": "...",
"performance": {...}
},
"participants": {
"text": "...",
"participants": ["Alice", "Bob"],
"performance": {...}
},
"action_items": {
"text": "...",
"items": ["Task 1", "Task 2"],
"performance": {...}
}
},
"aggregate_performance": {
"total_tokens": 1200,
"total_processing_time_ms": 3500
}
}
Testing Requirements
def test_content_type_detection():
"""Test auto-detection of content type."""
app = SummarizerApp()
transcript = "Alice: Hello\nBob: Hi there"
assert app.detect_content_type(transcript) == "transcript"
email = "From: [email protected]\nTo: [email protected]\nSubject: Test"
assert app.detect_content_type(email) == "email"
def test_single_style_summarization():
"""Test single style summary."""
config = SummaryConfig(styles=["brief"])
app = SummarizerApp(config)
result = app.summarize("Test content")
assert "summary" in result
assert "text" in result["summary"]
def test_multiple_styles():
"""Test multiple styles."""
config = SummaryConfig(
styles=["executive", "action_items"],
combined_prompt=True
)
app = SummarizerApp(config)
result = app.summarize("Test meeting content")
assert "summaries" in result
assert "executive" in result["summaries"]
assert "action_items" in result["summaries"]
Dependencies
[project]
dependencies = [
"gaia.chat.sdk",
"gaia.llm.lemonade_client",
]
SummarizerApp Technical Specification