Source Code:
src/gaia/agents/tools/file_tools.pyComponent: FileSearchToolsMixin
Module:
gaia.agents.tools.file_tools
Import: from gaia.agents.tools.file_tools import FileSearchToolsMixinOverview
FileSearchToolsMixin provides agent-agnostic file search and management operations shared across multiple agents (Chat, Code). Features intelligent multi-phase search, type-based file analysis, and grep-like content searching. Key Features:- 3-phase intelligent file search (CWD → common locations → deep scan)
- Type-aware file reading (Python AST, Markdown structure, binary detection)
- Grep-like content search across file systems
- Directory search with depth control
- Generic file writing with directory creation
- Phase 0: Deep search current working directory (unlimited depth)
- Phase 1: Search common locations (Documents, Downloads, Desktop) with depth limit
- Phase 2: Deep drive/root search if still not found
- Early termination on first match for speed
API Specification
class FileSearchToolsMixin:
"""
Mixin providing shared file search and read operations.
Register with `self.register_file_search_tools()`.
Tools provided:
- search_file: Multi-phase intelligent file search
- search_directory: Directory search by name
- read_file: Type-aware file reading with analysis
- search_file_content: Grep-like content search
- write_file: Generic file writer
- browse_directory: List directory contents with metadata
- get_file_info: Inspect a single file's metadata
- analyze_data_file: Summarise CSV / JSON / XLSX / log files
- list_recent_files: Surface recently-modified files by location
"""
@tool
def search_file(
file_pattern: str,
deep_search: bool = False,
file_types: str = None
) -> Dict[str, Any]:
"""
Search for files with two-phase strategy.
Quick search (default): CWD + common locations (Documents, Downloads, Desktop)
Deep search (deep_search=True): All drives (Windows) or root (Unix)
Always do quick search first. Only use deep_search=True after quick search
found nothing and user confirms they want a deeper search.
Args:
file_pattern: Pattern to search (e.g., 'oil', '*.pdf')
deep_search: Search all drives thoroughly (default: False)
file_types: Filter by extensions (e.g., 'pdf,docx')
Returns:
{
"status": "success",
"files": List[str], # Paths (max 10)
"file_list": List[{ # Formatted for display
"number": int,
"name": str,
"path": str,
"directory": str
}],
"count": int,
"search_context": "common_locations",
"display_message": str,
"deep_search_available": bool, # True when quick search found nothing
"user_instruction": str # If multiple found
}
"""
pass
@tool
def search_directory(
directory_name: str,
search_root: str = None,
max_depth: int = 4
) -> Dict[str, Any]:
"""
Search for directories by name.
Args:
directory_name: Directory name pattern
search_root: Root to start from (default: home)
max_depth: Max recursion depth (default: 4)
Returns:
{
"status": "success",
"directories": List[str], # Max 10
"count": int,
"message": str
}
"""
pass
@tool
def read_file(file_path: str) -> Dict[str, Any]:
"""
Read file with intelligent type-based analysis.
File Type Support:
- Python (.py): AST validation + symbol extraction (functions/classes)
- Markdown (.md): Headers + code blocks + links extraction
- Binary: Detection with size reporting
- Text: Raw content with line count
Args:
file_path: Path to file
Returns:
{
"status": "success",
"file_path": str,
"file_type": "python" | "markdown" | "binary" | str,
"content": str,
"line_count": int,
"size_bytes": int,
# Python-specific
"is_valid": bool,
"errors": List[str],
"symbols": List[{
"name": str,
"type": "function" | "class",
"line": int
}],
# Markdown-specific
"headers": List[str],
"code_blocks": List[{
"language": str,
"code": str
}],
"links": List[{
"text": str,
"url": str
}],
# Binary-specific
"is_binary": bool
}
"""
pass
@tool
def search_file_content(
pattern: str,
directory: str = ".",
file_pattern: str = None,
case_sensitive: bool = False
) -> Dict[str, Any]:
"""
Grep-like file content search.
Args:
pattern: Text pattern to find
directory: Where to search (default: current)
file_pattern: File glob filter (e.g., '*.py')
case_sensitive: Case sensitivity (default: False)
Returns:
{
"status": "success",
"pattern": str,
"matches": List[{
"file": str,
"line": int,
"content": str # Max 200 chars
}],
"total_matches": int, # Max 100
"files_searched": int,
"message": str
}
"""
pass
@tool
def write_file(
file_path: str,
content: str,
create_dirs: bool = True
) -> Dict[str, Any]:
"""
Write content to file.
Args:
file_path: Target path
content: File content
create_dirs: Create parent dirs (default: True)
Returns:
{
"status": "success",
"file_path": str,
"bytes_written": int,
"line_count": int
}
"""
pass
@tool
def browse_directory(
directory_path: str = None,
show_hidden: bool = False,
sort_by: str = "name",
) -> Dict[str, Any]:
"""
List a directory's contents with file metadata.
Args:
directory_path: Directory to list (default: current working dir)
show_hidden: Include dotfiles
sort_by: "name", "size", or "mtime"
Returns:
{"status": "success",
"directory": str,
"entries": List[{"name": str, "is_dir": bool,
"size_bytes": int, "mtime": float}]}
"""
pass
@tool
def get_file_info(file_path: str) -> Dict[str, Any]:
"""
Return metadata for a single file (size, mtime, type, permissions).
"""
pass
@tool
def analyze_data_file(file_path: str) -> Dict[str, Any]:
"""
Summarise a data file (CSV, JSON, XLSX, log, …) with row/column counts,
column names, sample rows, and inferred dtypes. Used when the agent
needs a quick overview of a dataset before reading it in full.
"""
pass
@tool
def list_recent_files(
location: str = "all",
file_types: str = None,
limit: int = 20,
) -> Dict[str, Any]:
"""
Return recently-modified files across common user locations
(Desktop/Documents/Downloads/CWD). `location` may be a specific folder
name or "all".
"""
pass
Implementation Highlights
3-Phase Search
# Phase 0: Current directory (deep)
cwd = Path.cwd()
self.console.start_progress(f"Searching {cwd.name}...")
search_location(cwd, max_depth=999)
if matching_files:
return {"search_context": "current_directory", ...}
# Phase 1: Common locations
for location in [home/"Documents", home/"Downloads", home/"Desktop"]:
search_location(location, max_depth=5)
if matching_files:
return {"search_context": "common_locations", ...}
# Phase 2: Deep drive search
if platform.system() == "Windows":
for drive in ["C:/", "D:/", ...]:
search_location(drive, max_depth=999)
else:
search_location(Path("/"), max_depth=999)
Python File Analysis
if ext == ".py":
tree = ast.parse(content)
symbols = []
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
symbols.append({
"name": node.name,
"type": "function",
"line": node.lineno
})
elif isinstance(node, ast.ClassDef):
symbols.append({
"name": node.name,
"type": "class",
"line": node.lineno
})
result["symbols"] = symbols
Testing Requirements
File:tests/agents/tools/test_file_search_mixin.py
Key tests:
- 3-phase search progression
- File type detection and analysis
- Python AST symbol extraction
- Markdown structure parsing
- Binary file detection
- Grep content search
- Directory creation on write
- Large file handling
Usage Examples
# Multi-phase file search
result = agent.search_file("manual", file_types="pdf")
print(f"Found {result['count']} files in {result['search_context']}")
# Python file analysis
result = agent.read_file("src/main.py")
for symbol in result['symbols']:
print(f"{symbol['type']}: {symbol['name']} (line {symbol['line']})")
# Content search
result = agent.search_file_content("TODO", directory="src", file_pattern="*.py")
for match in result['matches']:
print(f"{match['file']}:{match['line']}: {match['content']}")
# Write file
result = agent.write_file("output/report.txt", "Report content", create_dirs=True)
FileSearchToolsMixin Technical Specification