Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Core fixes: SDKs now expose real monotonic commit timestamps, databas…
…e config is actually applied, and scans are safer by default via prefix-bounds (multi-tenant isolation).

Query + retrieval upgrades: Adds a real token-budgeted context query engine (dedup + provenance), a hardened SQL/MCP parser, and working vector search (no more placeholder embeddings).

Agent-first features: Introduces Graph Overlay, Policy/Safety hooks (guardrails + audit), and Tool Routing (pick the right tools/agents, not all of them).

Status snapshot: Most features are marked Production across all SDKs; SQL indexing + per-table index policies are currently strongest.
  • Loading branch information
sushanthpy committed Jan 6, 2026
commit f10007580b305dfa6a84b2800f9f30d1d80f13c6
109 changes: 109 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,115 @@ pip install toondb-client

## What's New in Latest Release

### 🛡️ Policy & Safety Hooks
Enforce safety policies on agent operations with pre/post triggers:

```python
from toondb import Database, PolicyEngine, PolicyAction

db = Database.open("./agent_data")
policy = PolicyEngine(db)

# Block writes to system keys from agents
@policy.before_write("system/*")
def block_system_writes(key, value, context):
if context.get("agent_id"):
return PolicyAction.DENY
return PolicyAction.ALLOW

# Redact sensitive data on read
@policy.after_read("users/*/email")
def redact_emails(key, value, context):
if context.get("redact_pii"):
return b"[REDACTED]"
return value

# Rate limit writes per agent
policy.add_rate_limit("write", max_per_minute=100, scope="agent_id")

# Enable audit logging
policy.enable_audit()

# Use policy-wrapped operations
policy.put(b"users/alice", b"data", context={"agent_id": "agent_001"})
```

### 🔀 Multi-Agent Tool Routing
Route tool calls to specialized agents with automatic failover:

```python
from toondb import Database, ToolDispatcher, ToolCategory, RoutingStrategy

db = Database.open("./agent_data")
dispatcher = ToolDispatcher(db)

# Register agents with capabilities
dispatcher.register_local_agent(
"code_agent",
capabilities=[ToolCategory.CODE, ToolCategory.GIT],
handler=lambda tool, args: {"result": f"Processed {tool}"},
)

dispatcher.register_remote_agent(
"search_agent",
capabilities=[ToolCategory.SEARCH],
endpoint="http://localhost:8001/invoke",
)

# Register tools
dispatcher.register_tool(
name="search_code",
description="Search codebase",
category=ToolCategory.CODE,
)

# Invoke with automatic routing (priority, round-robin, fastest, etc.)
result = dispatcher.invoke("search_code", {"query": "auth"}, session_id="sess_001")
print(f"Routed to: {result.agent_id}, Success: {result.success}")
```

### 🕸️ Graph Overlay
Lightweight graph layer for agent memory relationships:

```python
from toondb import Database, GraphOverlay, TraversalOrder

db = Database.open("./agent_data")
graph = GraphOverlay(db)

# Add nodes (entities, concepts, events)
graph.add_node("user:alice", node_type="user", properties={"role": "admin"})
graph.add_node("project:toondb", node_type="project", properties={"status": "active"})

# Add relationships
graph.add_edge("user:alice", "project:toondb", edge_type="owns", properties={"since": "2024"})

# Traverse graph (BFS/DFS)
related = graph.bfs("user:alice", max_depth=2, edge_filter=lambda e: e.edge_type == "owns")

# Find shortest path
path = graph.shortest_path("user:alice", "project:toondb")
```

### 🔗 Unified Connection API
Single entry point with auto-detection:

```python
import toondb

# Auto-detects embedded mode from path
db = toondb.connect("./my_database")

# Auto-detects IPC mode from socket
db = toondb.connect("/tmp/toondb.sock")

# Auto-detects gRPC mode from host:port
db = toondb.connect("localhost:50051")

# Explicit mode
db = toondb.connect("./data", mode="embedded", config={"sync_mode": "full"})
```

### 🎯 Namespace Isolation
Logical database namespaces for true multi-tenancy without key prefixing:

Expand Down
208 changes: 208 additions & 0 deletions src/toondb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,44 @@
estimate_tokens,
split_by_tokens,
)
from .graph import (
# Graph Overlay (Task 10)
GraphOverlay,
GraphNode,
GraphEdge,
TraversalOrder,
)
from .policy import (
# Policy & Safety Hooks (Task 11)
PolicyEngine,
PolicyAction,
PolicyTrigger,
PolicyResult,
PolicyContext,
PolicyHandler,
PatternPolicy,
RateLimiter,
PolicyViolation,
# Built-in policy helpers
deny_all,
allow_all,
require_agent_id,
redact_value,
log_and_allow,
)
from .routing import (
# Tool Routing (Task 12)
ToolRouter,
AgentRegistry,
ToolDispatcher,
Tool,
Agent,
ToolCategory,
RoutingStrategy,
AgentStatus,
RouteResult,
RoutingContext,
)

# Vector search (optional - requires libtoondb_index)
try:
Expand Down Expand Up @@ -105,7 +143,143 @@
is_analytics_disabled = lambda: True

__version__ = "0.3.1"


# =============================================================================
# Unified Connection API (Task 9: Standardize Deployment Modes)
# =============================================================================

from enum import Enum
from typing import Optional, Union


class ConnectionMode(Enum):
"""ToonDB connection mode."""
EMBEDDED = "embedded" # Direct FFI to Rust library
IPC = "ipc" # Unix socket to local server
GRPC = "grpc" # gRPC to remote server


def connect(
path_or_url: str,
mode: Optional[Union[str, ConnectionMode]] = None,
config: Optional[dict] = None,
) -> Union[Database, IpcClient]:
"""
Connect to ToonDB with automatic mode detection.

This is the unified entry point for all ToonDB connection modes.
If mode is not specified, it auto-detects based on the path/URL:

- Embedded: File paths (./data, /tmp/db, ~/toondb)
- IPC: Unix socket paths (/tmp/toondb.sock, unix://...)
- gRPC: URLs with grpc:// scheme or host:port format

Args:
path_or_url: Database path, socket path, or gRPC URL
mode: Optional explicit mode ('embedded', 'ipc', 'grpc' or ConnectionMode enum)
config: Optional configuration dict (passed to underlying client)

Returns:
Database, IpcClient, or GrpcClient depending on mode

Examples:
# Embedded mode (auto-detected from file path)
db = toondb.connect("./my_database")
db.put(b"key", b"value")

# IPC mode (auto-detected from .sock extension)
db = toondb.connect("/tmp/toondb.sock")

# gRPC mode (auto-detected from host:port)
db = toondb.connect("localhost:50051")

# Explicit mode
db = toondb.connect("./data", mode="embedded", config={
"sync_mode": "full",
"index_policy": "scan_optimized",
})

# Using enum
db = toondb.connect("localhost:50051", mode=toondb.ConnectionMode.GRPC)
"""
# Normalize mode to enum
if mode is None:
detected_mode = _detect_mode(path_or_url)
elif isinstance(mode, str):
try:
detected_mode = ConnectionMode(mode.lower())
except ValueError:
raise ValueError(
f"Invalid mode '{mode}'. Valid modes: embedded, ipc, grpc"
)
else:
detected_mode = mode

# Create appropriate client
if detected_mode == ConnectionMode.EMBEDDED:
return Database.open(path_or_url, config=config)

elif detected_mode == ConnectionMode.IPC:
socket_path = path_or_url
if socket_path.startswith("unix://"):
socket_path = socket_path[7:] # Strip unix:// prefix
return IpcClient(socket_path)

elif detected_mode == ConnectionMode.GRPC:
try:
from .grpc_client import GrpcClient
url = path_or_url
if url.startswith("grpc://"):
url = url[7:] # Strip grpc:// prefix
return GrpcClient(url)
except ImportError:
raise ImportError(
"gRPC mode requires grpc dependencies. "
"Install with: pip install toondb[grpc]"
)

else:
raise ValueError(f"Unknown connection mode: {detected_mode}")


def _detect_mode(path_or_url: str) -> ConnectionMode:
"""Auto-detect connection mode from path/URL format."""
import os

# Explicit scheme detection
if path_or_url.startswith("grpc://"):
return ConnectionMode.GRPC
if path_or_url.startswith("unix://"):
return ConnectionMode.IPC

# Socket file detection
if path_or_url.endswith(".sock"):
return ConnectionMode.IPC
if "/tmp/" in path_or_url and "sock" in path_or_url.lower():
return ConnectionMode.IPC

# Host:port detection (gRPC)
if ":" in path_or_url:
parts = path_or_url.rsplit(":", 1)
if len(parts) == 2:
try:
port = int(parts[1])
if 1 <= port <= 65535:
# Looks like host:port - probably gRPC
return ConnectionMode.GRPC
except ValueError:
pass

# Default to embedded for file paths
return ConnectionMode.EMBEDDED


__all__ = [
# Unified API (Task 9)
"connect",
"ConnectionMode",

# Core
"Database",
"Transaction",
Expand All @@ -129,6 +303,40 @@
"SearchResult",
"SearchResults",

# Graph Overlay (Task 10)
"GraphOverlay",
"GraphNode",
"GraphEdge",
"TraversalOrder",

# Policy & Safety Hooks (Task 11)
"PolicyEngine",
"PolicyAction",
"PolicyTrigger",
"PolicyResult",
"PolicyContext",
"PolicyHandler",
"PatternPolicy",
"RateLimiter",
"PolicyViolation",
"deny_all",
"allow_all",
"require_agent_id",
"redact_value",
"log_and_allow",

# Tool Routing (Task 12)
"ToolRouter",
"AgentRegistry",
"ToolDispatcher",
"Tool",
"Agent",
"ToolCategory",
"RoutingStrategy",
"AgentStatus",
"RouteResult",
"RoutingContext",

# ContextQuery (Task 12)
"ContextQuery",
"ContextResult",
Expand Down
Loading