Initial commit: add .gitignore and README
This commit is contained in:
30
fusionagi/interfaces/__init__.py
Normal file
30
fusionagi/interfaces/__init__.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""Multi-modal interface layer for FusionAGI.
|
||||
|
||||
Provides admin control panel, user interfaces, and sensory interaction adapters.
|
||||
"""
|
||||
|
||||
from fusionagi.interfaces.base import (
|
||||
InterfaceAdapter,
|
||||
InterfaceCapabilities,
|
||||
InterfaceMessage,
|
||||
ModalityType,
|
||||
)
|
||||
from fusionagi.interfaces.voice import VoiceInterface, VoiceLibrary, TTSAdapter, STTAdapter
|
||||
from fusionagi.interfaces.conversation import ConversationManager, ConversationTuner
|
||||
from fusionagi.interfaces.admin_panel import AdminControlPanel
|
||||
from fusionagi.interfaces.multimodal_ui import MultiModalUI
|
||||
|
||||
__all__ = [
|
||||
"InterfaceAdapter",
|
||||
"InterfaceCapabilities",
|
||||
"InterfaceMessage",
|
||||
"ModalityType",
|
||||
"VoiceInterface",
|
||||
"VoiceLibrary",
|
||||
"TTSAdapter",
|
||||
"STTAdapter",
|
||||
"ConversationManager",
|
||||
"ConversationTuner",
|
||||
"AdminControlPanel",
|
||||
"MultiModalUI",
|
||||
]
|
||||
425
fusionagi/interfaces/admin_panel.py
Normal file
425
fusionagi/interfaces/admin_panel.py
Normal file
@@ -0,0 +1,425 @@
|
||||
"""Admin control panel for FusionAGI system management.
|
||||
|
||||
Provides administrative interface for:
|
||||
- Voice library management
|
||||
- Conversation tuning
|
||||
- Agent configuration
|
||||
- System monitoring
|
||||
- Governance policies
|
||||
- Manufacturing authority
|
||||
"""
|
||||
|
||||
from typing import Any, Callable, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from fusionagi._time import utc_now, utc_now_iso
|
||||
from fusionagi.interfaces.voice import VoiceLibrary, VoiceProfile
|
||||
from fusionagi.interfaces.conversation import ConversationTuner, ConversationStyle
|
||||
from fusionagi.core import Orchestrator, EventBus, StateManager
|
||||
from fusionagi.governance import PolicyEngine, AuditLog
|
||||
from fusionagi._logger import logger
|
||||
|
||||
|
||||
class SystemStatus(BaseModel):
|
||||
"""System status information."""
|
||||
|
||||
status: Literal["healthy", "degraded", "offline"] = Field(description="Overall system status")
|
||||
uptime_seconds: float = Field(description="System uptime in seconds")
|
||||
active_tasks: int = Field(description="Number of active tasks")
|
||||
active_agents: int = Field(description="Number of registered agents")
|
||||
active_sessions: int = Field(description="Number of active user sessions")
|
||||
memory_usage_mb: float | None = Field(default=None, description="Memory usage in MB")
|
||||
cpu_usage_percent: float | None = Field(default=None, description="CPU usage percentage")
|
||||
timestamp: str = Field(default_factory=utc_now_iso)
|
||||
|
||||
|
||||
class AgentConfig(BaseModel):
|
||||
"""Configuration for an agent."""
|
||||
|
||||
agent_id: str
|
||||
agent_type: str
|
||||
enabled: bool = Field(default=True)
|
||||
max_concurrent_tasks: int = Field(default=10)
|
||||
timeout_seconds: float = Field(default=300.0)
|
||||
retry_policy: dict[str, Any] = Field(default_factory=dict)
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class AdminControlPanel:
|
||||
"""
|
||||
Administrative control panel for FusionAGI.
|
||||
|
||||
Provides centralized management interface for:
|
||||
- Voice libraries and TTS/STT configuration
|
||||
- Conversation styles and natural language tuning
|
||||
- Agent configuration and monitoring
|
||||
- System health and performance metrics
|
||||
- Governance policies and audit logs
|
||||
- Manufacturing authority (MAA) settings
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
orchestrator: Orchestrator,
|
||||
event_bus: EventBus,
|
||||
state_manager: StateManager,
|
||||
voice_library: VoiceLibrary | None = None,
|
||||
conversation_tuner: ConversationTuner | None = None,
|
||||
policy_engine: PolicyEngine | None = None,
|
||||
audit_log: AuditLog | None = None,
|
||||
session_count_callback: Callable[[], int] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize admin control panel.
|
||||
|
||||
Args:
|
||||
orchestrator: FusionAGI orchestrator.
|
||||
event_bus: Event bus for system events (use EventBus(history_size=N) for event history).
|
||||
state_manager: State manager for task state.
|
||||
voice_library: Voice library for TTS management.
|
||||
conversation_tuner: Conversation tuner for NL configuration.
|
||||
policy_engine: Policy engine for governance.
|
||||
audit_log: Audit log for compliance tracking.
|
||||
session_count_callback: Optional callback returning active user session count (e.g. from MultiModalUI).
|
||||
"""
|
||||
self.orchestrator = orchestrator
|
||||
self.event_bus = event_bus
|
||||
self.state_manager = state_manager
|
||||
self.voice_library = voice_library or VoiceLibrary()
|
||||
self.conversation_tuner = conversation_tuner or ConversationTuner()
|
||||
self.policy_engine = policy_engine
|
||||
self.audit_log = audit_log
|
||||
self._session_count_callback = session_count_callback
|
||||
|
||||
self._agent_configs: dict[str, AgentConfig] = {}
|
||||
self._start_time = utc_now()
|
||||
|
||||
logger.info("AdminControlPanel initialized")
|
||||
|
||||
# ========== Voice Management ==========
|
||||
|
||||
def add_voice_profile(self, profile: VoiceProfile) -> str:
|
||||
"""
|
||||
Add a voice profile to the library.
|
||||
|
||||
Args:
|
||||
profile: Voice profile to add.
|
||||
|
||||
Returns:
|
||||
Voice ID.
|
||||
"""
|
||||
voice_id = self.voice_library.add_voice(profile)
|
||||
self._log_admin_action("voice_added", {"voice_id": voice_id, "name": profile.name})
|
||||
return voice_id
|
||||
|
||||
def list_voices(
|
||||
self,
|
||||
language: str | None = None,
|
||||
gender: str | None = None,
|
||||
style: str | None = None,
|
||||
) -> list[VoiceProfile]:
|
||||
"""List voice profiles with optional filtering."""
|
||||
return self.voice_library.list_voices(language=language, gender=gender, style=style)
|
||||
|
||||
def update_voice_profile(self, voice_id: str, updates: dict[str, Any]) -> bool:
|
||||
"""
|
||||
Update a voice profile.
|
||||
|
||||
Args:
|
||||
voice_id: Voice ID to update.
|
||||
updates: Dictionary of fields to update.
|
||||
|
||||
Returns:
|
||||
True if updated, False if not found.
|
||||
"""
|
||||
success = self.voice_library.update_voice(voice_id, updates)
|
||||
if success:
|
||||
self._log_admin_action("voice_updated", {"voice_id": voice_id, "fields": list(updates.keys())})
|
||||
return success
|
||||
|
||||
def remove_voice_profile(self, voice_id: str) -> bool:
|
||||
"""Remove a voice profile."""
|
||||
success = self.voice_library.remove_voice(voice_id)
|
||||
if success:
|
||||
self._log_admin_action("voice_removed", {"voice_id": voice_id})
|
||||
return success
|
||||
|
||||
def set_default_voice(self, voice_id: str) -> bool:
|
||||
"""Set the default voice."""
|
||||
success = self.voice_library.set_default_voice(voice_id)
|
||||
if success:
|
||||
self._log_admin_action("default_voice_set", {"voice_id": voice_id})
|
||||
return success
|
||||
|
||||
# ========== Conversation Tuning ==========
|
||||
|
||||
def register_conversation_style(self, name: str, style: ConversationStyle) -> None:
|
||||
"""
|
||||
Register a conversation style.
|
||||
|
||||
Args:
|
||||
name: Style name.
|
||||
style: Conversation style configuration.
|
||||
"""
|
||||
self.conversation_tuner.register_style(name, style)
|
||||
self._log_admin_action("conversation_style_registered", {"name": name})
|
||||
|
||||
def list_conversation_styles(self) -> list[str]:
|
||||
"""List all registered conversation style names."""
|
||||
return self.conversation_tuner.list_styles()
|
||||
|
||||
def get_conversation_style(self, name: str) -> ConversationStyle | None:
|
||||
"""Get a conversation style by name."""
|
||||
return self.conversation_tuner.get_style(name)
|
||||
|
||||
def set_default_conversation_style(self, style: ConversationStyle) -> None:
|
||||
"""Set the default conversation style."""
|
||||
self.conversation_tuner.set_default_style(style)
|
||||
self._log_admin_action("default_conversation_style_set", {})
|
||||
|
||||
# ========== Agent Management ==========
|
||||
|
||||
def configure_agent(self, config: AgentConfig) -> None:
|
||||
"""
|
||||
Configure an agent.
|
||||
|
||||
Args:
|
||||
config: Agent configuration.
|
||||
"""
|
||||
self._agent_configs[config.agent_id] = config
|
||||
self._log_admin_action("agent_configured", {"agent_id": config.agent_id})
|
||||
logger.info("Agent configured", extra={"agent_id": config.agent_id})
|
||||
|
||||
def get_agent_config(self, agent_id: str) -> AgentConfig | None:
|
||||
"""Get agent configuration."""
|
||||
return self._agent_configs.get(agent_id)
|
||||
|
||||
def list_agents(self) -> list[str]:
|
||||
"""List all registered agent IDs."""
|
||||
return list(self.orchestrator._agents.keys())
|
||||
|
||||
def enable_agent(self, agent_id: str) -> bool:
|
||||
"""Enable an agent."""
|
||||
config = self._agent_configs.get(agent_id)
|
||||
if config:
|
||||
config.enabled = True
|
||||
self._log_admin_action("agent_enabled", {"agent_id": agent_id})
|
||||
return True
|
||||
return False
|
||||
|
||||
def disable_agent(self, agent_id: str) -> bool:
|
||||
"""Disable an agent."""
|
||||
config = self._agent_configs.get(agent_id)
|
||||
if config:
|
||||
config.enabled = False
|
||||
self._log_admin_action("agent_disabled", {"agent_id": agent_id})
|
||||
return True
|
||||
return False
|
||||
|
||||
# ========== System Monitoring ==========
|
||||
|
||||
def get_system_status(self) -> SystemStatus:
|
||||
"""
|
||||
Get current system status.
|
||||
|
||||
Returns:
|
||||
System status information.
|
||||
"""
|
||||
uptime = (utc_now() - self._start_time).total_seconds()
|
||||
|
||||
# Count active tasks
|
||||
active_tasks = 0
|
||||
failed_count = 0
|
||||
for task_id in self.state_manager._tasks.keys():
|
||||
task = self.state_manager.get_task(task_id)
|
||||
if task:
|
||||
if task.state.value in ("pending", "active"):
|
||||
active_tasks += 1
|
||||
elif task.state.value == "failed":
|
||||
failed_count += 1
|
||||
|
||||
active_agents = len(self.orchestrator._agents)
|
||||
active_sessions = self._session_count_callback() if self._session_count_callback else 0
|
||||
|
||||
# Health: healthy under normal load; degraded if high task count or many failures
|
||||
if active_tasks > 1000 or (failed_count > 50 and active_tasks > 100):
|
||||
status: Literal["healthy", "degraded", "offline"] = "degraded"
|
||||
else:
|
||||
status = "healthy"
|
||||
|
||||
return SystemStatus(
|
||||
status=status,
|
||||
uptime_seconds=uptime,
|
||||
active_tasks=active_tasks,
|
||||
active_agents=active_agents,
|
||||
active_sessions=active_sessions,
|
||||
)
|
||||
|
||||
def get_task_statistics(self) -> dict[str, Any]:
|
||||
"""
|
||||
Get task execution statistics.
|
||||
|
||||
Returns:
|
||||
Dictionary with task statistics.
|
||||
"""
|
||||
stats = {
|
||||
"total_tasks": len(self.state_manager._tasks),
|
||||
"by_state": {},
|
||||
"by_priority": {},
|
||||
}
|
||||
|
||||
for task_id in self.state_manager._tasks.keys():
|
||||
task = self.state_manager.get_task(task_id)
|
||||
if task:
|
||||
# Count by state
|
||||
state_key = task.state.value
|
||||
stats["by_state"][state_key] = stats["by_state"].get(state_key, 0) + 1
|
||||
|
||||
# Count by priority
|
||||
priority_key = task.priority.value
|
||||
stats["by_priority"][priority_key] = stats["by_priority"].get(priority_key, 0) + 1
|
||||
|
||||
return stats
|
||||
|
||||
def get_recent_events(self, limit: int = 50) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Get recent system events from the event bus.
|
||||
|
||||
Requires EventBus(history_size=N) at construction for non-empty results.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of events to return.
|
||||
|
||||
Returns:
|
||||
List of recent events (event_type, payload, timestamp).
|
||||
"""
|
||||
if hasattr(self.event_bus, "get_recent_events"):
|
||||
return self.event_bus.get_recent_events(limit=limit)
|
||||
return []
|
||||
|
||||
# ========== Governance & Audit ==========
|
||||
|
||||
def get_audit_entries(
|
||||
self,
|
||||
limit: int = 100,
|
||||
action_type: str | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Get audit log entries.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of entries to return.
|
||||
action_type: Optional filter by action type.
|
||||
|
||||
Returns:
|
||||
List of audit entries.
|
||||
"""
|
||||
if not self.audit_log:
|
||||
return []
|
||||
|
||||
entries = self.audit_log.query(limit=limit)
|
||||
|
||||
if action_type:
|
||||
entries = [e for e in entries if e.get("action") == action_type]
|
||||
|
||||
return entries
|
||||
|
||||
def update_policy(self, policy_id: str, policy_data: dict[str, Any]) -> bool:
|
||||
"""
|
||||
Update a governance policy.
|
||||
|
||||
Args:
|
||||
policy_id: Policy identifier.
|
||||
policy_data: Policy configuration.
|
||||
|
||||
Returns:
|
||||
True if updated, False if policy engine not available.
|
||||
"""
|
||||
if not self.policy_engine:
|
||||
return False
|
||||
|
||||
rule_id = policy_data.get("rule_id", policy_id)
|
||||
if self.policy_engine.get_rule(rule_id) is None:
|
||||
return False
|
||||
updates = {k: v for k, v in policy_data.items() if k in ("condition", "effect", "reason", "priority")}
|
||||
ok = self.policy_engine.update_rule(rule_id, updates)
|
||||
if ok:
|
||||
self._log_admin_action("policy_updated", {"policy_id": policy_id, "rule_id": rule_id})
|
||||
return ok
|
||||
|
||||
# ========== Utility Methods ==========
|
||||
|
||||
def _log_admin_action(self, action: str, details: dict[str, Any]) -> None:
|
||||
"""
|
||||
Log an administrative action.
|
||||
|
||||
Args:
|
||||
action: Action type.
|
||||
details: Action details.
|
||||
"""
|
||||
logger.info(f"Admin action: {action}", extra=details)
|
||||
|
||||
if self.audit_log:
|
||||
self.audit_log.log(
|
||||
action=action,
|
||||
actor="admin",
|
||||
details=details,
|
||||
timestamp=utc_now_iso(),
|
||||
)
|
||||
|
||||
def export_configuration(self) -> dict[str, Any]:
|
||||
"""
|
||||
Export system configuration.
|
||||
|
||||
Returns:
|
||||
Dictionary with full system configuration.
|
||||
"""
|
||||
return {
|
||||
"voices": [v.model_dump() for v in self.voice_library.list_voices()],
|
||||
"conversation_styles": {
|
||||
name: self.conversation_tuner.get_style(name).model_dump()
|
||||
for name in self.conversation_tuner.list_styles()
|
||||
},
|
||||
"agent_configs": {
|
||||
agent_id: config.model_dump()
|
||||
for agent_id, config in self._agent_configs.items()
|
||||
},
|
||||
"exported_at": utc_now_iso(),
|
||||
}
|
||||
|
||||
def import_configuration(self, config: dict[str, Any]) -> bool:
|
||||
"""
|
||||
Import system configuration.
|
||||
|
||||
Args:
|
||||
config: Configuration dictionary to import.
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise.
|
||||
"""
|
||||
try:
|
||||
# Import voices
|
||||
if "voices" in config:
|
||||
for voice_data in config["voices"]:
|
||||
profile = VoiceProfile(**voice_data)
|
||||
self.voice_library.add_voice(profile)
|
||||
|
||||
# Import conversation styles
|
||||
if "conversation_styles" in config:
|
||||
for name, style_data in config["conversation_styles"].items():
|
||||
style = ConversationStyle(**style_data)
|
||||
self.conversation_tuner.register_style(name, style)
|
||||
|
||||
# Import agent configs
|
||||
if "agent_configs" in config:
|
||||
for agent_id, config_data in config["agent_configs"].items():
|
||||
agent_config = AgentConfig(**config_data)
|
||||
self._agent_configs[agent_id] = agent_config
|
||||
|
||||
self._log_admin_action("configuration_imported", {"source": "file"})
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Configuration import failed", extra={"error": str(e)})
|
||||
return False
|
||||
121
fusionagi/interfaces/base.py
Normal file
121
fusionagi/interfaces/base.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""Base interface adapter for multi-modal interaction."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from enum import Enum
|
||||
from typing import Any, AsyncIterator
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from fusionagi._time import utc_now_iso
|
||||
|
||||
|
||||
class ModalityType(str, Enum):
|
||||
"""Types of sensory modalities supported."""
|
||||
|
||||
TEXT = "text"
|
||||
VOICE = "voice"
|
||||
VISUAL = "visual"
|
||||
HAPTIC = "haptic"
|
||||
GESTURE = "gesture"
|
||||
BIOMETRIC = "biometric"
|
||||
|
||||
|
||||
class InterfaceMessage(BaseModel):
|
||||
"""Message exchanged through an interface."""
|
||||
|
||||
id: str = Field(description="Unique message identifier")
|
||||
modality: ModalityType = Field(description="Sensory modality of this message")
|
||||
content: Any = Field(description="Message content (modality-specific)")
|
||||
metadata: dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
|
||||
timestamp: str = Field(
|
||||
default_factory=utc_now_iso,
|
||||
description="Message timestamp"
|
||||
)
|
||||
user_id: str | None = Field(default=None, description="User identifier if applicable")
|
||||
session_id: str | None = Field(default=None, description="Session identifier")
|
||||
|
||||
|
||||
class InterfaceCapabilities(BaseModel):
|
||||
"""Capabilities of an interface adapter."""
|
||||
|
||||
supported_modalities: list[ModalityType] = Field(description="Supported sensory modalities")
|
||||
supports_streaming: bool = Field(default=False, description="Supports streaming responses")
|
||||
supports_interruption: bool = Field(default=False, description="Supports mid-response interruption")
|
||||
supports_multimodal: bool = Field(default=False, description="Supports multiple modalities simultaneously")
|
||||
latency_ms: float | None = Field(default=None, description="Expected latency in milliseconds")
|
||||
max_concurrent_sessions: int | None = Field(default=None, description="Max concurrent sessions")
|
||||
|
||||
|
||||
class InterfaceAdapter(ABC):
|
||||
"""
|
||||
Abstract base for interface adapters.
|
||||
|
||||
Interface adapters translate between human sensory modalities and FusionAGI's
|
||||
internal message format. Each adapter handles one or more modalities (voice,
|
||||
visual, haptic, etc.).
|
||||
"""
|
||||
|
||||
def __init__(self, name: str) -> None:
|
||||
self.name = name
|
||||
|
||||
@abstractmethod
|
||||
def capabilities(self) -> InterfaceCapabilities:
|
||||
"""Return the capabilities of this interface."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def send(self, message: InterfaceMessage) -> None:
|
||||
"""
|
||||
Send a message through this interface to the user.
|
||||
|
||||
Args:
|
||||
message: Message to send (modality-specific content).
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def receive(self, timeout_seconds: float | None = None) -> InterfaceMessage | None:
|
||||
"""
|
||||
Receive a message from the user through this interface.
|
||||
|
||||
Args:
|
||||
timeout_seconds: Optional timeout for receiving.
|
||||
|
||||
Returns:
|
||||
Received message or None if timeout.
|
||||
"""
|
||||
...
|
||||
|
||||
async def stream_send(self, messages: AsyncIterator[InterfaceMessage]) -> None:
|
||||
"""
|
||||
Stream messages to the user (for streaming responses).
|
||||
|
||||
Default implementation sends each message individually. Override for
|
||||
true streaming support.
|
||||
|
||||
Args:
|
||||
messages: Async iterator of messages to stream.
|
||||
"""
|
||||
async for msg in messages:
|
||||
await self.send(msg)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize the interface (connect, authenticate, etc.)."""
|
||||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
"""Shutdown the interface gracefully."""
|
||||
pass
|
||||
|
||||
def validate_message(self, message: InterfaceMessage) -> bool:
|
||||
"""
|
||||
Validate that a message is compatible with this interface.
|
||||
|
||||
Args:
|
||||
message: Message to validate.
|
||||
|
||||
Returns:
|
||||
True if valid, False otherwise.
|
||||
"""
|
||||
caps = self.capabilities()
|
||||
return message.modality in caps.supported_modalities
|
||||
392
fusionagi/interfaces/conversation.py
Normal file
392
fusionagi/interfaces/conversation.py
Normal file
@@ -0,0 +1,392 @@
|
||||
"""Conversation management and natural language tuning."""
|
||||
|
||||
import uuid
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from fusionagi._time import utc_now_iso
|
||||
from fusionagi._logger import logger
|
||||
|
||||
|
||||
class ConversationStyle(BaseModel):
|
||||
"""Configuration for conversation style and personality."""
|
||||
|
||||
formality: Literal["casual", "neutral", "formal"] = Field(
|
||||
default="neutral",
|
||||
description="Conversation formality level"
|
||||
)
|
||||
verbosity: Literal["concise", "balanced", "detailed"] = Field(
|
||||
default="balanced",
|
||||
description="Response length preference"
|
||||
)
|
||||
personality_traits: list[str] = Field(
|
||||
default_factory=list,
|
||||
description="Personality traits (e.g., friendly, professional, humorous)"
|
||||
)
|
||||
empathy_level: float = Field(
|
||||
default=0.7,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Emotional responsiveness (0=robotic, 1=highly empathetic)"
|
||||
)
|
||||
proactivity: float = Field(
|
||||
default=0.5,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Tendency to offer suggestions (0=reactive, 1=proactive)"
|
||||
)
|
||||
humor_level: float = Field(
|
||||
default=0.3,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Use of humor (0=serious, 1=playful)"
|
||||
)
|
||||
technical_depth: float = Field(
|
||||
default=0.5,
|
||||
ge=0.0,
|
||||
le=1.0,
|
||||
description="Technical detail level (0=simple, 1=expert)"
|
||||
)
|
||||
|
||||
|
||||
class ConversationContext(BaseModel):
|
||||
"""Context for a conversation session."""
|
||||
|
||||
session_id: str = Field(default_factory=lambda: f"session_{uuid.uuid4().hex}")
|
||||
user_id: str | None = Field(default=None)
|
||||
style: ConversationStyle = Field(default_factory=ConversationStyle)
|
||||
language: str = Field(default="en", description="Primary language code")
|
||||
domain: str | None = Field(default=None, description="Domain/topic of conversation")
|
||||
history_length: int = Field(default=10, description="Number of turns to maintain in context")
|
||||
started_at: str = Field(default_factory=utc_now_iso)
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ConversationTurn(BaseModel):
|
||||
"""A single turn in a conversation."""
|
||||
|
||||
turn_id: str = Field(default_factory=lambda: f"turn_{uuid.uuid4().hex[:8]}")
|
||||
session_id: str
|
||||
speaker: Literal["user", "agent", "system"]
|
||||
content: str
|
||||
intent: str | None = Field(default=None, description="Detected intent")
|
||||
sentiment: float | None = Field(
|
||||
default=None,
|
||||
ge=-1.0,
|
||||
le=1.0,
|
||||
description="Sentiment score (-1=negative, 0=neutral, 1=positive)"
|
||||
)
|
||||
confidence: float | None = Field(default=None, ge=0.0, le=1.0)
|
||||
timestamp: str = Field(default_factory=utc_now_iso)
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ConversationTuner:
|
||||
"""
|
||||
Conversation tuner for natural language interaction.
|
||||
|
||||
Allows admin to configure conversation style, personality, and behavior
|
||||
for different contexts, users, or agents.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._styles: dict[str, ConversationStyle] = {}
|
||||
self._default_style = ConversationStyle()
|
||||
logger.info("ConversationTuner initialized")
|
||||
|
||||
def register_style(self, name: str, style: ConversationStyle) -> None:
|
||||
"""
|
||||
Register a named conversation style.
|
||||
|
||||
Args:
|
||||
name: Style name (e.g., "customer_support", "technical_expert").
|
||||
style: Conversation style configuration.
|
||||
"""
|
||||
self._styles[name] = style
|
||||
logger.info("Conversation style registered", extra={"name": name})
|
||||
|
||||
def get_style(self, name: str) -> ConversationStyle | None:
|
||||
"""Get a conversation style by name."""
|
||||
return self._styles.get(name)
|
||||
|
||||
def list_styles(self) -> list[str]:
|
||||
"""List all registered style names."""
|
||||
return list(self._styles.keys())
|
||||
|
||||
def set_default_style(self, style: ConversationStyle) -> None:
|
||||
"""Set the default conversation style."""
|
||||
self._default_style = style
|
||||
logger.info("Default conversation style updated")
|
||||
|
||||
def get_default_style(self) -> ConversationStyle:
|
||||
"""Get the default conversation style."""
|
||||
return self._default_style
|
||||
|
||||
def tune_for_context(
|
||||
self,
|
||||
base_style: ConversationStyle | None = None,
|
||||
domain: str | None = None,
|
||||
user_preferences: dict[str, Any] | None = None,
|
||||
) -> ConversationStyle:
|
||||
"""
|
||||
Tune conversation style for a specific context.
|
||||
|
||||
Args:
|
||||
base_style: Base style to start from (uses default if None).
|
||||
domain: Domain/topic to optimize for.
|
||||
user_preferences: User-specific preferences to apply.
|
||||
|
||||
Returns:
|
||||
Tuned conversation style.
|
||||
"""
|
||||
style = base_style or self._default_style.model_copy(deep=True)
|
||||
|
||||
# Apply domain-specific tuning
|
||||
if domain:
|
||||
style = self._apply_domain_tuning(style, domain)
|
||||
|
||||
# Apply user preferences
|
||||
if user_preferences:
|
||||
for key, value in user_preferences.items():
|
||||
if hasattr(style, key):
|
||||
setattr(style, key, value)
|
||||
|
||||
logger.info(
|
||||
"Conversation style tuned",
|
||||
extra={"domain": domain, "has_user_prefs": bool(user_preferences)}
|
||||
)
|
||||
return style
|
||||
|
||||
def _apply_domain_tuning(self, style: ConversationStyle, domain: str) -> ConversationStyle:
|
||||
"""
|
||||
Apply domain-specific tuning to a conversation style.
|
||||
|
||||
Args:
|
||||
style: Base conversation style.
|
||||
domain: Domain to tune for.
|
||||
|
||||
Returns:
|
||||
Tuned conversation style.
|
||||
"""
|
||||
# Domain-specific presets
|
||||
domain_presets = {
|
||||
"technical": {
|
||||
"formality": "formal",
|
||||
"technical_depth": 0.9,
|
||||
"verbosity": "detailed",
|
||||
"humor_level": 0.1,
|
||||
},
|
||||
"customer_support": {
|
||||
"formality": "neutral",
|
||||
"empathy_level": 0.9,
|
||||
"proactivity": 0.8,
|
||||
"verbosity": "balanced",
|
||||
},
|
||||
"casual_chat": {
|
||||
"formality": "casual",
|
||||
"humor_level": 0.7,
|
||||
"empathy_level": 0.8,
|
||||
"technical_depth": 0.3,
|
||||
},
|
||||
"education": {
|
||||
"formality": "neutral",
|
||||
"verbosity": "detailed",
|
||||
"technical_depth": 0.6,
|
||||
"proactivity": 0.7,
|
||||
},
|
||||
}
|
||||
|
||||
preset = domain_presets.get(domain.lower())
|
||||
if preset:
|
||||
for key, value in preset.items():
|
||||
setattr(style, key, value)
|
||||
|
||||
return style
|
||||
|
||||
|
||||
class ConversationManager:
|
||||
"""
|
||||
Conversation manager for maintaining conversation state and history.
|
||||
|
||||
Manages conversation sessions, tracks turns, and provides context for
|
||||
natural language understanding and generation.
|
||||
"""
|
||||
|
||||
def __init__(self, tuner: ConversationTuner | None = None) -> None:
|
||||
"""
|
||||
Initialize conversation manager.
|
||||
|
||||
Args:
|
||||
tuner: Conversation tuner for style management.
|
||||
"""
|
||||
self.tuner = tuner or ConversationTuner()
|
||||
self._sessions: dict[str, ConversationContext] = {}
|
||||
self._history: dict[str, list[ConversationTurn]] = {}
|
||||
logger.info("ConversationManager initialized")
|
||||
|
||||
def create_session(
|
||||
self,
|
||||
user_id: str | None = None,
|
||||
style_name: str | None = None,
|
||||
language: str = "en",
|
||||
domain: str | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Create a new conversation session.
|
||||
|
||||
Args:
|
||||
user_id: Optional user identifier.
|
||||
style_name: Optional style name (uses default if None).
|
||||
language: Primary language code.
|
||||
domain: Domain/topic of conversation.
|
||||
|
||||
Returns:
|
||||
Session ID.
|
||||
"""
|
||||
style = self.tuner.get_style(style_name) if style_name else self.tuner.get_default_style()
|
||||
|
||||
context = ConversationContext(
|
||||
user_id=user_id,
|
||||
style=style,
|
||||
language=language,
|
||||
domain=domain,
|
||||
)
|
||||
|
||||
self._sessions[context.session_id] = context
|
||||
self._history[context.session_id] = []
|
||||
|
||||
logger.info(
|
||||
"Conversation session created",
|
||||
extra={
|
||||
"session_id": context.session_id,
|
||||
"user_id": user_id,
|
||||
"domain": domain,
|
||||
}
|
||||
)
|
||||
return context.session_id
|
||||
|
||||
def get_session(self, session_id: str) -> ConversationContext | None:
|
||||
"""Get conversation context for a session."""
|
||||
return self._sessions.get(session_id)
|
||||
|
||||
def add_turn(self, turn: ConversationTurn) -> None:
|
||||
"""
|
||||
Add a turn to conversation history.
|
||||
|
||||
Args:
|
||||
turn: Conversation turn to add.
|
||||
"""
|
||||
if turn.session_id not in self._history:
|
||||
logger.warning("Session not found", extra={"session_id": turn.session_id})
|
||||
return
|
||||
|
||||
history = self._history[turn.session_id]
|
||||
history.append(turn)
|
||||
|
||||
# Trim history to configured length
|
||||
context = self._sessions.get(turn.session_id)
|
||||
if context and len(history) > context.history_length:
|
||||
self._history[turn.session_id] = history[-context.history_length:]
|
||||
|
||||
logger.debug(
|
||||
"Turn added",
|
||||
extra={
|
||||
"session_id": turn.session_id,
|
||||
"speaker": turn.speaker,
|
||||
"content_length": len(turn.content),
|
||||
}
|
||||
)
|
||||
|
||||
def get_history(self, session_id: str, limit: int | None = None) -> list[ConversationTurn]:
|
||||
"""
|
||||
Get conversation history for a session.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
limit: Optional limit on number of turns to return.
|
||||
|
||||
Returns:
|
||||
List of conversation turns (most recent last).
|
||||
"""
|
||||
history = self._history.get(session_id, [])
|
||||
if limit:
|
||||
return history[-limit:]
|
||||
return history
|
||||
|
||||
def get_style_for_session(self, session_id: str) -> ConversationStyle | None:
|
||||
"""
|
||||
Get the conversation style for a session.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
|
||||
Returns:
|
||||
Conversation style for the session, or None if session not found.
|
||||
"""
|
||||
context = self._sessions.get(session_id)
|
||||
return context.style if context else None
|
||||
|
||||
def update_style(self, session_id: str, style: ConversationStyle) -> bool:
|
||||
"""
|
||||
Update conversation style for a session.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
style: New conversation style.
|
||||
|
||||
Returns:
|
||||
True if updated, False if session not found.
|
||||
"""
|
||||
context = self._sessions.get(session_id)
|
||||
if context:
|
||||
context.style = style
|
||||
logger.info("Session style updated", extra={"session_id": session_id})
|
||||
return True
|
||||
return False
|
||||
|
||||
def end_session(self, session_id: str) -> bool:
|
||||
"""
|
||||
End a conversation session.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
|
||||
Returns:
|
||||
True if ended, False if not found.
|
||||
"""
|
||||
if session_id in self._sessions:
|
||||
del self._sessions[session_id]
|
||||
# Keep history for analytics but could be cleaned up later
|
||||
logger.info("Session ended", extra={"session_id": session_id})
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_context_summary(self, session_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Get a summary of conversation context for LLM prompting.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
|
||||
Returns:
|
||||
Dictionary with context summary.
|
||||
"""
|
||||
context = self._sessions.get(session_id)
|
||||
history = self._history.get(session_id, [])
|
||||
|
||||
if not context:
|
||||
return {}
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"user_id": context.user_id,
|
||||
"language": context.language,
|
||||
"domain": context.domain,
|
||||
"style": context.style.model_dump(),
|
||||
"turn_count": len(history),
|
||||
"recent_turns": [
|
||||
{"speaker": t.speaker, "content": t.content, "intent": t.intent}
|
||||
for t in history[-5:] # Last 5 turns
|
||||
],
|
||||
}
|
||||
506
fusionagi/interfaces/multimodal_ui.py
Normal file
506
fusionagi/interfaces/multimodal_ui.py
Normal file
@@ -0,0 +1,506 @@
|
||||
"""Multi-modal user interface for full sensory experience with FusionAGI.
|
||||
|
||||
Supports:
|
||||
- Text (chat, commands)
|
||||
- Voice (speech input/output)
|
||||
- Visual (images, video, AR/VR)
|
||||
- Haptic (touch feedback)
|
||||
- Gesture (motion control)
|
||||
- Biometric (emotion detection, physiological signals)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
from typing import Any, AsyncIterator, Callable
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from fusionagi._time import utc_now_iso
|
||||
from fusionagi.interfaces.base import (
|
||||
InterfaceAdapter,
|
||||
InterfaceMessage,
|
||||
ModalityType,
|
||||
)
|
||||
from fusionagi.interfaces.voice import VoiceInterface, VoiceLibrary
|
||||
from fusionagi.interfaces.conversation import ConversationManager, ConversationTurn
|
||||
from fusionagi.core import Orchestrator
|
||||
from fusionagi.schemas import Task, TaskState
|
||||
from fusionagi._logger import logger
|
||||
|
||||
|
||||
class UserSession(BaseModel):
|
||||
"""User session with multi-modal interface."""
|
||||
|
||||
session_id: str = Field(default_factory=lambda: f"user_session_{uuid.uuid4().hex}")
|
||||
user_id: str | None = Field(default=None)
|
||||
conversation_session_id: str | None = Field(default=None)
|
||||
active_modalities: list[ModalityType] = Field(default_factory=list)
|
||||
preferences: dict[str, Any] = Field(default_factory=dict)
|
||||
accessibility_settings: dict[str, Any] = Field(default_factory=dict)
|
||||
started_at: str = Field(default_factory=utc_now_iso)
|
||||
last_activity_at: str = Field(default_factory=utc_now_iso)
|
||||
|
||||
|
||||
class MultiModalUI:
|
||||
"""
|
||||
Multi-modal user interface for FusionAGI.
|
||||
|
||||
Provides a unified interface that supports multiple sensory modalities
|
||||
simultaneously, allowing users to interact through their preferred
|
||||
combination of text, voice, visual, haptic, gesture, and biometric inputs.
|
||||
|
||||
Features:
|
||||
- Seamless switching between modalities
|
||||
- Simultaneous multi-modal input/output
|
||||
- Accessibility support
|
||||
- Context-aware modality selection
|
||||
- Real-time feedback across all active modalities
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
orchestrator: Orchestrator,
|
||||
conversation_manager: ConversationManager,
|
||||
voice_interface: VoiceInterface | None = None,
|
||||
llm_process_callback: Callable[[str, str, dict[str, Any], Any], str] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize multi-modal UI.
|
||||
|
||||
Args:
|
||||
orchestrator: FusionAGI orchestrator for task execution.
|
||||
conversation_manager: Conversation manager for natural language.
|
||||
voice_interface: Voice interface for speech interaction.
|
||||
llm_process_callback: Optional (session_id, user_input, context, style) -> response for converse().
|
||||
"""
|
||||
self.orchestrator = orchestrator
|
||||
self.conversation_manager = conversation_manager
|
||||
self.voice_interface = voice_interface
|
||||
self._llm_process_callback = llm_process_callback
|
||||
|
||||
self._sessions: dict[str, UserSession] = {}
|
||||
self._interface_adapters: dict[ModalityType, InterfaceAdapter] = {}
|
||||
self._receive_lock = asyncio.Lock()
|
||||
|
||||
# Register voice interface if provided
|
||||
if voice_interface:
|
||||
self._interface_adapters[ModalityType.VOICE] = voice_interface
|
||||
|
||||
logger.info("MultiModalUI initialized")
|
||||
|
||||
# ========== Session Management ==========
|
||||
|
||||
def create_session(
|
||||
self,
|
||||
user_id: str | None = None,
|
||||
preferred_modalities: list[ModalityType] | None = None,
|
||||
accessibility_settings: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Create a new user session.
|
||||
|
||||
Args:
|
||||
user_id: Optional user identifier.
|
||||
preferred_modalities: Preferred interaction modalities.
|
||||
accessibility_settings: Accessibility preferences.
|
||||
|
||||
Returns:
|
||||
Session ID.
|
||||
"""
|
||||
# Create conversation session
|
||||
conv_session_id = self.conversation_manager.create_session(user_id=user_id)
|
||||
|
||||
session = UserSession(
|
||||
user_id=user_id,
|
||||
conversation_session_id=conv_session_id,
|
||||
active_modalities=preferred_modalities or [ModalityType.TEXT],
|
||||
accessibility_settings=accessibility_settings or {},
|
||||
)
|
||||
|
||||
self._sessions[session.session_id] = session
|
||||
|
||||
logger.info(
|
||||
"User session created",
|
||||
extra={
|
||||
"session_id": session.session_id,
|
||||
"user_id": user_id,
|
||||
"modalities": [m.value for m in session.active_modalities],
|
||||
}
|
||||
)
|
||||
|
||||
return session.session_id
|
||||
|
||||
def get_session(self, session_id: str) -> UserSession | None:
|
||||
"""Get user session."""
|
||||
return self._sessions.get(session_id)
|
||||
|
||||
def active_session_count(self) -> int:
|
||||
"""Return number of active user sessions (for admin panel session_count_callback)."""
|
||||
return len(self._sessions)
|
||||
|
||||
def end_session(self, session_id: str) -> bool:
|
||||
"""
|
||||
End a user session.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
|
||||
Returns:
|
||||
True if ended, False if not found.
|
||||
"""
|
||||
session = self._sessions.get(session_id)
|
||||
if not session:
|
||||
return False
|
||||
|
||||
# End conversation session
|
||||
if session.conversation_session_id:
|
||||
self.conversation_manager.end_session(session.conversation_session_id)
|
||||
|
||||
del self._sessions[session_id]
|
||||
logger.info("User session ended", extra={"session_id": session_id})
|
||||
return True
|
||||
|
||||
# ========== Modality Management ==========
|
||||
|
||||
def register_interface(self, modality: ModalityType, adapter: InterfaceAdapter) -> None:
|
||||
"""
|
||||
Register an interface adapter for a modality.
|
||||
|
||||
Args:
|
||||
modality: Modality type.
|
||||
adapter: Interface adapter implementation.
|
||||
"""
|
||||
self._interface_adapters[modality] = adapter
|
||||
logger.info("Interface adapter registered", extra={"modality": modality.value})
|
||||
|
||||
def enable_modality(self, session_id: str, modality: ModalityType) -> bool:
|
||||
"""
|
||||
Enable a modality for a session.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
modality: Modality to enable.
|
||||
|
||||
Returns:
|
||||
True if enabled, False if session not found or modality unavailable.
|
||||
"""
|
||||
session = self._sessions.get(session_id)
|
||||
if not session:
|
||||
return False
|
||||
|
||||
if modality not in self._interface_adapters:
|
||||
logger.warning(
|
||||
"Modality not available",
|
||||
extra={"modality": modality.value}
|
||||
)
|
||||
return False
|
||||
|
||||
if modality not in session.active_modalities:
|
||||
session.active_modalities.append(modality)
|
||||
logger.info(
|
||||
"Modality enabled",
|
||||
extra={"session_id": session_id, "modality": modality.value}
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
def disable_modality(self, session_id: str, modality: ModalityType) -> bool:
|
||||
"""
|
||||
Disable a modality for a session.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
modality: Modality to disable.
|
||||
|
||||
Returns:
|
||||
True if disabled, False if session not found.
|
||||
"""
|
||||
session = self._sessions.get(session_id)
|
||||
if not session:
|
||||
return False
|
||||
|
||||
if modality in session.active_modalities:
|
||||
session.active_modalities.remove(modality)
|
||||
logger.info(
|
||||
"Modality disabled",
|
||||
extra={"session_id": session_id, "modality": modality.value}
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
# ========== User Interaction ==========
|
||||
|
||||
async def send_to_user(
|
||||
self,
|
||||
session_id: str,
|
||||
content: Any,
|
||||
modalities: list[ModalityType] | None = None,
|
||||
metadata: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Send content to user through active modalities.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
content: Content to send (will be adapted per modality).
|
||||
modalities: Specific modalities to use (uses active if None).
|
||||
metadata: Additional metadata for the message.
|
||||
"""
|
||||
session = self._sessions.get(session_id)
|
||||
if not session:
|
||||
logger.warning("Session not found", extra={"session_id": session_id})
|
||||
return
|
||||
|
||||
# Determine which modalities to use
|
||||
target_modalities = modalities or session.active_modalities
|
||||
|
||||
# Send through each active modality
|
||||
for modality in target_modalities:
|
||||
adapter = self._interface_adapters.get(modality)
|
||||
if not adapter:
|
||||
continue
|
||||
|
||||
# Create modality-specific message
|
||||
message = InterfaceMessage(
|
||||
id=f"msg_{uuid.uuid4().hex[:8]}",
|
||||
modality=modality,
|
||||
content=self._adapt_content(content, modality),
|
||||
metadata=metadata or {},
|
||||
session_id=session_id,
|
||||
user_id=session.user_id,
|
||||
)
|
||||
|
||||
try:
|
||||
await adapter.send(message)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to send through modality",
|
||||
extra={"modality": modality.value, "error": str(e)}
|
||||
)
|
||||
|
||||
async def receive_from_user(
|
||||
self,
|
||||
session_id: str,
|
||||
timeout_seconds: float | None = None,
|
||||
) -> InterfaceMessage | None:
|
||||
"""
|
||||
Receive input from user through any active modality.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
timeout_seconds: Optional timeout for receiving.
|
||||
|
||||
Returns:
|
||||
Received message or None if timeout.
|
||||
"""
|
||||
session = self._sessions.get(session_id)
|
||||
if not session:
|
||||
return None
|
||||
|
||||
# Listen on all active modalities (first to respond wins)
|
||||
# TODO: Implement proper async race condition handling
|
||||
for modality in session.active_modalities:
|
||||
adapter = self._interface_adapters.get(modality)
|
||||
if adapter:
|
||||
try:
|
||||
message = await adapter.receive(timeout_seconds)
|
||||
if message:
|
||||
# Update session activity
|
||||
session.last_activity_at = utc_now_iso()
|
||||
return message
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Failed to receive from modality",
|
||||
extra={"modality": modality.value, "error": str(e)}
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
# ========== Task Interaction ==========
|
||||
|
||||
async def submit_task_interactive(
|
||||
self,
|
||||
session_id: str,
|
||||
goal: str,
|
||||
constraints: dict[str, Any] | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Submit a task and provide interactive feedback.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
goal: Task goal description.
|
||||
constraints: Optional task constraints.
|
||||
|
||||
Returns:
|
||||
Task ID.
|
||||
"""
|
||||
session = self._sessions.get(session_id)
|
||||
if not session:
|
||||
raise ValueError(f"Session not found: {session_id}")
|
||||
|
||||
# Submit task
|
||||
task_id = self.orchestrator.submit_task(
|
||||
goal=goal,
|
||||
constraints=constraints or {},
|
||||
)
|
||||
|
||||
# Send confirmation to user
|
||||
await self.send_to_user(
|
||||
session_id,
|
||||
f"Task submitted: {goal}",
|
||||
metadata={"task_id": task_id, "type": "task_confirmation"},
|
||||
)
|
||||
|
||||
# Subscribe to task events for real-time updates
|
||||
self._subscribe_to_task_updates(session_id, task_id)
|
||||
|
||||
logger.info(
|
||||
"Interactive task submitted",
|
||||
extra={"session_id": session_id, "task_id": task_id}
|
||||
)
|
||||
|
||||
return task_id
|
||||
|
||||
def _subscribe_to_task_updates(self, session_id: str, task_id: str) -> None:
|
||||
"""
|
||||
Subscribe to task updates and relay to user.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
task_id: Task identifier.
|
||||
"""
|
||||
def on_task_update(event_type: str, data: dict[str, Any]) -> None:
|
||||
"""Handle task update event."""
|
||||
if data.get("task_id") != task_id:
|
||||
return
|
||||
|
||||
# Format update message
|
||||
if event_type == "task_state_changed":
|
||||
state = data.get("new_state")
|
||||
message = f"Task {task_id[:8]}: {state}"
|
||||
else:
|
||||
message = f"Task update: {event_type}"
|
||||
|
||||
# Send to user (async in background)
|
||||
import asyncio
|
||||
try:
|
||||
asyncio.create_task(
|
||||
self.send_to_user(
|
||||
session_id,
|
||||
message,
|
||||
metadata={"task_id": task_id, "event_type": event_type},
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to send task update", extra={"error": str(e)})
|
||||
|
||||
# Subscribe to events
|
||||
self.orchestrator._event_bus.subscribe("task_state_changed", on_task_update)
|
||||
self.orchestrator._event_bus.subscribe("task_step_completed", on_task_update)
|
||||
|
||||
# ========== Conversation Integration ==========
|
||||
|
||||
async def converse(
|
||||
self,
|
||||
session_id: str,
|
||||
user_input: str,
|
||||
) -> str:
|
||||
"""
|
||||
Handle conversational interaction.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
user_input: User's conversational input.
|
||||
|
||||
Returns:
|
||||
Agent's response.
|
||||
"""
|
||||
session = self._sessions.get(session_id)
|
||||
if not session or not session.conversation_session_id:
|
||||
return "Session not found"
|
||||
|
||||
# Add user turn
|
||||
user_turn = ConversationTurn(
|
||||
session_id=session.conversation_session_id,
|
||||
speaker="user",
|
||||
content=user_input,
|
||||
)
|
||||
self.conversation_manager.add_turn(user_turn)
|
||||
|
||||
context = self.conversation_manager.get_context_summary(session.conversation_session_id)
|
||||
style = self.conversation_manager.get_style_for_session(session.conversation_session_id)
|
||||
if self._llm_process_callback is not None:
|
||||
response = self._llm_process_callback(session_id, user_input, context, style)
|
||||
else:
|
||||
response = f"I understand you said: {user_input}"
|
||||
|
||||
# Add agent turn
|
||||
agent_turn = ConversationTurn(
|
||||
session_id=session.conversation_session_id,
|
||||
speaker="agent",
|
||||
content=response,
|
||||
)
|
||||
self.conversation_manager.add_turn(agent_turn)
|
||||
|
||||
return response
|
||||
|
||||
# ========== Utility Methods ==========
|
||||
|
||||
def _adapt_content(self, content: Any, modality: ModalityType) -> Any:
|
||||
"""
|
||||
Adapt content for a specific modality.
|
||||
|
||||
Args:
|
||||
content: Original content.
|
||||
modality: Target modality.
|
||||
|
||||
Returns:
|
||||
Adapted content.
|
||||
"""
|
||||
# Convert content to appropriate format for modality
|
||||
if modality == ModalityType.TEXT:
|
||||
return str(content)
|
||||
elif modality == ModalityType.VOICE:
|
||||
# For voice, ensure it's text that can be synthesized
|
||||
return str(content)
|
||||
elif modality == ModalityType.VISUAL:
|
||||
# For visual, might need to generate images or format for display
|
||||
return {"type": "text", "content": str(content)}
|
||||
elif modality == ModalityType.HAPTIC:
|
||||
# For haptic, might need to generate vibration patterns
|
||||
return {"pattern": "notification", "intensity": 0.5}
|
||||
else:
|
||||
return content
|
||||
|
||||
def get_available_modalities(self) -> list[ModalityType]:
|
||||
"""Get list of available modalities."""
|
||||
return list(self._interface_adapters.keys())
|
||||
|
||||
def get_session_statistics(self, session_id: str) -> dict[str, Any]:
|
||||
"""
|
||||
Get statistics for a session.
|
||||
|
||||
Args:
|
||||
session_id: Session identifier.
|
||||
|
||||
Returns:
|
||||
Dictionary with session statistics.
|
||||
"""
|
||||
session = self._sessions.get(session_id)
|
||||
if not session:
|
||||
return {}
|
||||
|
||||
# Get conversation history
|
||||
history = []
|
||||
if session.conversation_session_id:
|
||||
history = self.conversation_manager.get_history(session.conversation_session_id)
|
||||
|
||||
return {
|
||||
"session_id": session_id,
|
||||
"user_id": session.user_id,
|
||||
"active_modalities": [m.value for m in session.active_modalities],
|
||||
"conversation_turns": len(history),
|
||||
"started_at": session.started_at,
|
||||
"last_activity_at": session.last_activity_at,
|
||||
}
|
||||
338
fusionagi/interfaces/voice.py
Normal file
338
fusionagi/interfaces/voice.py
Normal file
@@ -0,0 +1,338 @@
|
||||
"""Voice interface: speech-to-text, text-to-speech, voice library management."""
|
||||
|
||||
import uuid
|
||||
from typing import Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from fusionagi._time import utc_now_iso
|
||||
from fusionagi.interfaces.base import InterfaceAdapter, InterfaceCapabilities, InterfaceMessage, ModalityType
|
||||
from fusionagi._logger import logger
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class TTSAdapter(Protocol):
|
||||
"""Protocol for TTS providers (ElevenLabs, Azure, system, etc.). Integrate by injecting an implementation."""
|
||||
|
||||
async def synthesize(self, text: str, voice_id: str | None = None, **kwargs: Any) -> bytes | None:
|
||||
"""Synthesize text to audio. Returns raw audio bytes or None if not available."""
|
||||
...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class STTAdapter(Protocol):
|
||||
"""Protocol for STT providers (Whisper, Azure, Google, etc.). Integrate by injecting an implementation."""
|
||||
|
||||
async def transcribe(self, audio_data: bytes | None = None, timeout_seconds: float | None = None, **kwargs: Any) -> str | None:
|
||||
"""Transcribe audio to text. Returns transcribed text or None if timeout/unavailable."""
|
||||
...
|
||||
|
||||
|
||||
class VoiceProfile(BaseModel):
|
||||
"""Voice profile for text-to-speech synthesis."""
|
||||
|
||||
id: str = Field(default_factory=lambda: f"voice_{uuid.uuid4().hex[:8]}")
|
||||
name: str = Field(description="Human-readable voice name")
|
||||
language: str = Field(default="en-US", description="Language code (e.g., en-US, es-ES)")
|
||||
gender: Literal["male", "female", "neutral"] | None = Field(default=None)
|
||||
age_range: Literal["child", "young_adult", "adult", "senior"] | None = Field(default=None)
|
||||
style: str | None = Field(default=None, description="Voice style (e.g., friendly, professional, calm)")
|
||||
pitch: float = Field(default=1.0, ge=0.5, le=2.0, description="Pitch multiplier")
|
||||
speed: float = Field(default=1.0, ge=0.5, le=2.0, description="Speed multiplier")
|
||||
provider: str = Field(default="system", description="TTS provider (e.g., system, elevenlabs, azure)")
|
||||
provider_voice_id: str | None = Field(default=None, description="Provider-specific voice ID")
|
||||
metadata: dict[str, Any] = Field(default_factory=dict)
|
||||
created_at: str = Field(default_factory=utc_now_iso)
|
||||
|
||||
|
||||
class VoiceLibrary:
|
||||
"""
|
||||
Voice library for managing TTS voice profiles.
|
||||
|
||||
Allows admin to add, configure, and organize voice profiles for different
|
||||
agents, contexts, or user preferences.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._voices: dict[str, VoiceProfile] = {}
|
||||
self._default_voice_id: str | None = None
|
||||
logger.info("VoiceLibrary initialized")
|
||||
|
||||
def add_voice(self, profile: VoiceProfile) -> str:
|
||||
"""
|
||||
Add a voice profile to the library.
|
||||
|
||||
Args:
|
||||
profile: Voice profile to add.
|
||||
|
||||
Returns:
|
||||
Voice ID.
|
||||
"""
|
||||
self._voices[profile.id] = profile
|
||||
if self._default_voice_id is None:
|
||||
self._default_voice_id = profile.id
|
||||
logger.info("Voice added", extra={"voice_id": profile.id, "name": profile.name})
|
||||
return profile.id
|
||||
|
||||
def remove_voice(self, voice_id: str) -> bool:
|
||||
"""
|
||||
Remove a voice profile from the library.
|
||||
|
||||
Args:
|
||||
voice_id: ID of voice to remove.
|
||||
|
||||
Returns:
|
||||
True if removed, False if not found.
|
||||
"""
|
||||
if voice_id in self._voices:
|
||||
del self._voices[voice_id]
|
||||
if self._default_voice_id == voice_id:
|
||||
self._default_voice_id = next(iter(self._voices.keys()), None)
|
||||
logger.info("Voice removed", extra={"voice_id": voice_id})
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_voice(self, voice_id: str) -> VoiceProfile | None:
|
||||
"""Get a voice profile by ID."""
|
||||
return self._voices.get(voice_id)
|
||||
|
||||
def list_voices(
|
||||
self,
|
||||
language: str | None = None,
|
||||
gender: str | None = None,
|
||||
style: str | None = None,
|
||||
) -> list[VoiceProfile]:
|
||||
"""
|
||||
List voice profiles with optional filtering.
|
||||
|
||||
Args:
|
||||
language: Filter by language code.
|
||||
gender: Filter by gender.
|
||||
style: Filter by style.
|
||||
|
||||
Returns:
|
||||
List of matching voice profiles.
|
||||
"""
|
||||
voices = list(self._voices.values())
|
||||
|
||||
if language:
|
||||
voices = [v for v in voices if v.language == language]
|
||||
if gender:
|
||||
voices = [v for v in voices if v.gender == gender]
|
||||
if style:
|
||||
voices = [v for v in voices if v.style == style]
|
||||
|
||||
return voices
|
||||
|
||||
def set_default_voice(self, voice_id: str) -> bool:
|
||||
"""
|
||||
Set the default voice for the library.
|
||||
|
||||
Args:
|
||||
voice_id: ID of voice to set as default.
|
||||
|
||||
Returns:
|
||||
True if set, False if voice not found.
|
||||
"""
|
||||
if voice_id in self._voices:
|
||||
self._default_voice_id = voice_id
|
||||
logger.info("Default voice set", extra={"voice_id": voice_id})
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_default_voice(self) -> VoiceProfile | None:
|
||||
"""Get the default voice profile."""
|
||||
if self._default_voice_id:
|
||||
return self._voices.get(self._default_voice_id)
|
||||
return None
|
||||
|
||||
def update_voice(self, voice_id: str, updates: dict[str, Any]) -> bool:
|
||||
"""
|
||||
Update a voice profile.
|
||||
|
||||
Args:
|
||||
voice_id: ID of voice to update.
|
||||
updates: Dictionary of fields to update.
|
||||
|
||||
Returns:
|
||||
True if updated, False if not found.
|
||||
"""
|
||||
if voice_id not in self._voices:
|
||||
return False
|
||||
|
||||
voice = self._voices[voice_id]
|
||||
for key, value in updates.items():
|
||||
if hasattr(voice, key):
|
||||
setattr(voice, key, value)
|
||||
|
||||
logger.info("Voice updated", extra={"voice_id": voice_id, "updates": list(updates.keys())})
|
||||
return True
|
||||
|
||||
|
||||
class VoiceInterface(InterfaceAdapter):
|
||||
"""
|
||||
Voice interface adapter for speech interaction.
|
||||
|
||||
Handles:
|
||||
- Speech-to-text (STT) for user input
|
||||
- Text-to-speech (TTS) for system output
|
||||
- Voice activity detection
|
||||
- Noise cancellation
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str = "voice",
|
||||
voice_library: VoiceLibrary | None = None,
|
||||
stt_provider: str = "whisper",
|
||||
tts_provider: str = "system",
|
||||
tts_adapter: TTSAdapter | None = None,
|
||||
stt_adapter: STTAdapter | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize voice interface.
|
||||
|
||||
Args:
|
||||
name: Interface name.
|
||||
voice_library: Voice library for TTS profiles.
|
||||
stt_provider: Speech-to-text provider (whisper, azure, google, etc.).
|
||||
tts_provider: Text-to-speech provider (system, elevenlabs, azure, etc.).
|
||||
tts_adapter: Optional TTS adapter for synthesis (inject to integrate ElevenLabs, Azure, etc.).
|
||||
stt_adapter: Optional STT adapter for transcription (inject to integrate Whisper, Azure, etc.).
|
||||
"""
|
||||
super().__init__(name)
|
||||
self.voice_library = voice_library or VoiceLibrary()
|
||||
self.stt_provider = stt_provider
|
||||
self.tts_provider = tts_provider
|
||||
self._tts_adapter = tts_adapter
|
||||
self._stt_adapter = stt_adapter
|
||||
self._active_voice_id: str | None = None
|
||||
logger.info(
|
||||
"VoiceInterface initialized",
|
||||
extra={"stt_provider": stt_provider, "tts_provider": tts_provider}
|
||||
)
|
||||
|
||||
def capabilities(self) -> InterfaceCapabilities:
|
||||
"""Return voice interface capabilities."""
|
||||
return InterfaceCapabilities(
|
||||
supported_modalities=[ModalityType.VOICE],
|
||||
supports_streaming=True,
|
||||
supports_interruption=True,
|
||||
supports_multimodal=False,
|
||||
latency_ms=200.0, # Typical voice latency
|
||||
max_concurrent_sessions=10,
|
||||
)
|
||||
|
||||
async def send(self, message: InterfaceMessage) -> None:
|
||||
"""
|
||||
Send voice output (text-to-speech).
|
||||
|
||||
Args:
|
||||
message: Message with text content to synthesize.
|
||||
"""
|
||||
if not self.validate_message(message):
|
||||
logger.warning("Invalid message for voice interface", extra={"modality": message.modality})
|
||||
return
|
||||
|
||||
# Get voice profile
|
||||
voice_id = message.metadata.get("voice_id", self._active_voice_id)
|
||||
voice = None
|
||||
if voice_id:
|
||||
voice = self.voice_library.get_voice(voice_id)
|
||||
if not voice:
|
||||
voice = self.voice_library.get_default_voice()
|
||||
|
||||
text = message.content if isinstance(message.content, str) else str(message.content)
|
||||
voice_id = voice.id if voice else None
|
||||
if self._tts_adapter is not None:
|
||||
try:
|
||||
audio_data = await self._tts_adapter.synthesize(text, voice_id=voice_id)
|
||||
if audio_data:
|
||||
logger.info(
|
||||
"TTS synthesis (adapter)",
|
||||
extra={"text_length": len(text), "voice_id": voice_id, "bytes": len(audio_data)},
|
||||
)
|
||||
# Inject: await self._play_audio(audio_data)
|
||||
except Exception as e:
|
||||
logger.exception("TTS adapter failed", extra={"error": str(e)})
|
||||
else:
|
||||
logger.info(
|
||||
"TTS synthesis (stub; inject tts_adapter for ElevenLabs, Azure, etc.)",
|
||||
extra={"text_length": len(text), "voice_id": voice_id, "provider": self.tts_provider},
|
||||
)
|
||||
|
||||
async def receive(self, timeout_seconds: float | None = None) -> InterfaceMessage | None:
|
||||
"""
|
||||
Receive voice input (speech-to-text).
|
||||
|
||||
Args:
|
||||
timeout_seconds: Optional timeout for listening.
|
||||
|
||||
Returns:
|
||||
Message with transcribed text or None if timeout.
|
||||
"""
|
||||
logger.info("STT listening", extra={"timeout": timeout_seconds, "provider": self.stt_provider})
|
||||
if self._stt_adapter is not None:
|
||||
try:
|
||||
text = await self._stt_adapter.transcribe(audio_data=None, timeout_seconds=timeout_seconds)
|
||||
if text:
|
||||
return InterfaceMessage(
|
||||
id=f"stt_{uuid.uuid4().hex[:8]}",
|
||||
modality=ModalityType.VOICE,
|
||||
content=text,
|
||||
metadata={"provider": self.stt_provider},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("STT adapter failed", extra={"error": str(e)})
|
||||
return None
|
||||
|
||||
def set_active_voice(self, voice_id: str) -> bool:
|
||||
"""
|
||||
Set the active voice for this interface session.
|
||||
|
||||
Args:
|
||||
voice_id: ID of voice to use.
|
||||
|
||||
Returns:
|
||||
True if voice exists, False otherwise.
|
||||
"""
|
||||
if self.voice_library.get_voice(voice_id):
|
||||
self._active_voice_id = voice_id
|
||||
logger.info("Active voice set", extra={"voice_id": voice_id})
|
||||
return True
|
||||
return False
|
||||
|
||||
async def _synthesize_speech(self, text: str, voice: VoiceProfile | None) -> bytes:
|
||||
"""
|
||||
Synthesize speech from text (to be implemented with actual provider).
|
||||
|
||||
Args:
|
||||
text: Text to synthesize.
|
||||
voice: Voice profile to use.
|
||||
|
||||
Returns:
|
||||
Audio data as bytes.
|
||||
"""
|
||||
# Integrate with TTS provider based on self.tts_provider
|
||||
# - system: Use OS TTS (pyttsx3, etc.)
|
||||
# - elevenlabs: Use ElevenLabs API
|
||||
# - azure: Use Azure Cognitive Services
|
||||
# - google: Use Google Cloud TTS
|
||||
raise NotImplementedError("TTS provider integration required")
|
||||
|
||||
async def _transcribe_speech(self, audio_data: bytes) -> str:
|
||||
"""
|
||||
Transcribe speech to text (to be implemented with actual provider).
|
||||
|
||||
Args:
|
||||
audio_data: Audio data to transcribe.
|
||||
|
||||
Returns:
|
||||
Transcribed text.
|
||||
"""
|
||||
# Integrate with STT provider based on self.stt_provider
|
||||
# - whisper: Use OpenAI Whisper (local or API)
|
||||
# - azure: Use Azure Cognitive Services
|
||||
# - google: Use Google Cloud Speech-to-Text
|
||||
# - deepgram: Use Deepgram API
|
||||
raise NotImplementedError("STT provider integration required")
|
||||
Reference in New Issue
Block a user