[Add] browser-use and main.py

2025-05-18 21:57:54 +09:00 · 2025-05-18 21:57:54 +09:00 · 96914d44ac
commit 96914d44ac
parent 08e64bdf45
221 changed files with 30952 additions and 1 deletions
--- a/browser-use/browser_use/agent/memory/init.py
+++ b/browser-use/browser_use/agent/memory/init.py
@ -0,0 +1,4 @@
+from browser_use.agent.memory.service import Memory
+from browser_use.agent.memory.views import MemoryConfig
+
+__all__ = ['Memory', 'MemoryConfig']
--- a/browser-use/browser_use/agent/memory/service.py
+++ b/browser-use/browser_use/agent/memory/service.py
@ -0,0 +1,151 @@
+from __future__ import annotations
+
+import logging
+import os
+
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import (
+	BaseMessage,
+	HumanMessage,
+)
+from langchain_core.messages.utils import convert_to_openai_messages
+
+from browser_use.agent.memory.views import MemoryConfig
+from browser_use.agent.message_manager.service import MessageManager
+from browser_use.agent.message_manager.views import ManagedMessage, MessageMetadata
+from browser_use.utils import time_execution_sync
+
+logger = logging.getLogger(__name__)
+
+
+class Memory:
+	"""
+	Manages procedural memory for agents.
+
+	This class implements a procedural memory management system using Mem0 that transforms agent interaction history
+	into concise, structured representations at specified intervals. It serves to optimize context window
+	utilization during extended task execution by converting verbose historical information into compact,
+	yet comprehensive memory constructs that preserve essential operational knowledge.
+	"""
+
+	def __init__(
+		self,
+		message_manager: MessageManager,
+		llm: BaseChatModel,
+		config: MemoryConfig | None = None,
+	):
+		self.message_manager = message_manager
+		self.llm = llm
+
+		# Initialize configuration with defaults based on the LLM if not provided
+		if config is None:
+			self.config = MemoryConfig(llm_instance=llm, agent_id=f'agent_{id(self)}')
+
+			# Set appropriate embedder based on LLM type
+			llm_class = llm.__class__.__name__
+			if llm_class == 'ChatOpenAI':
+				self.config.embedder_provider = 'openai'
+				self.config.embedder_model = 'text-embedding-3-small'
+				self.config.embedder_dims = 1536
+			elif llm_class == 'ChatGoogleGenerativeAI':
+				self.config.embedder_provider = 'gemini'
+				self.config.embedder_model = 'models/text-embedding-004'
+				self.config.embedder_dims = 768
+			elif llm_class == 'ChatOllama':
+				self.config.embedder_provider = 'ollama'
+				self.config.embedder_model = 'nomic-embed-text'
+				self.config.embedder_dims = 512
+		else:
+			# Ensure LLM instance is set in the config
+			self.config = MemoryConfig(**dict(config))  # re-validate untrusted user-provided config
+			self.config.llm_instance = llm
+
+		# Check for required packages
+		try:
+			# also disable mem0's telemetry when ANONYMIZED_TELEMETRY=False
+			if os.getenv('ANONYMIZED_TELEMETRY', 'true').lower()[0] in 'fn0':
+				os.environ['MEM0_TELEMETRY'] = 'False'
+			from mem0 import Memory as Mem0Memory
+		except ImportError:
+			raise ImportError('mem0 is required when enable_memory=True. Please install it with `pip install mem0`.')
+
+		if self.config.embedder_provider == 'huggingface':
+			try:
+				# check that required package is installed if huggingface is used
+				from sentence_transformers import SentenceTransformer  # noqa: F401
+			except ImportError:
+				raise ImportError(
+					'sentence_transformers is required when enable_memory=True and embedder_provider="huggingface". Please install it with `pip install sentence-transformers`.'
+				)
+
+		# Initialize Mem0 with the configuration
+		self.mem0 = Mem0Memory.from_config(config_dict=self.config.full_config_dict)
+
+	@time_execution_sync('--create_procedural_memory')
+	def create_procedural_memory(self, current_step: int) -> None:
+		"""
+		Create a procedural memory if needed based on the current step.
+
+		Args:
+		    current_step: The current step number of the agent
+		"""
+		logger.info(f'Creating procedural memory at step {current_step}')
+
+		# Get all messages
+		all_messages = self.message_manager.state.history.messages
+
+		# Separate messages into those to keep as-is and those to process for memory
+		new_messages = []
+		messages_to_process = []
+
+		for msg in all_messages:
+			if isinstance(msg, ManagedMessage) and msg.metadata.message_type in {'init', 'memory'}:
+				# Keep system and memory messages as they are
+				new_messages.append(msg)
+			else:
+				if len(msg.message.content) > 0:
+					messages_to_process.append(msg)
+
+		# Need at least 2 messages to create a meaningful summary
+		if len(messages_to_process) <= 1:
+			logger.info('Not enough non-memory messages to summarize')
+			return
+		# Create a procedural memory
+		memory_content = self._create([m.message for m in messages_to_process], current_step)
+
+		if not memory_content:
+			logger.warning('Failed to create procedural memory')
+			return
+
+		# Replace the processed messages with the consolidated memory
+		memory_message = HumanMessage(content=memory_content)
+		memory_tokens = self.message_manager._count_tokens(memory_message)
+		memory_metadata = MessageMetadata(tokens=memory_tokens, message_type='memory')
+
+		# Calculate the total tokens being removed
+		removed_tokens = sum(m.metadata.tokens for m in messages_to_process)
+
+		# Add the memory message
+		new_messages.append(ManagedMessage(message=memory_message, metadata=memory_metadata))
+
+		# Update the history
+		self.message_manager.state.history.messages = new_messages
+		self.message_manager.state.history.current_tokens -= removed_tokens
+		self.message_manager.state.history.current_tokens += memory_tokens
+		logger.info(f'Messages consolidated: {len(messages_to_process)} messages converted to procedural memory')
+
+	def _create(self, messages: list[BaseMessage], current_step: int) -> str | None:
+		parsed_messages = convert_to_openai_messages(messages)
+		try:
+			results = self.mem0.add(
+				messages=parsed_messages,
+				agent_id=self.config.agent_id,
+				memory_type='procedural_memory',
+				metadata={'step': current_step},
+			)
+			if len(results.get('results', [])):
+				return results.get('results', [])[0].get('memory')
+			return None
+		except Exception as e:
+			logger.error(f'Error creating procedural memory: {e}')
+			return None
--- a/browser-use/browser_use/agent/memory/views.py
+++ b/browser-use/browser_use/agent/memory/views.py
@ -0,0 +1,67 @@
+from typing import Any, Literal
+
+from langchain_core.language_models.chat_models import BaseChatModel
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class MemoryConfig(BaseModel):
+	"""Configuration for procedural memory."""
+
+	model_config = ConfigDict(
+		from_attributes=True, validate_default=True, revalidate_instances='always', validate_assignment=True
+	)
+
+	# Memory settings
+	agent_id: str = Field(default='browser_use_agent', min_length=1)
+	memory_interval: int = Field(default=10, gt=1, lt=100)
+
+	# Embedder settings
+	embedder_provider: Literal['openai', 'gemini', 'ollama', 'huggingface'] = 'huggingface'
+	embedder_model: str = Field(min_length=2, default='all-MiniLM-L6-v2')
+	embedder_dims: int = Field(default=384, gt=10, lt=10000)
+
+	# LLM settings - the LLM instance can be passed separately
+	llm_provider: Literal['langchain'] = 'langchain'
+	llm_instance: BaseChatModel | None = None
+
+	# Vector store settings
+	vector_store_provider: Literal['faiss'] = 'faiss'
+	vector_store_base_path: str = Field(default='/tmp/mem0')
+
+	@property
+	def vector_store_path(self) -> str:
+		"""Returns the full vector store path for the current configuration. e.g. /tmp/mem0_384_faiss"""
+		return f'{self.vector_store_base_path}_{self.embedder_dims}_{self.vector_store_provider}'
+
+	@property
+	def embedder_config_dict(self) -> dict[str, Any]:
+		"""Returns the embedder configuration dictionary."""
+		return {
+			'provider': self.embedder_provider,
+			'config': {'model': self.embedder_model, 'embedding_dims': self.embedder_dims},
+		}
+
+	@property
+	def llm_config_dict(self) -> dict[str, Any]:
+		"""Returns the LLM configuration dictionary."""
+		return {'provider': self.llm_provider, 'config': {'model': self.llm_instance}}
+
+	@property
+	def vector_store_config_dict(self) -> dict[str, Any]:
+		"""Returns the vector store configuration dictionary."""
+		return {
+			'provider': self.vector_store_provider,
+			'config': {
+				'embedding_model_dims': self.embedder_dims,
+				'path': self.vector_store_path,
+			},
+		}
+
+	@property
+	def full_config_dict(self) -> dict[str, dict[str, Any]]:
+		"""Returns the complete configuration dictionary for Mem0."""
+		return {
+			'embedder': self.embedder_config_dict,
+			'llm': self.llm_config_dict,
+			'vector_store': self.vector_store_config_dict,
+		}