[Add] browser-use and main.py
This commit is contained in:
parent
08e64bdf45
commit
96914d44ac
221 changed files with 30952 additions and 1 deletions
329
browser-use/browser_use/agent/message_manager/service.py
Normal file
329
browser-use/browser_use/agent/message_manager/service.py
Normal file
|
|
@ -0,0 +1,329 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from langchain_core.messages import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
HumanMessage,
|
||||
SystemMessage,
|
||||
ToolMessage,
|
||||
)
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use.agent.message_manager.views import MessageMetadata
|
||||
from browser_use.agent.prompts import AgentMessagePrompt
|
||||
from browser_use.agent.views import ActionResult, AgentOutput, AgentStepInfo, MessageManagerState
|
||||
from browser_use.browser.views import BrowserState
|
||||
from browser_use.utils import time_execution_sync
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MessageManagerSettings(BaseModel):
|
||||
max_input_tokens: int = 128000
|
||||
estimated_characters_per_token: int = 3
|
||||
image_tokens: int = 800
|
||||
include_attributes: list[str] = []
|
||||
message_context: str | None = None
|
||||
sensitive_data: dict[str, str] | None = None
|
||||
available_file_paths: list[str] | None = None
|
||||
|
||||
|
||||
class MessageManager:
|
||||
def __init__(
|
||||
self,
|
||||
task: str,
|
||||
system_message: SystemMessage,
|
||||
settings: MessageManagerSettings = MessageManagerSettings(),
|
||||
state: MessageManagerState = MessageManagerState(),
|
||||
):
|
||||
self.task = task
|
||||
self.settings = settings
|
||||
self.state = state
|
||||
self.system_prompt = system_message
|
||||
|
||||
# Only initialize messages if state is empty
|
||||
if len(self.state.history.messages) == 0:
|
||||
self._init_messages()
|
||||
|
||||
def _init_messages(self) -> None:
|
||||
"""Initialize the message history with system message, context, task, and other initial messages"""
|
||||
self._add_message_with_tokens(self.system_prompt, message_type='init')
|
||||
|
||||
if self.settings.message_context:
|
||||
context_message = HumanMessage(content='Context for the task' + self.settings.message_context)
|
||||
self._add_message_with_tokens(context_message, message_type='init')
|
||||
|
||||
task_message = HumanMessage(
|
||||
content=f'Your ultimate task is: """{self.task}""". If you achieved your ultimate task, stop everything and use the done action in the next step to complete the task. If not, continue as usual.'
|
||||
)
|
||||
self._add_message_with_tokens(task_message, message_type='init')
|
||||
|
||||
if self.settings.sensitive_data:
|
||||
info = f'Here are placeholders for sensitive data: {list(self.settings.sensitive_data.keys())}'
|
||||
info += '\nTo use them, write <secret>the placeholder name</secret>'
|
||||
info_message = HumanMessage(content=info)
|
||||
self._add_message_with_tokens(info_message, message_type='init')
|
||||
|
||||
placeholder_message = HumanMessage(content='Example output:')
|
||||
self._add_message_with_tokens(placeholder_message, message_type='init')
|
||||
|
||||
example_tool_call = AIMessage(
|
||||
content='',
|
||||
tool_calls=[
|
||||
{
|
||||
'name': 'AgentOutput',
|
||||
'args': {
|
||||
'current_state': {
|
||||
'evaluation_previous_goal': """
|
||||
Success - I successfully clicked on the 'Apple' link from the Google Search results page,
|
||||
which directed me to the 'Apple' company homepage. This is a good start toward finding
|
||||
the best place to buy a new iPhone as the Apple website often list iPhones for sale.
|
||||
""".strip(),
|
||||
'memory': """
|
||||
I searched for 'iPhone retailers' on Google. From the Google Search results page,
|
||||
I used the 'click_element_by_index' tool to click on element at index [45] labeled 'Best Buy' but calling
|
||||
the tool did not direct me to a new page. I then used the 'click_element_by_index' tool to click
|
||||
on element at index [82] labeled 'Apple' which redirected me to the 'Apple' company homepage.
|
||||
Currently at step 3/15.
|
||||
""".strip(),
|
||||
'next_goal': """
|
||||
Looking at reported structure of the current page, I can see the item '[127]<h3 iPhone/>'
|
||||
in the content. I think this button will lead to more information and potentially prices
|
||||
for iPhones. I'll click on the link at index [127] using the 'click_element_by_index'
|
||||
tool and hope to see prices on the next page.
|
||||
""".strip(),
|
||||
},
|
||||
'action': [{'click_element_by_index': {'index': 127}}],
|
||||
},
|
||||
'id': str(self.state.tool_id),
|
||||
'type': 'tool_call',
|
||||
},
|
||||
],
|
||||
)
|
||||
self._add_message_with_tokens(example_tool_call, message_type='init')
|
||||
self.add_tool_message(content='Browser started', message_type='init')
|
||||
|
||||
placeholder_message = HumanMessage(content='[Your task history memory starts here]')
|
||||
self._add_message_with_tokens(placeholder_message)
|
||||
|
||||
if self.settings.available_file_paths:
|
||||
filepaths_msg = HumanMessage(content=f'Here are file paths you can use: {self.settings.available_file_paths}')
|
||||
self._add_message_with_tokens(filepaths_msg, message_type='init')
|
||||
|
||||
def add_new_task(self, new_task: str) -> None:
|
||||
content = f'Your new ultimate task is: """{new_task}""". Take the previous context into account and finish your new ultimate task. '
|
||||
msg = HumanMessage(content=content)
|
||||
self._add_message_with_tokens(msg)
|
||||
self.task = new_task
|
||||
|
||||
@time_execution_sync('--add_state_message')
|
||||
def add_state_message(
|
||||
self,
|
||||
state: BrowserState,
|
||||
result: list[ActionResult] | None = None,
|
||||
step_info: AgentStepInfo | None = None,
|
||||
use_vision=True,
|
||||
) -> None:
|
||||
"""Add browser state as human message"""
|
||||
|
||||
# if keep in memory, add to directly to history and add state without result
|
||||
if result:
|
||||
for r in result:
|
||||
if r.include_in_memory:
|
||||
if r.extracted_content:
|
||||
msg = HumanMessage(content='Action result: ' + str(r.extracted_content))
|
||||
self._add_message_with_tokens(msg)
|
||||
if r.error:
|
||||
# if endswith \n, remove it
|
||||
if r.error.endswith('\n'):
|
||||
r.error = r.error[:-1]
|
||||
# get only last line of error
|
||||
last_line = r.error.split('\n')[-1]
|
||||
msg = HumanMessage(content='Action error: ' + last_line)
|
||||
self._add_message_with_tokens(msg)
|
||||
result = None # if result in history, we dont want to add it again
|
||||
|
||||
# otherwise add state message and result to next message (which will not stay in memory)
|
||||
state_message = AgentMessagePrompt(
|
||||
state,
|
||||
result,
|
||||
include_attributes=self.settings.include_attributes,
|
||||
step_info=step_info,
|
||||
).get_user_message(use_vision)
|
||||
self._add_message_with_tokens(state_message)
|
||||
|
||||
def add_model_output(self, model_output: AgentOutput) -> None:
|
||||
"""Add model output as AI message"""
|
||||
tool_calls = [
|
||||
{
|
||||
'name': 'AgentOutput',
|
||||
'args': model_output.model_dump(mode='json', exclude_unset=True),
|
||||
'id': str(self.state.tool_id),
|
||||
'type': 'tool_call',
|
||||
}
|
||||
]
|
||||
|
||||
msg = AIMessage(
|
||||
content='',
|
||||
tool_calls=tool_calls,
|
||||
)
|
||||
|
||||
self._add_message_with_tokens(msg)
|
||||
# empty tool response
|
||||
self.add_tool_message(content='')
|
||||
|
||||
def add_plan(self, plan: str | None, position: int | None = None) -> None:
|
||||
if plan:
|
||||
msg = AIMessage(content=plan)
|
||||
self._add_message_with_tokens(msg, position)
|
||||
|
||||
@time_execution_sync('--get_messages')
|
||||
def get_messages(self) -> list[BaseMessage]:
|
||||
"""Get current message list, potentially trimmed to max tokens"""
|
||||
|
||||
msg = [m.message for m in self.state.history.messages]
|
||||
# debug which messages are in history with token count # log
|
||||
total_input_tokens = 0
|
||||
logger.debug(f'Messages in history: {len(self.state.history.messages)}:')
|
||||
for m in self.state.history.messages:
|
||||
total_input_tokens += m.metadata.tokens
|
||||
logger.debug(f'{m.message.__class__.__name__} - Token count: {m.metadata.tokens}')
|
||||
logger.debug(f'Total input tokens: {total_input_tokens}')
|
||||
|
||||
return msg
|
||||
|
||||
def _add_message_with_tokens(
|
||||
self, message: BaseMessage, position: int | None = None, message_type: str | None = None
|
||||
) -> None:
|
||||
"""Add message with token count metadata
|
||||
position: None for last, -1 for second last, etc.
|
||||
"""
|
||||
|
||||
# filter out sensitive data from the message
|
||||
if self.settings.sensitive_data:
|
||||
message = self._filter_sensitive_data(message)
|
||||
|
||||
token_count = self._count_tokens(message)
|
||||
metadata = MessageMetadata(tokens=token_count, message_type=message_type)
|
||||
self.state.history.add_message(message, metadata, position)
|
||||
|
||||
@time_execution_sync('--filter_sensitive_data')
|
||||
def _filter_sensitive_data(self, message: BaseMessage) -> BaseMessage:
|
||||
"""Filter out sensitive data from the message"""
|
||||
|
||||
def replace_sensitive(value: str) -> str:
|
||||
if not self.settings.sensitive_data:
|
||||
return value
|
||||
|
||||
# Create a dictionary with all key-value pairs from sensitive_data where value is not None or empty
|
||||
valid_sensitive_data = {k: v for k, v in self.settings.sensitive_data.items() if v}
|
||||
|
||||
# If there are no valid sensitive data entries, just return the original value
|
||||
if not valid_sensitive_data:
|
||||
logger.warning('No valid entries found in sensitive_data dictionary')
|
||||
return value
|
||||
|
||||
# Replace all valid sensitive data values with their placeholder tags
|
||||
for key, val in valid_sensitive_data.items():
|
||||
value = value.replace(val, f'<secret>{key}</secret>')
|
||||
|
||||
return value
|
||||
|
||||
if isinstance(message.content, str):
|
||||
message.content = replace_sensitive(message.content)
|
||||
elif isinstance(message.content, list):
|
||||
for i, item in enumerate(message.content):
|
||||
if isinstance(item, dict) and 'text' in item:
|
||||
item['text'] = replace_sensitive(item['text'])
|
||||
message.content[i] = item
|
||||
return message
|
||||
|
||||
def _count_tokens(self, message: BaseMessage) -> int:
|
||||
"""Count tokens in a message using the model's tokenizer"""
|
||||
tokens = 0
|
||||
if isinstance(message.content, list):
|
||||
for item in message.content:
|
||||
if 'image_url' in item:
|
||||
tokens += self.settings.image_tokens
|
||||
elif isinstance(item, dict) and 'text' in item:
|
||||
tokens += self._count_text_tokens(item['text'])
|
||||
else:
|
||||
msg = message.content
|
||||
if hasattr(message, 'tool_calls'):
|
||||
msg += str(message.tool_calls) # type: ignore
|
||||
tokens += self._count_text_tokens(msg)
|
||||
return tokens
|
||||
|
||||
def _count_text_tokens(self, text: str) -> int:
|
||||
"""Count tokens in a text string"""
|
||||
tokens = len(text) // self.settings.estimated_characters_per_token # Rough estimate if no tokenizer available
|
||||
return tokens
|
||||
|
||||
def cut_messages(self):
|
||||
"""Get current message list, potentially trimmed to max tokens"""
|
||||
diff = self.state.history.current_tokens - self.settings.max_input_tokens
|
||||
if diff <= 0:
|
||||
return None
|
||||
|
||||
msg = self.state.history.messages[-1]
|
||||
|
||||
# if list with image remove image
|
||||
if isinstance(msg.message.content, list):
|
||||
text = ''
|
||||
for item in msg.message.content:
|
||||
if 'image_url' in item:
|
||||
msg.message.content.remove(item)
|
||||
diff -= self.settings.image_tokens
|
||||
msg.metadata.tokens -= self.settings.image_tokens
|
||||
self.state.history.current_tokens -= self.settings.image_tokens
|
||||
logger.debug(
|
||||
f'Removed image with {self.settings.image_tokens} tokens - total tokens now: {self.state.history.current_tokens}/{self.settings.max_input_tokens}'
|
||||
)
|
||||
elif 'text' in item and isinstance(item, dict):
|
||||
text += item['text']
|
||||
msg.message.content = text
|
||||
self.state.history.messages[-1] = msg
|
||||
|
||||
if diff <= 0:
|
||||
return None
|
||||
|
||||
# if still over, remove text from state message proportionally to the number of tokens needed with buffer
|
||||
# Calculate the proportion of content to remove
|
||||
proportion_to_remove = diff / msg.metadata.tokens
|
||||
if proportion_to_remove > 0.99:
|
||||
raise ValueError(
|
||||
f'Max token limit reached - history is too long - reduce the system prompt or task. '
|
||||
f'proportion_to_remove: {proportion_to_remove}'
|
||||
)
|
||||
logger.debug(
|
||||
f'Removing {proportion_to_remove * 100:.2f}% of the last message {proportion_to_remove * msg.metadata.tokens:.2f} / {msg.metadata.tokens:.2f} tokens)'
|
||||
)
|
||||
|
||||
content = msg.message.content
|
||||
characters_to_remove = int(len(content) * proportion_to_remove)
|
||||
content = content[:-characters_to_remove]
|
||||
|
||||
# remove tokens and old long message
|
||||
self.state.history.remove_last_state_message()
|
||||
|
||||
# new message with updated content
|
||||
msg = HumanMessage(content=content)
|
||||
self._add_message_with_tokens(msg)
|
||||
|
||||
last_msg = self.state.history.messages[-1]
|
||||
|
||||
logger.debug(
|
||||
f'Added message with {last_msg.metadata.tokens} tokens - total tokens now: {self.state.history.current_tokens}/{self.settings.max_input_tokens} - total messages: {len(self.state.history.messages)}'
|
||||
)
|
||||
|
||||
def _remove_last_state_message(self) -> None:
|
||||
"""Remove last state message from history"""
|
||||
self.state.history.remove_last_state_message()
|
||||
|
||||
def add_tool_message(self, content: str, message_type: str | None = None) -> None:
|
||||
"""Add tool message to history"""
|
||||
msg = ToolMessage(content=content, tool_call_id=str(self.state.tool_id))
|
||||
self.state.tool_id += 1
|
||||
self._add_message_with_tokens(msg, message_type=message_type)
|
||||
Loading…
Add table
Add a link
Reference in a new issue