[Add] browser-use and main.py
This commit is contained in:
parent
08e64bdf45
commit
96914d44ac
221 changed files with 30952 additions and 1 deletions
59
browser-use/tests/conftest.py
Normal file
59
browser-use/tests/conftest.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
"""
|
||||
Test configuration for browser-use.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
# Ensure the project root is in the Python path
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
# Load environment variables
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def llm():
|
||||
"""
|
||||
Fixture to provide a ChatOpenAI instance or a mock for testing.
|
||||
Uses a mock if OPENAI_API_KEY is not set.
|
||||
"""
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
logger.debug(f'API Key present: {bool(api_key)}')
|
||||
logger.debug('Using actual ChatOpenAI model')
|
||||
return ChatOpenAI(model='gpt-4o', api_key=SecretStr(api_key) if api_key else None)
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def browser():
|
||||
"""
|
||||
Fixture to provide a Browser instance for testing.
|
||||
"""
|
||||
logger.debug('Creating Browser instance for testing')
|
||||
return Browser(config=BrowserConfig(headless=True, disable_security=True))
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
async def browser_context(browser):
|
||||
"""
|
||||
Fixture to provide a BrowserContext instance for testing.
|
||||
"""
|
||||
logger.debug('Creating BrowserContext instance for testing')
|
||||
context = BrowserContext(browser=browser)
|
||||
yield context
|
||||
await context.close()
|
||||
305
browser-use/tests/test_action_filters.py
Normal file
305
browser-use/tests/test_action_filters.py
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from playwright.async_api import Page
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use.controller.registry.service import Registry
|
||||
from browser_use.controller.registry.views import ActionRegistry, RegisteredAction
|
||||
|
||||
|
||||
class EmptyParamModel(BaseModel):
|
||||
pass
|
||||
|
||||
|
||||
class TestActionFilters:
|
||||
def test_get_prompt_description_no_filters(self):
|
||||
"""Test that system prompt only includes actions with no filters"""
|
||||
registry = ActionRegistry()
|
||||
|
||||
# Add actions with and without filters
|
||||
no_filter_action = RegisteredAction(
|
||||
name='no_filter_action',
|
||||
description='Action with no filters',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=None,
|
||||
page_filter=None,
|
||||
)
|
||||
|
||||
page_filter_action = RegisteredAction(
|
||||
name='page_filter_action',
|
||||
description='Action with page filter',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=None,
|
||||
page_filter=lambda page: True,
|
||||
)
|
||||
|
||||
domain_filter_action = RegisteredAction(
|
||||
name='domain_filter_action',
|
||||
description='Action with domain filter',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=['example.com'],
|
||||
page_filter=None,
|
||||
)
|
||||
|
||||
registry.actions = {
|
||||
'no_filter_action': no_filter_action,
|
||||
'page_filter_action': page_filter_action,
|
||||
'domain_filter_action': domain_filter_action,
|
||||
}
|
||||
|
||||
# System prompt (no page) should only include actions with no filters
|
||||
system_description = registry.get_prompt_description()
|
||||
assert 'no_filter_action' in system_description
|
||||
assert 'page_filter_action' not in system_description
|
||||
assert 'domain_filter_action' not in system_description
|
||||
|
||||
def test_page_filter_matching(self):
|
||||
"""Test that page filters work correctly"""
|
||||
registry = ActionRegistry()
|
||||
|
||||
# Create a mock page
|
||||
mock_page = MagicMock(spec=Page)
|
||||
mock_page.url = 'https://example.com/page'
|
||||
|
||||
# Create actions with different page filters
|
||||
matching_action = RegisteredAction(
|
||||
name='matching_action',
|
||||
description='Action with matching page filter',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=None,
|
||||
page_filter=lambda page: 'example.com' in page.url,
|
||||
)
|
||||
|
||||
non_matching_action = RegisteredAction(
|
||||
name='non_matching_action',
|
||||
description='Action with non-matching page filter',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=None,
|
||||
page_filter=lambda page: 'other.com' in page.url,
|
||||
)
|
||||
|
||||
registry.actions = {'matching_action': matching_action, 'non_matching_action': non_matching_action}
|
||||
|
||||
# Page-specific description should only include matching actions
|
||||
page_description = registry.get_prompt_description(mock_page)
|
||||
assert 'matching_action' in page_description
|
||||
assert 'non_matching_action' not in page_description
|
||||
|
||||
def test_domain_filter_matching(self):
|
||||
"""Test that domain filters work correctly with glob patterns"""
|
||||
registry = ActionRegistry()
|
||||
|
||||
# Create actions with different domain patterns
|
||||
actions = {
|
||||
'exact_match': RegisteredAction(
|
||||
name='exact_match',
|
||||
description='Exact domain match',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=['example.com'],
|
||||
page_filter=None,
|
||||
),
|
||||
'subdomain_match': RegisteredAction(
|
||||
name='subdomain_match',
|
||||
description='Subdomain wildcard match',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=['*.example.com'],
|
||||
page_filter=None,
|
||||
),
|
||||
'prefix_match': RegisteredAction(
|
||||
name='prefix_match',
|
||||
description='Prefix wildcard match',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=['example*'],
|
||||
page_filter=None,
|
||||
),
|
||||
'non_matching': RegisteredAction(
|
||||
name='non_matching',
|
||||
description='Non-matching domain',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=['other.com'],
|
||||
page_filter=None,
|
||||
),
|
||||
}
|
||||
|
||||
registry.actions = actions
|
||||
|
||||
# Test exact domain match
|
||||
mock_page = MagicMock(spec=Page)
|
||||
mock_page.url = 'https://example.com/page'
|
||||
|
||||
exact_match_description = registry.get_prompt_description(mock_page)
|
||||
assert 'exact_match' in exact_match_description
|
||||
assert 'non_matching' not in exact_match_description
|
||||
|
||||
# Test subdomain match
|
||||
mock_page.url = 'https://sub.example.com/page'
|
||||
subdomain_match_description = registry.get_prompt_description(mock_page)
|
||||
assert 'subdomain_match' in subdomain_match_description
|
||||
assert 'exact_match' not in subdomain_match_description
|
||||
|
||||
# Test prefix match
|
||||
mock_page.url = 'https://example123.org/page'
|
||||
prefix_match_description = registry.get_prompt_description(mock_page)
|
||||
assert 'prefix_match' in prefix_match_description
|
||||
|
||||
def test_domain_and_page_filter_together(self):
|
||||
"""Test that actions can be filtered by both domain and page filter"""
|
||||
registry = ActionRegistry()
|
||||
|
||||
# Create a mock page
|
||||
mock_page = MagicMock(spec=Page)
|
||||
mock_page.url = 'https://example.com/admin'
|
||||
|
||||
# Actions with different combinations of filters
|
||||
actions = {
|
||||
'domain_only': RegisteredAction(
|
||||
name='domain_only',
|
||||
description='Domain filter only',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=['example.com'],
|
||||
page_filter=None,
|
||||
),
|
||||
'page_only': RegisteredAction(
|
||||
name='page_only',
|
||||
description='Page filter only',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=None,
|
||||
page_filter=lambda page: 'admin' in page.url,
|
||||
),
|
||||
'both_matching': RegisteredAction(
|
||||
name='both_matching',
|
||||
description='Both filters matching',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=['example.com'],
|
||||
page_filter=lambda page: 'admin' in page.url,
|
||||
),
|
||||
'both_one_fail': RegisteredAction(
|
||||
name='both_one_fail',
|
||||
description='One filter fails',
|
||||
function=lambda: None,
|
||||
param_model=EmptyParamModel,
|
||||
domains=['other.com'],
|
||||
page_filter=lambda page: 'admin' in page.url,
|
||||
),
|
||||
}
|
||||
|
||||
registry.actions = actions
|
||||
|
||||
# Check that only actions with matching filters are included
|
||||
description = registry.get_prompt_description(mock_page)
|
||||
assert 'domain_only' in description # Domain matches
|
||||
assert 'page_only' in description # Page filter matches
|
||||
assert 'both_matching' in description # Both filters match
|
||||
assert 'both_one_fail' not in description # Domain filter fails
|
||||
|
||||
# Test with different URL where page filter fails
|
||||
mock_page.url = 'https://example.com/dashboard'
|
||||
description = registry.get_prompt_description(mock_page)
|
||||
assert 'domain_only' in description # Domain matches
|
||||
assert 'page_only' not in description # Page filter fails
|
||||
assert 'both_matching' not in description # Page filter fails
|
||||
assert 'both_one_fail' not in description # Domain filter fails
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_registry_action_decorator(self):
|
||||
"""Test the action decorator with filters"""
|
||||
registry = Registry()
|
||||
|
||||
# Define actions with different filters
|
||||
@registry.action(
|
||||
description='No filter action',
|
||||
)
|
||||
def no_filter_action():
|
||||
pass
|
||||
|
||||
@registry.action(description='Domain filter action', domains=['example.com'])
|
||||
def domain_filter_action():
|
||||
pass
|
||||
|
||||
@registry.action(description='Page filter action', page_filter=lambda page: 'admin' in page.url)
|
||||
def page_filter_action():
|
||||
pass
|
||||
|
||||
# Check that system prompt only includes the no_filter_action
|
||||
system_description = registry.get_prompt_description()
|
||||
assert 'No filter action' in system_description
|
||||
assert 'Domain filter action' not in system_description
|
||||
assert 'Page filter action' not in system_description
|
||||
|
||||
# Check that page-specific prompt includes the right actions
|
||||
mock_page = MagicMock(spec=Page)
|
||||
mock_page.url = 'https://example.com/admin'
|
||||
|
||||
page_description = registry.get_prompt_description(mock_page)
|
||||
assert 'Domain filter action' in page_description
|
||||
assert 'Page filter action' in page_description
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_action_model_creation(self):
|
||||
"""Test that action models are created correctly with filters"""
|
||||
registry = Registry()
|
||||
|
||||
# Define actions with different filters
|
||||
@registry.action(
|
||||
description='No filter action',
|
||||
)
|
||||
def no_filter_action():
|
||||
pass
|
||||
|
||||
@registry.action(description='Domain filter action', domains=['example.com'])
|
||||
def domain_filter_action():
|
||||
pass
|
||||
|
||||
@registry.action(description='Page filter action', page_filter=lambda page: 'admin' in page.url)
|
||||
def page_filter_action():
|
||||
pass
|
||||
|
||||
@registry.action(description='Both filters action', domains=['example.com'], page_filter=lambda page: 'admin' in page.url)
|
||||
def both_filters_action():
|
||||
pass
|
||||
|
||||
# Initial action model should only include no_filter_action
|
||||
initial_model = registry.create_action_model()
|
||||
assert 'no_filter_action' in initial_model.model_fields
|
||||
assert 'domain_filter_action' not in initial_model.model_fields
|
||||
assert 'page_filter_action' not in initial_model.model_fields
|
||||
assert 'both_filters_action' not in initial_model.model_fields
|
||||
|
||||
# Action model with matching page should include all matching actions
|
||||
mock_page = MagicMock(spec=Page)
|
||||
mock_page.url = 'https://example.com/admin'
|
||||
|
||||
page_model = registry.create_action_model(page=mock_page)
|
||||
assert 'no_filter_action' in page_model.model_fields
|
||||
assert 'domain_filter_action' in page_model.model_fields
|
||||
assert 'page_filter_action' in page_model.model_fields
|
||||
assert 'both_filters_action' in page_model.model_fields
|
||||
|
||||
# Action model with non-matching domain should exclude domain-filtered actions
|
||||
mock_page.url = 'https://other.com/admin'
|
||||
non_matching_domain_model = registry.create_action_model(page=mock_page)
|
||||
assert 'no_filter_action' in non_matching_domain_model.model_fields
|
||||
assert 'domain_filter_action' not in non_matching_domain_model.model_fields
|
||||
assert 'page_filter_action' in non_matching_domain_model.model_fields
|
||||
assert 'both_filters_action' not in non_matching_domain_model.model_fields
|
||||
|
||||
# Action model with non-matching page filter should exclude page-filtered actions
|
||||
mock_page.url = 'https://example.com/dashboard'
|
||||
non_matching_page_model = registry.create_action_model(page=mock_page)
|
||||
assert 'no_filter_action' in non_matching_page_model.model_fields
|
||||
assert 'domain_filter_action' in non_matching_page_model.model_fields
|
||||
assert 'page_filter_action' not in non_matching_page_model.model_fields
|
||||
assert 'both_filters_action' not in non_matching_page_model.model_fields
|
||||
220
browser-use/tests/test_agent_actions.py
Normal file
220
browser-use/tests/test_agent_actions.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
import asyncio
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from pydantic import BaseModel, SecretStr
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.views import BrowserState
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def llm():
|
||||
"""Initialize language model for testing"""
|
||||
|
||||
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
|
||||
return AzureChatOpenAI(
|
||||
model='gpt-4o',
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
)
|
||||
# return ChatOpenAI(model='gpt-4o-mini')
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def event_loop():
|
||||
"""Create an instance of the default event loop for each test case."""
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
async def browser(event_loop):
|
||||
browser_instance = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
yield browser_instance
|
||||
await browser_instance.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def context(browser):
|
||||
async with await browser.new_context() as context:
|
||||
yield context
|
||||
# Clean up automatically happens with __aexit__
|
||||
|
||||
|
||||
# pytest tests/test_agent_actions.py -v -k "test_ecommerce_interaction" --capture=no
|
||||
# @pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason='Kinda expensive to run')
|
||||
async def test_ecommerce_interaction(llm, context):
|
||||
"""Test complex ecommerce interaction sequence"""
|
||||
agent = Agent(
|
||||
task="Go to amazon.com, search for 'laptop', filter by 4+ stars, and find the price of the first result",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
save_conversation_path='tmp/test_ecommerce_interaction/conversation',
|
||||
)
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=20)
|
||||
|
||||
# Verify sequence of actions
|
||||
action_sequence = []
|
||||
for action in history.model_actions():
|
||||
action_name = list(action.keys())[0]
|
||||
if action_name in ['go_to_url', 'open_tab']:
|
||||
action_sequence.append('navigate')
|
||||
elif action_name == 'input_text':
|
||||
action_sequence.append('input')
|
||||
# Check that the input is 'laptop'
|
||||
inp = action['input_text']['text'].lower() # type: ignore
|
||||
if inp == 'laptop':
|
||||
action_sequence.append('input_exact_correct')
|
||||
elif 'laptop' in inp:
|
||||
action_sequence.append('correct_in_input')
|
||||
else:
|
||||
action_sequence.append('incorrect_input')
|
||||
elif action_name == 'click_element':
|
||||
action_sequence.append('click')
|
||||
|
||||
# Verify essential steps were performed
|
||||
assert 'navigate' in action_sequence # Navigated to Amazon
|
||||
assert 'input' in action_sequence # Entered search term
|
||||
assert 'click' in action_sequence # Clicked search/filter
|
||||
assert 'input_exact_correct' in action_sequence or 'correct_in_input' in action_sequence
|
||||
|
||||
|
||||
# @pytest.mark.asyncio
|
||||
async def test_error_recovery(llm, context):
|
||||
"""Test agent's ability to recover from errors"""
|
||||
agent = Agent(
|
||||
task='Navigate to nonexistent-site.com and then recover by going to google.com ',
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
actions_names = history.action_names()
|
||||
actions = history.model_actions()
|
||||
assert 'go_to_url' in actions_names or 'open_tab' in actions_names, f'{actions_names} does not contain go_to_url or open_tab'
|
||||
for action in actions:
|
||||
if 'go_to_url' in action:
|
||||
assert 'url' in action['go_to_url'], 'url is not in go_to_url'
|
||||
assert action['go_to_url']['url'].endswith('google.com'), 'url does not end with google.com'
|
||||
break
|
||||
|
||||
|
||||
# @pytest.mark.asyncio
|
||||
async def test_find_contact_email(llm, context):
|
||||
"""Test agent's ability to find contact email on a website"""
|
||||
agent = Agent(
|
||||
task='Go to https://browser-use.com/ and find out the contact email',
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
# Verify the agent found the contact email
|
||||
extracted_content = history.extracted_content()
|
||||
email = 'info@browser-use.com'
|
||||
for content in extracted_content:
|
||||
if email in content:
|
||||
break
|
||||
else:
|
||||
pytest.fail(f'{extracted_content} does not contain {email}')
|
||||
|
||||
|
||||
# @pytest.mark.asyncio
|
||||
async def test_agent_finds_installation_command(llm, context):
|
||||
"""Test agent's ability to find the pip installation command for browser-use on the web"""
|
||||
agent = Agent(
|
||||
task='Find the pip installation command for the browser-use repo',
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
# Verify the agent found the correct installation command
|
||||
extracted_content = history.extracted_content()
|
||||
install_command = 'pip install browser-use'
|
||||
for content in extracted_content:
|
||||
if install_command in content:
|
||||
break
|
||||
else:
|
||||
pytest.fail(f'{extracted_content} does not contain {install_command}')
|
||||
|
||||
|
||||
class CaptchaTest(BaseModel):
|
||||
name: str
|
||||
url: str
|
||||
success_text: str
|
||||
additional_text: str | None = None
|
||||
|
||||
|
||||
# run 3 test: python -m pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
|
||||
# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
'captcha',
|
||||
[
|
||||
CaptchaTest(
|
||||
name='Text Captcha',
|
||||
url='https://2captcha.com/demo/text',
|
||||
success_text='Captcha is passed successfully!',
|
||||
),
|
||||
CaptchaTest(
|
||||
name='Basic Captcha',
|
||||
url='https://captcha.com/demos/features/captcha-demo.aspx',
|
||||
success_text='Correct!',
|
||||
),
|
||||
CaptchaTest(
|
||||
name='Rotate Captcha',
|
||||
url='https://2captcha.com/demo/rotatecaptcha',
|
||||
success_text='Captcha is passed successfully',
|
||||
additional_text='Use multiple clicks at once. click done when image is exact correct position.',
|
||||
),
|
||||
CaptchaTest(
|
||||
name='MT Captcha',
|
||||
url='https://2captcha.com/demo/mtcaptcha',
|
||||
success_text='Verified Successfully',
|
||||
additional_text='Stop when you solved it successfully.',
|
||||
),
|
||||
],
|
||||
)
|
||||
async def test_captcha_solver(llm, context, captcha: CaptchaTest):
|
||||
"""Test agent's ability to solve different types of captchas"""
|
||||
agent = Agent(
|
||||
task=f'Go to {captcha.url} and solve the captcha. {captcha.additional_text}',
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=7)
|
||||
|
||||
state: BrowserState = await context.get_state()
|
||||
|
||||
all_text = state.element_tree.get_all_text_till_next_clickable_element()
|
||||
|
||||
if not all_text:
|
||||
all_text = ''
|
||||
|
||||
if not isinstance(all_text, str):
|
||||
all_text = str(all_text)
|
||||
|
||||
solved = captcha.success_text in all_text
|
||||
assert solved, f'Failed to solve {captcha.name}'
|
||||
|
||||
# python -m pytest tests/test_agent_actions.py -v --capture=no
|
||||
|
||||
# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
|
||||
69
browser-use/tests/test_attach_chrome.py
Normal file
69
browser-use/tests/test_attach_chrome.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
import asyncio
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
|
||||
async def test_full_screen(start_fullscreen: bool, maximize: bool):
|
||||
async with async_playwright() as p:
|
||||
try:
|
||||
print('Attempting to connect to Chrome...')
|
||||
# run in terminal: /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --no-first-run
|
||||
browser = await p.chromium.connect_over_cdp(
|
||||
'http://localhost:9222',
|
||||
timeout=20000, # 20 second timeout for connection
|
||||
)
|
||||
print('Connected to Chrome successfully')
|
||||
|
||||
# Get the first context and page, or create new ones if needed
|
||||
if len(browser.contexts) == 0:
|
||||
context = await browser.new_context(ignore_https_errors=True)
|
||||
else:
|
||||
context = browser.contexts[0]
|
||||
|
||||
if len(context.pages) == 0:
|
||||
page = await context.new_page()
|
||||
else:
|
||||
page = context.pages[0]
|
||||
|
||||
print('Attempting to navigate to Gmail...')
|
||||
try:
|
||||
# First try with a shorter timeout
|
||||
await page.goto(
|
||||
'https://mail.google.com',
|
||||
wait_until='load', # Changed from domcontentloaded
|
||||
timeout=10000,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f'First navigation attempt failed: {e}')
|
||||
print('Trying again with different settings...')
|
||||
# If that fails, try again with different settings
|
||||
await page.goto(
|
||||
'https://mail.google.com',
|
||||
wait_until='commit', # Less strict wait condition
|
||||
timeout=30000,
|
||||
)
|
||||
|
||||
# Wait for the page to stabilize
|
||||
await asyncio.sleep(2)
|
||||
|
||||
print(f'Current page title: {await page.title()}')
|
||||
|
||||
# Optional: wait for specific Gmail elements
|
||||
try:
|
||||
await page.wait_for_selector('div[role="main"]', timeout=5000)
|
||||
print('Gmail interface detected')
|
||||
except Exception as e:
|
||||
print(f'Note: Gmail interface not detected: {e}')
|
||||
|
||||
await asyncio.sleep(30)
|
||||
except Exception as e:
|
||||
print(f'An error occurred: {e}')
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(test_full_screen(False, False))
|
||||
578
browser-use/tests/test_browser.py
Normal file
578
browser-use/tests/test_browser.py
Normal file
|
|
@ -0,0 +1,578 @@
|
|||
import asyncio
|
||||
import subprocess
|
||||
|
||||
import psutil
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig, ProxySettings
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_builtin_browser_launch(monkeypatch):
|
||||
"""
|
||||
Test that the standard browser is launched correctly:
|
||||
When no remote (cdp or wss) or chrome instance is provided, the Browser class uses _setup_builtin_browser.
|
||||
This test monkeypatches async_playwright to return dummy objects, and asserts that get_playwright_browser returns the expected DummyBrowser.
|
||||
"""
|
||||
|
||||
class DummyBrowser:
|
||||
pass
|
||||
|
||||
class DummyChromium:
|
||||
async def launch(self, headless, args, proxy=None, handle_sigterm=False, handle_sigint=False):
|
||||
return DummyBrowser()
|
||||
|
||||
class DummyPlaywright:
|
||||
def __init__(self):
|
||||
self.chromium = DummyChromium()
|
||||
|
||||
async def stop(self):
|
||||
pass
|
||||
|
||||
class DummyAsyncPlaywrightContext:
|
||||
async def start(self):
|
||||
return DummyPlaywright()
|
||||
|
||||
monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
|
||||
config = BrowserConfig(headless=True, disable_security=False, extra_browser_args=['--test'])
|
||||
browser_obj = Browser(config=config)
|
||||
result_browser = await browser_obj.get_playwright_browser()
|
||||
assert isinstance(result_browser, DummyBrowser), 'Expected DummyBrowser from _setup_builtin_browser'
|
||||
await browser_obj.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cdp_browser_launch(monkeypatch):
|
||||
"""
|
||||
Test that when a CDP URL is provided in the configuration, the Browser uses _setup_cdp
|
||||
and returns the expected DummyBrowser.
|
||||
"""
|
||||
|
||||
class DummyBrowser:
|
||||
pass
|
||||
|
||||
class DummyChromium:
|
||||
async def connect_over_cdp(self, endpoint_url, timeout=20000):
|
||||
assert endpoint_url == 'ws://dummy-cdp-url', 'The endpoint URL should match the configuration.'
|
||||
return DummyBrowser()
|
||||
|
||||
class DummyPlaywright:
|
||||
def __init__(self):
|
||||
self.chromium = DummyChromium()
|
||||
|
||||
async def stop(self):
|
||||
pass
|
||||
|
||||
class DummyAsyncPlaywrightContext:
|
||||
async def start(self):
|
||||
return DummyPlaywright()
|
||||
|
||||
monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
|
||||
config = BrowserConfig(cdp_url='ws://dummy-cdp-url')
|
||||
browser_obj = Browser(config=config)
|
||||
result_browser = await browser_obj.get_playwright_browser()
|
||||
assert isinstance(result_browser, DummyBrowser), 'Expected DummyBrowser from _setup_cdp'
|
||||
await browser_obj.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_wss_browser_launch(monkeypatch):
|
||||
"""
|
||||
Test that when a WSS URL is provided in the configuration,
|
||||
the Browser uses setup_wss and returns the expected DummyBrowser.
|
||||
"""
|
||||
|
||||
class DummyBrowser:
|
||||
pass
|
||||
|
||||
class DummyChromium:
|
||||
async def connect(self, wss_url):
|
||||
assert wss_url == 'ws://dummy-wss-url', 'WSS URL should match the configuration.'
|
||||
return DummyBrowser()
|
||||
|
||||
class DummyPlaywright:
|
||||
def __init__(self):
|
||||
self.chromium = DummyChromium()
|
||||
|
||||
async def stop(self):
|
||||
pass
|
||||
|
||||
class DummyAsyncPlaywrightContext:
|
||||
async def start(self):
|
||||
return DummyPlaywright()
|
||||
|
||||
monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
|
||||
config = BrowserConfig(wss_url='ws://dummy-wss-url')
|
||||
browser_obj = Browser(config=config)
|
||||
result_browser = await browser_obj.get_playwright_browser()
|
||||
assert isinstance(result_browser, DummyBrowser), 'Expected DummyBrowser from _setup_wss'
|
||||
await browser_obj.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_user_provided_browser_launch(monkeypatch):
|
||||
"""
|
||||
Test that when a browser_binary_path is provided the Browser class uses
|
||||
_setup_user_provided_browser branch and returns the expected DummyBrowser object
|
||||
by reusing an existing Chrome instance.
|
||||
"""
|
||||
|
||||
# Dummy response for requests.get when checking chrome debugging endpoint.
|
||||
class DummyResponse:
|
||||
status_code = 200
|
||||
|
||||
def dummy_get(url, timeout):
|
||||
if url == 'http://localhost:9222/json/version':
|
||||
return DummyResponse()
|
||||
raise requests.ConnectionError('Connection failed')
|
||||
|
||||
monkeypatch.setattr(requests, 'get', dummy_get)
|
||||
|
||||
class DummyBrowser:
|
||||
pass
|
||||
|
||||
class DummyChromium:
|
||||
async def connect_over_cdp(self, endpoint_url, timeout=20000):
|
||||
assert endpoint_url == 'http://localhost:9222', "Endpoint URL must be 'http://localhost:9222'"
|
||||
return DummyBrowser()
|
||||
|
||||
class DummyPlaywright:
|
||||
def __init__(self):
|
||||
self.chromium = DummyChromium()
|
||||
|
||||
async def stop(self):
|
||||
pass
|
||||
|
||||
class DummyAsyncPlaywrightContext:
|
||||
async def start(self):
|
||||
return DummyPlaywright()
|
||||
|
||||
monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
|
||||
config = BrowserConfig(browser_binary_path='dummy/chrome', extra_browser_args=['--dummy-arg'])
|
||||
browser_obj = Browser(config=config)
|
||||
result_browser = await browser_obj.get_playwright_browser()
|
||||
assert isinstance(result_browser, DummyBrowser), 'Expected DummyBrowser from _setup_user_provided_browser'
|
||||
await browser_obj.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_user_provided_browser_launch_on_custom_chrome_remote_debugging_port(monkeypatch):
|
||||
"""
|
||||
Test that when a browser_binary_path and chrome_remote_debugging_port are provided, the Browser class uses
|
||||
_setup_user_provided_browser branch and returns the expected DummyBrowser object
|
||||
by launching a new Chrome instance with --remote-debugging-port=chrome_remote_debugging_port argument.
|
||||
"""
|
||||
|
||||
# Custom remote debugging port
|
||||
custom_chrome_remote_debugging_port = 9223
|
||||
|
||||
# Dummy response for requests.get when checking chrome debugging endpoint.
|
||||
class DummyResponse:
|
||||
status_code = 200
|
||||
|
||||
def dummy_get(url, timeout):
|
||||
if url == f'http://localhost:{custom_chrome_remote_debugging_port}/json/version':
|
||||
return DummyResponse()
|
||||
raise requests.ConnectionError('Connection failed')
|
||||
|
||||
monkeypatch.setattr(requests, 'get', dummy_get)
|
||||
|
||||
class DummyProcess:
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
class DummySubProcess:
|
||||
pid = 1234
|
||||
|
||||
async def dummy_create_subprocess_exec(browser_binary_path, *args, **kwargs):
|
||||
assert f'--remote-debugging-port={custom_chrome_remote_debugging_port}' in args, (
|
||||
f'Chrome must be started with with --remote-debugging-port={custom_chrome_remote_debugging_port} argument'
|
||||
)
|
||||
|
||||
return DummySubProcess()
|
||||
|
||||
monkeypatch.setattr(asyncio, 'create_subprocess_exec', dummy_create_subprocess_exec)
|
||||
monkeypatch.setattr(psutil, 'Process', DummyProcess)
|
||||
|
||||
class DummyBrowser:
|
||||
pass
|
||||
|
||||
class DummyChromium:
|
||||
async def connect_over_cdp(self, endpoint_url, timeout=20000):
|
||||
assert endpoint_url == f'http://localhost:{custom_chrome_remote_debugging_port}', (
|
||||
f"Endpoint URL must be 'http://localhost:{custom_chrome_remote_debugging_port}'"
|
||||
)
|
||||
return DummyBrowser()
|
||||
|
||||
class DummyPlaywright:
|
||||
def __init__(self):
|
||||
self.chromium = DummyChromium()
|
||||
|
||||
async def stop(self):
|
||||
pass
|
||||
|
||||
class DummyAsyncPlaywrightContext:
|
||||
async def start(self):
|
||||
return DummyPlaywright()
|
||||
|
||||
monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
|
||||
|
||||
config = BrowserConfig(
|
||||
browser_binary_path='dummy/chrome',
|
||||
chrome_remote_debugging_port=custom_chrome_remote_debugging_port,
|
||||
extra_browser_args=['--dummy-arg'],
|
||||
)
|
||||
|
||||
browser_obj = Browser(config=config)
|
||||
result_browser = await browser_obj.get_playwright_browser()
|
||||
assert isinstance(result_browser, DummyBrowser), (
|
||||
f'Expected DummyBrowser with remote debugging port {custom_chrome_remote_debugging_port} from _setup_user_provided_browser'
|
||||
)
|
||||
await browser_obj.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_builtin_browser_disable_security_args(monkeypatch):
|
||||
"""
|
||||
Test that the standard browser launch includes disable-security arguments when disable_security is True.
|
||||
This verifies that _setup_builtin_browser correctly appends the security disabling arguments along with
|
||||
the base arguments and any extra arguments provided.
|
||||
"""
|
||||
# These are the base arguments defined in _setup_builtin_browser.
|
||||
base_args = [
|
||||
'--no-sandbox',
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--disable-infobars',
|
||||
'--disable-background-timer-throttling',
|
||||
'--disable-popup-blocking',
|
||||
'--disable-backgrounding-occluded-windows',
|
||||
'--disable-renderer-backgrounding',
|
||||
'--disable-window-activation',
|
||||
'--disable-focus-on-load',
|
||||
'--no-first-run',
|
||||
'--no-default-browser-check',
|
||||
'--no-startup-window',
|
||||
'--window-position=0,0',
|
||||
]
|
||||
# When disable_security is True, these arguments should be added.
|
||||
disable_security_args = [
|
||||
'--disable-web-security',
|
||||
'--disable-site-isolation-trials',
|
||||
'--disable-features=IsolateOrigins,site-per-process',
|
||||
]
|
||||
# Additional arbitrary argument for testing extra args
|
||||
extra_args = ['--dummy-extra']
|
||||
|
||||
class DummyBrowser:
|
||||
pass
|
||||
|
||||
class DummyChromium:
|
||||
async def launch(self, headless, args, proxy=None, handle_sigterm=False, handle_sigint=False):
|
||||
# Expected args is the base args plus disable security args and the extra args.
|
||||
expected_args = base_args + disable_security_args + extra_args
|
||||
assert headless is True, 'Expected headless to be True'
|
||||
assert args == expected_args, f'Expected args {expected_args}, but got {args}'
|
||||
assert proxy is None, 'Expected proxy to be None'
|
||||
return DummyBrowser()
|
||||
|
||||
class DummyPlaywright:
|
||||
def __init__(self):
|
||||
self.chromium = DummyChromium()
|
||||
|
||||
async def stop(self):
|
||||
pass
|
||||
|
||||
class DummyAsyncPlaywrightContext:
|
||||
async def start(self):
|
||||
return DummyPlaywright()
|
||||
|
||||
monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
|
||||
config = BrowserConfig(headless=True, disable_security=True, extra_browser_args=extra_args)
|
||||
browser_obj = Browser(config=config)
|
||||
result_browser = await browser_obj.get_playwright_browser()
|
||||
assert isinstance(result_browser, DummyBrowser), (
|
||||
'Expected DummyBrowser from _setup_builtin_browser with disable_security active'
|
||||
)
|
||||
await browser_obj.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_new_context_creation():
|
||||
"""
|
||||
Test that the new_context method returns a BrowserContext with the correct attributes.
|
||||
This verifies that the BrowserContext is initialized with the provided Browser instance and configuration.
|
||||
"""
|
||||
config = BrowserConfig()
|
||||
browser_obj = Browser(config=config)
|
||||
custom_context_config = BrowserContextConfig()
|
||||
context = await browser_obj.new_context(custom_context_config)
|
||||
assert isinstance(context, BrowserContext), 'Expected new_context to return an instance of BrowserContext'
|
||||
assert context.browser is browser_obj, "Expected the context's browser attribute to be the Browser instance"
|
||||
assert context.config == custom_context_config, "Expected the context's config attribute to be the provided config"
|
||||
await browser_obj.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_user_provided_browser_launch_failure(monkeypatch):
|
||||
"""
|
||||
Test that when a Chrome instance cannot be started or connected to,
|
||||
the Browser._setup_user_provided_browser branch eventually raises a RuntimeError.
|
||||
We simulate failure by:
|
||||
- Forcing requests.get to always raise a ConnectionError (so no existing instance is found).
|
||||
- Monkeypatching subprocess.Popen to do nothing.
|
||||
- Replacing asyncio.sleep to avoid delays.
|
||||
- Having the dummy playwright's connect_over_cdp method always raise an Exception.
|
||||
"""
|
||||
|
||||
def dummy_get(url, timeout):
|
||||
raise requests.ConnectionError('Simulated connection failure')
|
||||
|
||||
monkeypatch.setattr(requests, 'get', dummy_get)
|
||||
monkeypatch.setattr(subprocess, 'Popen', lambda args, stdout, stderr: None)
|
||||
|
||||
async def fake_sleep(seconds):
|
||||
return
|
||||
|
||||
monkeypatch.setattr(asyncio, 'sleep', fake_sleep)
|
||||
|
||||
class DummyChromium:
|
||||
async def connect_over_cdp(self, endpoint_url, timeout=20000):
|
||||
raise Exception('Connection failed simulation')
|
||||
|
||||
class DummyPlaywright:
|
||||
def __init__(self):
|
||||
self.chromium = DummyChromium()
|
||||
|
||||
async def stop(self):
|
||||
pass
|
||||
|
||||
class DummyAsyncPlaywrightContext:
|
||||
async def start(self):
|
||||
return DummyPlaywright()
|
||||
|
||||
monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
|
||||
config = BrowserConfig(browser_binary_path='dummy/chrome', extra_browser_args=['--dummy-arg'])
|
||||
browser_obj = Browser(config=config)
|
||||
with pytest.raises(RuntimeError, match='To start chrome in Debug mode'):
|
||||
await browser_obj.get_playwright_browser()
|
||||
await browser_obj.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_playwright_browser_caching(monkeypatch):
|
||||
"""
|
||||
Test that get_playwright_browser returns a cached browser instance.
|
||||
On the first call, the browser is initialized; on subsequent calls,
|
||||
the same instance is returned.
|
||||
"""
|
||||
|
||||
class DummyBrowser:
|
||||
pass
|
||||
|
||||
class DummyChromium:
|
||||
async def launch(self, headless, args, proxy=None, handle_sigterm=False, handle_sigint=False):
|
||||
return DummyBrowser()
|
||||
|
||||
class DummyPlaywright:
|
||||
def __init__(self):
|
||||
self.chromium = DummyChromium()
|
||||
|
||||
async def stop(self):
|
||||
pass
|
||||
|
||||
class DummyAsyncPlaywrightContext:
|
||||
async def start(self):
|
||||
return DummyPlaywright()
|
||||
|
||||
monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
|
||||
config = BrowserConfig(headless=True, disable_security=False, extra_browser_args=['--test'])
|
||||
browser_obj = Browser(config=config)
|
||||
first_browser = await browser_obj.get_playwright_browser()
|
||||
second_browser = await browser_obj.get_playwright_browser()
|
||||
assert first_browser is second_browser, 'Expected the browser to be cached and reused across calls.'
|
||||
await browser_obj.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_close_error_handling(monkeypatch):
|
||||
"""
|
||||
Test that the close method properly handles exceptions thrown by
|
||||
playwright_browser.close() and playwright.stop(), ensuring that the
|
||||
browser's attributes are set to None even if errors occur.
|
||||
"""
|
||||
|
||||
class DummyBrowserWithError:
|
||||
async def close(self):
|
||||
raise Exception('Close error simulation')
|
||||
|
||||
class DummyPlaywrightWithError:
|
||||
async def stop(self):
|
||||
raise Exception('Stop error simulation')
|
||||
|
||||
config = BrowserConfig()
|
||||
browser_obj = Browser(config=config)
|
||||
browser_obj.playwright_browser = DummyBrowserWithError()
|
||||
browser_obj.playwright = DummyPlaywrightWithError()
|
||||
await browser_obj.close()
|
||||
assert browser_obj.playwright_browser is None, 'Expected playwright_browser to be None after close'
|
||||
assert browser_obj.playwright is None, 'Expected playwright to be None after close'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_standard_browser_launch_with_proxy(monkeypatch):
|
||||
"""
|
||||
Test that when a proxy is provided in the BrowserConfig, the _setup_builtin_browser method
|
||||
correctly passes the proxy parameter to the playwright.chromium.launch method.
|
||||
This test sets up a dummy async_playwright context and verifies that the dummy proxy is received.
|
||||
"""
|
||||
|
||||
class DummyBrowser:
|
||||
pass
|
||||
|
||||
# Create a dummy proxy settings instance.
|
||||
dummy_proxy = ProxySettings(server='http://dummy.proxy')
|
||||
|
||||
class DummyChromium:
|
||||
async def launch(self, headless, args, proxy=None, handle_sigterm=False, handle_sigint=False):
|
||||
# Assert that the proxy passed equals the dummy proxy provided in the configuration.
|
||||
assert isinstance(proxy, dict) and proxy['server'] == 'http://dummy.proxy', (
|
||||
f'Expected proxy {dummy_proxy} but got {proxy}'
|
||||
)
|
||||
# We can also verify some base parameters if needed (headless, args) but our focus is proxy.
|
||||
return DummyBrowser()
|
||||
|
||||
class DummyPlaywright:
|
||||
def __init__(self):
|
||||
self.chromium = DummyChromium()
|
||||
|
||||
async def stop(self):
|
||||
pass
|
||||
|
||||
class DummyAsyncPlaywrightContext:
|
||||
async def start(self):
|
||||
return DummyPlaywright()
|
||||
|
||||
# Monkeypatch async_playwright to return our dummy async playwright context.
|
||||
monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
|
||||
# Create a BrowserConfig with the dummy proxy.
|
||||
config = BrowserConfig(headless=False, disable_security=False, proxy=dummy_proxy)
|
||||
browser_obj = Browser(config=config)
|
||||
# Call get_playwright_browser and verify that the returned browser is as expected.
|
||||
result_browser = await browser_obj.get_playwright_browser()
|
||||
assert isinstance(result_browser, DummyBrowser), 'Expected DummyBrowser from _setup_builtin_browser with proxy provided'
|
||||
await browser_obj.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_browser_window_size(monkeypatch):
|
||||
"""
|
||||
Test that when window_width and window_height are provided in BrowserContextConfig,
|
||||
they're properly converted to a dictionary when passed to Playwright.
|
||||
"""
|
||||
|
||||
class DummyPage:
|
||||
def __init__(self):
|
||||
self.url = 'about:blank'
|
||||
|
||||
async def goto(self, url):
|
||||
pass
|
||||
|
||||
async def wait_for_load_state(self, state):
|
||||
pass
|
||||
|
||||
async def title(self):
|
||||
return 'Test Page'
|
||||
|
||||
async def bring_to_front(self):
|
||||
pass
|
||||
|
||||
async def evaluate(self, script):
|
||||
return True
|
||||
|
||||
def is_closed(self):
|
||||
return False
|
||||
|
||||
class DummyContext:
|
||||
def __init__(self):
|
||||
self.pages = [DummyPage()]
|
||||
self.tracing = self
|
||||
|
||||
async def new_page(self):
|
||||
return DummyPage()
|
||||
|
||||
async def add_init_script(self, script):
|
||||
pass
|
||||
|
||||
async def start(self):
|
||||
pass
|
||||
|
||||
async def stop(self, path=None):
|
||||
pass
|
||||
|
||||
def on(self, event, handler):
|
||||
pass
|
||||
|
||||
async def close(self):
|
||||
pass
|
||||
|
||||
async def grant_permissions(self, permissions, origin=None):
|
||||
pass
|
||||
|
||||
class DummyBrowser:
|
||||
def __init__(self):
|
||||
self.contexts = []
|
||||
|
||||
async def new_context(self, **kwargs):
|
||||
# Assert that record_video_size is a dictionary with expected values
|
||||
assert isinstance(kwargs['record_video_size'], dict), (
|
||||
f'Expected record_video_size to be a dictionary, got {type(kwargs["record_video_size"])}'
|
||||
)
|
||||
assert kwargs['record_video_size']['width'] == 1280, (
|
||||
f'Expected width to be 1280, got {kwargs["record_video_size"].get("width")}'
|
||||
)
|
||||
assert kwargs['record_video_size']['height'] == 1100, (
|
||||
f'Expected height to be 1100, got {kwargs["record_video_size"].get("height")}'
|
||||
)
|
||||
|
||||
context = DummyContext()
|
||||
self.contexts.append(context)
|
||||
return context
|
||||
|
||||
async def close(self):
|
||||
pass
|
||||
|
||||
class DummyPlaywright:
|
||||
def __init__(self):
|
||||
self.chromium = self
|
||||
|
||||
async def launch(self, **kwargs):
|
||||
return DummyBrowser()
|
||||
|
||||
async def stop(self):
|
||||
pass
|
||||
|
||||
class DummyAsyncPlaywrightContext:
|
||||
async def start(self):
|
||||
return DummyPlaywright()
|
||||
|
||||
# Monkeypatch async_playwright to return our dummy async playwright context
|
||||
monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
|
||||
|
||||
# Create browser with default config
|
||||
browser_obj = Browser()
|
||||
|
||||
# Get browser instance
|
||||
playwright_browser = await browser_obj.get_playwright_browser()
|
||||
|
||||
# Create context config with specific window size
|
||||
context_config = BrowserContextConfig(window_width=1280, window_height=1100)
|
||||
|
||||
# Create browser context - this will test if window dimensions are properly converted
|
||||
browser_context = BrowserContext(browser=browser_obj, config=context_config)
|
||||
await browser_context._initialize_session()
|
||||
|
||||
# Clean up
|
||||
await browser_context.close()
|
||||
await browser_obj.close()
|
||||
201
browser-use/tests/test_browser_config_models.py
Normal file
201
browser-use/tests/test_browser_config_models.py
Normal file
|
|
@ -0,0 +1,201 @@
|
|||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig, ProxySettings
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_settings_pydantic_model():
|
||||
"""
|
||||
Test that ProxySettings as a Pydantic model is correctly converted to a dictionary when used.
|
||||
"""
|
||||
# Create ProxySettings with Pydantic model
|
||||
proxy_settings = ProxySettings(
|
||||
server='http://example.proxy:8080', bypass='localhost', username='testuser', password='testpass'
|
||||
)
|
||||
|
||||
# Verify the model has correct dict-like access
|
||||
assert proxy_settings['server'] == 'http://example.proxy:8080'
|
||||
assert proxy_settings.get('bypass') == 'localhost'
|
||||
assert proxy_settings.get('nonexistent', 'default') == 'default'
|
||||
|
||||
# Verify model_dump works correctly
|
||||
proxy_dict = proxy_settings.model_dump()
|
||||
assert isinstance(proxy_dict, dict)
|
||||
assert proxy_dict['server'] == 'http://example.proxy:8080'
|
||||
assert proxy_dict['bypass'] == 'localhost'
|
||||
assert proxy_dict['username'] == 'testuser'
|
||||
assert proxy_dict['password'] == 'testpass'
|
||||
|
||||
# We don't launch the actual browser - we just verify the model itself works as expected
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_window_size_config():
|
||||
"""
|
||||
Test that BrowserContextConfig correctly handles window_width and window_height properties.
|
||||
"""
|
||||
# Create config with specific window dimensions
|
||||
config = BrowserContextConfig(window_width=1280, window_height=1100)
|
||||
|
||||
# Verify the properties are set correctly
|
||||
assert config.window_width == 1280
|
||||
assert config.window_height == 1100
|
||||
|
||||
# Verify model_dump works correctly
|
||||
config_dict = config.model_dump()
|
||||
assert isinstance(config_dict, dict)
|
||||
assert config_dict['window_width'] == 1280
|
||||
assert config_dict['window_height'] == 1100
|
||||
|
||||
# Create with different values
|
||||
config2 = BrowserContextConfig(window_width=1920, window_height=1080)
|
||||
assert config2.window_width == 1920
|
||||
assert config2.window_height == 1080
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skipif(os.environ.get('CI') == 'true', reason='Skip browser test in CI')
|
||||
async def test_window_size_with_real_browser():
|
||||
"""
|
||||
Integration test that verifies our window size Pydantic model is correctly
|
||||
passed to Playwright and the actual browser window is configured with these settings.
|
||||
This test is skipped in CI environments.
|
||||
"""
|
||||
# Create browser config with headless mode
|
||||
browser_config = BrowserConfig(
|
||||
headless=True, # Use headless for faster test
|
||||
)
|
||||
|
||||
# Create context config with specific dimensions we can check
|
||||
context_config = BrowserContextConfig(
|
||||
window_width=1024,
|
||||
window_height=768,
|
||||
maximum_wait_page_load_time=2.0, # Faster timeouts for test
|
||||
minimum_wait_page_load_time=0.2,
|
||||
no_viewport=True, # Use actual window size instead of viewport
|
||||
)
|
||||
|
||||
# Create browser and context
|
||||
browser = Browser(config=browser_config)
|
||||
try:
|
||||
# Initialize browser
|
||||
playwright_browser = await browser.get_playwright_browser()
|
||||
assert playwright_browser is not None, 'Browser initialization failed'
|
||||
|
||||
# Create context
|
||||
browser_context = BrowserContext(browser=browser, config=context_config)
|
||||
try:
|
||||
# Initialize session
|
||||
await browser_context._initialize_session()
|
||||
|
||||
# Get the current page
|
||||
page = await browser_context.get_current_page()
|
||||
assert page is not None, 'Failed to get current page'
|
||||
|
||||
# Get the context configuration used for browser window size
|
||||
video_size = await page.evaluate("""
|
||||
() => {
|
||||
// This returns information about the context recording settings
|
||||
// which should match our configured video size (browser_window_size)
|
||||
try {
|
||||
const settings = window.getPlaywrightContextSettings ?
|
||||
window.getPlaywrightContextSettings() : null;
|
||||
if (settings && settings.recordVideo) {
|
||||
return settings.recordVideo.size;
|
||||
}
|
||||
} catch (e) {}
|
||||
|
||||
// Fallback to window dimensions
|
||||
return {
|
||||
width: window.innerWidth,
|
||||
height: window.innerHeight
|
||||
};
|
||||
}
|
||||
""")
|
||||
|
||||
# Let's also check the viewport size
|
||||
viewport_size = await page.evaluate("""
|
||||
() => {
|
||||
return {
|
||||
width: window.innerWidth,
|
||||
height: window.innerHeight
|
||||
}
|
||||
}
|
||||
""")
|
||||
|
||||
print(f'Window size config: width={context_config.window_width}, height={context_config.window_height}')
|
||||
print(f'Browser viewport size: {viewport_size}')
|
||||
|
||||
# This is a lightweight test to verify that the page has a size (details may vary by browser)
|
||||
assert viewport_size['width'] > 0, 'Expected viewport width to be positive'
|
||||
assert viewport_size['height'] > 0, 'Expected viewport height to be positive'
|
||||
|
||||
# For browser context creation in record_video_size, this is what truly matters
|
||||
# Verify that our window size was properly serialized to a dictionary
|
||||
print(f'Content of context session: {browser_context.session.context}')
|
||||
print('✅ Browser window size used in the test')
|
||||
finally:
|
||||
# Clean up context
|
||||
await browser_context.close()
|
||||
finally:
|
||||
# Clean up browser
|
||||
await browser.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_proxy_with_real_browser():
|
||||
"""
|
||||
Integration test that verifies our proxy Pydantic model is correctly
|
||||
passed to Playwright without requiring a working proxy server.
|
||||
|
||||
This test:
|
||||
1. Creates a ProxySettings Pydantic model
|
||||
2. Passes it to BrowserConfig
|
||||
3. Verifies browser initialization works (proving the model was correctly serialized)
|
||||
4. We don't actually verify proxy functionality (would require a working proxy)
|
||||
"""
|
||||
# Create proxy settings with a fake proxy server
|
||||
proxy_settings = ProxySettings(
|
||||
server='http://non.existent.proxy:9999', bypass='localhost', username='testuser', password='testpass'
|
||||
)
|
||||
|
||||
# Test model serialization
|
||||
proxy_dict = proxy_settings.model_dump()
|
||||
assert isinstance(proxy_dict, dict)
|
||||
assert proxy_dict['server'] == 'http://non.existent.proxy:9999'
|
||||
|
||||
# Create browser config with proxy
|
||||
browser_config = BrowserConfig(
|
||||
headless=True,
|
||||
proxy=proxy_settings,
|
||||
)
|
||||
|
||||
# Create browser
|
||||
browser = Browser(config=browser_config)
|
||||
try:
|
||||
# Initialize browser - this should succeed even with invalid proxy
|
||||
# because we're just checking configuration, not actual proxy functionality
|
||||
try:
|
||||
playwright_browser = await browser.get_playwright_browser()
|
||||
assert playwright_browser is not None, 'Browser initialization failed'
|
||||
|
||||
# Success - the browser was initialized with our proxy settings
|
||||
# We won't try to make requests (which would fail with non-existent proxy)
|
||||
print('✅ Browser initialized with proxy settings successfully')
|
||||
|
||||
# We can inspect browser settings here to verify proxy was passed
|
||||
# but the specific API to access these settings depends on the browser
|
||||
|
||||
except Exception as e:
|
||||
# Make sure any exception isn't related to the proxy configuration format
|
||||
# (Network errors due to non-existent proxy are acceptable, invalid type conversion isn't)
|
||||
error_text = str(e).lower()
|
||||
assert 'proxy' not in error_text or any(
|
||||
term in error_text for term in ['connect', 'connection', 'network', 'timeout', 'unreachable']
|
||||
), f'Proxy configuration error (not network error): {e}'
|
||||
finally:
|
||||
# Clean up browser
|
||||
await browser.close()
|
||||
107
browser-use/tests/test_browser_window_size_height.py
Normal file
107
browser-use/tests/test_browser_window_size_height.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
"""
|
||||
Example script demonstrating the browser_window_size feature.
|
||||
This script shows how to set a custom window size for the browser.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContextConfig
|
||||
|
||||
|
||||
async def main():
|
||||
"""Demonstrate setting a custom browser window size"""
|
||||
# Create a browser with a specific window size
|
||||
config = BrowserContextConfig(window_width=800, window_height=400) # Small size to clearly demonstrate the fix
|
||||
|
||||
browser = None
|
||||
browser_context = None
|
||||
|
||||
try:
|
||||
# Initialize the browser with error handling
|
||||
try:
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=False, # Use non-headless mode to see the window
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
print(f'Failed to initialize browser: {e}')
|
||||
return 1
|
||||
|
||||
# Create a browser context
|
||||
try:
|
||||
browser_context = await browser.new_context(config=config)
|
||||
except Exception as e:
|
||||
print(f'Failed to create browser context: {e}')
|
||||
return 1
|
||||
|
||||
# Get the current page
|
||||
page = await browser_context.get_current_page()
|
||||
|
||||
# Navigate to a test page with error handling
|
||||
try:
|
||||
await page.goto('https://example.com')
|
||||
await page.wait_for_load_state('domcontentloaded')
|
||||
except Exception as e:
|
||||
print(f'Failed to navigate to example.com: {e}')
|
||||
print('Continuing with test anyway...')
|
||||
|
||||
# Wait a bit to see the window
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Get the actual viewport size using JavaScript
|
||||
viewport_size = await page.evaluate("""
|
||||
() => {
|
||||
return {
|
||||
width: window.innerWidth,
|
||||
height: window.innerHeight
|
||||
}
|
||||
}
|
||||
""")
|
||||
|
||||
print(f'Configured window size: width={config.window_width}, height={config.window_height}')
|
||||
print(f'Actual viewport size: {viewport_size}')
|
||||
|
||||
# Validate the window size
|
||||
validate_window_size({'width': config.window_width, 'height': config.window_height}, viewport_size)
|
||||
|
||||
# Wait a bit more to see the window
|
||||
await asyncio.sleep(3)
|
||||
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
print(f'Unexpected error: {e}')
|
||||
return 1
|
||||
|
||||
finally:
|
||||
# Close resources
|
||||
if browser_context:
|
||||
await browser_context.close()
|
||||
if browser:
|
||||
await browser.close()
|
||||
|
||||
|
||||
def validate_window_size(configured: dict[str, Any], actual: dict[str, Any]) -> None:
|
||||
"""Compare configured window size with actual size and report differences"""
|
||||
# Allow for small differences due to browser chrome, scrollbars, etc.
|
||||
width_diff = abs(configured['width'] - actual['width'])
|
||||
height_diff = abs(configured['height'] - actual['height'])
|
||||
|
||||
# Tolerance of 5% or 20px, whichever is greater
|
||||
width_tolerance = max(configured['width'] * 0.05, 20)
|
||||
height_tolerance = max(configured['height'] * 0.05, 20)
|
||||
|
||||
if width_diff > width_tolerance or height_diff > height_tolerance:
|
||||
print('WARNING: Significant difference between configured and actual window size!')
|
||||
print(f'Width difference: {width_diff}px, Height difference: {height_diff}px')
|
||||
else:
|
||||
print('Window size validation passed: actual size matches configured size within tolerance')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
result = asyncio.run(main())
|
||||
sys.exit(result)
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
import asyncio
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContextConfig
|
||||
|
||||
|
||||
async def test():
|
||||
print('Testing browser window sizing with no_viewport=False...')
|
||||
browser = Browser(BrowserConfig(headless=False))
|
||||
context_config = BrowserContextConfig(window_width=1440, window_height=900, no_viewport=False)
|
||||
browser_context = await browser.new_context(config=context_config)
|
||||
page = await browser_context.get_current_page()
|
||||
await page.goto('https://example.com')
|
||||
await asyncio.sleep(2)
|
||||
viewport = await page.evaluate('() => ({width: window.innerWidth, height: window.innerHeight})')
|
||||
print('Configured size: width=1440, height=900')
|
||||
print(f'Actual viewport size: {viewport}')
|
||||
|
||||
# Get the actual window size
|
||||
window_size = await page.evaluate("""
|
||||
() => ({
|
||||
width: window.outerWidth,
|
||||
height: window.outerHeight
|
||||
})
|
||||
""")
|
||||
print(f'Actual window size: {window_size}')
|
||||
|
||||
await browser_context.close()
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(test())
|
||||
363
browser-use/tests/test_context.py
Normal file
363
browser-use/tests/test_context.py
Normal file
|
|
@ -0,0 +1,363 @@
|
|||
import base64
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from browser_use.browser.views import BrowserState
|
||||
from browser_use.dom.views import DOMElementNode
|
||||
|
||||
|
||||
def test_is_url_allowed():
|
||||
"""
|
||||
Test the _is_url_allowed method to verify that it correctly checks URLs against
|
||||
the allowed domains configuration.
|
||||
Scenario 1: When allowed_domains is None, all URLs should be allowed.
|
||||
Scenario 2: When allowed_domains is a list, only URLs matching the allowed domain(s) are allowed.
|
||||
Scenario 3: When the URL is malformed, it should return False.
|
||||
Scenario 4: When allowed_domains contain glob patterns, see: test_url_allowlist_security.py
|
||||
"""
|
||||
# Create a dummy Browser mock. Only the 'config' attribute is needed for _is_url_allowed.
|
||||
dummy_browser = Mock()
|
||||
# Set an empty config for dummy_browser; it won't be used in _is_url_allowed.
|
||||
dummy_browser.config = Mock()
|
||||
# Scenario 1: allowed_domains is None, any URL should be allowed.
|
||||
config1 = BrowserContextConfig(allowed_domains=None)
|
||||
context1 = BrowserContext(browser=dummy_browser, config=config1)
|
||||
assert context1._is_url_allowed('http://anydomain.com') is True
|
||||
assert context1._is_url_allowed('https://anotherdomain.org/path') is True
|
||||
# Scenario 2: allowed_domains is provided.
|
||||
allowed = ['example.com', 'mysite.org']
|
||||
config2 = BrowserContextConfig(allowed_domains=allowed)
|
||||
context2 = BrowserContext(browser=dummy_browser, config=config2)
|
||||
# URL exactly matching
|
||||
assert context2._is_url_allowed('http://example.com') is True
|
||||
# URL with subdomain (should be allowed)
|
||||
assert context2._is_url_allowed('http://sub.example.com/path') is True
|
||||
# URL with different domain (should not be allowed)
|
||||
assert context2._is_url_allowed('http://notexample.com') is False
|
||||
# URL that matches second allowed domain
|
||||
assert context2._is_url_allowed('https://mysite.org/page') is True
|
||||
# URL with port number, still allowed (port is stripped)
|
||||
assert context2._is_url_allowed('http://example.com:8080') is True
|
||||
# Scenario 3: Malformed URL or empty domain
|
||||
# urlparse will return an empty netloc for some malformed URLs.
|
||||
assert context2._is_url_allowed('notaurl') is False
|
||||
|
||||
|
||||
def test_convert_simple_xpath_to_css_selector():
|
||||
"""
|
||||
Test the _convert_simple_xpath_to_css_selector method of BrowserContext.
|
||||
This verifies that simple XPath expressions (with and without indices) are correctly converted to CSS selectors.
|
||||
"""
|
||||
# Test empty xpath returns empty string
|
||||
assert BrowserContext._convert_simple_xpath_to_css_selector('') == ''
|
||||
# Test a simple xpath without indices
|
||||
xpath = '/html/body/div/span'
|
||||
expected = 'html > body > div > span'
|
||||
result = BrowserContext._convert_simple_xpath_to_css_selector(xpath)
|
||||
assert result == expected
|
||||
# Test xpath with an index on one element: [2] should translate to :nth-of-type(2)
|
||||
xpath = '/html/body/div[2]/span'
|
||||
expected = 'html > body > div:nth-of-type(2) > span'
|
||||
result = BrowserContext._convert_simple_xpath_to_css_selector(xpath)
|
||||
assert result == expected
|
||||
# Test xpath with indices on multiple elements:
|
||||
# For "li[3]" -> li:nth-of-type(3) and for "a[1]" -> a:nth-of-type(1)
|
||||
xpath = '/ul/li[3]/a[1]'
|
||||
expected = 'ul > li:nth-of-type(3) > a:nth-of-type(1)'
|
||||
result = BrowserContext._convert_simple_xpath_to_css_selector(xpath)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_get_initial_state():
|
||||
"""
|
||||
Test the _get_initial_state method to verify it returns the correct initial BrowserState.
|
||||
The test checks that when a dummy page with a URL is provided,
|
||||
the returned state contains that URL and other default values.
|
||||
"""
|
||||
# Create a dummy browser since only its existence is needed.
|
||||
dummy_browser = Mock()
|
||||
dummy_browser.config = Mock()
|
||||
context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
|
||||
|
||||
# Define a dummy page with a 'url' attribute.
|
||||
class DummyPage:
|
||||
url = 'http://dummy.com'
|
||||
|
||||
dummy_page = DummyPage()
|
||||
# Call _get_initial_state with a page: URL should be set from page.url.
|
||||
state_with_page = context._get_initial_state(page=dummy_page)
|
||||
assert state_with_page.url == dummy_page.url
|
||||
# Verify that the element_tree is initialized with tag 'root'
|
||||
assert state_with_page.element_tree.tag_name == 'root'
|
||||
# Call _get_initial_state without a page: URL should be empty.
|
||||
state_without_page = context._get_initial_state()
|
||||
assert state_without_page.url == ''
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_javascript():
|
||||
"""
|
||||
Test the execute_javascript method by mocking the current page's evaluate function.
|
||||
This ensures that when execute_javascript is called, it correctly returns the value
|
||||
from the page's evaluate method.
|
||||
"""
|
||||
|
||||
# Define a dummy page with an async evaluate method.
|
||||
class DummyPage:
|
||||
async def evaluate(self, script):
|
||||
return 'dummy_result'
|
||||
|
||||
# Create a dummy session object with a dummy current_page.
|
||||
dummy_session = type('DummySession', (), {})()
|
||||
dummy_session.current_page = DummyPage()
|
||||
# Create a dummy browser mock with a minimal config.
|
||||
dummy_browser = Mock()
|
||||
dummy_browser.config = Mock()
|
||||
# Initialize the BrowserContext with the dummy browser and config.
|
||||
context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
|
||||
# Manually set the session to our dummy session.
|
||||
context.session = dummy_session
|
||||
# Call execute_javascript and verify it returns the expected result.
|
||||
result = await context.execute_javascript('return 1+1')
|
||||
assert result == 'dummy_result'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_enhanced_css_selector_for_element():
|
||||
"""
|
||||
Test the _enhanced_css_selector_for_element method to verify that
|
||||
it returns the correct CSS selector string for a dummy DOMElementNode.
|
||||
The test checks that:
|
||||
- The provided xpath is correctly converted (handling indices),
|
||||
- Class attributes are appended as CSS classes,
|
||||
- Standard and dynamic attributes (including ones with special characters)
|
||||
are correctly added to the selector.
|
||||
"""
|
||||
# Create a dummy DOMElementNode instance with a complex set of attributes.
|
||||
dummy_element = DOMElementNode(
|
||||
tag_name='div',
|
||||
is_visible=True,
|
||||
parent=None,
|
||||
xpath='/html/body/div[2]',
|
||||
attributes={'class': 'foo bar', 'id': 'my-id', 'placeholder': 'some "quoted" text', 'data-testid': '123'},
|
||||
children=[],
|
||||
)
|
||||
# Call the method with include_dynamic_attributes=True.
|
||||
actual_selector = BrowserContext._enhanced_css_selector_for_element(dummy_element, include_dynamic_attributes=True)
|
||||
# Expected conversion:
|
||||
# 1. The xpath "/html/body/div[2]" converts to "html > body > div:nth-of-type(2)".
|
||||
# 2. The class attribute "foo bar" appends ".foo.bar".
|
||||
# 3. The "id" attribute is added as [id="my-id"].
|
||||
# 4. The "placeholder" attribute contains quotes; it is added as
|
||||
# [placeholder*="some \"quoted\" text"].
|
||||
# 5. The dynamic attribute "data-testid" is added as [data-testid="123"].
|
||||
expected_selector = (
|
||||
'html > body > div:nth-of-type(2).foo.bar[id="my-id"][placeholder*="some \\"quoted\\" text"][data-testid="123"]'
|
||||
)
|
||||
assert actual_selector == expected_selector, f'Expected {expected_selector}, but got {actual_selector}'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_scroll_info():
|
||||
"""
|
||||
Test the get_scroll_info method by mocking the page's evaluate method.
|
||||
This dummy page returns preset values for window.scrollY, window.innerHeight,
|
||||
and document.documentElement.scrollHeight. The test then verifies that the
|
||||
computed scroll information (pixels_above and pixels_below) match the expected values.
|
||||
"""
|
||||
|
||||
# Define a dummy page with an async evaluate method returning preset values.
|
||||
class DummyPage:
|
||||
async def evaluate(self, script):
|
||||
if 'window.scrollY' in script:
|
||||
return 100 # scrollY
|
||||
elif 'window.innerHeight' in script:
|
||||
return 500 # innerHeight
|
||||
elif 'document.documentElement.scrollHeight' in script:
|
||||
return 1200 # total scrollable height
|
||||
return None
|
||||
|
||||
# Create a dummy session with a dummy current_page.
|
||||
dummy_session = type('DummySession', (), {})()
|
||||
dummy_session.current_page = DummyPage()
|
||||
# We also need a dummy context attribute but it won't be used in this test.
|
||||
dummy_session.context = type('DummyContext', (), {})()
|
||||
# Create a dummy browser mock.
|
||||
dummy_browser = Mock()
|
||||
dummy_browser.config = Mock()
|
||||
# Initialize BrowserContext with the dummy browser and config.
|
||||
context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
|
||||
# Manually set the session to our dummy session.
|
||||
context.session = dummy_session
|
||||
# Call get_scroll_info on the dummy page.
|
||||
pixels_above, pixels_below = await context.get_scroll_info(dummy_session.current_page)
|
||||
# Expected calculations:
|
||||
# pixels_above = scrollY = 100
|
||||
# pixels_below = total_height - (scrollY + innerHeight) = 1200 - (100 + 500) = 600
|
||||
assert pixels_above == 100, f'Expected 100 pixels above, got {pixels_above}'
|
||||
assert pixels_below == 600, f'Expected 600 pixels below, got {pixels_below}'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reset_context():
|
||||
"""
|
||||
Test the reset_context method to ensure it correctly closes all existing tabs,
|
||||
resets the cached state, and creates a new page.
|
||||
"""
|
||||
|
||||
# Dummy Page with close and wait_for_load_state methods.
|
||||
class DummyPage:
|
||||
def __init__(self, url='http://dummy.com'):
|
||||
self.url = url
|
||||
self.closed = False
|
||||
|
||||
async def close(self):
|
||||
self.closed = True
|
||||
|
||||
async def wait_for_load_state(self):
|
||||
pass
|
||||
|
||||
# Dummy Context that holds pages and can create a new page.
|
||||
class DummyContext:
|
||||
def __init__(self):
|
||||
self.pages = []
|
||||
|
||||
async def new_page(self):
|
||||
new_page = DummyPage(url='')
|
||||
self.pages.append(new_page)
|
||||
return new_page
|
||||
|
||||
# Create a dummy session with a context containing two pages.
|
||||
dummy_session = type('DummySession', (), {})()
|
||||
dummy_context = DummyContext()
|
||||
page1 = DummyPage(url='http://page1.com')
|
||||
page2 = DummyPage(url='http://page2.com')
|
||||
dummy_context.pages.extend([page1, page2])
|
||||
dummy_session.context = dummy_context
|
||||
dummy_session.current_page = page1
|
||||
dummy_session.cached_state = None
|
||||
# Create a dummy browser mock.
|
||||
dummy_browser = Mock()
|
||||
dummy_browser.config = Mock()
|
||||
# Initialize BrowserContext using our dummy_browser and config,
|
||||
# and manually set its session to our dummy session.
|
||||
context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
|
||||
context.session = dummy_session
|
||||
# Confirm session has 2 pages before reset.
|
||||
assert len(dummy_session.context.pages) == 2
|
||||
# Call reset_context which should close existing pages,
|
||||
# reset the cached state, and create a new page as current_page.
|
||||
await context.reset_context()
|
||||
# Verify that initial pages were closed.
|
||||
assert page1.closed is True
|
||||
assert page2.closed is True
|
||||
# Check that a new page is created and set as current_page.
|
||||
assert dummy_session.current_page is not None
|
||||
new_page = dummy_session.current_page
|
||||
# New page URL should be empty as per _get_initial_state.
|
||||
assert new_page.url == ''
|
||||
# Verify that cached_state is reset to an initial BrowserState.
|
||||
state = dummy_session.cached_state
|
||||
assert isinstance(state, BrowserState)
|
||||
assert state.url == ''
|
||||
assert state.element_tree.tag_name == 'root'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_take_screenshot():
|
||||
"""
|
||||
Test the take_screenshot method to verify that it returns a base64 encoded screenshot string.
|
||||
A dummy page with a mocked screenshot method is used, returning a predefined byte string.
|
||||
"""
|
||||
|
||||
class DummyPage:
|
||||
async def screenshot(self, full_page, animations):
|
||||
# Verify that parameters are forwarded correctly.
|
||||
assert full_page is True, 'full_page parameter was not correctly passed'
|
||||
assert animations == 'disabled', 'animations parameter was not correctly passed'
|
||||
# Return a test byte string.
|
||||
return b'test'
|
||||
|
||||
# Create a dummy session with the DummyPage as the current_page.
|
||||
dummy_session = type('DummySession', (), {})()
|
||||
dummy_session.current_page = DummyPage()
|
||||
dummy_session.context = None # Not used in this test
|
||||
# Create a dummy browser mock.
|
||||
dummy_browser = Mock()
|
||||
dummy_browser.config = Mock()
|
||||
# Initialize the BrowserContext with the dummy browser and config.
|
||||
context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
|
||||
# Manually set the session to our dummy session.
|
||||
context.session = dummy_session
|
||||
# Call take_screenshot and check that it returns the expected base64 encoded string.
|
||||
result = await context.take_screenshot(full_page=True)
|
||||
expected = base64.b64encode(b'test').decode('utf-8')
|
||||
assert result == expected, f'Expected {expected}, but got {result}'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_refresh_page_behavior():
|
||||
"""
|
||||
Test the refresh_page method of BrowserContext to verify that it correctly reloads the current page
|
||||
and waits for the page's load state. This is done by creating a dummy page that flags when its
|
||||
reload and wait_for_load_state methods are called.
|
||||
"""
|
||||
|
||||
class DummyPage:
|
||||
def __init__(self):
|
||||
self.reload_called = False
|
||||
self.wait_for_load_state_called = False
|
||||
|
||||
async def reload(self):
|
||||
self.reload_called = True
|
||||
|
||||
async def wait_for_load_state(self):
|
||||
self.wait_for_load_state_called = True
|
||||
|
||||
# Create a dummy session with the dummy page as the current_page.
|
||||
dummy_page = DummyPage()
|
||||
dummy_session = type('DummySession', (), {})()
|
||||
dummy_session.current_page = dummy_page
|
||||
dummy_session.context = None # Not required for this test
|
||||
# Create a dummy browser mock
|
||||
dummy_browser = Mock()
|
||||
dummy_browser.config = Mock()
|
||||
# Initialize BrowserContext with the dummy browser and config,
|
||||
# and manually set its session to our dummy session.
|
||||
context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
|
||||
context.session = dummy_session
|
||||
# Call refresh_page and verify that reload and wait_for_load_state were called.
|
||||
await context.refresh_page()
|
||||
assert dummy_page.reload_called is True, 'Expected the page to call reload()'
|
||||
assert dummy_page.wait_for_load_state_called is True, 'Expected the page to call wait_for_load_state()'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_remove_highlights_failure():
|
||||
"""
|
||||
Test the remove_highlights method to ensure that if the page.evaluate call fails,
|
||||
the exception is caught and does not propagate (i.e. the method handles errors gracefully).
|
||||
"""
|
||||
|
||||
# Dummy page that always raises an exception when evaluate is called.
|
||||
class DummyPage:
|
||||
async def evaluate(self, script):
|
||||
raise Exception('dummy error')
|
||||
|
||||
# Create a dummy session with the DummyPage as current_page.
|
||||
dummy_session = type('DummySession', (), {})()
|
||||
dummy_session.current_page = DummyPage()
|
||||
dummy_session.context = None # Not used in this test
|
||||
# Create a dummy browser mock.
|
||||
dummy_browser = Mock()
|
||||
dummy_browser.config = Mock()
|
||||
# Initialize BrowserContext with the dummy browser and configuration.
|
||||
context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
|
||||
context.session = dummy_session
|
||||
# Call remove_highlights and verify that no exception is raised.
|
||||
try:
|
||||
await context.remove_highlights()
|
||||
except Exception as e:
|
||||
pytest.fail(f'remove_highlights raised an exception: {e}')
|
||||
1256
browser-use/tests/test_controller.py
Normal file
1256
browser-use/tests/test_controller.py
Normal file
File diff suppressed because it is too large
Load diff
202
browser-use/tests/test_core_functionality.py
Normal file
202
browser-use/tests/test_core_functionality.py
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
import asyncio
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def event_loop():
|
||||
"""Create an instance of the default event loop for each test case."""
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
async def browser(event_loop):
|
||||
browser_instance = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
yield browser_instance
|
||||
await browser_instance.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def context(browser):
|
||||
async with await browser.new_context() as context:
|
||||
yield context
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def llm():
|
||||
"""Initialize language model for testing"""
|
||||
return AzureChatOpenAI(
|
||||
model='gpt-4o',
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
)
|
||||
|
||||
|
||||
# pytest -s -k test_search_google
|
||||
@pytest.mark.asyncio
|
||||
async def test_search_google(llm, context):
|
||||
"""Test 'Search Google' action"""
|
||||
agent = Agent(
|
||||
task="Search Google for 'OpenAI'.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=2)
|
||||
action_names = history.action_names()
|
||||
assert 'search_google' in action_names
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_go_to_url(llm, context):
|
||||
"""Test 'Navigate to URL' action"""
|
||||
agent = Agent(
|
||||
task="Navigate to 'https://www.python.org'.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
history = await agent.run(max_steps=2)
|
||||
action_names = history.action_names()
|
||||
assert 'go_to_url' in action_names
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_go_back(llm, context):
|
||||
"""Test 'Go back' action"""
|
||||
agent = Agent(
|
||||
task="Go to 'https://www.example.com', then go back.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
history = await agent.run(max_steps=3)
|
||||
action_names = history.action_names()
|
||||
assert 'go_to_url' in action_names
|
||||
assert 'go_back' in action_names
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_click_element(llm, context):
|
||||
"""Test 'Click element' action"""
|
||||
agent = Agent(
|
||||
task="Go to 'https://www.python.org' and click on the first link.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
history = await agent.run(max_steps=4)
|
||||
action_names = history.action_names()
|
||||
assert 'go_to_url' in action_names or 'open_tab' in action_names
|
||||
assert 'click_element_by_index' in action_names
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_input_text(llm, context):
|
||||
"""Test 'Input text' action"""
|
||||
agent = Agent(
|
||||
task="Go to 'https://www.google.com' and input 'OpenAI' into the search box.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
history = await agent.run(max_steps=4)
|
||||
action_names = history.action_names()
|
||||
assert 'go_to_url' in action_names
|
||||
assert 'input_text' in action_names
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_switch_tab(llm, context):
|
||||
"""Test 'Switch tab' action"""
|
||||
agent = Agent(
|
||||
task="Open new tabs with 'https://www.google.com' and 'https://www.wikipedia.org', then switch to the first tab.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
history = await agent.run(max_steps=6)
|
||||
action_names = history.action_names()
|
||||
open_tab_count = action_names.count('open_tab')
|
||||
assert open_tab_count >= 2
|
||||
assert 'switch_tab' in action_names
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_open_new_tab(llm, context):
|
||||
"""Test 'Open new tab' action"""
|
||||
agent = Agent(
|
||||
task="Open a new tab and go to 'https://www.example.com'.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
history = await agent.run(max_steps=3)
|
||||
action_names = history.action_names()
|
||||
assert 'open_tab' in action_names
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_page_content(llm, context):
|
||||
"""Test 'Extract page content' action"""
|
||||
agent = Agent(
|
||||
task="Go to 'https://www.example.com' and extract the page content.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
history = await agent.run(max_steps=3)
|
||||
action_names = history.action_names()
|
||||
assert 'go_to_url' in action_names
|
||||
assert 'extract_content' in action_names
|
||||
|
||||
|
||||
# pytest -k test_done_action
|
||||
@pytest.mark.asyncio
|
||||
async def test_done_action(llm, context):
|
||||
"""Test 'Complete task' action"""
|
||||
agent = Agent(
|
||||
task="Navigate to 'https://www.example.com' and signal that the task is done.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
|
||||
history = await agent.run(max_steps=3)
|
||||
action_names = history.action_names()
|
||||
assert 'go_to_url' in action_names
|
||||
assert 'done' in action_names
|
||||
|
||||
|
||||
# run with: pytest -k test_scroll_down
|
||||
@pytest.mark.asyncio
|
||||
async def test_scroll_down(llm, context):
|
||||
"""Test 'Scroll down' action and validate that the page actually scrolled"""
|
||||
agent = Agent(
|
||||
task="Go to 'https://en.wikipedia.org/wiki/Internet' and scroll down the page.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
# Get the browser instance
|
||||
page = await context.get_current_page()
|
||||
|
||||
# Navigate to the page and get initial scroll position
|
||||
await agent.run(max_steps=1)
|
||||
initial_scroll_position = await page.evaluate('window.scrollY;')
|
||||
|
||||
# Perform the scroll down action
|
||||
await agent.run(max_steps=2)
|
||||
final_scroll_position = await page.evaluate('window.scrollY;')
|
||||
|
||||
# Validate that the scroll position has changed
|
||||
assert final_scroll_position > initial_scroll_position, 'Page did not scroll down'
|
||||
|
||||
# Validate that the 'scroll_down' action was executed
|
||||
history = agent.history
|
||||
action_names = history.action_names()
|
||||
assert 'scroll_down' in action_names
|
||||
40
browser-use/tests/test_dropdown.py
Normal file
40
browser-use/tests/test_dropdown.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
"""
|
||||
Test dropdown interaction functionality.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dropdown(llm, browser_context):
|
||||
"""Test selecting an option from a dropdown menu."""
|
||||
agent = Agent(
|
||||
task=(
|
||||
'go to https://codepen.io/geheimschriftstift/pen/mPLvQz and first get all options for the dropdown and then select the 5th option'
|
||||
),
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
)
|
||||
|
||||
try:
|
||||
history: AgentHistoryList = await agent.run(20)
|
||||
result = history.final_result()
|
||||
|
||||
# Verify dropdown interaction
|
||||
assert result is not None
|
||||
assert 'Duck' in result, "Expected 5th option 'Duck' to be selected"
|
||||
|
||||
# Verify dropdown state
|
||||
element = await browser_context.get_element_by_selector('select')
|
||||
assert element is not None, 'Dropdown element should exist'
|
||||
|
||||
value = await element.evaluate('el => el.value')
|
||||
assert value == '5', 'Dropdown should have 5th option selected'
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f'Dropdown test failed: {str(e)}')
|
||||
finally:
|
||||
await browser_context.close()
|
||||
44
browser-use/tests/test_dropdown_complex.py
Normal file
44
browser-use/tests/test_dropdown_complex.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
"""
|
||||
Test complex dropdown interaction functionality.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dropdown_complex(llm, browser_context):
|
||||
"""Test selecting an option from a complex dropdown menu."""
|
||||
agent = Agent(
|
||||
task=(
|
||||
'go to https://codepen.io/shyam-king/pen/pvzpByJ and first get all options for the dropdown and then select the json option'
|
||||
),
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
)
|
||||
|
||||
try:
|
||||
history: AgentHistoryList = await agent.run(20)
|
||||
result = history.final_result()
|
||||
|
||||
# Verify dropdown interaction
|
||||
assert result is not None
|
||||
assert 'json' in result.lower(), "Expected 'json' option to be selected"
|
||||
|
||||
# Verify dropdown state
|
||||
element = await browser_context.get_element_by_selector('.select-selected')
|
||||
assert element is not None, 'Custom dropdown element should exist'
|
||||
|
||||
text = await element.text_content()
|
||||
assert 'json' in text.lower(), 'Dropdown should display json option'
|
||||
|
||||
# Verify the selected option's effect
|
||||
code_element = await browser_context.get_element_by_selector('pre code')
|
||||
assert code_element is not None, 'Code element should be visible when JSON is selected'
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f'Complex dropdown test failed: {str(e)}')
|
||||
finally:
|
||||
await browser_context.close()
|
||||
40
browser-use/tests/test_dropdown_error.py
Normal file
40
browser-use/tests/test_dropdown_error.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
"""
|
||||
Simple try of the agent.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, AgentHistoryList
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
# browser = Browser(config=BrowserConfig(headless=False))
|
||||
|
||||
agent = Agent(
|
||||
task=('go to https://codepen.io/shyam-king/pen/emOyjKm and select number "4" and return the output of "selected value"'),
|
||||
llm=llm,
|
||||
browser_context=BrowserContext(
|
||||
browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def test_dropdown():
|
||||
history: AgentHistoryList = await agent.run(20)
|
||||
# await controller.browser.close(force=True)
|
||||
|
||||
result = history.final_result()
|
||||
assert result is not None
|
||||
assert '4' in result
|
||||
print(result)
|
||||
|
||||
# await browser.close()
|
||||
98
browser-use/tests/test_excluded_actions.py
Normal file
98
browser-use/tests/test_excluded_actions.py
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
import asyncio
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.controller.service import Controller
|
||||
|
||||
# run with:
|
||||
# python -m pytest tests/test_excluded_actions.py -v -k "test_only_open_tab_allowed" --capture=no
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def event_loop():
|
||||
"""Create an instance of the default event loop for each test case."""
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
async def browser(event_loop):
|
||||
browser_instance = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
yield browser_instance
|
||||
await browser_instance.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def context(browser):
|
||||
async with await browser.new_context() as context:
|
||||
yield context
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def llm():
|
||||
"""Initialize language model for testing"""
|
||||
return AzureChatOpenAI(
|
||||
model='gpt-4o',
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
)
|
||||
|
||||
|
||||
# pytest tests/test_excluded_actions.py -v -k "test_only_open_tab_allowed" --capture=no
|
||||
@pytest.mark.asyncio
|
||||
async def test_only_open_tab_allowed(llm, context):
|
||||
"""Test that only open_tab action is available while others are excluded"""
|
||||
|
||||
# Create list of all default actions except open_tab
|
||||
excluded_actions = [
|
||||
'search_google',
|
||||
'go_to_url',
|
||||
'go_back',
|
||||
'click_element',
|
||||
'input_text',
|
||||
'switch_tab',
|
||||
'extract_content',
|
||||
'done',
|
||||
'scroll_down',
|
||||
'scroll_up',
|
||||
'send_keys',
|
||||
'scroll_to_text',
|
||||
'get_dropdown_options',
|
||||
'select_dropdown_option',
|
||||
]
|
||||
|
||||
# Initialize controller with excluded actions
|
||||
controller = Controller(exclude_actions=excluded_actions)
|
||||
|
||||
# Create agent with a task that would normally use other actions
|
||||
agent = Agent(
|
||||
task="Go to google.com and search for 'python programming'",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
controller=controller,
|
||||
)
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=2)
|
||||
|
||||
# Verify that only open_tab was used
|
||||
action_names = history.action_names()
|
||||
|
||||
# Only open_tab should be in the actions
|
||||
assert all(action == 'open_tab' for action in action_names), (
|
||||
f'Found unexpected actions: {[a for a in action_names if a != "open_tab"]}'
|
||||
)
|
||||
|
||||
# open_tab should be used at least once
|
||||
assert 'open_tab' in action_names, 'open_tab action was not used'
|
||||
21
browser-use/tests/test_full_screen.py
Normal file
21
browser-use/tests/test_full_screen.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
import asyncio
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
|
||||
async def test_full_screen(start_fullscreen: bool, maximize: bool):
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(
|
||||
headless=False,
|
||||
args=['--start-maximized'],
|
||||
)
|
||||
context = await browser.new_context(no_viewport=True, viewport=None)
|
||||
page = await context.new_page()
|
||||
await page.goto('https://google.com')
|
||||
|
||||
await asyncio.sleep(10)
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(test_full_screen(False, False))
|
||||
40
browser-use/tests/test_gif_path.py
Normal file
40
browser-use/tests/test_gif_path.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
"""
|
||||
Simple try of the agent.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, AgentHistoryList
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
|
||||
agent = Agent(
|
||||
task=('go to google.com and search for text "hi there"'),
|
||||
llm=llm,
|
||||
browser_context=BrowserContext(
|
||||
browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
|
||||
),
|
||||
generate_gif='./google.gif',
|
||||
)
|
||||
|
||||
|
||||
async def test_gif_path():
|
||||
if os.path.exists('./google.gif'):
|
||||
os.unlink('./google.gif')
|
||||
|
||||
history: AgentHistoryList = await agent.run(20)
|
||||
|
||||
result = history.final_result()
|
||||
assert result is not None
|
||||
|
||||
assert os.path.exists('./google.gif'), 'google.gif was not created'
|
||||
137
browser-use/tests/test_mind2web.py
Normal file
137
browser-use/tests/test_mind2web.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
"""
|
||||
Test browser automation using Mind2Web dataset tasks with pytest framework.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.utils import logger
|
||||
|
||||
# Constants
|
||||
MAX_STEPS = 50
|
||||
TEST_SUBSET_SIZE = 10
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def event_loop():
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
async def browser(event_loop):
|
||||
browser_instance = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
yield browser_instance
|
||||
await browser_instance.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def context(browser):
|
||||
async with await browser.new_context() as new_context:
|
||||
yield new_context
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def test_cases() -> list[dict[str, Any]]:
|
||||
"""Load test cases from Mind2Web dataset"""
|
||||
file_path = os.path.join(os.path.dirname(__file__), 'mind2web_data/processed.json')
|
||||
logger.info(f'Loading test cases from {file_path}')
|
||||
|
||||
with open(file_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
subset = data[:TEST_SUBSET_SIZE]
|
||||
logger.info(f'Loaded {len(subset)}/{len(data)} test cases')
|
||||
return subset
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def llm():
|
||||
"""Initialize language model for testing"""
|
||||
|
||||
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
|
||||
return AzureChatOpenAI(
|
||||
model='gpt-4o',
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
)
|
||||
|
||||
|
||||
# run with: pytest -s -v tests/test_mind2web.py:test_random_samples
|
||||
@pytest.mark.asyncio
|
||||
async def test_random_samples(test_cases: list[dict[str, Any]], llm, context, validator):
|
||||
"""Test a random sampling of tasks across different websites"""
|
||||
import random
|
||||
|
||||
logger.info('=== Testing Random Samples ===')
|
||||
|
||||
# Take random samples
|
||||
samples = random.sample(test_cases, 1)
|
||||
|
||||
for i, case in enumerate(samples, 1):
|
||||
task = f'Go to {case["website"]}.com and {case["confirmed_task"]}'
|
||||
logger.info(f'--- Random Sample {i}/{len(samples)} ---')
|
||||
logger.info(f'Task: {task}\n')
|
||||
|
||||
agent = Agent(task, llm, browser_context=context)
|
||||
|
||||
await agent.run()
|
||||
|
||||
logger.info('Validating random sample task...')
|
||||
|
||||
# TODO: Validate the task
|
||||
|
||||
|
||||
def test_dataset_integrity(test_cases):
|
||||
"""Test the integrity of the test dataset"""
|
||||
logger.info('\n=== Testing Dataset Integrity ===')
|
||||
|
||||
required_fields = ['website', 'confirmed_task', 'action_reprs']
|
||||
missing_fields = []
|
||||
|
||||
logger.info(f'Checking {len(test_cases)} test cases for required fields')
|
||||
|
||||
for i, case in enumerate(test_cases, 1):
|
||||
logger.debug(f'Checking case {i}/{len(test_cases)}')
|
||||
|
||||
for field in required_fields:
|
||||
if field not in case:
|
||||
missing_fields.append(f'Case {i}: {field}')
|
||||
logger.warning(f"Missing field '{field}' in case {i}")
|
||||
|
||||
# Type checks
|
||||
if not isinstance(case.get('confirmed_task'), str):
|
||||
logger.error(f"Case {i}: 'confirmed_task' must be string")
|
||||
assert False, 'Task must be string'
|
||||
|
||||
if not isinstance(case.get('action_reprs'), list):
|
||||
logger.error(f"Case {i}: 'action_reprs' must be list")
|
||||
assert False, 'Actions must be list'
|
||||
|
||||
if len(case.get('action_reprs', [])) == 0:
|
||||
logger.error(f"Case {i}: 'action_reprs' must not be empty")
|
||||
assert False, 'Must have at least one action'
|
||||
|
||||
if missing_fields:
|
||||
logger.error('Dataset integrity check failed')
|
||||
assert False, f'Missing fields: {missing_fields}'
|
||||
else:
|
||||
logger.info('✅ Dataset integrity check passed')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v'])
|
||||
160
browser-use/tests/test_models.py
Normal file
160
browser-use/tests/test_models.py
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
import asyncio
|
||||
import os
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
from langchain_ollama import ChatOllama
|
||||
from langchain_openai import AzureChatOpenAI, ChatOpenAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def event_loop():
|
||||
"""Create an instance of the default event loop for each test case."""
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
async def browser(event_loop):
|
||||
browser_instance = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
yield browser_instance
|
||||
await browser_instance.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def context(browser):
|
||||
async with await browser.new_context() as context:
|
||||
yield context
|
||||
|
||||
|
||||
api_key_gemini = SecretStr(os.getenv('GOOGLE_API_KEY') or '')
|
||||
api_key_deepseek = SecretStr(os.getenv('DEEPSEEK_API_KEY') or '')
|
||||
api_key_anthropic = SecretStr(os.getenv('ANTHROPIC_API_KEY') or '')
|
||||
|
||||
|
||||
# pytest -s -v tests/test_models.py
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
ChatOpenAI(model='gpt-4o'),
|
||||
ChatOpenAI(model='gpt-4o-mini'),
|
||||
AzureChatOpenAI(
|
||||
model='gpt-4o',
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
),
|
||||
# ChatOpenAI(
|
||||
# base_url='https://api.deepseek.com/v1',
|
||||
# model='deepseek-reasoner',
|
||||
# api_key=api_key_deepseek,
|
||||
# ),
|
||||
# run: ollama start
|
||||
ChatOllama(
|
||||
model='qwen2.5:latest',
|
||||
num_ctx=128000,
|
||||
),
|
||||
AzureChatOpenAI(
|
||||
model='gpt-4o-mini',
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
),
|
||||
ChatAnthropic(
|
||||
model_name='claude-3-5-sonnet-20240620',
|
||||
timeout=100,
|
||||
temperature=0.0,
|
||||
stop=None,
|
||||
api_key=api_key_anthropic,
|
||||
),
|
||||
ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=api_key_gemini),
|
||||
ChatGoogleGenerativeAI(model='gemini-1.5-pro', api_key=api_key_gemini),
|
||||
ChatGoogleGenerativeAI(model='gemini-1.5-flash-latest', api_key=api_key_gemini),
|
||||
ChatOpenAI(
|
||||
base_url='https://api.deepseek.com/v1',
|
||||
model='deepseek-chat',
|
||||
api_key=api_key_deepseek,
|
||||
),
|
||||
],
|
||||
ids=[
|
||||
'gpt-4o',
|
||||
'gpt-4o-mini',
|
||||
'azure-gpt-4o',
|
||||
#'deepseek-reasoner',
|
||||
'qwen2.5:latest',
|
||||
'azure-gpt-4o-mini',
|
||||
'claude-3-5-sonnet',
|
||||
'gemini-2.0-flash-exp',
|
||||
'gemini-1.5-pro',
|
||||
'gemini-1.5-flash-latest',
|
||||
'deepseek-chat',
|
||||
],
|
||||
)
|
||||
async def llm(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_model_search(llm, context):
|
||||
"""Test 'Search Google' action"""
|
||||
model_name = llm.model if hasattr(llm, 'model') else llm.model_name
|
||||
print(f'\nTesting model: {model_name}')
|
||||
|
||||
use_vision = True
|
||||
models_without_vision = ['deepseek-chat', 'deepseek-reasoner']
|
||||
if hasattr(llm, 'model') and llm.model in models_without_vision:
|
||||
use_vision = False
|
||||
elif hasattr(llm, 'model_name') and llm.model_name in models_without_vision:
|
||||
use_vision = False
|
||||
|
||||
# require ollama run
|
||||
local_models = ['qwen2.5:latest']
|
||||
if model_name in local_models:
|
||||
# check if ollama is running
|
||||
# ping ollama http://127.0.0.1
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get('http://127.0.0.1:11434/')
|
||||
if response.status_code != 200:
|
||||
raise Exception('Ollama is not running - start with `ollama start`')
|
||||
except Exception:
|
||||
raise Exception('Ollama is not running - start with `ollama start`')
|
||||
|
||||
agent = Agent(
|
||||
task="Search Google for 'elon musk' then click on the first result and scroll down.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
max_failures=2,
|
||||
use_vision=use_vision,
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=2)
|
||||
done = history.is_done()
|
||||
successful = history.is_successful()
|
||||
action_names = history.action_names()
|
||||
print(f'Actions performed: {action_names}')
|
||||
errors = [e for e in history.errors() if e is not None]
|
||||
errors = '\n'.join(errors)
|
||||
passed = False
|
||||
if 'search_google' in action_names:
|
||||
passed = True
|
||||
elif 'go_to_url' in action_names:
|
||||
passed = True
|
||||
elif 'open_tab' in action_names:
|
||||
passed = True
|
||||
|
||||
else:
|
||||
passed = False
|
||||
print(f'Model {model_name}: {"✅ PASSED - " if passed else "❌ FAILED - "} Done: {done} Successful: {successful}')
|
||||
|
||||
assert passed, f'Model {model_name} not working\nActions performed: {action_names}\nErrors: {errors}'
|
||||
66
browser-use/tests/test_qwen.py
Normal file
66
browser-use/tests/test_qwen.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
import asyncio
|
||||
|
||||
import pytest
|
||||
from langchain_ollama import ChatOllama
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def llm():
|
||||
"""Initialize language model for testing"""
|
||||
|
||||
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
|
||||
# NOTE: Make sure to run ollama server with `ollama start'
|
||||
return ChatOllama(
|
||||
model='qwen2.5:latest',
|
||||
num_ctx=128000,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def event_loop():
|
||||
"""Create an instance of the default event loop for each test case."""
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
async def browser(event_loop):
|
||||
browser_instance = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
yield browser_instance
|
||||
await browser_instance.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def context(browser):
|
||||
async with await browser.new_context() as context:
|
||||
yield context
|
||||
|
||||
|
||||
# pytest tests/test_qwen.py -v -k "test_qwen_url" --capture=no
|
||||
# @pytest.mark.asyncio
|
||||
async def test_qwen_url(llm, context):
|
||||
"""Test complex ecommerce interaction sequence"""
|
||||
agent = Agent(
|
||||
task='go_to_url amazon.com',
|
||||
llm=llm,
|
||||
)
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=3)
|
||||
|
||||
# Verify sequence of actions
|
||||
action_sequence = []
|
||||
for action in history.model_actions():
|
||||
action_name = list(action.keys())[0]
|
||||
if action_name in ['go_to_url', 'open_tab']:
|
||||
action_sequence.append('navigate')
|
||||
|
||||
assert 'navigate' in action_sequence # Navigated to Amazon
|
||||
45
browser-use/tests/test_react_dropdown.py
Normal file
45
browser-use/tests/test_react_dropdown.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
"""
|
||||
Simple try of the agent.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import asyncio
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, AgentHistoryList
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
# browser = Browser(config=BrowserConfig(headless=False))
|
||||
|
||||
agent = Agent(
|
||||
task=(
|
||||
'go to https://codepen.io/shyam-king/pen/ByBJoOv and select "Tiger" dropdown and read the text given in "Selected Animal" box (it can be empty as well)'
|
||||
),
|
||||
llm=llm,
|
||||
browser_context=BrowserContext(
|
||||
browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
async def test_dropdown():
|
||||
history: AgentHistoryList = await agent.run(10)
|
||||
# await controller.browser.close(force=True)
|
||||
|
||||
result = history.final_result()
|
||||
assert result is not None
|
||||
print('result: ', result)
|
||||
# await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(test_dropdown())
|
||||
83
browser-use/tests/test_save_conversation.py
Normal file
83
browser-use/tests/test_save_conversation.py
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
"""
|
||||
Simple try of the agent.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, AgentHistoryList
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
|
||||
|
||||
async def test_save_conversation_contains_slash():
|
||||
if os.path.exists('./logs'):
|
||||
shutil.rmtree('./logs')
|
||||
|
||||
agent = Agent(
|
||||
task=('go to google.com and search for text "hi there"'),
|
||||
llm=llm,
|
||||
browser_context=BrowserContext(
|
||||
browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
|
||||
),
|
||||
save_conversation_path='logs/conversation',
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(20)
|
||||
|
||||
result = history.final_result()
|
||||
assert result is not None
|
||||
|
||||
assert os.path.exists('./logs'), 'logs directory was not created'
|
||||
assert os.path.exists('./logs/conversation_2.txt'), 'logs file was not created'
|
||||
|
||||
|
||||
async def test_save_conversation_not_contains_slash():
|
||||
if os.path.exists('./logs'):
|
||||
shutil.rmtree('./logs')
|
||||
|
||||
agent = Agent(
|
||||
task=('go to google.com and search for text "hi there"'),
|
||||
llm=llm,
|
||||
browser_context=BrowserContext(
|
||||
browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
|
||||
),
|
||||
save_conversation_path='logs',
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(20)
|
||||
|
||||
result = history.final_result()
|
||||
assert result is not None
|
||||
|
||||
assert os.path.exists('./logs'), 'logs directory was not created'
|
||||
assert os.path.exists('./logs/_2.txt'), 'logs file was not created'
|
||||
|
||||
|
||||
async def test_save_conversation_deep_directory():
|
||||
if os.path.exists('./logs'):
|
||||
shutil.rmtree('./logs')
|
||||
|
||||
agent = Agent(
|
||||
task=('go to google.com and search for text "hi there"'),
|
||||
llm=llm,
|
||||
browser_context=BrowserContext(
|
||||
browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
|
||||
),
|
||||
save_conversation_path='logs/deep/directory/conversation',
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(20)
|
||||
|
||||
result = history.final_result()
|
||||
assert result is not None
|
||||
|
||||
assert os.path.exists('./logs/deep/directory'), 'logs directory was not created'
|
||||
assert os.path.exists('./logs/deep/directory/conversation_2.txt'), 'logs file was not created'
|
||||
198
browser-use/tests/test_self_registered_actions.py
Normal file
198
browser-use/tests/test_self_registered_actions.py
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
import asyncio
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from pydantic import BaseModel, SecretStr
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.controller.service import Controller
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def event_loop():
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
async def browser(event_loop):
|
||||
browser_instance = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
yield browser_instance
|
||||
await browser_instance.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def context(browser):
|
||||
async with await browser.new_context() as context:
|
||||
yield context
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def controller():
|
||||
"""Initialize the controller with self-registered actions"""
|
||||
controller = Controller()
|
||||
|
||||
# Define custom actions without Pydantic models
|
||||
@controller.action('Print a message')
|
||||
def print_message(message: str):
|
||||
print(f'Message: {message}')
|
||||
return f'Printed message: {message}'
|
||||
|
||||
@controller.action('Add two numbers')
|
||||
def add_numbers(a: int, b: int):
|
||||
result = a + b
|
||||
return f'The sum is {result}'
|
||||
|
||||
@controller.action('Concatenate strings')
|
||||
def concatenate_strings(str1: str, str2: str):
|
||||
result = str1 + str2
|
||||
return f'Concatenated string: {result}'
|
||||
|
||||
# Define Pydantic models
|
||||
class SimpleModel(BaseModel):
|
||||
name: str
|
||||
age: int
|
||||
|
||||
class Address(BaseModel):
|
||||
street: str
|
||||
city: str
|
||||
|
||||
class NestedModel(BaseModel):
|
||||
user: SimpleModel
|
||||
address: Address
|
||||
|
||||
# Add actions with Pydantic model arguments
|
||||
@controller.action('Process simple model', param_model=SimpleModel)
|
||||
def process_simple_model(model: SimpleModel):
|
||||
return f'Processed {model.name}, age {model.age}'
|
||||
|
||||
@controller.action('Process nested model', param_model=NestedModel)
|
||||
def process_nested_model(model: NestedModel):
|
||||
user_info = f'{model.user.name}, age {model.user.age}'
|
||||
address_info = f'{model.address.street}, {model.address.city}'
|
||||
return f'Processed user {user_info} at address {address_info}'
|
||||
|
||||
@controller.action('Process multiple models')
|
||||
def process_multiple_models(model1: SimpleModel, model2: Address):
|
||||
return f'Processed {model1.name} living at {model2.street}, {model2.city}'
|
||||
|
||||
yield controller
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def llm():
|
||||
"""Initialize language model for testing"""
|
||||
|
||||
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
|
||||
return AzureChatOpenAI(
|
||||
model='gpt-4o',
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
)
|
||||
|
||||
|
||||
# @pytest.mark.skip(reason="Skipping test for now")
|
||||
@pytest.mark.asyncio
|
||||
async def test_self_registered_actions_no_pydantic(llm, controller):
|
||||
"""Test self-registered actions with individual arguments"""
|
||||
agent = Agent(
|
||||
task="First, print the message 'Hello, World!'. Then, add 10 and 20. Next, concatenate 'foo' and 'bar'.",
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
# Check that custom actions were executed
|
||||
action_names = history.action_names()
|
||||
|
||||
assert 'print_message' in action_names
|
||||
assert 'add_numbers' in action_names
|
||||
assert 'concatenate_strings' in action_names
|
||||
|
||||
|
||||
# @pytest.mark.skip(reason="Skipping test for now")
|
||||
@pytest.mark.asyncio
|
||||
async def test_mixed_arguments_actions(llm, controller):
|
||||
"""Test actions with mixed argument types"""
|
||||
|
||||
# Define another action during the test
|
||||
# Test for async actions
|
||||
@controller.action('Calculate the area of a rectangle')
|
||||
async def calculate_area(length: float, width: float):
|
||||
area = length * width
|
||||
return f'The area is {area}'
|
||||
|
||||
agent = Agent(
|
||||
task='Calculate the area of a rectangle with length 5.5 and width 3.2.',
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
)
|
||||
history = await agent.run(max_steps=5)
|
||||
|
||||
# Check that the action was executed
|
||||
action_names = history.action_names()
|
||||
|
||||
assert 'calculate_area' in action_names
|
||||
# check result
|
||||
correct = 'The area is 17.6'
|
||||
for content in history.extracted_content():
|
||||
if correct in content:
|
||||
break
|
||||
else:
|
||||
pytest.fail(f'{correct} not found in extracted content')
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pydantic_simple_model(llm, controller):
|
||||
"""Test action with a simple Pydantic model argument"""
|
||||
agent = Agent(
|
||||
task="Process a simple model with name 'Alice' and age 30.",
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
)
|
||||
history = await agent.run(max_steps=5)
|
||||
|
||||
# Check that the action was executed
|
||||
action_names = history.action_names()
|
||||
|
||||
assert 'process_simple_model' in action_names
|
||||
correct = 'Processed Alice, age 30'
|
||||
for content in history.extracted_content():
|
||||
if correct in content:
|
||||
break
|
||||
else:
|
||||
pytest.fail(f'{correct} not found in extracted content')
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pydantic_nested_model(llm, controller):
|
||||
"""Test action with a nested Pydantic model argument"""
|
||||
agent = Agent(
|
||||
task="Process a nested model with user name 'Bob', age 25, living at '123 Maple St', 'Springfield'.",
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
)
|
||||
history = await agent.run(max_steps=5)
|
||||
|
||||
# Check that the action was executed
|
||||
action_names = history.action_names()
|
||||
|
||||
assert 'process_nested_model' in action_names
|
||||
correct = 'Processed user Bob, age 25 at address 123 Maple St, Springfield'
|
||||
for content in history.extracted_content():
|
||||
if correct in content:
|
||||
break
|
||||
else:
|
||||
pytest.fail(f'{correct} not found in extracted content')
|
||||
|
||||
|
||||
# run this file with:
|
||||
# pytest tests/test_self_registered_actions.py --capture=no
|
||||
91
browser-use/tests/test_sensitive_data.py
Normal file
91
browser-use/tests/test_sensitive_data.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
import pytest
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from browser_use.agent.message_manager.service import MessageManager, MessageManagerSettings
|
||||
from browser_use.agent.views import MessageManagerState
|
||||
from browser_use.controller.registry.service import Registry
|
||||
|
||||
|
||||
class SensitiveParams(BaseModel):
|
||||
"""Test parameter model for sensitive data testing."""
|
||||
|
||||
text: str = Field(description='Text with sensitive data placeholders')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def registry():
|
||||
return Registry()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def message_manager():
|
||||
return MessageManager(
|
||||
task='Test task',
|
||||
system_message=SystemMessage(content='System message'),
|
||||
settings=MessageManagerSettings(),
|
||||
state=MessageManagerState(),
|
||||
)
|
||||
|
||||
|
||||
def test_replace_sensitive_data_with_missing_keys(registry):
|
||||
"""Test that _replace_sensitive_data handles missing keys gracefully"""
|
||||
# Create a simple Pydantic model with sensitive data placeholders
|
||||
params = SensitiveParams(text='Please enter <secret>username</secret> and <secret>password</secret>')
|
||||
|
||||
# Case 1: All keys present
|
||||
sensitive_data = {'username': 'user123', 'password': 'pass456'}
|
||||
result = registry._replace_sensitive_data(params, sensitive_data)
|
||||
assert 'user123' in result.text
|
||||
assert 'pass456' in result.text
|
||||
# Both keys should be replaced
|
||||
|
||||
# Case 2: One key missing
|
||||
sensitive_data = {'username': 'user123'} # password is missing
|
||||
result = registry._replace_sensitive_data(params, sensitive_data)
|
||||
assert 'user123' in result.text
|
||||
assert '<secret>password</secret>' in result.text
|
||||
# Verify the behavior - username replaced, password kept as tag
|
||||
|
||||
# Case 3: Multiple keys missing
|
||||
sensitive_data = {} # both keys missing
|
||||
result = registry._replace_sensitive_data(params, sensitive_data)
|
||||
assert '<secret>username</secret>' in result.text
|
||||
assert '<secret>password</secret>' in result.text
|
||||
# Verify both tags are preserved when keys are missing
|
||||
|
||||
# Case 4: One key empty
|
||||
sensitive_data = {'username': 'user123', 'password': ''}
|
||||
result = registry._replace_sensitive_data(params, sensitive_data)
|
||||
assert 'user123' in result.text
|
||||
assert '<secret>password</secret>' in result.text
|
||||
# Empty value should be treated the same as missing key
|
||||
|
||||
|
||||
def test_filter_sensitive_data(message_manager):
|
||||
"""Test that _filter_sensitive_data handles all sensitive data scenarios correctly"""
|
||||
# Set up a message with sensitive information
|
||||
message = HumanMessage(content='My username is admin and password is secret123')
|
||||
|
||||
# Case 1: No sensitive data provided
|
||||
message_manager.settings.sensitive_data = None
|
||||
result = message_manager._filter_sensitive_data(message)
|
||||
assert result.content == 'My username is admin and password is secret123'
|
||||
|
||||
# Case 2: All sensitive data is properly replaced
|
||||
message_manager.settings.sensitive_data = {'username': 'admin', 'password': 'secret123'}
|
||||
result = message_manager._filter_sensitive_data(message)
|
||||
assert '<secret>username</secret>' in result.content
|
||||
assert '<secret>password</secret>' in result.content
|
||||
|
||||
# Case 3: Make sure it works with nested content
|
||||
nested_message = HumanMessage(content=[{'type': 'text', 'text': 'My username is admin and password is secret123'}])
|
||||
result = message_manager._filter_sensitive_data(nested_message)
|
||||
assert '<secret>username</secret>' in result.content[0]['text']
|
||||
assert '<secret>password</secret>' in result.content[0]['text']
|
||||
|
||||
# Case 4: Test with empty values
|
||||
message_manager.settings.sensitive_data = {'username': 'admin', 'password': ''}
|
||||
result = message_manager._filter_sensitive_data(message)
|
||||
assert '<secret>username</secret>' in result.content
|
||||
# Only username should be replaced since password is empty
|
||||
344
browser-use/tests/test_service.py
Normal file
344
browser-use/tests/test_service.py
Normal file
|
|
@ -0,0 +1,344 @@
|
|||
from unittest.mock import AsyncMock, MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.messages import HumanMessage
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import ActionResult
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.browser.views import BrowserState
|
||||
from browser_use.controller.registry.service import Registry
|
||||
from browser_use.controller.registry.views import ActionModel
|
||||
from browser_use.controller.service import Controller
|
||||
|
||||
# run with python -m pytest tests/test_service.py
|
||||
|
||||
|
||||
# run test with:
|
||||
# python -m pytest tests/test_service.py
|
||||
class TestAgent:
|
||||
@pytest.fixture
|
||||
def mock_controller(self):
|
||||
controller = Mock(spec=Controller)
|
||||
registry = Mock(spec=Registry)
|
||||
registry.registry = MagicMock()
|
||||
registry.registry.actions = {'test_action': MagicMock(param_model=MagicMock())} # type: ignore
|
||||
controller.registry = registry
|
||||
return controller
|
||||
|
||||
@pytest.fixture
|
||||
def mock_llm(self):
|
||||
return Mock(spec=BaseChatModel)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_browser(self):
|
||||
return Mock(spec=Browser)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_browser_context(self):
|
||||
return Mock(spec=BrowserContext)
|
||||
|
||||
def test_convert_initial_actions(self, mock_controller, mock_llm, mock_browser, mock_browser_context): # type: ignore
|
||||
"""
|
||||
Test that the _convert_initial_actions method correctly converts
|
||||
dictionary-based actions to ActionModel instances.
|
||||
|
||||
This test ensures that:
|
||||
1. The method processes the initial actions correctly.
|
||||
2. The correct param_model is called with the right parameters.
|
||||
3. The ActionModel is created with the validated parameters.
|
||||
4. The method returns a list of ActionModel instances.
|
||||
"""
|
||||
# Arrange
|
||||
agent = Agent(
|
||||
task='Test task', llm=mock_llm, controller=mock_controller, browser=mock_browser, browser_context=mock_browser_context
|
||||
)
|
||||
initial_actions = [{'test_action': {'param1': 'value1', 'param2': 'value2'}}]
|
||||
|
||||
# Mock the ActionModel
|
||||
mock_action_model = MagicMock(spec=ActionModel)
|
||||
mock_action_model_instance = MagicMock()
|
||||
mock_action_model.return_value = mock_action_model_instance
|
||||
agent.ActionModel = mock_action_model # type: ignore
|
||||
|
||||
# Act
|
||||
result = agent._convert_initial_actions(initial_actions)
|
||||
|
||||
# Assert
|
||||
assert len(result) == 1
|
||||
mock_controller.registry.registry.actions['test_action'].param_model.assert_called_once_with( # type: ignore
|
||||
param1='value1', param2='value2'
|
||||
)
|
||||
mock_action_model.assert_called_once()
|
||||
assert isinstance(result[0], MagicMock)
|
||||
assert result[0] == mock_action_model_instance
|
||||
|
||||
# Check that the ActionModel was called with the correct parameters
|
||||
call_args = mock_action_model.call_args[1]
|
||||
assert 'test_action' in call_args
|
||||
assert call_args['test_action'] == mock_controller.registry.registry.actions['test_action'].param_model.return_value # type: ignore
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_step_error_handling(self):
|
||||
"""
|
||||
Test the error handling in the step method of the Agent class.
|
||||
This test simulates a failure in the get_next_action method and
|
||||
checks if the error is properly handled and recorded.
|
||||
"""
|
||||
# Mock the LLM
|
||||
mock_llm = MagicMock(spec=BaseChatModel)
|
||||
|
||||
# Mock the MessageManager
|
||||
with patch('browser_use.agent.service.MessageManager') as mock_message_manager:
|
||||
# Create an Agent instance with mocked dependencies
|
||||
agent = Agent(task='Test task', llm=mock_llm)
|
||||
|
||||
# Mock the get_next_action method to raise an exception
|
||||
agent.get_next_action = AsyncMock(side_effect=ValueError('Test error'))
|
||||
|
||||
# Mock the browser_context
|
||||
agent.browser_context = AsyncMock()
|
||||
agent.browser_context.get_state = AsyncMock(
|
||||
return_value=BrowserState(
|
||||
url='https://example.com',
|
||||
title='Example',
|
||||
element_tree=MagicMock(), # Mocked element tree
|
||||
tabs=[],
|
||||
selector_map={},
|
||||
screenshot='',
|
||||
)
|
||||
)
|
||||
|
||||
# Mock the controller
|
||||
agent.controller = AsyncMock()
|
||||
|
||||
# Call the step method
|
||||
await agent.step()
|
||||
|
||||
# Assert that the error was handled and recorded
|
||||
assert agent.consecutive_failures == 1
|
||||
assert len(agent._last_result) == 1
|
||||
assert isinstance(agent._last_result[0], ActionResult)
|
||||
assert 'Test error' in agent._last_result[0].error
|
||||
assert agent._last_result[0].include_in_memory is True
|
||||
|
||||
|
||||
class TestRegistry:
|
||||
@pytest.fixture
|
||||
def registry_with_excludes(self):
|
||||
return Registry(exclude_actions=['excluded_action'])
|
||||
|
||||
def test_action_decorator_with_excluded_action(self, registry_with_excludes):
|
||||
"""
|
||||
Test that the action decorator does not register an action
|
||||
if it's in the exclude_actions list.
|
||||
"""
|
||||
|
||||
# Define a function to be decorated
|
||||
def excluded_action():
|
||||
pass
|
||||
|
||||
# Apply the action decorator
|
||||
decorated_func = registry_with_excludes.action(description='This should be excluded')(excluded_action)
|
||||
|
||||
# Assert that the decorated function is the same as the original
|
||||
assert decorated_func == excluded_action
|
||||
|
||||
# Assert that the action was not added to the registry
|
||||
assert 'excluded_action' not in registry_with_excludes.registry.actions
|
||||
|
||||
# Define another function that should be included
|
||||
def included_action():
|
||||
pass
|
||||
|
||||
# Apply the action decorator to an included action
|
||||
registry_with_excludes.action(description='This should be included')(included_action)
|
||||
|
||||
# Assert that the included action was added to the registry
|
||||
assert 'included_action' in registry_with_excludes.registry.actions
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_execute_action_with_and_without_browser_context(self):
|
||||
"""
|
||||
Test that the execute_action method correctly handles actions with and without a browser context.
|
||||
This test ensures that:
|
||||
1. An action requiring a browser context is executed correctly.
|
||||
2. An action not requiring a browser context is executed correctly.
|
||||
3. The browser context is passed to the action function when required.
|
||||
4. The action function receives the correct parameters.
|
||||
5. The method raises an error when a browser context is required but not provided.
|
||||
"""
|
||||
registry = Registry()
|
||||
|
||||
# Define a mock action model
|
||||
class TestActionModel(BaseModel):
|
||||
param1: str
|
||||
|
||||
# Define mock action functions
|
||||
async def test_action_with_browser(param1: str, browser):
|
||||
return f'Action executed with {param1} and browser'
|
||||
|
||||
async def test_action_without_browser(param1: str):
|
||||
return f'Action executed with {param1}'
|
||||
|
||||
# Register the actions
|
||||
registry.registry.actions['test_action_with_browser'] = MagicMock(
|
||||
function=AsyncMock(side_effect=test_action_with_browser),
|
||||
param_model=TestActionModel,
|
||||
description='Test action with browser',
|
||||
)
|
||||
|
||||
registry.registry.actions['test_action_without_browser'] = MagicMock(
|
||||
function=AsyncMock(side_effect=test_action_without_browser),
|
||||
param_model=TestActionModel,
|
||||
description='Test action without browser',
|
||||
)
|
||||
|
||||
# Mock BrowserContext
|
||||
mock_browser = MagicMock()
|
||||
|
||||
# Execute the action with a browser context
|
||||
result_with_browser = await registry.execute_action(
|
||||
'test_action_with_browser', {'param1': 'test_value'}, browser=mock_browser
|
||||
)
|
||||
assert result_with_browser == 'Action executed with test_value and browser'
|
||||
|
||||
# Execute the action without a browser context
|
||||
result_without_browser = await registry.execute_action('test_action_without_browser', {'param1': 'test_value'})
|
||||
assert result_without_browser == 'Action executed with test_value'
|
||||
|
||||
# Test error when browser is required but not provided
|
||||
with pytest.raises(RuntimeError, match='Action test_action_with_browser requires browser but none provided'):
|
||||
await registry.execute_action('test_action_with_browser', {'param1': 'test_value'})
|
||||
|
||||
# Verify that the action functions were called with correct parameters
|
||||
registry.registry.actions['test_action_with_browser'].function.assert_called_once_with(
|
||||
param1='test_value', browser=mock_browser
|
||||
)
|
||||
registry.registry.actions['test_action_without_browser'].function.assert_called_once_with(param1='test_value')
|
||||
|
||||
|
||||
class TestAgentRetry:
|
||||
@pytest.fixture
|
||||
def mock_llm(self):
|
||||
return AsyncMock()
|
||||
|
||||
@pytest.fixture
|
||||
def mock_controller(self):
|
||||
controller = Mock()
|
||||
controller.registry = Mock()
|
||||
controller.registry.registry = Mock()
|
||||
controller.registry.registry.actions = {}
|
||||
return controller
|
||||
|
||||
@pytest.fixture
|
||||
def mock_browser_context(self):
|
||||
browser_context = Mock()
|
||||
browser_context.get_state = AsyncMock(
|
||||
return_value=BrowserState(
|
||||
url='https://parabank.parasoft.com/parabank/index.htm',
|
||||
title='ParaBank',
|
||||
element_tree=MagicMock(),
|
||||
tabs=[],
|
||||
selector_map={},
|
||||
screenshot='',
|
||||
)
|
||||
)
|
||||
return browser_context
|
||||
|
||||
@pytest.fixture
|
||||
def mock_action_model(self):
|
||||
action_model = Mock(spec=ActionModel)
|
||||
return action_model
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_step_empty_action_retry(self, mock_llm, mock_controller, mock_browser_context, mock_action_model):
|
||||
"""
|
||||
Test that the step method retries and handles empty actions correctly.
|
||||
"""
|
||||
# Arrange
|
||||
agent = Agent(
|
||||
task='Test task',
|
||||
llm=mock_llm,
|
||||
controller=mock_controller,
|
||||
browser=Mock(),
|
||||
browser_context=mock_browser_context,
|
||||
)
|
||||
agent.ActionModel = mock_action_model # Inject the mock ActionModel
|
||||
|
||||
# Mock get_next_action to return empty action the first time, then a valid action
|
||||
empty_model_output = MagicMock()
|
||||
empty_model_output.action = [] # Empty action
|
||||
valid_model_output = MagicMock()
|
||||
valid_action = MagicMock()
|
||||
valid_model_output.action = [valid_action]
|
||||
|
||||
mock_llm.return_value.invoke.side_effect = [empty_model_output, valid_model_output]
|
||||
agent.get_next_action = mock_llm.return_value.invoke
|
||||
|
||||
# Act
|
||||
await agent.step()
|
||||
|
||||
# Assert
|
||||
# Check that get_next_action was called twice (initial call + retry)
|
||||
assert agent.get_next_action.call_count == 2
|
||||
# Check that the LLM was called twice
|
||||
assert mock_llm.return_value.invoke.call_count == 2
|
||||
|
||||
# Check that the second call to get_next_action included the clarification message
|
||||
_, retry_messages = mock_llm.return_value.invoke.call_args_list[1]
|
||||
assert len(retry_messages[0]) == 2 # input_messages + clarification message
|
||||
assert isinstance(retry_messages[0][1], HumanMessage)
|
||||
assert 'You forgot to return an action' in retry_messages[0][1].content
|
||||
|
||||
# Check that _last_result contains the valid action
|
||||
assert len(agent._last_result) == 1
|
||||
assert agent._last_result[0].action == valid_action
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_step_empty_action_retry_and_fail(self, mock_llm, mock_controller, mock_browser_context, mock_action_model):
|
||||
"""
|
||||
Test that the step method handles the case where get_next_action returns
|
||||
empty actions twice, and inserts a safe noop action.
|
||||
"""
|
||||
# Arrange
|
||||
agent = Agent(
|
||||
task='Test task',
|
||||
llm=mock_llm,
|
||||
controller=mock_controller,
|
||||
browser=Mock(),
|
||||
browser_context=mock_browser_context,
|
||||
)
|
||||
agent.ActionModel = mock_action_model # Inject the mock ActionModel
|
||||
|
||||
# Mock get_next_action to return empty action both times
|
||||
empty_model_output = MagicMock()
|
||||
empty_model_output.action = [] # Empty action
|
||||
mock_llm.return_value.invoke.return_value = empty_model_output
|
||||
agent.get_next_action = mock_llm.return_value.invoke
|
||||
|
||||
# Mock the ActionModel instance creation
|
||||
mock_action_instance = MagicMock()
|
||||
mock_action_model.return_value = mock_action_instance
|
||||
|
||||
# Act
|
||||
await agent.step()
|
||||
|
||||
# Assert
|
||||
# Check that get_next_action was called twice
|
||||
assert agent.get_next_action.call_count == 2
|
||||
# Check that the LLM was called twice
|
||||
assert mock_llm.return_value.invoke.call_count == 2
|
||||
|
||||
# Check that ActionModel was instantiated with the noop action
|
||||
mock_action_model.assert_called_once()
|
||||
call_args = mock_action_model.call_args[1]
|
||||
assert 'done' in call_args
|
||||
assert call_args['done'] == {'success': False, 'text': 'No action returned, safe exit.'}
|
||||
|
||||
# Check that _last_result contains the noop action
|
||||
assert len(agent._last_result) == 1
|
||||
assert agent._last_result[0].action == mock_action_instance
|
||||
115
browser-use/tests/test_stress.py
Normal file
115
browser-use/tests/test_stress.py
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
import asyncio
|
||||
import os
|
||||
import random
|
||||
import string
|
||||
import time
|
||||
|
||||
import pytest
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.controller.service import Controller
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def event_loop():
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
async def browser(event_loop):
|
||||
browser_instance = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
yield browser_instance
|
||||
await browser_instance.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def context(browser):
|
||||
async with await browser.new_context() as context:
|
||||
yield context
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def llm():
|
||||
"""Initialize the language model"""
|
||||
model = AzureChatOpenAI(
|
||||
api_version='2024-10-21',
|
||||
model='gpt-4o',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
)
|
||||
return model
|
||||
|
||||
|
||||
def generate_random_text(length: int) -> str:
|
||||
"""Generate random text of specified length"""
|
||||
return ''.join(random.choices(string.ascii_letters + string.digits + ' ', k=length))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def controller():
|
||||
"""Initialize the controller"""
|
||||
controller = Controller()
|
||||
large_text = generate_random_text(10000)
|
||||
|
||||
@controller.action('call this magical function to get very special text')
|
||||
def get_very_special_text():
|
||||
return large_text
|
||||
|
||||
yield controller
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_token_limit_with_multiple_extractions(llm, controller, context):
|
||||
"""Test handling of multiple smaller extractions accumulating tokens"""
|
||||
agent = Agent(
|
||||
task='Call the magical function to get very special text 5 times',
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
browser_context=context,
|
||||
max_input_tokens=2000,
|
||||
save_conversation_path='tmp/stress_test/test_token_limit_with_multiple_extractions.json',
|
||||
)
|
||||
|
||||
history = await agent.run(max_steps=5)
|
||||
|
||||
# check if 5 times called get_special_text
|
||||
calls = [a for a in history.action_names() if a == 'get_very_special_text']
|
||||
assert len(calls) == 5
|
||||
# check the message history should be max 3 messages
|
||||
assert len(agent.message_manager.history.messages) > 3
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
@pytest.mark.parametrize('max_tokens', [4000]) # 8000 20000
|
||||
@pytest.mark.asyncio
|
||||
async def test_open_3_tabs_and_extract_content(llm, controller, context, max_tokens):
|
||||
"""Stress test: Open 3 tabs with urls and extract content"""
|
||||
agent = Agent(
|
||||
task='Open 3 tabs with https://en.wikipedia.org/wiki/Internet and extract the content from each.',
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
browser_context=context,
|
||||
max_input_tokens=max_tokens,
|
||||
save_conversation_path='tmp/stress_test/test_open_3_tabs_and_extract_content.json',
|
||||
)
|
||||
start_time = time.time()
|
||||
history = await agent.run(max_steps=7)
|
||||
end_time = time.time()
|
||||
|
||||
total_time = end_time - start_time
|
||||
|
||||
print(f'Total time: {total_time:.2f} seconds')
|
||||
# Check for errors
|
||||
errors = history.errors()
|
||||
assert len(errors) == 0, 'Errors occurred during the test'
|
||||
# check if 3 tabs were opened
|
||||
assert len(context.current_state.tabs) >= 3, '3 tabs were not opened'
|
||||
575
browser-use/tests/test_tab_management.py
Normal file
575
browser-use/tests/test_tab_management.py
Normal file
|
|
@ -0,0 +1,575 @@
|
|||
import asyncio
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
from pytest_httpserver import HTTPServer
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from browser_use.agent.views import ActionModel
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
from browser_use.controller.service import Controller
|
||||
from browser_use.controller.views import (
|
||||
CloseTabAction,
|
||||
GoToUrlAction,
|
||||
OpenTabAction,
|
||||
SwitchTabAction,
|
||||
)
|
||||
|
||||
# Set up test logging
|
||||
logger = logging.getLogger('tab_tests')
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
|
||||
class TestTabManagement:
|
||||
"""Tests for the tab management system with separate agent_current_page and human_current_page references."""
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def event_loop(self):
|
||||
"""Create and provide an event loop for async tests."""
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
def http_server(self):
|
||||
"""Create and provide a test HTTP server that serves static content."""
|
||||
server = HTTPServer()
|
||||
server.start()
|
||||
|
||||
# Add routes for test pages
|
||||
server.expect_request('/page1').respond_with_data(
|
||||
'<html><head><title>Test Page 1</title></head><body><h1>Test Page 1</h1></body></html>', content_type='text/html'
|
||||
)
|
||||
server.expect_request('/page2').respond_with_data(
|
||||
'<html><head><title>Test Page 2</title></head><body><h1>Test Page 2</h1></body></html>', content_type='text/html'
|
||||
)
|
||||
server.expect_request('/page3').respond_with_data(
|
||||
'<html><head><title>Test Page 3</title></head><body><h1>Test Page 3</h1></body></html>', content_type='text/html'
|
||||
)
|
||||
server.expect_request('/page4').respond_with_data(
|
||||
'<html><head><title>Test Page 4</title></head><body><h1>Test Page 4</h1></body></html>', content_type='text/html'
|
||||
)
|
||||
|
||||
yield server
|
||||
server.stop()
|
||||
|
||||
@pytest.fixture(scope='module')
|
||||
async def browser(self, event_loop):
|
||||
"""Create and provide a Browser instance with security disabled."""
|
||||
browser_instance = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
yield browser_instance
|
||||
await browser_instance.close()
|
||||
|
||||
@pytest.fixture
|
||||
async def browser_context(self, browser, http_server):
|
||||
"""Create and provide a BrowserContext instance with a properly initialized tab."""
|
||||
context = BrowserContext(browser=browser)
|
||||
|
||||
# Initialize a session
|
||||
session = await context.get_session()
|
||||
|
||||
# Ensure we start with no pages (close any that might exist)
|
||||
for page in session.context.pages:
|
||||
await page.close()
|
||||
|
||||
# Create an initial tab and wait for it to load completely
|
||||
base_url = f'http://{http_server.host}:{http_server.port}'
|
||||
await context.create_new_tab(f'{base_url}/page1')
|
||||
await asyncio.sleep(1) # Wait for the tab to fully initialize
|
||||
|
||||
# Verify that agent_current_page and human_current_page are properly set
|
||||
assert context.agent_current_page is not None
|
||||
assert context.human_current_page is not None
|
||||
assert f'{http_server.host}:{http_server.port}' in context.agent_current_page.url
|
||||
|
||||
yield context
|
||||
await context.close()
|
||||
|
||||
@pytest.fixture
|
||||
def controller(self):
|
||||
"""Create and provide a Controller instance."""
|
||||
return Controller()
|
||||
|
||||
@pytest.fixture
|
||||
def base_url(self, http_server):
|
||||
"""Return the base URL for the test HTTP server."""
|
||||
return f'http://{http_server.host}:{http_server.port}'
|
||||
|
||||
# Helper methods
|
||||
|
||||
async def _execute_action(self, controller, browser_context, action_data):
|
||||
"""Generic helper to execute any action via the controller."""
|
||||
# Dynamically create an appropriate ActionModel class
|
||||
action_type = list(action_data.keys())[0]
|
||||
action_value = action_data[action_type]
|
||||
|
||||
# Create the ActionModel with the single action field
|
||||
class DynamicActionModel(ActionModel):
|
||||
pass
|
||||
|
||||
# Dynamically add the field with the right type annotation
|
||||
setattr(DynamicActionModel, action_type, type(action_value) | None)
|
||||
|
||||
# Execute the action
|
||||
result = await controller.act(DynamicActionModel(**action_data), browser_context)
|
||||
|
||||
# Give the browser a moment to process the action
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
return result
|
||||
|
||||
async def _ensure_synchronized_state(self, browser_context, base_url):
|
||||
"""Helper to ensure tab references are properly synchronized before tests."""
|
||||
# Make sure agent_current_page and human_current_page are set and valid
|
||||
session = await browser_context.get_session()
|
||||
|
||||
if not browser_context.agent_current_page or browser_context.agent_current_page not in session.context.pages:
|
||||
if session.context.pages:
|
||||
browser_context.agent_current_page = session.context.pages[0]
|
||||
else:
|
||||
# Create a tab with the test server
|
||||
await browser_context.create_new_tab(f'{base_url}/page1')
|
||||
await asyncio.sleep(1) # Wait longer for tab to initialize
|
||||
|
||||
if not browser_context.human_current_page or browser_context.human_current_page not in session.context.pages:
|
||||
browser_context.human_current_page = browser_context.agent_current_page
|
||||
|
||||
async def _simulate_user_tab_change(self, page, browser_context):
|
||||
"""Simulate a user changing tabs by properly triggering events with Playwright."""
|
||||
logger.debug(
|
||||
f'BEFORE: agent_tab={browser_context.agent_current_page.url if browser_context.agent_current_page else "None"}, '
|
||||
f'human_current_page={browser_context.human_current_page.url if browser_context.human_current_page else "None"}'
|
||||
)
|
||||
logger.debug(f'Simulating user changing to -> {page.url}')
|
||||
|
||||
# First bring the page to front - this is the physical action a user would take
|
||||
await page.bring_to_front()
|
||||
|
||||
# To simulate a user switching tabs, we need to trigger the right events
|
||||
# Use Playwright's dispatch_event method to properly trigger events from outside
|
||||
|
||||
await page.dispatch_event('body', 'focus')
|
||||
# await page.evaluate("""() => window.dispatchEvent(new Event('focus'))""")
|
||||
# await page.evaluate(
|
||||
# """() => document.dispatchEvent(new Event('pointermove', { bubbles: true, cancelable: false, clientX: 0, clientY: 0 }))"""
|
||||
# )
|
||||
# await page.evaluate(
|
||||
# "() => document.dispatchEvent(new Event('deviceorientation', { bubbles: true, cancelable: false, alpha: 0, beta: 0, gamma: 0 }))"
|
||||
# )
|
||||
# await page.evaluate(
|
||||
# """() => document.dispatchEvent(new Event('visibilitychange', { bubbles: true, cancelable: false }))"""
|
||||
# )
|
||||
# logger.debug('Dispatched window.focus event')
|
||||
|
||||
# cheat for now, because playwright really messes with foreground tab detection
|
||||
# TODO: fix this properly by triggering the right events and detecting them in playwright
|
||||
await page.evaluate("""() => {
|
||||
const listener = Object.keys(window).filter(k => k.startsWith('onVisibilityChange'))[0]
|
||||
if (listener) {
|
||||
window[listener]({ bubbles: true, cancelable: false })
|
||||
}
|
||||
}""")
|
||||
|
||||
# Give the event handlers time to process
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
logger.debug(
|
||||
f'AFTER: agent_tab URL={browser_context.agent_current_page.url if browser_context.agent_current_page else "None"}, '
|
||||
f'human_current_page URL={browser_context.human_current_page.url if browser_context.human_current_page else "None"}'
|
||||
)
|
||||
|
||||
# Tab management tests
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_open_tab_updates_both_references(self, browser_context, base_url):
|
||||
"""Test that open_tab correctly updates both tab references."""
|
||||
# Ensure tab references are synchronized
|
||||
await self._ensure_synchronized_state(browser_context, base_url)
|
||||
|
||||
# Store initial tab count and references
|
||||
session = await browser_context.get_session()
|
||||
initial_tab_count = len(session.context.pages)
|
||||
initial_agent_tab = browser_context.agent_current_page
|
||||
|
||||
# Open a new tab directly via BrowserContext
|
||||
await browser_context.create_new_tab(f'{base_url}/page2')
|
||||
|
||||
# Give time for events to process
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Verify a new tab was created
|
||||
session = await browser_context.get_session()
|
||||
assert len(session.context.pages) == initial_tab_count + 1
|
||||
|
||||
# Both references should be set to the new tab and different from initial tab
|
||||
assert browser_context.human_current_page is not None
|
||||
assert browser_context.agent_current_page is not None
|
||||
assert browser_context.human_current_page == browser_context.agent_current_page
|
||||
assert initial_agent_tab != browser_context.agent_current_page
|
||||
assert f'{base_url}/page2' in browser_context.agent_current_page.url
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_switch_tab_updates_both_references(self, browser_context, base_url):
|
||||
"""Test that switch_tab updates both tab references."""
|
||||
# Ensure we start with at least one tab
|
||||
await self._ensure_synchronized_state(browser_context, base_url)
|
||||
|
||||
# Create a new tab in addition to existing one
|
||||
await browser_context.create_new_tab(f'{base_url}/page2')
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Verify we now have the second tab active
|
||||
assert f'{base_url}/page2' in browser_context.agent_current_page.url
|
||||
|
||||
# Switch to the first tab
|
||||
session = await browser_context.get_session()
|
||||
first_tab = session.context.pages[0]
|
||||
await browser_context.switch_to_tab(0)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Both references should point to the first tab
|
||||
assert browser_context.human_current_page is not None
|
||||
assert browser_context.agent_current_page is not None
|
||||
assert browser_context.human_current_page == browser_context.agent_current_page
|
||||
assert browser_context.agent_current_page == first_tab
|
||||
assert f'{base_url}/page1' in browser_context.agent_current_page.url
|
||||
|
||||
# Verify the underlying page is correct by checking we can interact with it
|
||||
page = await browser_context.get_agent_current_page()
|
||||
title = await page.title()
|
||||
assert 'Test Page 1' in title
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_close_tab_handles_references_correctly(self, browser_context, base_url):
|
||||
"""Test that closing a tab updates references correctly."""
|
||||
# Ensure we start with at least one tab
|
||||
await self._ensure_synchronized_state(browser_context, base_url)
|
||||
|
||||
# Create two tabs with different URLs
|
||||
initial_tab = browser_context.agent_current_page
|
||||
await browser_context.create_new_tab(f'{base_url}/page2')
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Verify the second tab is now active
|
||||
assert f'{base_url}/page2' in browser_context.agent_current_page.url
|
||||
|
||||
# Close the current tab
|
||||
await browser_context.close_current_tab()
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Both references should be updated to the remaining available tab
|
||||
assert browser_context.human_current_page is not None
|
||||
assert browser_context.agent_current_page is not None
|
||||
assert browser_context.human_current_page == browser_context.agent_current_page
|
||||
assert browser_context.agent_current_page == initial_tab
|
||||
assert not browser_context.human_current_page.is_closed()
|
||||
assert f'{base_url}/page1' in browser_context.human_current_page.url
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_user_changes_tab(self, browser_context, base_url):
|
||||
"""Test that agent_current_page is preserved when user changes the foreground tab."""
|
||||
# Ensure we start with at least one tab
|
||||
await self._ensure_synchronized_state(browser_context, base_url)
|
||||
|
||||
# Create a second tab with a different URL
|
||||
await browser_context.create_new_tab(f'{base_url}/page2')
|
||||
await asyncio.sleep(1)
|
||||
assert f'{base_url}/page2' in browser_context.agent_current_page.url
|
||||
|
||||
# Switch back to the first tab for the agent
|
||||
session = await browser_context.get_session()
|
||||
first_tab = session.context.pages[0]
|
||||
await browser_context.switch_to_tab(0)
|
||||
await self._simulate_user_tab_change(first_tab, browser_context)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Store agent's active tab
|
||||
agent_tab = browser_context.agent_current_page
|
||||
assert f'{base_url}/page1' in agent_tab.url
|
||||
|
||||
# Simulate user switching to the second tab
|
||||
session = await browser_context.get_session()
|
||||
user_tab = session.context.pages[1] # Second tab
|
||||
|
||||
# First, log the visibility listeners
|
||||
listeners = await user_tab.evaluate("() => Object.keys(window).filter(k => k.startsWith('onVisibilityChange'))")
|
||||
logger.debug(f'Tab visibility listeners: {listeners}')
|
||||
|
||||
# Make sure handlers exist before attempting to trigger them
|
||||
assert len(listeners) > 0, 'No visibility listeners found on the page'
|
||||
|
||||
# Now try the simulation
|
||||
await self._simulate_user_tab_change(user_tab, browser_context)
|
||||
|
||||
# Verify agent_current_page remains unchanged while human_current_page changed
|
||||
assert browser_context.agent_current_page == agent_tab
|
||||
assert browser_context.human_current_page != browser_context.agent_current_page
|
||||
assert f'{base_url}/page1' in browser_context.agent_current_page.url
|
||||
assert f'{base_url}/page2' in browser_context.human_current_page.url
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_agent_current_page(self, browser_context, base_url):
|
||||
"""Test that get_agent_current_page returns agent_current_page regardless of human_current_page."""
|
||||
# Ensure we start with at least one tab
|
||||
await self._ensure_synchronized_state(browser_context, base_url)
|
||||
|
||||
# Create a second tab with a different URL
|
||||
await browser_context.create_new_tab(f'{base_url}/page2')
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Switch back to the first tab for the agent
|
||||
await browser_context.switch_to_tab(0)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Simulate user switching to the second tab
|
||||
session = await browser_context.get_session()
|
||||
user_tab = session.context.pages[1] # Second tab
|
||||
await self._simulate_user_tab_change(user_tab, browser_context)
|
||||
|
||||
# Verify get_agent_current_page returns agent's tab, not foreground tab
|
||||
agent_page = await browser_context.get_agent_current_page()
|
||||
assert agent_page == browser_context.agent_current_page
|
||||
assert f'{base_url}/page1' in agent_page.url
|
||||
|
||||
# Call a method on the page to verify it's fully functional
|
||||
title = await agent_page.title()
|
||||
assert 'Test Page 1' in title
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_browser_operations_use_agent_current_page(self, browser_context, base_url):
|
||||
"""Test that browser operations use agent_current_page, not human_current_page."""
|
||||
# Ensure we start with at least one tab
|
||||
await self._ensure_synchronized_state(browser_context, base_url)
|
||||
|
||||
# Create a second tab with a different URL
|
||||
await browser_context.create_new_tab(f'{base_url}/page2')
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Switch back to the first tab for the agent
|
||||
await browser_context.switch_to_tab(0)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Simulate user switching to the second tab
|
||||
session = await browser_context.get_session()
|
||||
user_tab = session.context.pages[1] # Second tab
|
||||
await self._simulate_user_tab_change(user_tab, browser_context)
|
||||
|
||||
# Verify we have the setup we want
|
||||
assert browser_context.human_current_page != browser_context.agent_current_page
|
||||
assert f'{base_url}/page2' in browser_context.human_current_page.url
|
||||
assert f'{base_url}/page1' in browser_context.agent_current_page.url
|
||||
|
||||
# Execute a navigation directly on agent's tab
|
||||
agent_page = await browser_context.get_agent_current_page()
|
||||
await agent_page.goto(f'{base_url}/page3')
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Verify navigation happened on agent_current_page
|
||||
assert f'{base_url}/page3' in browser_context.agent_current_page.url
|
||||
# But human_current_page remains unchanged
|
||||
assert f'{base_url}/page2' in browser_context.human_current_page.url
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tab_reference_recovery(self, browser_context, base_url):
|
||||
"""Test recovery when a tab reference becomes invalid."""
|
||||
# Ensure we start with at least one valid tab
|
||||
await self._ensure_synchronized_state(browser_context, base_url)
|
||||
|
||||
# Create a second tab so we have multiple
|
||||
await browser_context.create_new_tab(f'{base_url}/page2')
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Deliberately corrupt the agent_current_page reference
|
||||
browser_context.agent_current_page = None
|
||||
|
||||
# Call get_agent_current_page, which should recover the reference
|
||||
agent_page = await browser_context.get_agent_current_page()
|
||||
|
||||
# Verify recovery worked
|
||||
assert agent_page is not None
|
||||
assert not agent_page.is_closed()
|
||||
|
||||
# Verify the tab is fully functional
|
||||
title = await agent_page.title()
|
||||
assert title, 'Page should have a title'
|
||||
|
||||
# Verify both references are now valid again
|
||||
assert browser_context.agent_current_page is not None
|
||||
assert browser_context.human_current_page is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_reconcile_tab_state_handles_both_invalid(self, browser_context, base_url):
|
||||
"""Test that reconcile_tab_state can recover when both tab references are invalid."""
|
||||
# Ensure we start with at least one valid tab
|
||||
await self._ensure_synchronized_state(browser_context, base_url)
|
||||
|
||||
# Corrupt both references
|
||||
browser_context.agent_current_page = None
|
||||
browser_context.human_current_page = None
|
||||
|
||||
# Call reconcile_tab_state directly
|
||||
await browser_context._reconcile_tab_state()
|
||||
|
||||
# Verify both references are restored
|
||||
assert browser_context.agent_current_page is not None
|
||||
assert browser_context.human_current_page is not None
|
||||
# and they are the same tab
|
||||
assert browser_context.agent_current_page == browser_context.human_current_page
|
||||
# and the tab is valid
|
||||
assert not browser_context.agent_current_page.is_closed()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_race_condition_resilience(self, browser_context, base_url):
|
||||
"""Test resilience against race conditions in tab operations."""
|
||||
# Ensure we start with at least one valid tab
|
||||
await self._ensure_synchronized_state(browser_context, base_url)
|
||||
|
||||
# Create two more tabs to have three in total
|
||||
await browser_context.create_new_tab(f'{base_url}/page2')
|
||||
await asyncio.sleep(0.5)
|
||||
await browser_context.create_new_tab(f'{base_url}/page3')
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Verify we have at least 3 tabs
|
||||
session = await browser_context.get_session()
|
||||
assert len(session.context.pages) >= 3
|
||||
|
||||
# Perform a series of rapid tab switches to simulate race conditions
|
||||
for i in range(5):
|
||||
tab_index = i % 3
|
||||
await browser_context.switch_to_tab(tab_index)
|
||||
await asyncio.sleep(0.1) # Very short delay between switches
|
||||
|
||||
# Verify the state is consistent after rapid operations
|
||||
assert browser_context.human_current_page is not None
|
||||
assert browser_context.agent_current_page is not None
|
||||
assert browser_context.human_current_page == browser_context.agent_current_page
|
||||
assert not browser_context.human_current_page.is_closed()
|
||||
|
||||
# Verify we can still navigate on the final tab
|
||||
page = await browser_context.get_agent_current_page()
|
||||
await page.goto(f'{base_url}/page4')
|
||||
assert f'{base_url}/page4' in page.url
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tab_management_using_controller_actions(self, browser_context, controller, base_url):
|
||||
"""
|
||||
Test tab management using Controller actions instead of directly calling browser_context methods,
|
||||
ensuring that both human and agent tab detection works correctly.
|
||||
"""
|
||||
# Ensure we start with at least one tab
|
||||
await self._ensure_synchronized_state(browser_context, base_url)
|
||||
|
||||
# Make sure we have a clean single tab to start with
|
||||
session = await browser_context.get_session()
|
||||
while len(session.context.pages) > 1:
|
||||
await browser_context.close_current_tab()
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Store the initial tab for reference
|
||||
initial_tab = browser_context.agent_current_page
|
||||
initial_tab_id = initial_tab.page_id if hasattr(initial_tab, 'page_id') else 0
|
||||
|
||||
# Define action models for tab operations
|
||||
class OpenTabActionModel(ActionModel):
|
||||
open_tab: OpenTabAction | None = None
|
||||
|
||||
class SwitchTabActionModel(ActionModel):
|
||||
switch_tab: SwitchTabAction | None = None
|
||||
|
||||
class GoToUrlActionModel(ActionModel):
|
||||
go_to_url: GoToUrlAction | None = None
|
||||
|
||||
class CloseTabActionModel(ActionModel):
|
||||
close_tab: CloseTabAction | None = None
|
||||
|
||||
# Create second tab with OpenTabAction
|
||||
open_tab_action = {'open_tab': OpenTabAction(url=f'{base_url}/page2')}
|
||||
await controller.act(OpenTabActionModel(**open_tab_action), browser_context)
|
||||
await asyncio.sleep(1) # Wait for the tab to fully initialize
|
||||
|
||||
# Verify the second tab is opened and active for both agent and human
|
||||
second_tab = browser_context.agent_current_page
|
||||
assert browser_context.human_current_page == browser_context.agent_current_page
|
||||
assert f'{base_url}/page2' in browser_context.agent_current_page.url
|
||||
second_tab_id = second_tab.page_id if hasattr(second_tab, 'page_id') else 1
|
||||
|
||||
# Create third tab with OpenTabAction
|
||||
open_tab_action2 = {'open_tab': OpenTabAction(url=f'{base_url}/page3')}
|
||||
await controller.act(OpenTabActionModel(**open_tab_action2), browser_context)
|
||||
await asyncio.sleep(1) # Wait for the tab to fully initialize
|
||||
|
||||
# Verify the third tab is opened and active
|
||||
third_tab = browser_context.agent_current_page
|
||||
assert browser_context.human_current_page == browser_context.agent_current_page
|
||||
assert f'{base_url}/page3' in browser_context.agent_current_page.url
|
||||
third_tab_id = third_tab.page_id if hasattr(third_tab, 'page_id') else 2
|
||||
|
||||
# Use SwitchTabAction to go back to the first tab (for the agent)
|
||||
switch_tab_action = {'switch_tab': SwitchTabAction(page_id=initial_tab_id)}
|
||||
await controller.act(SwitchTabActionModel(**switch_tab_action), browser_context)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Verify agent is now on the first tab
|
||||
assert browser_context.agent_current_page == initial_tab
|
||||
assert f'{base_url}/page1' in browser_context.agent_current_page.url
|
||||
assert browser_context.human_current_page == browser_context.agent_current_page
|
||||
|
||||
# Simulate human switching to the second tab
|
||||
await self._simulate_user_tab_change(second_tab, browser_context)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Verify human and agent are on different tabs
|
||||
assert browser_context.human_current_page == second_tab
|
||||
assert browser_context.agent_current_page == initial_tab
|
||||
assert browser_context.human_current_page != browser_context.agent_current_page
|
||||
assert f'{base_url}/page2' in browser_context.human_current_page.url
|
||||
assert f'{base_url}/page1' in browser_context.agent_current_page.url
|
||||
|
||||
# Use GoToUrlAction to navigate the agent's tab to a new URL
|
||||
goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/page4')}
|
||||
await controller.act(GoToUrlActionModel(**goto_action), browser_context)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# Refresh the agent's page reference and verify navigation
|
||||
agent_page = await browser_context.get_agent_current_page()
|
||||
assert agent_page is not None
|
||||
assert f'{base_url}/page4' in agent_page.url
|
||||
|
||||
# Verify human's tab remains unchanged
|
||||
assert f'{base_url}/page2' in browser_context.human_current_page.url
|
||||
|
||||
# Use CloseTabAction to close the third tab
|
||||
close_tab_action = {'close_tab': CloseTabAction(page_id=third_tab_id)}
|
||||
await controller.act(CloseTabActionModel(**close_tab_action), browser_context)
|
||||
await asyncio.sleep(1.0) # Extended wait to ensure tab cleanup
|
||||
|
||||
# Verify tab was closed
|
||||
session = await browser_context.get_session()
|
||||
assert len(session.context.pages) == 2
|
||||
|
||||
# Close the second tab, which is the human's current tab
|
||||
close_tab_action2 = {'close_tab': CloseTabAction(page_id=second_tab_id)}
|
||||
await controller.act(CloseTabActionModel(**close_tab_action2), browser_context)
|
||||
await asyncio.sleep(1.0) # Extended wait to ensure tab cleanup
|
||||
|
||||
# Verify we have only one tab left
|
||||
session = await browser_context.get_session()
|
||||
assert len(session.context.pages) == 1
|
||||
|
||||
# Refresh references and verify both human and agent point to the same tab
|
||||
await browser_context._reconcile_tab_state()
|
||||
assert browser_context.human_current_page is not None
|
||||
assert browser_context.agent_current_page is not None
|
||||
assert browser_context.human_current_page == browser_context.agent_current_page
|
||||
|
||||
# Verify the URL of the remaining tab
|
||||
final_page = await browser_context.get_current_page()
|
||||
assert f'{base_url}' in final_page.url
|
||||
91
browser-use/tests/test_url_allowlist_security.py
Normal file
91
browser-use/tests/test_url_allowlist_security.py
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
|
||||
|
||||
class TestUrlAllowlistSecurity:
|
||||
"""Tests for URL allowlist security bypass prevention and URL allowlist glob pattern matching."""
|
||||
|
||||
def test_authentication_bypass_prevention(self):
|
||||
"""Test that the URL allowlist cannot be bypassed using authentication credentials."""
|
||||
# Create a context config with a sample allowed domain
|
||||
config = BrowserContextConfig(allowed_domains=['example.com'])
|
||||
context = BrowserContext(browser=None, config=config)
|
||||
|
||||
# Security vulnerability test cases
|
||||
# These should all be detected as malicious despite containing "example.com"
|
||||
assert context._is_url_allowed('https://example.com:password@malicious.com') is False
|
||||
assert context._is_url_allowed('https://example.com@malicious.com') is False
|
||||
assert context._is_url_allowed('https://example.com%20@malicious.com') is False
|
||||
assert context._is_url_allowed('https://example.com%3A@malicious.com') is False
|
||||
|
||||
# Make sure legitimate auth credentials still work
|
||||
assert context._is_url_allowed('https://user:password@example.com') is True
|
||||
|
||||
def test_glob_pattern_matching(self):
|
||||
"""Test that glob patterns in allowed_domains work correctly."""
|
||||
# Test *.example.com pattern (should match subdomains and main domain)
|
||||
glob_config = BrowserContextConfig(allowed_domains=['*.example.com'])
|
||||
glob_context = BrowserContext(browser=None, config=glob_config)
|
||||
|
||||
# Should match subdomains
|
||||
assert glob_context._is_url_allowed('https://sub.example.com') is True
|
||||
assert glob_context._is_url_allowed('https://deep.sub.example.com') is True
|
||||
|
||||
# Should also match main domain
|
||||
assert glob_context._is_url_allowed('https://example.com') is True
|
||||
|
||||
# Should not match other domains
|
||||
assert glob_context._is_url_allowed('https://notexample.com') is False
|
||||
assert glob_context._is_url_allowed('https://example.org') is False
|
||||
|
||||
# Test more complex glob patterns
|
||||
stars_config = BrowserContextConfig(allowed_domains=['*google.com', 'wiki*'])
|
||||
stars_context = BrowserContext(browser=None, config=stars_config)
|
||||
|
||||
# Should match domains ending with google.com
|
||||
assert stars_context._is_url_allowed('https://google.com') is True
|
||||
assert stars_context._is_url_allowed('https://www.google.com') is True
|
||||
assert stars_context._is_url_allowed('https://anygoogle.com') is True
|
||||
|
||||
# Should match domains starting with wiki
|
||||
assert stars_context._is_url_allowed('https://wiki.org') is True
|
||||
assert stars_context._is_url_allowed('https://wikipedia.org') is True
|
||||
|
||||
# Should not match other domains
|
||||
assert stars_context._is_url_allowed('https://example.com') is False
|
||||
|
||||
# Test browser internal URLs
|
||||
assert stars_context._is_url_allowed('chrome://settings') is True
|
||||
assert stars_context._is_url_allowed('about:blank') is True
|
||||
|
||||
# Test security for glob patterns (authentication credentials bypass attempts)
|
||||
# These should all be detected as malicious despite containing allowed domain patterns
|
||||
assert glob_context._is_url_allowed('https://allowed.example.com:password@notallowed.com') is False
|
||||
assert glob_context._is_url_allowed('https://subdomain.example.com@evil.com') is False
|
||||
assert glob_context._is_url_allowed('https://sub.example.com%20@malicious.org') is False
|
||||
assert stars_context._is_url_allowed('https://anygoogle.com@evil.org') is False
|
||||
|
||||
def test_glob_pattern_edge_cases(self):
|
||||
"""Test edge cases for glob pattern matching to ensure proper behavior."""
|
||||
# Test with domains containing glob pattern in the middle
|
||||
stars_config = BrowserContextConfig(allowed_domains=['*google.com', 'wiki*'])
|
||||
stars_context = BrowserContext(browser=None, config=stars_config)
|
||||
|
||||
# Verify that 'wiki*' pattern doesn't match domains that merely contain 'wiki' in the middle
|
||||
assert stars_context._is_url_allowed('https://notawiki.com') is False
|
||||
assert stars_context._is_url_allowed('https://havewikipages.org') is False
|
||||
assert stars_context._is_url_allowed('https://my-wiki-site.com') is False
|
||||
|
||||
# Verify that '*google.com' doesn't match domains that have 'google' in the middle
|
||||
assert stars_context._is_url_allowed('https://mygoogle.company.com') is False
|
||||
|
||||
# Create context with potentially risky glob pattern that demonstrates security concerns
|
||||
risky_config = BrowserContextConfig(allowed_domains=['*.google.*'])
|
||||
risky_context = BrowserContext(browser=None, config=risky_config)
|
||||
|
||||
# Should match legitimate Google domains
|
||||
assert risky_context._is_url_allowed('https://www.google.com') is True
|
||||
assert risky_context._is_url_allowed('https://mail.google.co.uk') is True
|
||||
|
||||
# But could also match potentially malicious domains with a subdomain structure
|
||||
# This demonstrates why such wildcard patterns can be risky
|
||||
assert risky_context._is_url_allowed('https://www.google.evil.com') is True
|
||||
64
browser-use/tests/test_vision.py
Normal file
64
browser-use/tests/test_vision.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
"""
|
||||
Simple try of the agent.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pprint import pprint
|
||||
|
||||
import pytest
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import asyncio
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, AgentHistoryList, Controller
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
controller = Controller()
|
||||
|
||||
# use this test to ask the model questions about the page like
|
||||
# which color do you see for bbox labels, list all with their label
|
||||
# what's the smallest bboxes with labels and
|
||||
|
||||
|
||||
@controller.registry.action(description='explain what you see on the screen and ask user for input')
|
||||
async def explain_screen(text: str) -> str:
|
||||
pprint(text)
|
||||
answer = input('\nuser input next question: \n')
|
||||
return answer
|
||||
|
||||
|
||||
@controller.registry.action(description='done')
|
||||
async def done(text: str) -> str:
|
||||
# pprint(text)
|
||||
return 'call explain_screen'
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def event_loop():
|
||||
"""Create an instance of the default event loop for each test case."""
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason='this is for local testing only')
|
||||
async def test_vision():
|
||||
agent = Agent(
|
||||
task='call explain_screen all the time the user asks you questions e.g. about the page like bbox which you see are labels - your task is to explain it and get the next question',
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
browser=Browser(config=BrowserConfig(disable_security=True, headless=False)),
|
||||
)
|
||||
try:
|
||||
history: AgentHistoryList = await agent.run(20)
|
||||
finally:
|
||||
# Make sure to close the browser
|
||||
await agent.browser.close()
|
||||
68
browser-use/tests/test_wait_for_element.py
Normal file
68
browser-use/tests/test_wait_for_element.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||
if project_root not in sys.path:
|
||||
sys.path.insert(0, project_root)
|
||||
|
||||
import pytest
|
||||
from dotenv import load_dotenv
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
# Third-party imports
|
||||
from browser_use import Agent, Controller
|
||||
|
||||
# Local imports
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
# Load environment variables.
|
||||
load_dotenv()
|
||||
|
||||
# Initialize language model and controller.
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
controller = Controller()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason='this is for local testing only')
|
||||
async def test_wait_for_element():
|
||||
"""Test 'Wait for element' action."""
|
||||
|
||||
initial_actions = [
|
||||
{'open_tab': {'url': 'https://pypi.org/'}},
|
||||
# Uncomment the line below to include the wait action in initial actions.
|
||||
# {'wait_for_element': {'selector': '#search', 'timeout': 30}},
|
||||
]
|
||||
|
||||
# Set up the browser context.
|
||||
context = BrowserContext(
|
||||
browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
|
||||
)
|
||||
|
||||
# Create the agent with the task.
|
||||
agent = Agent(
|
||||
task="Wait for element '#search' to be visible with a timeout of 30 seconds.",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
initial_actions=initial_actions,
|
||||
controller=controller,
|
||||
)
|
||||
|
||||
# Run the agent for a few steps to trigger navigation and then the wait action.
|
||||
history = await agent.run(max_steps=3)
|
||||
action_names = history.action_names()
|
||||
|
||||
# Ensure that the wait_for_element action was executed.
|
||||
assert 'wait_for_element' in action_names, 'Expected wait_for_element action to be executed.'
|
||||
|
||||
# Verify that the #search element is visible by querying the page.
|
||||
page = await context.get_current_page()
|
||||
header_handle = await page.query_selector('#search')
|
||||
assert header_handle is not None, 'Expected to find a #search element on the page.'
|
||||
is_visible = await header_handle.is_visible()
|
||||
assert is_visible, 'Expected the #search element to be visible.'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(test_wait_for_element())
|
||||
Loading…
Add table
Add a link
Reference in a new issue