[Add] browser-use and main.py
This commit is contained in:
parent
08e64bdf45
commit
96914d44ac
221 changed files with 30952 additions and 1 deletions
220
browser-use/tests/test_agent_actions.py
Normal file
220
browser-use/tests/test_agent_actions.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
import asyncio
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from pydantic import BaseModel, SecretStr
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.views import BrowserState
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def llm():
|
||||
"""Initialize language model for testing"""
|
||||
|
||||
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
|
||||
return AzureChatOpenAI(
|
||||
model='gpt-4o',
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
)
|
||||
# return ChatOpenAI(model='gpt-4o-mini')
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def event_loop():
|
||||
"""Create an instance of the default event loop for each test case."""
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
async def browser(event_loop):
|
||||
browser_instance = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
yield browser_instance
|
||||
await browser_instance.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def context(browser):
|
||||
async with await browser.new_context() as context:
|
||||
yield context
|
||||
# Clean up automatically happens with __aexit__
|
||||
|
||||
|
||||
# pytest tests/test_agent_actions.py -v -k "test_ecommerce_interaction" --capture=no
|
||||
# @pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason='Kinda expensive to run')
|
||||
async def test_ecommerce_interaction(llm, context):
|
||||
"""Test complex ecommerce interaction sequence"""
|
||||
agent = Agent(
|
||||
task="Go to amazon.com, search for 'laptop', filter by 4+ stars, and find the price of the first result",
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
save_conversation_path='tmp/test_ecommerce_interaction/conversation',
|
||||
)
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=20)
|
||||
|
||||
# Verify sequence of actions
|
||||
action_sequence = []
|
||||
for action in history.model_actions():
|
||||
action_name = list(action.keys())[0]
|
||||
if action_name in ['go_to_url', 'open_tab']:
|
||||
action_sequence.append('navigate')
|
||||
elif action_name == 'input_text':
|
||||
action_sequence.append('input')
|
||||
# Check that the input is 'laptop'
|
||||
inp = action['input_text']['text'].lower() # type: ignore
|
||||
if inp == 'laptop':
|
||||
action_sequence.append('input_exact_correct')
|
||||
elif 'laptop' in inp:
|
||||
action_sequence.append('correct_in_input')
|
||||
else:
|
||||
action_sequence.append('incorrect_input')
|
||||
elif action_name == 'click_element':
|
||||
action_sequence.append('click')
|
||||
|
||||
# Verify essential steps were performed
|
||||
assert 'navigate' in action_sequence # Navigated to Amazon
|
||||
assert 'input' in action_sequence # Entered search term
|
||||
assert 'click' in action_sequence # Clicked search/filter
|
||||
assert 'input_exact_correct' in action_sequence or 'correct_in_input' in action_sequence
|
||||
|
||||
|
||||
# @pytest.mark.asyncio
|
||||
async def test_error_recovery(llm, context):
|
||||
"""Test agent's ability to recover from errors"""
|
||||
agent = Agent(
|
||||
task='Navigate to nonexistent-site.com and then recover by going to google.com ',
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
actions_names = history.action_names()
|
||||
actions = history.model_actions()
|
||||
assert 'go_to_url' in actions_names or 'open_tab' in actions_names, f'{actions_names} does not contain go_to_url or open_tab'
|
||||
for action in actions:
|
||||
if 'go_to_url' in action:
|
||||
assert 'url' in action['go_to_url'], 'url is not in go_to_url'
|
||||
assert action['go_to_url']['url'].endswith('google.com'), 'url does not end with google.com'
|
||||
break
|
||||
|
||||
|
||||
# @pytest.mark.asyncio
|
||||
async def test_find_contact_email(llm, context):
|
||||
"""Test agent's ability to find contact email on a website"""
|
||||
agent = Agent(
|
||||
task='Go to https://browser-use.com/ and find out the contact email',
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
# Verify the agent found the contact email
|
||||
extracted_content = history.extracted_content()
|
||||
email = 'info@browser-use.com'
|
||||
for content in extracted_content:
|
||||
if email in content:
|
||||
break
|
||||
else:
|
||||
pytest.fail(f'{extracted_content} does not contain {email}')
|
||||
|
||||
|
||||
# @pytest.mark.asyncio
|
||||
async def test_agent_finds_installation_command(llm, context):
|
||||
"""Test agent's ability to find the pip installation command for browser-use on the web"""
|
||||
agent = Agent(
|
||||
task='Find the pip installation command for the browser-use repo',
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=10)
|
||||
|
||||
# Verify the agent found the correct installation command
|
||||
extracted_content = history.extracted_content()
|
||||
install_command = 'pip install browser-use'
|
||||
for content in extracted_content:
|
||||
if install_command in content:
|
||||
break
|
||||
else:
|
||||
pytest.fail(f'{extracted_content} does not contain {install_command}')
|
||||
|
||||
|
||||
class CaptchaTest(BaseModel):
|
||||
name: str
|
||||
url: str
|
||||
success_text: str
|
||||
additional_text: str | None = None
|
||||
|
||||
|
||||
# run 3 test: python -m pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
|
||||
# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
'captcha',
|
||||
[
|
||||
CaptchaTest(
|
||||
name='Text Captcha',
|
||||
url='https://2captcha.com/demo/text',
|
||||
success_text='Captcha is passed successfully!',
|
||||
),
|
||||
CaptchaTest(
|
||||
name='Basic Captcha',
|
||||
url='https://captcha.com/demos/features/captcha-demo.aspx',
|
||||
success_text='Correct!',
|
||||
),
|
||||
CaptchaTest(
|
||||
name='Rotate Captcha',
|
||||
url='https://2captcha.com/demo/rotatecaptcha',
|
||||
success_text='Captcha is passed successfully',
|
||||
additional_text='Use multiple clicks at once. click done when image is exact correct position.',
|
||||
),
|
||||
CaptchaTest(
|
||||
name='MT Captcha',
|
||||
url='https://2captcha.com/demo/mtcaptcha',
|
||||
success_text='Verified Successfully',
|
||||
additional_text='Stop when you solved it successfully.',
|
||||
),
|
||||
],
|
||||
)
|
||||
async def test_captcha_solver(llm, context, captcha: CaptchaTest):
|
||||
"""Test agent's ability to solve different types of captchas"""
|
||||
agent = Agent(
|
||||
task=f'Go to {captcha.url} and solve the captcha. {captcha.additional_text}',
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
|
||||
history: AgentHistoryList = await agent.run(max_steps=7)
|
||||
|
||||
state: BrowserState = await context.get_state()
|
||||
|
||||
all_text = state.element_tree.get_all_text_till_next_clickable_element()
|
||||
|
||||
if not all_text:
|
||||
all_text = ''
|
||||
|
||||
if not isinstance(all_text, str):
|
||||
all_text = str(all_text)
|
||||
|
||||
solved = captcha.success_text in all_text
|
||||
assert solved, f'Failed to solve {captcha.name}'
|
||||
|
||||
# python -m pytest tests/test_agent_actions.py -v --capture=no
|
||||
|
||||
# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
|
||||
Loading…
Add table
Add a link
Reference in a new issue