[Add] browser-use and main.py

2025-05-18 21:57:54 +09:00 · 2025-05-18 21:57:54 +09:00 · 96914d44ac
commit 96914d44ac
parent 08e64bdf45
221 changed files with 30952 additions and 1 deletions
--- a/browser-use/tests/test_agent_actions.py
+++ b/browser-use/tests/test_agent_actions.py
@ -0,0 +1,220 @@
+import asyncio
+import os
+
+import pytest
+from langchain_openai import AzureChatOpenAI
+from pydantic import BaseModel, SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.views import BrowserState
+
+
+@pytest.fixture
+def llm():
+	"""Initialize language model for testing"""
+
+	# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
+	return AzureChatOpenAI(
+		model='gpt-4o',
+		api_version='2024-10-21',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+	# return ChatOpenAI(model='gpt-4o-mini')
+
+
+@pytest.fixture(scope='session')
+def event_loop():
+	"""Create an instance of the default event loop for each test case."""
+	loop = asyncio.get_event_loop_policy().new_event_loop()
+	yield loop
+	loop.close()
+
+
+@pytest.fixture(scope='session')
+async def browser(event_loop):
+	browser_instance = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+	yield browser_instance
+	await browser_instance.close()
+
+
+@pytest.fixture
+async def context(browser):
+	async with await browser.new_context() as context:
+		yield context
+		# Clean up automatically happens with __aexit__
+
+
+# pytest tests/test_agent_actions.py -v -k "test_ecommerce_interaction" --capture=no
+# @pytest.mark.asyncio
+@pytest.mark.skip(reason='Kinda expensive to run')
+async def test_ecommerce_interaction(llm, context):
+	"""Test complex ecommerce interaction sequence"""
+	agent = Agent(
+		task="Go to amazon.com, search for 'laptop', filter by 4+ stars, and find the price of the first result",
+		llm=llm,
+		browser_context=context,
+		save_conversation_path='tmp/test_ecommerce_interaction/conversation',
+	)
+
+	history: AgentHistoryList = await agent.run(max_steps=20)
+
+	# Verify sequence of actions
+	action_sequence = []
+	for action in history.model_actions():
+		action_name = list(action.keys())[0]
+		if action_name in ['go_to_url', 'open_tab']:
+			action_sequence.append('navigate')
+		elif action_name == 'input_text':
+			action_sequence.append('input')
+			# Check that the input is 'laptop'
+			inp = action['input_text']['text'].lower()  # type: ignore
+			if inp == 'laptop':
+				action_sequence.append('input_exact_correct')
+			elif 'laptop' in inp:
+				action_sequence.append('correct_in_input')
+			else:
+				action_sequence.append('incorrect_input')
+		elif action_name == 'click_element':
+			action_sequence.append('click')
+
+	# Verify essential steps were performed
+	assert 'navigate' in action_sequence  # Navigated to Amazon
+	assert 'input' in action_sequence  # Entered search term
+	assert 'click' in action_sequence  # Clicked search/filter
+	assert 'input_exact_correct' in action_sequence or 'correct_in_input' in action_sequence
+
+
+# @pytest.mark.asyncio
+async def test_error_recovery(llm, context):
+	"""Test agent's ability to recover from errors"""
+	agent = Agent(
+		task='Navigate to nonexistent-site.com and then recover by going to google.com ',
+		llm=llm,
+		browser_context=context,
+	)
+
+	history: AgentHistoryList = await agent.run(max_steps=10)
+
+	actions_names = history.action_names()
+	actions = history.model_actions()
+	assert 'go_to_url' in actions_names or 'open_tab' in actions_names, f'{actions_names} does not contain go_to_url or open_tab'
+	for action in actions:
+		if 'go_to_url' in action:
+			assert 'url' in action['go_to_url'], 'url is not in go_to_url'
+			assert action['go_to_url']['url'].endswith('google.com'), 'url does not end with google.com'
+			break
+
+
+# @pytest.mark.asyncio
+async def test_find_contact_email(llm, context):
+	"""Test agent's ability to find contact email on a website"""
+	agent = Agent(
+		task='Go to https://browser-use.com/ and find out the contact email',
+		llm=llm,
+		browser_context=context,
+	)
+
+	history: AgentHistoryList = await agent.run(max_steps=10)
+
+	# Verify the agent found the contact email
+	extracted_content = history.extracted_content()
+	email = 'info@browser-use.com'
+	for content in extracted_content:
+		if email in content:
+			break
+	else:
+		pytest.fail(f'{extracted_content} does not contain {email}')
+
+
+# @pytest.mark.asyncio
+async def test_agent_finds_installation_command(llm, context):
+	"""Test agent's ability to find the pip installation command for browser-use on the web"""
+	agent = Agent(
+		task='Find the pip installation command for the browser-use repo',
+		llm=llm,
+		browser_context=context,
+	)
+
+	history: AgentHistoryList = await agent.run(max_steps=10)
+
+	# Verify the agent found the correct installation command
+	extracted_content = history.extracted_content()
+	install_command = 'pip install browser-use'
+	for content in extracted_content:
+		if install_command in content:
+			break
+	else:
+		pytest.fail(f'{extracted_content} does not contain {install_command}')
+
+
+class CaptchaTest(BaseModel):
+	name: str
+	url: str
+	success_text: str
+	additional_text: str | None = None
+
+
+# run 3 test: python -m pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
+# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+	'captcha',
+	[
+		CaptchaTest(
+			name='Text Captcha',
+			url='https://2captcha.com/demo/text',
+			success_text='Captcha is passed successfully!',
+		),
+		CaptchaTest(
+			name='Basic Captcha',
+			url='https://captcha.com/demos/features/captcha-demo.aspx',
+			success_text='Correct!',
+		),
+		CaptchaTest(
+			name='Rotate Captcha',
+			url='https://2captcha.com/demo/rotatecaptcha',
+			success_text='Captcha is passed successfully',
+			additional_text='Use multiple clicks at once. click done when image is exact correct position.',
+		),
+		CaptchaTest(
+			name='MT Captcha',
+			url='https://2captcha.com/demo/mtcaptcha',
+			success_text='Verified Successfully',
+			additional_text='Stop when you solved it successfully.',
+		),
+	],
+)
+async def test_captcha_solver(llm, context, captcha: CaptchaTest):
+	"""Test agent's ability to solve different types of captchas"""
+	agent = Agent(
+		task=f'Go to {captcha.url} and solve the captcha. {captcha.additional_text}',
+		llm=llm,
+		browser_context=context,
+	)
+	from browser_use.agent.views import AgentHistoryList
+
+	history: AgentHistoryList = await agent.run(max_steps=7)
+
+	state: BrowserState = await context.get_state()
+
+	all_text = state.element_tree.get_all_text_till_next_clickable_element()
+
+	if not all_text:
+		all_text = ''
+
+	if not isinstance(all_text, str):
+		all_text = str(all_text)
+
+	solved = captcha.success_text in all_text
+	assert solved, f'Failed to solve {captcha.name}'
+
+	# python -m pytest tests/test_agent_actions.py -v --capture=no
+
+	# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO