[Add] browser-use and main.py

2025-05-18 21:57:54 +09:00 · 2025-05-18 21:57:54 +09:00 · 96914d44ac
commit 96914d44ac
parent 08e64bdf45
221 changed files with 30952 additions and 1 deletions
--- a/browser-use/tests/conftest.py
+++ b/browser-use/tests/conftest.py
@ -0,0 +1,59 @@
+"""
+Test configuration for browser-use.
+"""
+
+import logging
+import os
+import sys
+
+import pytest
+from langchain_openai import ChatOpenAI
+from pydantic import SecretStr
+
+# Ensure the project root is in the Python path
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+
+# Load environment variables
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Configure logging
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
+
+
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+
+@pytest.fixture(scope='session')
+def llm():
+	"""
+	Fixture to provide a ChatOpenAI instance or a mock for testing.
+	Uses a mock if OPENAI_API_KEY is not set.
+	"""
+	api_key = os.getenv('OPENAI_API_KEY')
+	logger.debug(f'API Key present: {bool(api_key)}')
+	logger.debug('Using actual ChatOpenAI model')
+	return ChatOpenAI(model='gpt-4o', api_key=SecretStr(api_key) if api_key else None)
+
+
+@pytest.fixture(scope='session')
+def browser():
+	"""
+	Fixture to provide a Browser instance for testing.
+	"""
+	logger.debug('Creating Browser instance for testing')
+	return Browser(config=BrowserConfig(headless=True, disable_security=True))
+
+
+@pytest.fixture(scope='function')
+async def browser_context(browser):
+	"""
+	Fixture to provide a BrowserContext instance for testing.
+	"""
+	logger.debug('Creating BrowserContext instance for testing')
+	context = BrowserContext(browser=browser)
+	yield context
+	await context.close()
--- a/browser-use/tests/test_action_filters.py
+++ b/browser-use/tests/test_action_filters.py
@ -0,0 +1,305 @@
+from unittest.mock import MagicMock
+
+import pytest
+from playwright.async_api import Page
+from pydantic import BaseModel
+
+from browser_use.controller.registry.service import Registry
+from browser_use.controller.registry.views import ActionRegistry, RegisteredAction
+
+
+class EmptyParamModel(BaseModel):
+	pass
+
+
+class TestActionFilters:
+	def test_get_prompt_description_no_filters(self):
+		"""Test that system prompt only includes actions with no filters"""
+		registry = ActionRegistry()
+
+		# Add actions with and without filters
+		no_filter_action = RegisteredAction(
+			name='no_filter_action',
+			description='Action with no filters',
+			function=lambda: None,
+			param_model=EmptyParamModel,
+			domains=None,
+			page_filter=None,
+		)
+
+		page_filter_action = RegisteredAction(
+			name='page_filter_action',
+			description='Action with page filter',
+			function=lambda: None,
+			param_model=EmptyParamModel,
+			domains=None,
+			page_filter=lambda page: True,
+		)
+
+		domain_filter_action = RegisteredAction(
+			name='domain_filter_action',
+			description='Action with domain filter',
+			function=lambda: None,
+			param_model=EmptyParamModel,
+			domains=['example.com'],
+			page_filter=None,
+		)
+
+		registry.actions = {
+			'no_filter_action': no_filter_action,
+			'page_filter_action': page_filter_action,
+			'domain_filter_action': domain_filter_action,
+		}
+
+		# System prompt (no page) should only include actions with no filters
+		system_description = registry.get_prompt_description()
+		assert 'no_filter_action' in system_description
+		assert 'page_filter_action' not in system_description
+		assert 'domain_filter_action' not in system_description
+
+	def test_page_filter_matching(self):
+		"""Test that page filters work correctly"""
+		registry = ActionRegistry()
+
+		# Create a mock page
+		mock_page = MagicMock(spec=Page)
+		mock_page.url = 'https://example.com/page'
+
+		# Create actions with different page filters
+		matching_action = RegisteredAction(
+			name='matching_action',
+			description='Action with matching page filter',
+			function=lambda: None,
+			param_model=EmptyParamModel,
+			domains=None,
+			page_filter=lambda page: 'example.com' in page.url,
+		)
+
+		non_matching_action = RegisteredAction(
+			name='non_matching_action',
+			description='Action with non-matching page filter',
+			function=lambda: None,
+			param_model=EmptyParamModel,
+			domains=None,
+			page_filter=lambda page: 'other.com' in page.url,
+		)
+
+		registry.actions = {'matching_action': matching_action, 'non_matching_action': non_matching_action}
+
+		# Page-specific description should only include matching actions
+		page_description = registry.get_prompt_description(mock_page)
+		assert 'matching_action' in page_description
+		assert 'non_matching_action' not in page_description
+
+	def test_domain_filter_matching(self):
+		"""Test that domain filters work correctly with glob patterns"""
+		registry = ActionRegistry()
+
+		# Create actions with different domain patterns
+		actions = {
+			'exact_match': RegisteredAction(
+				name='exact_match',
+				description='Exact domain match',
+				function=lambda: None,
+				param_model=EmptyParamModel,
+				domains=['example.com'],
+				page_filter=None,
+			),
+			'subdomain_match': RegisteredAction(
+				name='subdomain_match',
+				description='Subdomain wildcard match',
+				function=lambda: None,
+				param_model=EmptyParamModel,
+				domains=['*.example.com'],
+				page_filter=None,
+			),
+			'prefix_match': RegisteredAction(
+				name='prefix_match',
+				description='Prefix wildcard match',
+				function=lambda: None,
+				param_model=EmptyParamModel,
+				domains=['example*'],
+				page_filter=None,
+			),
+			'non_matching': RegisteredAction(
+				name='non_matching',
+				description='Non-matching domain',
+				function=lambda: None,
+				param_model=EmptyParamModel,
+				domains=['other.com'],
+				page_filter=None,
+			),
+		}
+
+		registry.actions = actions
+
+		# Test exact domain match
+		mock_page = MagicMock(spec=Page)
+		mock_page.url = 'https://example.com/page'
+
+		exact_match_description = registry.get_prompt_description(mock_page)
+		assert 'exact_match' in exact_match_description
+		assert 'non_matching' not in exact_match_description
+
+		# Test subdomain match
+		mock_page.url = 'https://sub.example.com/page'
+		subdomain_match_description = registry.get_prompt_description(mock_page)
+		assert 'subdomain_match' in subdomain_match_description
+		assert 'exact_match' not in subdomain_match_description
+
+		# Test prefix match
+		mock_page.url = 'https://example123.org/page'
+		prefix_match_description = registry.get_prompt_description(mock_page)
+		assert 'prefix_match' in prefix_match_description
+
+	def test_domain_and_page_filter_together(self):
+		"""Test that actions can be filtered by both domain and page filter"""
+		registry = ActionRegistry()
+
+		# Create a mock page
+		mock_page = MagicMock(spec=Page)
+		mock_page.url = 'https://example.com/admin'
+
+		# Actions with different combinations of filters
+		actions = {
+			'domain_only': RegisteredAction(
+				name='domain_only',
+				description='Domain filter only',
+				function=lambda: None,
+				param_model=EmptyParamModel,
+				domains=['example.com'],
+				page_filter=None,
+			),
+			'page_only': RegisteredAction(
+				name='page_only',
+				description='Page filter only',
+				function=lambda: None,
+				param_model=EmptyParamModel,
+				domains=None,
+				page_filter=lambda page: 'admin' in page.url,
+			),
+			'both_matching': RegisteredAction(
+				name='both_matching',
+				description='Both filters matching',
+				function=lambda: None,
+				param_model=EmptyParamModel,
+				domains=['example.com'],
+				page_filter=lambda page: 'admin' in page.url,
+			),
+			'both_one_fail': RegisteredAction(
+				name='both_one_fail',
+				description='One filter fails',
+				function=lambda: None,
+				param_model=EmptyParamModel,
+				domains=['other.com'],
+				page_filter=lambda page: 'admin' in page.url,
+			),
+		}
+
+		registry.actions = actions
+
+		# Check that only actions with matching filters are included
+		description = registry.get_prompt_description(mock_page)
+		assert 'domain_only' in description  # Domain matches
+		assert 'page_only' in description  # Page filter matches
+		assert 'both_matching' in description  # Both filters match
+		assert 'both_one_fail' not in description  # Domain filter fails
+
+		# Test with different URL where page filter fails
+		mock_page.url = 'https://example.com/dashboard'
+		description = registry.get_prompt_description(mock_page)
+		assert 'domain_only' in description  # Domain matches
+		assert 'page_only' not in description  # Page filter fails
+		assert 'both_matching' not in description  # Page filter fails
+		assert 'both_one_fail' not in description  # Domain filter fails
+
+	@pytest.mark.asyncio
+	async def test_registry_action_decorator(self):
+		"""Test the action decorator with filters"""
+		registry = Registry()
+
+		# Define actions with different filters
+		@registry.action(
+			description='No filter action',
+		)
+		def no_filter_action():
+			pass
+
+		@registry.action(description='Domain filter action', domains=['example.com'])
+		def domain_filter_action():
+			pass
+
+		@registry.action(description='Page filter action', page_filter=lambda page: 'admin' in page.url)
+		def page_filter_action():
+			pass
+
+		# Check that system prompt only includes the no_filter_action
+		system_description = registry.get_prompt_description()
+		assert 'No filter action' in system_description
+		assert 'Domain filter action' not in system_description
+		assert 'Page filter action' not in system_description
+
+		# Check that page-specific prompt includes the right actions
+		mock_page = MagicMock(spec=Page)
+		mock_page.url = 'https://example.com/admin'
+
+		page_description = registry.get_prompt_description(mock_page)
+		assert 'Domain filter action' in page_description
+		assert 'Page filter action' in page_description
+
+	@pytest.mark.asyncio
+	async def test_action_model_creation(self):
+		"""Test that action models are created correctly with filters"""
+		registry = Registry()
+
+		# Define actions with different filters
+		@registry.action(
+			description='No filter action',
+		)
+		def no_filter_action():
+			pass
+
+		@registry.action(description='Domain filter action', domains=['example.com'])
+		def domain_filter_action():
+			pass
+
+		@registry.action(description='Page filter action', page_filter=lambda page: 'admin' in page.url)
+		def page_filter_action():
+			pass
+
+		@registry.action(description='Both filters action', domains=['example.com'], page_filter=lambda page: 'admin' in page.url)
+		def both_filters_action():
+			pass
+
+		# Initial action model should only include no_filter_action
+		initial_model = registry.create_action_model()
+		assert 'no_filter_action' in initial_model.model_fields
+		assert 'domain_filter_action' not in initial_model.model_fields
+		assert 'page_filter_action' not in initial_model.model_fields
+		assert 'both_filters_action' not in initial_model.model_fields
+
+		# Action model with matching page should include all matching actions
+		mock_page = MagicMock(spec=Page)
+		mock_page.url = 'https://example.com/admin'
+
+		page_model = registry.create_action_model(page=mock_page)
+		assert 'no_filter_action' in page_model.model_fields
+		assert 'domain_filter_action' in page_model.model_fields
+		assert 'page_filter_action' in page_model.model_fields
+		assert 'both_filters_action' in page_model.model_fields
+
+		# Action model with non-matching domain should exclude domain-filtered actions
+		mock_page.url = 'https://other.com/admin'
+		non_matching_domain_model = registry.create_action_model(page=mock_page)
+		assert 'no_filter_action' in non_matching_domain_model.model_fields
+		assert 'domain_filter_action' not in non_matching_domain_model.model_fields
+		assert 'page_filter_action' in non_matching_domain_model.model_fields
+		assert 'both_filters_action' not in non_matching_domain_model.model_fields
+
+		# Action model with non-matching page filter should exclude page-filtered actions
+		mock_page.url = 'https://example.com/dashboard'
+		non_matching_page_model = registry.create_action_model(page=mock_page)
+		assert 'no_filter_action' in non_matching_page_model.model_fields
+		assert 'domain_filter_action' in non_matching_page_model.model_fields
+		assert 'page_filter_action' not in non_matching_page_model.model_fields
+		assert 'both_filters_action' not in non_matching_page_model.model_fields
--- a/browser-use/tests/test_agent_actions.py
+++ b/browser-use/tests/test_agent_actions.py
@ -0,0 +1,220 @@
+import asyncio
+import os
+
+import pytest
+from langchain_openai import AzureChatOpenAI
+from pydantic import BaseModel, SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.views import BrowserState
+
+
+@pytest.fixture
+def llm():
+	"""Initialize language model for testing"""
+
+	# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
+	return AzureChatOpenAI(
+		model='gpt-4o',
+		api_version='2024-10-21',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+	# return ChatOpenAI(model='gpt-4o-mini')
+
+
+@pytest.fixture(scope='session')
+def event_loop():
+	"""Create an instance of the default event loop for each test case."""
+	loop = asyncio.get_event_loop_policy().new_event_loop()
+	yield loop
+	loop.close()
+
+
+@pytest.fixture(scope='session')
+async def browser(event_loop):
+	browser_instance = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+	yield browser_instance
+	await browser_instance.close()
+
+
+@pytest.fixture
+async def context(browser):
+	async with await browser.new_context() as context:
+		yield context
+		# Clean up automatically happens with __aexit__
+
+
+# pytest tests/test_agent_actions.py -v -k "test_ecommerce_interaction" --capture=no
+# @pytest.mark.asyncio
+@pytest.mark.skip(reason='Kinda expensive to run')
+async def test_ecommerce_interaction(llm, context):
+	"""Test complex ecommerce interaction sequence"""
+	agent = Agent(
+		task="Go to amazon.com, search for 'laptop', filter by 4+ stars, and find the price of the first result",
+		llm=llm,
+		browser_context=context,
+		save_conversation_path='tmp/test_ecommerce_interaction/conversation',
+	)
+
+	history: AgentHistoryList = await agent.run(max_steps=20)
+
+	# Verify sequence of actions
+	action_sequence = []
+	for action in history.model_actions():
+		action_name = list(action.keys())[0]
+		if action_name in ['go_to_url', 'open_tab']:
+			action_sequence.append('navigate')
+		elif action_name == 'input_text':
+			action_sequence.append('input')
+			# Check that the input is 'laptop'
+			inp = action['input_text']['text'].lower()  # type: ignore
+			if inp == 'laptop':
+				action_sequence.append('input_exact_correct')
+			elif 'laptop' in inp:
+				action_sequence.append('correct_in_input')
+			else:
+				action_sequence.append('incorrect_input')
+		elif action_name == 'click_element':
+			action_sequence.append('click')
+
+	# Verify essential steps were performed
+	assert 'navigate' in action_sequence  # Navigated to Amazon
+	assert 'input' in action_sequence  # Entered search term
+	assert 'click' in action_sequence  # Clicked search/filter
+	assert 'input_exact_correct' in action_sequence or 'correct_in_input' in action_sequence
+
+
+# @pytest.mark.asyncio
+async def test_error_recovery(llm, context):
+	"""Test agent's ability to recover from errors"""
+	agent = Agent(
+		task='Navigate to nonexistent-site.com and then recover by going to google.com ',
+		llm=llm,
+		browser_context=context,
+	)
+
+	history: AgentHistoryList = await agent.run(max_steps=10)
+
+	actions_names = history.action_names()
+	actions = history.model_actions()
+	assert 'go_to_url' in actions_names or 'open_tab' in actions_names, f'{actions_names} does not contain go_to_url or open_tab'
+	for action in actions:
+		if 'go_to_url' in action:
+			assert 'url' in action['go_to_url'], 'url is not in go_to_url'
+			assert action['go_to_url']['url'].endswith('google.com'), 'url does not end with google.com'
+			break
+
+
+# @pytest.mark.asyncio
+async def test_find_contact_email(llm, context):
+	"""Test agent's ability to find contact email on a website"""
+	agent = Agent(
+		task='Go to https://browser-use.com/ and find out the contact email',
+		llm=llm,
+		browser_context=context,
+	)
+
+	history: AgentHistoryList = await agent.run(max_steps=10)
+
+	# Verify the agent found the contact email
+	extracted_content = history.extracted_content()
+	email = 'info@browser-use.com'
+	for content in extracted_content:
+		if email in content:
+			break
+	else:
+		pytest.fail(f'{extracted_content} does not contain {email}')
+
+
+# @pytest.mark.asyncio
+async def test_agent_finds_installation_command(llm, context):
+	"""Test agent's ability to find the pip installation command for browser-use on the web"""
+	agent = Agent(
+		task='Find the pip installation command for the browser-use repo',
+		llm=llm,
+		browser_context=context,
+	)
+
+	history: AgentHistoryList = await agent.run(max_steps=10)
+
+	# Verify the agent found the correct installation command
+	extracted_content = history.extracted_content()
+	install_command = 'pip install browser-use'
+	for content in extracted_content:
+		if install_command in content:
+			break
+	else:
+		pytest.fail(f'{extracted_content} does not contain {install_command}')
+
+
+class CaptchaTest(BaseModel):
+	name: str
+	url: str
+	success_text: str
+	additional_text: str | None = None
+
+
+# run 3 test: python -m pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
+# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+	'captcha',
+	[
+		CaptchaTest(
+			name='Text Captcha',
+			url='https://2captcha.com/demo/text',
+			success_text='Captcha is passed successfully!',
+		),
+		CaptchaTest(
+			name='Basic Captcha',
+			url='https://captcha.com/demos/features/captcha-demo.aspx',
+			success_text='Correct!',
+		),
+		CaptchaTest(
+			name='Rotate Captcha',
+			url='https://2captcha.com/demo/rotatecaptcha',
+			success_text='Captcha is passed successfully',
+			additional_text='Use multiple clicks at once. click done when image is exact correct position.',
+		),
+		CaptchaTest(
+			name='MT Captcha',
+			url='https://2captcha.com/demo/mtcaptcha',
+			success_text='Verified Successfully',
+			additional_text='Stop when you solved it successfully.',
+		),
+	],
+)
+async def test_captcha_solver(llm, context, captcha: CaptchaTest):
+	"""Test agent's ability to solve different types of captchas"""
+	agent = Agent(
+		task=f'Go to {captcha.url} and solve the captcha. {captcha.additional_text}',
+		llm=llm,
+		browser_context=context,
+	)
+	from browser_use.agent.views import AgentHistoryList
+
+	history: AgentHistoryList = await agent.run(max_steps=7)
+
+	state: BrowserState = await context.get_state()
+
+	all_text = state.element_tree.get_all_text_till_next_clickable_element()
+
+	if not all_text:
+		all_text = ''
+
+	if not isinstance(all_text, str):
+		all_text = str(all_text)
+
+	solved = captcha.success_text in all_text
+	assert solved, f'Failed to solve {captcha.name}'
+
+	# python -m pytest tests/test_agent_actions.py -v --capture=no
+
+	# pytest tests/test_agent_actions.py -v -k "test_captcha_solver" --capture=no --log-cli-level=INFO
--- a/browser-use/tests/test_attach_chrome.py
+++ b/browser-use/tests/test_attach_chrome.py
@ -0,0 +1,69 @@
+import asyncio
+
+from playwright.async_api import async_playwright
+
+
+async def test_full_screen(start_fullscreen: bool, maximize: bool):
+	async with async_playwright() as p:
+		try:
+			print('Attempting to connect to Chrome...')
+			# run in terminal: /Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 --no-first-run
+			browser = await p.chromium.connect_over_cdp(
+				'http://localhost:9222',
+				timeout=20000,  # 20 second timeout for connection
+			)
+			print('Connected to Chrome successfully')
+
+			# Get the first context and page, or create new ones if needed
+			if len(browser.contexts) == 0:
+				context = await browser.new_context(ignore_https_errors=True)
+			else:
+				context = browser.contexts[0]
+
+			if len(context.pages) == 0:
+				page = await context.new_page()
+			else:
+				page = context.pages[0]
+
+			print('Attempting to navigate to Gmail...')
+			try:
+				# First try with a shorter timeout
+				await page.goto(
+					'https://mail.google.com',
+					wait_until='load',  # Changed from domcontentloaded
+					timeout=10000,
+				)
+			except Exception as e:
+				print(f'First navigation attempt failed: {e}')
+				print('Trying again with different settings...')
+				# If that fails, try again with different settings
+				await page.goto(
+					'https://mail.google.com',
+					wait_until='commit',  # Less strict wait condition
+					timeout=30000,
+				)
+
+			# Wait for the page to stabilize
+			await asyncio.sleep(2)
+
+			print(f'Current page title: {await page.title()}')
+
+			# Optional: wait for specific Gmail elements
+			try:
+				await page.wait_for_selector('div[role="main"]', timeout=5000)
+				print('Gmail interface detected')
+			except Exception as e:
+				print(f'Note: Gmail interface not detected: {e}')
+
+			await asyncio.sleep(30)
+		except Exception as e:
+			print(f'An error occurred: {e}')
+			import traceback
+
+			traceback.print_exc()
+		finally:
+			await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(test_full_screen(False, False))
--- a/browser-use/tests/test_browser.py
+++ b/browser-use/tests/test_browser.py
@ -0,0 +1,578 @@
+import asyncio
+import subprocess
+
+import psutil
+import pytest
+import requests
+
+from browser_use.browser.browser import Browser, BrowserConfig, ProxySettings
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+
+
+@pytest.mark.asyncio
+async def test_builtin_browser_launch(monkeypatch):
+	"""
+	Test that the standard browser is launched correctly:
+	When no remote (cdp or wss) or chrome instance is provided, the Browser class uses _setup_builtin_browser.
+	This test monkeypatches async_playwright to return dummy objects, and asserts that get_playwright_browser returns the expected DummyBrowser.
+	"""
+
+	class DummyBrowser:
+		pass
+
+	class DummyChromium:
+		async def launch(self, headless, args, proxy=None, handle_sigterm=False, handle_sigint=False):
+			return DummyBrowser()
+
+	class DummyPlaywright:
+		def __init__(self):
+			self.chromium = DummyChromium()
+
+		async def stop(self):
+			pass
+
+	class DummyAsyncPlaywrightContext:
+		async def start(self):
+			return DummyPlaywright()
+
+	monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
+	config = BrowserConfig(headless=True, disable_security=False, extra_browser_args=['--test'])
+	browser_obj = Browser(config=config)
+	result_browser = await browser_obj.get_playwright_browser()
+	assert isinstance(result_browser, DummyBrowser), 'Expected DummyBrowser from _setup_builtin_browser'
+	await browser_obj.close()
+
+
+@pytest.mark.asyncio
+async def test_cdp_browser_launch(monkeypatch):
+	"""
+	Test that when a CDP URL is provided in the configuration, the Browser uses _setup_cdp
+	and returns the expected DummyBrowser.
+	"""
+
+	class DummyBrowser:
+		pass
+
+	class DummyChromium:
+		async def connect_over_cdp(self, endpoint_url, timeout=20000):
+			assert endpoint_url == 'ws://dummy-cdp-url', 'The endpoint URL should match the configuration.'
+			return DummyBrowser()
+
+	class DummyPlaywright:
+		def __init__(self):
+			self.chromium = DummyChromium()
+
+		async def stop(self):
+			pass
+
+	class DummyAsyncPlaywrightContext:
+		async def start(self):
+			return DummyPlaywright()
+
+	monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
+	config = BrowserConfig(cdp_url='ws://dummy-cdp-url')
+	browser_obj = Browser(config=config)
+	result_browser = await browser_obj.get_playwright_browser()
+	assert isinstance(result_browser, DummyBrowser), 'Expected DummyBrowser from _setup_cdp'
+	await browser_obj.close()
+
+
+@pytest.mark.asyncio
+async def test_wss_browser_launch(monkeypatch):
+	"""
+	Test that when a WSS URL is provided in the configuration,
+	the Browser uses setup_wss and returns the expected DummyBrowser.
+	"""
+
+	class DummyBrowser:
+		pass
+
+	class DummyChromium:
+		async def connect(self, wss_url):
+			assert wss_url == 'ws://dummy-wss-url', 'WSS URL should match the configuration.'
+			return DummyBrowser()
+
+	class DummyPlaywright:
+		def __init__(self):
+			self.chromium = DummyChromium()
+
+		async def stop(self):
+			pass
+
+	class DummyAsyncPlaywrightContext:
+		async def start(self):
+			return DummyPlaywright()
+
+	monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
+	config = BrowserConfig(wss_url='ws://dummy-wss-url')
+	browser_obj = Browser(config=config)
+	result_browser = await browser_obj.get_playwright_browser()
+	assert isinstance(result_browser, DummyBrowser), 'Expected DummyBrowser from _setup_wss'
+	await browser_obj.close()
+
+
+@pytest.mark.asyncio
+async def test_user_provided_browser_launch(monkeypatch):
+	"""
+	Test that when a browser_binary_path is provided the Browser class uses
+	_setup_user_provided_browser branch and returns the expected DummyBrowser object
+	by reusing an existing Chrome instance.
+	"""
+
+	# Dummy response for requests.get when checking chrome debugging endpoint.
+	class DummyResponse:
+		status_code = 200
+
+	def dummy_get(url, timeout):
+		if url == 'http://localhost:9222/json/version':
+			return DummyResponse()
+		raise requests.ConnectionError('Connection failed')
+
+	monkeypatch.setattr(requests, 'get', dummy_get)
+
+	class DummyBrowser:
+		pass
+
+	class DummyChromium:
+		async def connect_over_cdp(self, endpoint_url, timeout=20000):
+			assert endpoint_url == 'http://localhost:9222', "Endpoint URL must be 'http://localhost:9222'"
+			return DummyBrowser()
+
+	class DummyPlaywright:
+		def __init__(self):
+			self.chromium = DummyChromium()
+
+		async def stop(self):
+			pass
+
+	class DummyAsyncPlaywrightContext:
+		async def start(self):
+			return DummyPlaywright()
+
+	monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
+	config = BrowserConfig(browser_binary_path='dummy/chrome', extra_browser_args=['--dummy-arg'])
+	browser_obj = Browser(config=config)
+	result_browser = await browser_obj.get_playwright_browser()
+	assert isinstance(result_browser, DummyBrowser), 'Expected DummyBrowser from _setup_user_provided_browser'
+	await browser_obj.close()
+
+
+@pytest.mark.asyncio
+async def test_user_provided_browser_launch_on_custom_chrome_remote_debugging_port(monkeypatch):
+	"""
+	Test that when a browser_binary_path and chrome_remote_debugging_port are provided, the Browser class uses
+	_setup_user_provided_browser branch and returns the expected DummyBrowser object
+	by launching a new Chrome instance with --remote-debugging-port=chrome_remote_debugging_port argument.
+	"""
+
+	# Custom remote debugging port
+	custom_chrome_remote_debugging_port = 9223
+
+	# Dummy response for requests.get when checking chrome debugging endpoint.
+	class DummyResponse:
+		status_code = 200
+
+	def dummy_get(url, timeout):
+		if url == f'http://localhost:{custom_chrome_remote_debugging_port}/json/version':
+			return DummyResponse()
+		raise requests.ConnectionError('Connection failed')
+
+	monkeypatch.setattr(requests, 'get', dummy_get)
+
+	class DummyProcess:
+		def __init__(self, *args, **kwargs):
+			pass
+
+	class DummySubProcess:
+		pid = 1234
+
+	async def dummy_create_subprocess_exec(browser_binary_path, *args, **kwargs):
+		assert f'--remote-debugging-port={custom_chrome_remote_debugging_port}' in args, (
+			f'Chrome must be started with with --remote-debugging-port={custom_chrome_remote_debugging_port} argument'
+		)
+
+		return DummySubProcess()
+
+	monkeypatch.setattr(asyncio, 'create_subprocess_exec', dummy_create_subprocess_exec)
+	monkeypatch.setattr(psutil, 'Process', DummyProcess)
+
+	class DummyBrowser:
+		pass
+
+	class DummyChromium:
+		async def connect_over_cdp(self, endpoint_url, timeout=20000):
+			assert endpoint_url == f'http://localhost:{custom_chrome_remote_debugging_port}', (
+				f"Endpoint URL must be 'http://localhost:{custom_chrome_remote_debugging_port}'"
+			)
+			return DummyBrowser()
+
+	class DummyPlaywright:
+		def __init__(self):
+			self.chromium = DummyChromium()
+
+		async def stop(self):
+			pass
+
+	class DummyAsyncPlaywrightContext:
+		async def start(self):
+			return DummyPlaywright()
+
+	monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
+
+	config = BrowserConfig(
+		browser_binary_path='dummy/chrome',
+		chrome_remote_debugging_port=custom_chrome_remote_debugging_port,
+		extra_browser_args=['--dummy-arg'],
+	)
+
+	browser_obj = Browser(config=config)
+	result_browser = await browser_obj.get_playwright_browser()
+	assert isinstance(result_browser, DummyBrowser), (
+		f'Expected DummyBrowser with remote debugging port {custom_chrome_remote_debugging_port} from _setup_user_provided_browser'
+	)
+	await browser_obj.close()
+
+
+@pytest.mark.asyncio
+async def test_builtin_browser_disable_security_args(monkeypatch):
+	"""
+	Test that the standard browser launch includes disable-security arguments when disable_security is True.
+	This verifies that _setup_builtin_browser correctly appends the security disabling arguments along with
+	the base arguments and any extra arguments provided.
+	"""
+	# These are the base arguments defined in _setup_builtin_browser.
+	base_args = [
+		'--no-sandbox',
+		'--disable-blink-features=AutomationControlled',
+		'--disable-infobars',
+		'--disable-background-timer-throttling',
+		'--disable-popup-blocking',
+		'--disable-backgrounding-occluded-windows',
+		'--disable-renderer-backgrounding',
+		'--disable-window-activation',
+		'--disable-focus-on-load',
+		'--no-first-run',
+		'--no-default-browser-check',
+		'--no-startup-window',
+		'--window-position=0,0',
+	]
+	# When disable_security is True, these arguments should be added.
+	disable_security_args = [
+		'--disable-web-security',
+		'--disable-site-isolation-trials',
+		'--disable-features=IsolateOrigins,site-per-process',
+	]
+	# Additional arbitrary argument for testing extra args
+	extra_args = ['--dummy-extra']
+
+	class DummyBrowser:
+		pass
+
+	class DummyChromium:
+		async def launch(self, headless, args, proxy=None, handle_sigterm=False, handle_sigint=False):
+			# Expected args is the base args plus disable security args and the extra args.
+			expected_args = base_args + disable_security_args + extra_args
+			assert headless is True, 'Expected headless to be True'
+			assert args == expected_args, f'Expected args {expected_args}, but got {args}'
+			assert proxy is None, 'Expected proxy to be None'
+			return DummyBrowser()
+
+	class DummyPlaywright:
+		def __init__(self):
+			self.chromium = DummyChromium()
+
+		async def stop(self):
+			pass
+
+	class DummyAsyncPlaywrightContext:
+		async def start(self):
+			return DummyPlaywright()
+
+	monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
+	config = BrowserConfig(headless=True, disable_security=True, extra_browser_args=extra_args)
+	browser_obj = Browser(config=config)
+	result_browser = await browser_obj.get_playwright_browser()
+	assert isinstance(result_browser, DummyBrowser), (
+		'Expected DummyBrowser from _setup_builtin_browser with disable_security active'
+	)
+	await browser_obj.close()
+
+
+@pytest.mark.asyncio
+async def test_new_context_creation():
+	"""
+	Test that the new_context method returns a BrowserContext with the correct attributes.
+	This verifies that the BrowserContext is initialized with the provided Browser instance and configuration.
+	"""
+	config = BrowserConfig()
+	browser_obj = Browser(config=config)
+	custom_context_config = BrowserContextConfig()
+	context = await browser_obj.new_context(custom_context_config)
+	assert isinstance(context, BrowserContext), 'Expected new_context to return an instance of BrowserContext'
+	assert context.browser is browser_obj, "Expected the context's browser attribute to be the Browser instance"
+	assert context.config == custom_context_config, "Expected the context's config attribute to be the provided config"
+	await browser_obj.close()
+
+
+@pytest.mark.asyncio
+async def test_user_provided_browser_launch_failure(monkeypatch):
+	"""
+	Test that when a Chrome instance cannot be started or connected to,
+	the Browser._setup_user_provided_browser branch eventually raises a RuntimeError.
+	We simulate failure by:
+	  - Forcing requests.get to always raise a ConnectionError (so no existing instance is found).
+	  - Monkeypatching subprocess.Popen to do nothing.
+	  - Replacing asyncio.sleep to avoid delays.
+	  - Having the dummy playwright's connect_over_cdp method always raise an Exception.
+	"""
+
+	def dummy_get(url, timeout):
+		raise requests.ConnectionError('Simulated connection failure')
+
+	monkeypatch.setattr(requests, 'get', dummy_get)
+	monkeypatch.setattr(subprocess, 'Popen', lambda args, stdout, stderr: None)
+
+	async def fake_sleep(seconds):
+		return
+
+	monkeypatch.setattr(asyncio, 'sleep', fake_sleep)
+
+	class DummyChromium:
+		async def connect_over_cdp(self, endpoint_url, timeout=20000):
+			raise Exception('Connection failed simulation')
+
+	class DummyPlaywright:
+		def __init__(self):
+			self.chromium = DummyChromium()
+
+		async def stop(self):
+			pass
+
+	class DummyAsyncPlaywrightContext:
+		async def start(self):
+			return DummyPlaywright()
+
+	monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
+	config = BrowserConfig(browser_binary_path='dummy/chrome', extra_browser_args=['--dummy-arg'])
+	browser_obj = Browser(config=config)
+	with pytest.raises(RuntimeError, match='To start chrome in Debug mode'):
+		await browser_obj.get_playwright_browser()
+	await browser_obj.close()
+
+
+@pytest.mark.asyncio
+async def test_get_playwright_browser_caching(monkeypatch):
+	"""
+	Test that get_playwright_browser returns a cached browser instance.
+	On the first call, the browser is initialized; on subsequent calls,
+	the same instance is returned.
+	"""
+
+	class DummyBrowser:
+		pass
+
+	class DummyChromium:
+		async def launch(self, headless, args, proxy=None, handle_sigterm=False, handle_sigint=False):
+			return DummyBrowser()
+
+	class DummyPlaywright:
+		def __init__(self):
+			self.chromium = DummyChromium()
+
+		async def stop(self):
+			pass
+
+	class DummyAsyncPlaywrightContext:
+		async def start(self):
+			return DummyPlaywright()
+
+	monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
+	config = BrowserConfig(headless=True, disable_security=False, extra_browser_args=['--test'])
+	browser_obj = Browser(config=config)
+	first_browser = await browser_obj.get_playwright_browser()
+	second_browser = await browser_obj.get_playwright_browser()
+	assert first_browser is second_browser, 'Expected the browser to be cached and reused across calls.'
+	await browser_obj.close()
+
+
+@pytest.mark.asyncio
+async def test_close_error_handling(monkeypatch):
+	"""
+	Test that the close method properly handles exceptions thrown by
+	playwright_browser.close() and playwright.stop(), ensuring that the
+	browser's attributes are set to None even if errors occur.
+	"""
+
+	class DummyBrowserWithError:
+		async def close(self):
+			raise Exception('Close error simulation')
+
+	class DummyPlaywrightWithError:
+		async def stop(self):
+			raise Exception('Stop error simulation')
+
+	config = BrowserConfig()
+	browser_obj = Browser(config=config)
+	browser_obj.playwright_browser = DummyBrowserWithError()
+	browser_obj.playwright = DummyPlaywrightWithError()
+	await browser_obj.close()
+	assert browser_obj.playwright_browser is None, 'Expected playwright_browser to be None after close'
+	assert browser_obj.playwright is None, 'Expected playwright to be None after close'
+
+
+@pytest.mark.asyncio
+async def test_standard_browser_launch_with_proxy(monkeypatch):
+	"""
+	Test that when a proxy is provided in the BrowserConfig, the _setup_builtin_browser method
+	correctly passes the proxy parameter to the playwright.chromium.launch method.
+	This test sets up a dummy async_playwright context and verifies that the dummy proxy is received.
+	"""
+
+	class DummyBrowser:
+		pass
+
+	# Create a dummy proxy settings instance.
+	dummy_proxy = ProxySettings(server='http://dummy.proxy')
+
+	class DummyChromium:
+		async def launch(self, headless, args, proxy=None, handle_sigterm=False, handle_sigint=False):
+			# Assert that the proxy passed equals the dummy proxy provided in the configuration.
+			assert isinstance(proxy, dict) and proxy['server'] == 'http://dummy.proxy', (
+				f'Expected proxy {dummy_proxy} but got {proxy}'
+			)
+			# We can also verify some base parameters if needed (headless, args) but our focus is proxy.
+			return DummyBrowser()
+
+	class DummyPlaywright:
+		def __init__(self):
+			self.chromium = DummyChromium()
+
+		async def stop(self):
+			pass
+
+	class DummyAsyncPlaywrightContext:
+		async def start(self):
+			return DummyPlaywright()
+
+	# Monkeypatch async_playwright to return our dummy async playwright context.
+	monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
+	# Create a BrowserConfig with the dummy proxy.
+	config = BrowserConfig(headless=False, disable_security=False, proxy=dummy_proxy)
+	browser_obj = Browser(config=config)
+	# Call get_playwright_browser and verify that the returned browser is as expected.
+	result_browser = await browser_obj.get_playwright_browser()
+	assert isinstance(result_browser, DummyBrowser), 'Expected DummyBrowser from _setup_builtin_browser with proxy provided'
+	await browser_obj.close()
+
+
+@pytest.mark.asyncio
+async def test_browser_window_size(monkeypatch):
+	"""
+	Test that when window_width and window_height are provided in BrowserContextConfig,
+	they're properly converted to a dictionary when passed to Playwright.
+	"""
+
+	class DummyPage:
+		def __init__(self):
+			self.url = 'about:blank'
+
+		async def goto(self, url):
+			pass
+
+		async def wait_for_load_state(self, state):
+			pass
+
+		async def title(self):
+			return 'Test Page'
+
+		async def bring_to_front(self):
+			pass
+
+		async def evaluate(self, script):
+			return True
+
+		def is_closed(self):
+			return False
+
+	class DummyContext:
+		def __init__(self):
+			self.pages = [DummyPage()]
+			self.tracing = self
+
+		async def new_page(self):
+			return DummyPage()
+
+		async def add_init_script(self, script):
+			pass
+
+		async def start(self):
+			pass
+
+		async def stop(self, path=None):
+			pass
+
+		def on(self, event, handler):
+			pass
+
+		async def close(self):
+			pass
+
+		async def grant_permissions(self, permissions, origin=None):
+			pass
+
+	class DummyBrowser:
+		def __init__(self):
+			self.contexts = []
+
+		async def new_context(self, **kwargs):
+			# Assert that record_video_size is a dictionary with expected values
+			assert isinstance(kwargs['record_video_size'], dict), (
+				f'Expected record_video_size to be a dictionary, got {type(kwargs["record_video_size"])}'
+			)
+			assert kwargs['record_video_size']['width'] == 1280, (
+				f'Expected width to be 1280, got {kwargs["record_video_size"].get("width")}'
+			)
+			assert kwargs['record_video_size']['height'] == 1100, (
+				f'Expected height to be 1100, got {kwargs["record_video_size"].get("height")}'
+			)
+
+			context = DummyContext()
+			self.contexts.append(context)
+			return context
+
+		async def close(self):
+			pass
+
+	class DummyPlaywright:
+		def __init__(self):
+			self.chromium = self
+
+		async def launch(self, **kwargs):
+			return DummyBrowser()
+
+		async def stop(self):
+			pass
+
+	class DummyAsyncPlaywrightContext:
+		async def start(self):
+			return DummyPlaywright()
+
+	# Monkeypatch async_playwright to return our dummy async playwright context
+	monkeypatch.setattr('browser_use.browser.browser.async_playwright', lambda: DummyAsyncPlaywrightContext())
+
+	# Create browser with default config
+	browser_obj = Browser()
+
+	# Get browser instance
+	playwright_browser = await browser_obj.get_playwright_browser()
+
+	# Create context config with specific window size
+	context_config = BrowserContextConfig(window_width=1280, window_height=1100)
+
+	# Create browser context - this will test if window dimensions are properly converted
+	browser_context = BrowserContext(browser=browser_obj, config=context_config)
+	await browser_context._initialize_session()
+
+	# Clean up
+	await browser_context.close()
+	await browser_obj.close()
--- a/browser-use/tests/test_browser_config_models.py
+++ b/browser-use/tests/test_browser_config_models.py
@ -0,0 +1,201 @@
+import os
+
+import pytest
+
+from browser_use.browser.browser import Browser, BrowserConfig, ProxySettings
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+
+
+@pytest.mark.asyncio
+async def test_proxy_settings_pydantic_model():
+	"""
+	Test that ProxySettings as a Pydantic model is correctly converted to a dictionary when used.
+	"""
+	# Create ProxySettings with Pydantic model
+	proxy_settings = ProxySettings(
+		server='http://example.proxy:8080', bypass='localhost', username='testuser', password='testpass'
+	)
+
+	# Verify the model has correct dict-like access
+	assert proxy_settings['server'] == 'http://example.proxy:8080'
+	assert proxy_settings.get('bypass') == 'localhost'
+	assert proxy_settings.get('nonexistent', 'default') == 'default'
+
+	# Verify model_dump works correctly
+	proxy_dict = proxy_settings.model_dump()
+	assert isinstance(proxy_dict, dict)
+	assert proxy_dict['server'] == 'http://example.proxy:8080'
+	assert proxy_dict['bypass'] == 'localhost'
+	assert proxy_dict['username'] == 'testuser'
+	assert proxy_dict['password'] == 'testpass'
+
+	# We don't launch the actual browser - we just verify the model itself works as expected
+
+
+@pytest.mark.asyncio
+async def test_window_size_config():
+	"""
+	Test that BrowserContextConfig correctly handles window_width and window_height properties.
+	"""
+	# Create config with specific window dimensions
+	config = BrowserContextConfig(window_width=1280, window_height=1100)
+
+	# Verify the properties are set correctly
+	assert config.window_width == 1280
+	assert config.window_height == 1100
+
+	# Verify model_dump works correctly
+	config_dict = config.model_dump()
+	assert isinstance(config_dict, dict)
+	assert config_dict['window_width'] == 1280
+	assert config_dict['window_height'] == 1100
+
+	# Create with different values
+	config2 = BrowserContextConfig(window_width=1920, window_height=1080)
+	assert config2.window_width == 1920
+	assert config2.window_height == 1080
+
+
+@pytest.mark.asyncio
+@pytest.mark.skipif(os.environ.get('CI') == 'true', reason='Skip browser test in CI')
+async def test_window_size_with_real_browser():
+	"""
+	Integration test that verifies our window size Pydantic model is correctly
+	passed to Playwright and the actual browser window is configured with these settings.
+	This test is skipped in CI environments.
+	"""
+	# Create browser config with headless mode
+	browser_config = BrowserConfig(
+		headless=True,  # Use headless for faster test
+	)
+
+	# Create context config with specific dimensions we can check
+	context_config = BrowserContextConfig(
+		window_width=1024,
+		window_height=768,
+		maximum_wait_page_load_time=2.0,  # Faster timeouts for test
+		minimum_wait_page_load_time=0.2,
+		no_viewport=True,  # Use actual window size instead of viewport
+	)
+
+	# Create browser and context
+	browser = Browser(config=browser_config)
+	try:
+		# Initialize browser
+		playwright_browser = await browser.get_playwright_browser()
+		assert playwright_browser is not None, 'Browser initialization failed'
+
+		# Create context
+		browser_context = BrowserContext(browser=browser, config=context_config)
+		try:
+			# Initialize session
+			await browser_context._initialize_session()
+
+			# Get the current page
+			page = await browser_context.get_current_page()
+			assert page is not None, 'Failed to get current page'
+
+			# Get the context configuration used for browser window size
+			video_size = await page.evaluate("""
+                () => {
+                    // This returns information about the context recording settings
+                    // which should match our configured video size (browser_window_size)
+                    try {
+                        const settings = window.getPlaywrightContextSettings ? 
+                            window.getPlaywrightContextSettings() : null;
+                        if (settings && settings.recordVideo) {
+                            return settings.recordVideo.size;
+                        }
+                    } catch (e) {}
+                    
+                    // Fallback to window dimensions
+                    return {
+                        width: window.innerWidth,
+                        height: window.innerHeight
+                    };
+                }
+            """)
+
+			# Let's also check the viewport size
+			viewport_size = await page.evaluate("""
+                () => {
+                    return {
+                        width: window.innerWidth,
+                        height: window.innerHeight
+                    }
+                }
+            """)
+
+			print(f'Window size config: width={context_config.window_width}, height={context_config.window_height}')
+			print(f'Browser viewport size: {viewport_size}')
+
+			# This is a lightweight test to verify that the page has a size (details may vary by browser)
+			assert viewport_size['width'] > 0, 'Expected viewport width to be positive'
+			assert viewport_size['height'] > 0, 'Expected viewport height to be positive'
+
+			# For browser context creation in record_video_size, this is what truly matters
+			# Verify that our window size was properly serialized to a dictionary
+			print(f'Content of context session: {browser_context.session.context}')
+			print('✅ Browser window size used in the test')
+		finally:
+			# Clean up context
+			await browser_context.close()
+	finally:
+		# Clean up browser
+		await browser.close()
+
+
+@pytest.mark.asyncio
+async def test_proxy_with_real_browser():
+	"""
+	Integration test that verifies our proxy Pydantic model is correctly
+	passed to Playwright without requiring a working proxy server.
+
+	This test:
+	1. Creates a ProxySettings Pydantic model
+	2. Passes it to BrowserConfig
+	3. Verifies browser initialization works (proving the model was correctly serialized)
+	4. We don't actually verify proxy functionality (would require a working proxy)
+	"""
+	# Create proxy settings with a fake proxy server
+	proxy_settings = ProxySettings(
+		server='http://non.existent.proxy:9999', bypass='localhost', username='testuser', password='testpass'
+	)
+
+	# Test model serialization
+	proxy_dict = proxy_settings.model_dump()
+	assert isinstance(proxy_dict, dict)
+	assert proxy_dict['server'] == 'http://non.existent.proxy:9999'
+
+	# Create browser config with proxy
+	browser_config = BrowserConfig(
+		headless=True,
+		proxy=proxy_settings,
+	)
+
+	# Create browser
+	browser = Browser(config=browser_config)
+	try:
+		# Initialize browser - this should succeed even with invalid proxy
+		# because we're just checking configuration, not actual proxy functionality
+		try:
+			playwright_browser = await browser.get_playwright_browser()
+			assert playwright_browser is not None, 'Browser initialization failed'
+
+			# Success - the browser was initialized with our proxy settings
+			# We won't try to make requests (which would fail with non-existent proxy)
+			print('✅ Browser initialized with proxy settings successfully')
+
+			# We can inspect browser settings here to verify proxy was passed
+			# but the specific API to access these settings depends on the browser
+
+		except Exception as e:
+			# Make sure any exception isn't related to the proxy configuration format
+			# (Network errors due to non-existent proxy are acceptable, invalid type conversion isn't)
+			error_text = str(e).lower()
+			assert 'proxy' not in error_text or any(
+				term in error_text for term in ['connect', 'connection', 'network', 'timeout', 'unreachable']
+			), f'Proxy configuration error (not network error): {e}'
+	finally:
+		# Clean up browser
+		await browser.close()
--- a/browser-use/tests/test_browser_window_size_height.py
+++ b/browser-use/tests/test_browser_window_size_height.py
@ -0,0 +1,107 @@
+"""
+Example script demonstrating the browser_window_size feature.
+This script shows how to set a custom window size for the browser.
+"""
+
+import asyncio
+import sys
+from typing import Any
+
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContextConfig
+
+
+async def main():
+	"""Demonstrate setting a custom browser window size"""
+	# Create a browser with a specific window size
+	config = BrowserContextConfig(window_width=800, window_height=400)  # Small size to clearly demonstrate the fix
+
+	browser = None
+	browser_context = None
+
+	try:
+		# Initialize the browser with error handling
+		try:
+			browser = Browser(
+				config=BrowserConfig(
+					headless=False,  # Use non-headless mode to see the window
+				)
+			)
+		except Exception as e:
+			print(f'Failed to initialize browser: {e}')
+			return 1
+
+		# Create a browser context
+		try:
+			browser_context = await browser.new_context(config=config)
+		except Exception as e:
+			print(f'Failed to create browser context: {e}')
+			return 1
+
+		# Get the current page
+		page = await browser_context.get_current_page()
+
+		# Navigate to a test page with error handling
+		try:
+			await page.goto('https://example.com')
+			await page.wait_for_load_state('domcontentloaded')
+		except Exception as e:
+			print(f'Failed to navigate to example.com: {e}')
+			print('Continuing with test anyway...')
+
+		# Wait a bit to see the window
+		await asyncio.sleep(2)
+
+		# Get the actual viewport size using JavaScript
+		viewport_size = await page.evaluate("""
+			() => {
+				return {
+					width: window.innerWidth,
+					height: window.innerHeight
+				}
+			}
+		""")
+
+		print(f'Configured window size: width={config.window_width}, height={config.window_height}')
+		print(f'Actual viewport size: {viewport_size}')
+
+		# Validate the window size
+		validate_window_size({'width': config.window_width, 'height': config.window_height}, viewport_size)
+
+		# Wait a bit more to see the window
+		await asyncio.sleep(3)
+
+		return 0
+
+	except Exception as e:
+		print(f'Unexpected error: {e}')
+		return 1
+
+	finally:
+		# Close resources
+		if browser_context:
+			await browser_context.close()
+		if browser:
+			await browser.close()
+
+
+def validate_window_size(configured: dict[str, Any], actual: dict[str, Any]) -> None:
+	"""Compare configured window size with actual size and report differences"""
+	# Allow for small differences due to browser chrome, scrollbars, etc.
+	width_diff = abs(configured['width'] - actual['width'])
+	height_diff = abs(configured['height'] - actual['height'])
+
+	# Tolerance of 5% or 20px, whichever is greater
+	width_tolerance = max(configured['width'] * 0.05, 20)
+	height_tolerance = max(configured['height'] * 0.05, 20)
+
+	if width_diff > width_tolerance or height_diff > height_tolerance:
+		print('WARNING: Significant difference between configured and actual window size!')
+		print(f'Width difference: {width_diff}px, Height difference: {height_diff}px')
+	else:
+		print('Window size validation passed: actual size matches configured size within tolerance')
+
+
+if __name__ == '__main__':
+	result = asyncio.run(main())
+	sys.exit(result)
--- a/browser-use/tests/test_browser_window_size_height_no_viewport.py
+++ b/browser-use/tests/test_browser_window_size_height_no_viewport.py
@ -0,0 +1,33 @@
+import asyncio
+
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContextConfig
+
+
+async def test():
+	print('Testing browser window sizing with no_viewport=False...')
+	browser = Browser(BrowserConfig(headless=False))
+	context_config = BrowserContextConfig(window_width=1440, window_height=900, no_viewport=False)
+	browser_context = await browser.new_context(config=context_config)
+	page = await browser_context.get_current_page()
+	await page.goto('https://example.com')
+	await asyncio.sleep(2)
+	viewport = await page.evaluate('() => ({width: window.innerWidth, height: window.innerHeight})')
+	print('Configured size: width=1440, height=900')
+	print(f'Actual viewport size: {viewport}')
+
+	# Get the actual window size
+	window_size = await page.evaluate("""
+        () => ({
+            width: window.outerWidth,
+            height: window.outerHeight
+        })
+    """)
+	print(f'Actual window size: {window_size}')
+
+	await browser_context.close()
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(test())
--- a/browser-use/tests/test_context.py
+++ b/browser-use/tests/test_context.py
@ -0,0 +1,363 @@
+import base64
+from unittest.mock import Mock
+
+import pytest
+
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from browser_use.browser.views import BrowserState
+from browser_use.dom.views import DOMElementNode
+
+
+def test_is_url_allowed():
+	"""
+	Test the _is_url_allowed method to verify that it correctly checks URLs against
+	the allowed domains configuration.
+	Scenario 1: When allowed_domains is None, all URLs should be allowed.
+	Scenario 2: When allowed_domains is a list, only URLs matching the allowed domain(s) are allowed.
+	Scenario 3: When the URL is malformed, it should return False.
+	Scenario 4: When allowed_domains contain glob patterns, see: test_url_allowlist_security.py
+	"""
+	# Create a dummy Browser mock. Only the 'config' attribute is needed for _is_url_allowed.
+	dummy_browser = Mock()
+	# Set an empty config for dummy_browser; it won't be used in _is_url_allowed.
+	dummy_browser.config = Mock()
+	# Scenario 1: allowed_domains is None, any URL should be allowed.
+	config1 = BrowserContextConfig(allowed_domains=None)
+	context1 = BrowserContext(browser=dummy_browser, config=config1)
+	assert context1._is_url_allowed('http://anydomain.com') is True
+	assert context1._is_url_allowed('https://anotherdomain.org/path') is True
+	# Scenario 2: allowed_domains is provided.
+	allowed = ['example.com', 'mysite.org']
+	config2 = BrowserContextConfig(allowed_domains=allowed)
+	context2 = BrowserContext(browser=dummy_browser, config=config2)
+	# URL exactly matching
+	assert context2._is_url_allowed('http://example.com') is True
+	# URL with subdomain (should be allowed)
+	assert context2._is_url_allowed('http://sub.example.com/path') is True
+	# URL with different domain (should not be allowed)
+	assert context2._is_url_allowed('http://notexample.com') is False
+	# URL that matches second allowed domain
+	assert context2._is_url_allowed('https://mysite.org/page') is True
+	# URL with port number, still allowed (port is stripped)
+	assert context2._is_url_allowed('http://example.com:8080') is True
+	# Scenario 3: Malformed URL or empty domain
+	# urlparse will return an empty netloc for some malformed URLs.
+	assert context2._is_url_allowed('notaurl') is False
+
+
+def test_convert_simple_xpath_to_css_selector():
+	"""
+	Test the _convert_simple_xpath_to_css_selector method of BrowserContext.
+	This verifies that simple XPath expressions (with and without indices) are correctly converted to CSS selectors.
+	"""
+	# Test empty xpath returns empty string
+	assert BrowserContext._convert_simple_xpath_to_css_selector('') == ''
+	# Test a simple xpath without indices
+	xpath = '/html/body/div/span'
+	expected = 'html > body > div > span'
+	result = BrowserContext._convert_simple_xpath_to_css_selector(xpath)
+	assert result == expected
+	# Test xpath with an index on one element: [2] should translate to :nth-of-type(2)
+	xpath = '/html/body/div[2]/span'
+	expected = 'html > body > div:nth-of-type(2) > span'
+	result = BrowserContext._convert_simple_xpath_to_css_selector(xpath)
+	assert result == expected
+	# Test xpath with indices on multiple elements:
+	# For "li[3]" -> li:nth-of-type(3) and for "a[1]" -> a:nth-of-type(1)
+	xpath = '/ul/li[3]/a[1]'
+	expected = 'ul > li:nth-of-type(3) > a:nth-of-type(1)'
+	result = BrowserContext._convert_simple_xpath_to_css_selector(xpath)
+	assert result == expected
+
+
+def test_get_initial_state():
+	"""
+	Test the _get_initial_state method to verify it returns the correct initial BrowserState.
+	The test checks that when a dummy page with a URL is provided,
+	the returned state contains that URL and other default values.
+	"""
+	# Create a dummy browser since only its existence is needed.
+	dummy_browser = Mock()
+	dummy_browser.config = Mock()
+	context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
+
+	# Define a dummy page with a 'url' attribute.
+	class DummyPage:
+		url = 'http://dummy.com'
+
+	dummy_page = DummyPage()
+	# Call _get_initial_state with a page: URL should be set from page.url.
+	state_with_page = context._get_initial_state(page=dummy_page)
+	assert state_with_page.url == dummy_page.url
+	# Verify that the element_tree is initialized with tag 'root'
+	assert state_with_page.element_tree.tag_name == 'root'
+	# Call _get_initial_state without a page: URL should be empty.
+	state_without_page = context._get_initial_state()
+	assert state_without_page.url == ''
+
+
+@pytest.mark.asyncio
+async def test_execute_javascript():
+	"""
+	Test the execute_javascript method by mocking the current page's evaluate function.
+	This ensures that when execute_javascript is called, it correctly returns the value
+	from the page's evaluate method.
+	"""
+
+	# Define a dummy page with an async evaluate method.
+	class DummyPage:
+		async def evaluate(self, script):
+			return 'dummy_result'
+
+	# Create a dummy session object with a dummy current_page.
+	dummy_session = type('DummySession', (), {})()
+	dummy_session.current_page = DummyPage()
+	# Create a dummy browser mock with a minimal config.
+	dummy_browser = Mock()
+	dummy_browser.config = Mock()
+	# Initialize the BrowserContext with the dummy browser and config.
+	context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
+	# Manually set the session to our dummy session.
+	context.session = dummy_session
+	# Call execute_javascript and verify it returns the expected result.
+	result = await context.execute_javascript('return 1+1')
+	assert result == 'dummy_result'
+
+
+@pytest.mark.asyncio
+async def test_enhanced_css_selector_for_element():
+	"""
+	Test the _enhanced_css_selector_for_element method to verify that
+	it returns the correct CSS selector string for a dummy DOMElementNode.
+	The test checks that:
+	  - The provided xpath is correctly converted (handling indices),
+	  - Class attributes are appended as CSS classes,
+	  - Standard and dynamic attributes (including ones with special characters)
+	    are correctly added to the selector.
+	"""
+	# Create a dummy DOMElementNode instance with a complex set of attributes.
+	dummy_element = DOMElementNode(
+		tag_name='div',
+		is_visible=True,
+		parent=None,
+		xpath='/html/body/div[2]',
+		attributes={'class': 'foo bar', 'id': 'my-id', 'placeholder': 'some "quoted" text', 'data-testid': '123'},
+		children=[],
+	)
+	# Call the method with include_dynamic_attributes=True.
+	actual_selector = BrowserContext._enhanced_css_selector_for_element(dummy_element, include_dynamic_attributes=True)
+	# Expected conversion:
+	# 1. The xpath "/html/body/div[2]" converts to "html > body > div:nth-of-type(2)".
+	# 2. The class attribute "foo bar" appends ".foo.bar".
+	# 3. The "id" attribute is added as [id="my-id"].
+	# 4. The "placeholder" attribute contains quotes; it is added as
+	#    [placeholder*="some \"quoted\" text"].
+	# 5. The dynamic attribute "data-testid" is added as [data-testid="123"].
+	expected_selector = (
+		'html > body > div:nth-of-type(2).foo.bar[id="my-id"][placeholder*="some \\"quoted\\" text"][data-testid="123"]'
+	)
+	assert actual_selector == expected_selector, f'Expected {expected_selector}, but got {actual_selector}'
+
+
+@pytest.mark.asyncio
+async def test_get_scroll_info():
+	"""
+	Test the get_scroll_info method by mocking the page's evaluate method.
+	This dummy page returns preset values for window.scrollY, window.innerHeight,
+	and document.documentElement.scrollHeight. The test then verifies that the
+	computed scroll information (pixels_above and pixels_below) match the expected values.
+	"""
+
+	# Define a dummy page with an async evaluate method returning preset values.
+	class DummyPage:
+		async def evaluate(self, script):
+			if 'window.scrollY' in script:
+				return 100  # scrollY
+			elif 'window.innerHeight' in script:
+				return 500  # innerHeight
+			elif 'document.documentElement.scrollHeight' in script:
+				return 1200  # total scrollable height
+			return None
+
+	# Create a dummy session with a dummy current_page.
+	dummy_session = type('DummySession', (), {})()
+	dummy_session.current_page = DummyPage()
+	# We also need a dummy context attribute but it won't be used in this test.
+	dummy_session.context = type('DummyContext', (), {})()
+	# Create a dummy browser mock.
+	dummy_browser = Mock()
+	dummy_browser.config = Mock()
+	# Initialize BrowserContext with the dummy browser and config.
+	context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
+	# Manually set the session to our dummy session.
+	context.session = dummy_session
+	# Call get_scroll_info on the dummy page.
+	pixels_above, pixels_below = await context.get_scroll_info(dummy_session.current_page)
+	# Expected calculations:
+	# pixels_above = scrollY = 100
+	# pixels_below = total_height - (scrollY + innerHeight) = 1200 - (100 + 500) = 600
+	assert pixels_above == 100, f'Expected 100 pixels above, got {pixels_above}'
+	assert pixels_below == 600, f'Expected 600 pixels below, got {pixels_below}'
+
+
+@pytest.mark.asyncio
+async def test_reset_context():
+	"""
+	Test the reset_context method to ensure it correctly closes all existing tabs,
+	resets the cached state, and creates a new page.
+	"""
+
+	# Dummy Page with close and wait_for_load_state methods.
+	class DummyPage:
+		def __init__(self, url='http://dummy.com'):
+			self.url = url
+			self.closed = False
+
+		async def close(self):
+			self.closed = True
+
+		async def wait_for_load_state(self):
+			pass
+
+	# Dummy Context that holds pages and can create a new page.
+	class DummyContext:
+		def __init__(self):
+			self.pages = []
+
+		async def new_page(self):
+			new_page = DummyPage(url='')
+			self.pages.append(new_page)
+			return new_page
+
+	# Create a dummy session with a context containing two pages.
+	dummy_session = type('DummySession', (), {})()
+	dummy_context = DummyContext()
+	page1 = DummyPage(url='http://page1.com')
+	page2 = DummyPage(url='http://page2.com')
+	dummy_context.pages.extend([page1, page2])
+	dummy_session.context = dummy_context
+	dummy_session.current_page = page1
+	dummy_session.cached_state = None
+	# Create a dummy browser mock.
+	dummy_browser = Mock()
+	dummy_browser.config = Mock()
+	# Initialize BrowserContext using our dummy_browser and config,
+	# and manually set its session to our dummy session.
+	context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
+	context.session = dummy_session
+	# Confirm session has 2 pages before reset.
+	assert len(dummy_session.context.pages) == 2
+	# Call reset_context which should close existing pages,
+	# reset the cached state, and create a new page as current_page.
+	await context.reset_context()
+	# Verify that initial pages were closed.
+	assert page1.closed is True
+	assert page2.closed is True
+	# Check that a new page is created and set as current_page.
+	assert dummy_session.current_page is not None
+	new_page = dummy_session.current_page
+	# New page URL should be empty as per _get_initial_state.
+	assert new_page.url == ''
+	# Verify that cached_state is reset to an initial BrowserState.
+	state = dummy_session.cached_state
+	assert isinstance(state, BrowserState)
+	assert state.url == ''
+	assert state.element_tree.tag_name == 'root'
+
+
+@pytest.mark.asyncio
+async def test_take_screenshot():
+	"""
+	Test the take_screenshot method to verify that it returns a base64 encoded screenshot string.
+	A dummy page with a mocked screenshot method is used, returning a predefined byte string.
+	"""
+
+	class DummyPage:
+		async def screenshot(self, full_page, animations):
+			# Verify that parameters are forwarded correctly.
+			assert full_page is True, 'full_page parameter was not correctly passed'
+			assert animations == 'disabled', 'animations parameter was not correctly passed'
+			# Return a test byte string.
+			return b'test'
+
+	# Create a dummy session with the DummyPage as the current_page.
+	dummy_session = type('DummySession', (), {})()
+	dummy_session.current_page = DummyPage()
+	dummy_session.context = None  # Not used in this test
+	# Create a dummy browser mock.
+	dummy_browser = Mock()
+	dummy_browser.config = Mock()
+	# Initialize the BrowserContext with the dummy browser and config.
+	context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
+	# Manually set the session to our dummy session.
+	context.session = dummy_session
+	# Call take_screenshot and check that it returns the expected base64 encoded string.
+	result = await context.take_screenshot(full_page=True)
+	expected = base64.b64encode(b'test').decode('utf-8')
+	assert result == expected, f'Expected {expected}, but got {result}'
+
+
+@pytest.mark.asyncio
+async def test_refresh_page_behavior():
+	"""
+	Test the refresh_page method of BrowserContext to verify that it correctly reloads the current page
+	and waits for the page's load state. This is done by creating a dummy page that flags when its
+	reload and wait_for_load_state methods are called.
+	"""
+
+	class DummyPage:
+		def __init__(self):
+			self.reload_called = False
+			self.wait_for_load_state_called = False
+
+		async def reload(self):
+			self.reload_called = True
+
+		async def wait_for_load_state(self):
+			self.wait_for_load_state_called = True
+
+	# Create a dummy session with the dummy page as the current_page.
+	dummy_page = DummyPage()
+	dummy_session = type('DummySession', (), {})()
+	dummy_session.current_page = dummy_page
+	dummy_session.context = None  # Not required for this test
+	# Create a dummy browser mock
+	dummy_browser = Mock()
+	dummy_browser.config = Mock()
+	# Initialize BrowserContext with the dummy browser and config,
+	# and manually set its session to our dummy session.
+	context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
+	context.session = dummy_session
+	# Call refresh_page and verify that reload and wait_for_load_state were called.
+	await context.refresh_page()
+	assert dummy_page.reload_called is True, 'Expected the page to call reload()'
+	assert dummy_page.wait_for_load_state_called is True, 'Expected the page to call wait_for_load_state()'
+
+
+@pytest.mark.asyncio
+async def test_remove_highlights_failure():
+	"""
+	Test the remove_highlights method to ensure that if the page.evaluate call fails,
+	the exception is caught and does not propagate (i.e. the method handles errors gracefully).
+	"""
+
+	# Dummy page that always raises an exception when evaluate is called.
+	class DummyPage:
+		async def evaluate(self, script):
+			raise Exception('dummy error')
+
+	# Create a dummy session with the DummyPage as current_page.
+	dummy_session = type('DummySession', (), {})()
+	dummy_session.current_page = DummyPage()
+	dummy_session.context = None  # Not used in this test
+	# Create a dummy browser mock.
+	dummy_browser = Mock()
+	dummy_browser.config = Mock()
+	# Initialize BrowserContext with the dummy browser and configuration.
+	context = BrowserContext(browser=dummy_browser, config=BrowserContextConfig())
+	context.session = dummy_session
+	# Call remove_highlights and verify that no exception is raised.
+	try:
+		await context.remove_highlights()
+	except Exception as e:
+		pytest.fail(f'remove_highlights raised an exception: {e}')
--- a/browser-use/tests/test_controller.py
+++ b/browser-use/tests/test_controller.py
--- a/browser-use/tests/test_core_functionality.py
+++ b/browser-use/tests/test_core_functionality.py
@ -0,0 +1,202 @@
+import asyncio
+import os
+
+import pytest
+from langchain_openai import AzureChatOpenAI
+from pydantic import SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.browser import Browser, BrowserConfig
+
+
+@pytest.fixture(scope='function')
+def event_loop():
+	"""Create an instance of the default event loop for each test case."""
+	loop = asyncio.get_event_loop_policy().new_event_loop()
+	yield loop
+	loop.close()
+
+
+@pytest.fixture(scope='function')
+async def browser(event_loop):
+	browser_instance = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+	yield browser_instance
+	await browser_instance.close()
+
+
+@pytest.fixture
+async def context(browser):
+	async with await browser.new_context() as context:
+		yield context
+
+
+@pytest.fixture
+def llm():
+	"""Initialize language model for testing"""
+	return AzureChatOpenAI(
+		model='gpt-4o',
+		api_version='2024-10-21',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+
+
+# pytest -s -k test_search_google
+@pytest.mark.asyncio
+async def test_search_google(llm, context):
+	"""Test 'Search Google' action"""
+	agent = Agent(
+		task="Search Google for 'OpenAI'.",
+		llm=llm,
+		browser_context=context,
+	)
+	history: AgentHistoryList = await agent.run(max_steps=2)
+	action_names = history.action_names()
+	assert 'search_google' in action_names
+
+
+@pytest.mark.asyncio
+async def test_go_to_url(llm, context):
+	"""Test 'Navigate to URL' action"""
+	agent = Agent(
+		task="Navigate to 'https://www.python.org'.",
+		llm=llm,
+		browser_context=context,
+	)
+	history = await agent.run(max_steps=2)
+	action_names = history.action_names()
+	assert 'go_to_url' in action_names
+
+
+@pytest.mark.asyncio
+async def test_go_back(llm, context):
+	"""Test 'Go back' action"""
+	agent = Agent(
+		task="Go to 'https://www.example.com', then go back.",
+		llm=llm,
+		browser_context=context,
+	)
+	history = await agent.run(max_steps=3)
+	action_names = history.action_names()
+	assert 'go_to_url' in action_names
+	assert 'go_back' in action_names
+
+
+@pytest.mark.asyncio
+async def test_click_element(llm, context):
+	"""Test 'Click element' action"""
+	agent = Agent(
+		task="Go to 'https://www.python.org' and click on the first link.",
+		llm=llm,
+		browser_context=context,
+	)
+	history = await agent.run(max_steps=4)
+	action_names = history.action_names()
+	assert 'go_to_url' in action_names or 'open_tab' in action_names
+	assert 'click_element_by_index' in action_names
+
+
+@pytest.mark.asyncio
+async def test_input_text(llm, context):
+	"""Test 'Input text' action"""
+	agent = Agent(
+		task="Go to 'https://www.google.com' and input 'OpenAI' into the search box.",
+		llm=llm,
+		browser_context=context,
+	)
+	history = await agent.run(max_steps=4)
+	action_names = history.action_names()
+	assert 'go_to_url' in action_names
+	assert 'input_text' in action_names
+
+
+@pytest.mark.asyncio
+async def test_switch_tab(llm, context):
+	"""Test 'Switch tab' action"""
+	agent = Agent(
+		task="Open new tabs with 'https://www.google.com' and 'https://www.wikipedia.org', then switch to the first tab.",
+		llm=llm,
+		browser_context=context,
+	)
+	history = await agent.run(max_steps=6)
+	action_names = history.action_names()
+	open_tab_count = action_names.count('open_tab')
+	assert open_tab_count >= 2
+	assert 'switch_tab' in action_names
+
+
+@pytest.mark.asyncio
+async def test_open_new_tab(llm, context):
+	"""Test 'Open new tab' action"""
+	agent = Agent(
+		task="Open a new tab and go to 'https://www.example.com'.",
+		llm=llm,
+		browser_context=context,
+	)
+	history = await agent.run(max_steps=3)
+	action_names = history.action_names()
+	assert 'open_tab' in action_names
+
+
+@pytest.mark.asyncio
+async def test_extract_page_content(llm, context):
+	"""Test 'Extract page content' action"""
+	agent = Agent(
+		task="Go to 'https://www.example.com' and extract the page content.",
+		llm=llm,
+		browser_context=context,
+	)
+	history = await agent.run(max_steps=3)
+	action_names = history.action_names()
+	assert 'go_to_url' in action_names
+	assert 'extract_content' in action_names
+
+
+# pytest -k test_done_action
+@pytest.mark.asyncio
+async def test_done_action(llm, context):
+	"""Test 'Complete task' action"""
+	agent = Agent(
+		task="Navigate to 'https://www.example.com' and signal that the task is done.",
+		llm=llm,
+		browser_context=context,
+	)
+
+	history = await agent.run(max_steps=3)
+	action_names = history.action_names()
+	assert 'go_to_url' in action_names
+	assert 'done' in action_names
+
+
+# run with: pytest -k test_scroll_down
+@pytest.mark.asyncio
+async def test_scroll_down(llm, context):
+	"""Test 'Scroll down' action and validate that the page actually scrolled"""
+	agent = Agent(
+		task="Go to 'https://en.wikipedia.org/wiki/Internet' and scroll down the page.",
+		llm=llm,
+		browser_context=context,
+	)
+	# Get the browser instance
+	page = await context.get_current_page()
+
+	# Navigate to the page and get initial scroll position
+	await agent.run(max_steps=1)
+	initial_scroll_position = await page.evaluate('window.scrollY;')
+
+	# Perform the scroll down action
+	await agent.run(max_steps=2)
+	final_scroll_position = await page.evaluate('window.scrollY;')
+
+	# Validate that the scroll position has changed
+	assert final_scroll_position > initial_scroll_position, 'Page did not scroll down'
+
+	# Validate that the 'scroll_down' action was executed
+	history = agent.history
+	action_names = history.action_names()
+	assert 'scroll_down' in action_names
--- a/browser-use/tests/test_dropdown.py
+++ b/browser-use/tests/test_dropdown.py
@ -0,0 +1,40 @@
+"""
+Test dropdown interaction functionality.
+"""
+
+import pytest
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import AgentHistoryList
+
+
+@pytest.mark.asyncio
+async def test_dropdown(llm, browser_context):
+	"""Test selecting an option from a dropdown menu."""
+	agent = Agent(
+		task=(
+			'go to https://codepen.io/geheimschriftstift/pen/mPLvQz and first get all options for the dropdown and then select the 5th option'
+		),
+		llm=llm,
+		browser_context=browser_context,
+	)
+
+	try:
+		history: AgentHistoryList = await agent.run(20)
+		result = history.final_result()
+
+		# Verify dropdown interaction
+		assert result is not None
+		assert 'Duck' in result, "Expected 5th option 'Duck' to be selected"
+
+		# Verify dropdown state
+		element = await browser_context.get_element_by_selector('select')
+		assert element is not None, 'Dropdown element should exist'
+
+		value = await element.evaluate('el => el.value')
+		assert value == '5', 'Dropdown should have 5th option selected'
+
+	except Exception as e:
+		pytest.fail(f'Dropdown test failed: {str(e)}')
+	finally:
+		await browser_context.close()
--- a/browser-use/tests/test_dropdown_complex.py
+++ b/browser-use/tests/test_dropdown_complex.py
@ -0,0 +1,44 @@
+"""
+Test complex dropdown interaction functionality.
+"""
+
+import pytest
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import AgentHistoryList
+
+
+@pytest.mark.asyncio
+async def test_dropdown_complex(llm, browser_context):
+	"""Test selecting an option from a complex dropdown menu."""
+	agent = Agent(
+		task=(
+			'go to https://codepen.io/shyam-king/pen/pvzpByJ and first get all options for the dropdown and then select the json option'
+		),
+		llm=llm,
+		browser_context=browser_context,
+	)
+
+	try:
+		history: AgentHistoryList = await agent.run(20)
+		result = history.final_result()
+
+		# Verify dropdown interaction
+		assert result is not None
+		assert 'json' in result.lower(), "Expected 'json' option to be selected"
+
+		# Verify dropdown state
+		element = await browser_context.get_element_by_selector('.select-selected')
+		assert element is not None, 'Custom dropdown element should exist'
+
+		text = await element.text_content()
+		assert 'json' in text.lower(), 'Dropdown should display json option'
+
+		# Verify the selected option's effect
+		code_element = await browser_context.get_element_by_selector('pre code')
+		assert code_element is not None, 'Code element should be visible when JSON is selected'
+
+	except Exception as e:
+		pytest.fail(f'Complex dropdown test failed: {str(e)}')
+	finally:
+		await browser_context.close()
--- a/browser-use/tests/test_dropdown_error.py
+++ b/browser-use/tests/test_dropdown_error.py
@ -0,0 +1,40 @@
+"""
+Simple try of the agent.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import os
+import sys
+
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, AgentHistoryList
+
+llm = ChatOpenAI(model='gpt-4o')
+# browser = Browser(config=BrowserConfig(headless=False))
+
+agent = Agent(
+	task=('go to https://codepen.io/shyam-king/pen/emOyjKm and select number "4" and return the output of "selected value"'),
+	llm=llm,
+	browser_context=BrowserContext(
+		browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
+	),
+)
+
+
+async def test_dropdown():
+	history: AgentHistoryList = await agent.run(20)
+	# await controller.browser.close(force=True)
+
+	result = history.final_result()
+	assert result is not None
+	assert '4' in result
+	print(result)
+
+	# await browser.close()
--- a/browser-use/tests/test_excluded_actions.py
+++ b/browser-use/tests/test_excluded_actions.py
@ -0,0 +1,98 @@
+import asyncio
+import os
+
+import pytest
+from langchain_openai import AzureChatOpenAI
+from pydantic import SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.controller.service import Controller
+
+# run with:
+# python -m pytest tests/test_excluded_actions.py -v -k "test_only_open_tab_allowed" --capture=no
+
+
+@pytest.fixture(scope='session')
+def event_loop():
+	"""Create an instance of the default event loop for each test case."""
+	loop = asyncio.get_event_loop_policy().new_event_loop()
+	yield loop
+	loop.close()
+
+
+@pytest.fixture(scope='session')
+async def browser(event_loop):
+	browser_instance = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+	yield browser_instance
+	await browser_instance.close()
+
+
+@pytest.fixture
+async def context(browser):
+	async with await browser.new_context() as context:
+		yield context
+
+
+@pytest.fixture
+def llm():
+	"""Initialize language model for testing"""
+	return AzureChatOpenAI(
+		model='gpt-4o',
+		api_version='2024-10-21',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+
+
+# pytest tests/test_excluded_actions.py -v -k "test_only_open_tab_allowed" --capture=no
+@pytest.mark.asyncio
+async def test_only_open_tab_allowed(llm, context):
+	"""Test that only open_tab action is available while others are excluded"""
+
+	# Create list of all default actions except open_tab
+	excluded_actions = [
+		'search_google',
+		'go_to_url',
+		'go_back',
+		'click_element',
+		'input_text',
+		'switch_tab',
+		'extract_content',
+		'done',
+		'scroll_down',
+		'scroll_up',
+		'send_keys',
+		'scroll_to_text',
+		'get_dropdown_options',
+		'select_dropdown_option',
+	]
+
+	# Initialize controller with excluded actions
+	controller = Controller(exclude_actions=excluded_actions)
+
+	# Create agent with a task that would normally use other actions
+	agent = Agent(
+		task="Go to google.com and search for 'python programming'",
+		llm=llm,
+		browser_context=context,
+		controller=controller,
+	)
+
+	history: AgentHistoryList = await agent.run(max_steps=2)
+
+	# Verify that only open_tab was used
+	action_names = history.action_names()
+
+	# Only open_tab should be in the actions
+	assert all(action == 'open_tab' for action in action_names), (
+		f'Found unexpected actions: {[a for a in action_names if a != "open_tab"]}'
+	)
+
+	# open_tab should be used at least once
+	assert 'open_tab' in action_names, 'open_tab action was not used'
--- a/browser-use/tests/test_full_screen.py
+++ b/browser-use/tests/test_full_screen.py
@ -0,0 +1,21 @@
+import asyncio
+
+from playwright.async_api import async_playwright
+
+
+async def test_full_screen(start_fullscreen: bool, maximize: bool):
+	async with async_playwright() as p:
+		browser = await p.chromium.launch(
+			headless=False,
+			args=['--start-maximized'],
+		)
+		context = await browser.new_context(no_viewport=True, viewport=None)
+		page = await context.new_page()
+		await page.goto('https://google.com')
+
+		await asyncio.sleep(10)
+		await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(test_full_screen(False, False))
--- a/browser-use/tests/test_gif_path.py
+++ b/browser-use/tests/test_gif_path.py
@ -0,0 +1,40 @@
+"""
+Simple try of the agent.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import os
+import sys
+
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, AgentHistoryList
+
+llm = ChatOpenAI(model='gpt-4o')
+
+agent = Agent(
+	task=('go to google.com and search for text "hi there"'),
+	llm=llm,
+	browser_context=BrowserContext(
+		browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
+	),
+	generate_gif='./google.gif',
+)
+
+
+async def test_gif_path():
+	if os.path.exists('./google.gif'):
+		os.unlink('./google.gif')
+
+	history: AgentHistoryList = await agent.run(20)
+
+	result = history.final_result()
+	assert result is not None
+
+	assert os.path.exists('./google.gif'), 'google.gif was not created'
--- a/browser-use/tests/test_mind2web.py
+++ b/browser-use/tests/test_mind2web.py
@ -0,0 +1,137 @@
+"""
+Test browser automation using Mind2Web dataset tasks with pytest framework.
+"""
+
+import asyncio
+import json
+import os
+from typing import Any
+
+import pytest
+from langchain_openai import AzureChatOpenAI
+from pydantic import SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.utils import logger
+
+# Constants
+MAX_STEPS = 50
+TEST_SUBSET_SIZE = 10
+
+
+@pytest.fixture(scope='session')
+def event_loop():
+	loop = asyncio.get_event_loop_policy().new_event_loop()
+	yield loop
+	loop.close()
+
+
+@pytest.fixture(scope='session')
+async def browser(event_loop):
+	browser_instance = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+	yield browser_instance
+	await browser_instance.close()
+
+
+@pytest.fixture
+async def context(browser):
+	async with await browser.new_context() as new_context:
+		yield new_context
+
+
+@pytest.fixture(scope='session')
+def test_cases() -> list[dict[str, Any]]:
+	"""Load test cases from Mind2Web dataset"""
+	file_path = os.path.join(os.path.dirname(__file__), 'mind2web_data/processed.json')
+	logger.info(f'Loading test cases from {file_path}')
+
+	with open(file_path) as f:
+		data = json.load(f)
+
+	subset = data[:TEST_SUBSET_SIZE]
+	logger.info(f'Loaded {len(subset)}/{len(data)} test cases')
+	return subset
+
+
+@pytest.fixture
+def llm():
+	"""Initialize language model for testing"""
+
+	# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
+	return AzureChatOpenAI(
+		model='gpt-4o',
+		api_version='2024-10-21',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+
+
+# run with: pytest -s -v tests/test_mind2web.py:test_random_samples
+@pytest.mark.asyncio
+async def test_random_samples(test_cases: list[dict[str, Any]], llm, context, validator):
+	"""Test a random sampling of tasks across different websites"""
+	import random
+
+	logger.info('=== Testing Random Samples ===')
+
+	# Take random samples
+	samples = random.sample(test_cases, 1)
+
+	for i, case in enumerate(samples, 1):
+		task = f'Go to {case["website"]}.com and {case["confirmed_task"]}'
+		logger.info(f'--- Random Sample {i}/{len(samples)} ---')
+		logger.info(f'Task: {task}\n')
+
+		agent = Agent(task, llm, browser_context=context)
+
+		await agent.run()
+
+		logger.info('Validating random sample task...')
+
+		# TODO: Validate the task
+
+
+def test_dataset_integrity(test_cases):
+	"""Test the integrity of the test dataset"""
+	logger.info('\n=== Testing Dataset Integrity ===')
+
+	required_fields = ['website', 'confirmed_task', 'action_reprs']
+	missing_fields = []
+
+	logger.info(f'Checking {len(test_cases)} test cases for required fields')
+
+	for i, case in enumerate(test_cases, 1):
+		logger.debug(f'Checking case {i}/{len(test_cases)}')
+
+		for field in required_fields:
+			if field not in case:
+				missing_fields.append(f'Case {i}: {field}')
+				logger.warning(f"Missing field '{field}' in case {i}")
+
+		# Type checks
+		if not isinstance(case.get('confirmed_task'), str):
+			logger.error(f"Case {i}: 'confirmed_task' must be string")
+			assert False, 'Task must be string'
+
+		if not isinstance(case.get('action_reprs'), list):
+			logger.error(f"Case {i}: 'action_reprs' must be list")
+			assert False, 'Actions must be list'
+
+		if len(case.get('action_reprs', [])) == 0:
+			logger.error(f"Case {i}: 'action_reprs' must not be empty")
+			assert False, 'Must have at least one action'
+
+	if missing_fields:
+		logger.error('Dataset integrity check failed')
+		assert False, f'Missing fields: {missing_fields}'
+	else:
+		logger.info('✅ Dataset integrity check passed')
+
+
+if __name__ == '__main__':
+	pytest.main([__file__, '-v'])
--- a/browser-use/tests/test_models.py
+++ b/browser-use/tests/test_models.py
@ -0,0 +1,160 @@
+import asyncio
+import os
+
+import httpx
+import pytest
+from langchain_anthropic import ChatAnthropic
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_ollama import ChatOllama
+from langchain_openai import AzureChatOpenAI, ChatOpenAI
+from pydantic import SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.browser import Browser, BrowserConfig
+
+
+@pytest.fixture(scope='function')
+def event_loop():
+	"""Create an instance of the default event loop for each test case."""
+	loop = asyncio.get_event_loop_policy().new_event_loop()
+	yield loop
+	loop.close()
+
+
+@pytest.fixture(scope='function')
+async def browser(event_loop):
+	browser_instance = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+	yield browser_instance
+	await browser_instance.close()
+
+
+@pytest.fixture
+async def context(browser):
+	async with await browser.new_context() as context:
+		yield context
+
+
+api_key_gemini = SecretStr(os.getenv('GOOGLE_API_KEY') or '')
+api_key_deepseek = SecretStr(os.getenv('DEEPSEEK_API_KEY') or '')
+api_key_anthropic = SecretStr(os.getenv('ANTHROPIC_API_KEY') or '')
+
+
+# pytest -s -v tests/test_models.py
+@pytest.fixture(
+	params=[
+		ChatOpenAI(model='gpt-4o'),
+		ChatOpenAI(model='gpt-4o-mini'),
+		AzureChatOpenAI(
+			model='gpt-4o',
+			api_version='2024-10-21',
+			azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+			api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+		),
+		# ChatOpenAI(
+		# base_url='https://api.deepseek.com/v1',
+		# model='deepseek-reasoner',
+		# api_key=api_key_deepseek,
+		# ),
+		# run: ollama start
+		ChatOllama(
+			model='qwen2.5:latest',
+			num_ctx=128000,
+		),
+		AzureChatOpenAI(
+			model='gpt-4o-mini',
+			api_version='2024-10-21',
+			azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+			api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+		),
+		ChatAnthropic(
+			model_name='claude-3-5-sonnet-20240620',
+			timeout=100,
+			temperature=0.0,
+			stop=None,
+			api_key=api_key_anthropic,
+		),
+		ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=api_key_gemini),
+		ChatGoogleGenerativeAI(model='gemini-1.5-pro', api_key=api_key_gemini),
+		ChatGoogleGenerativeAI(model='gemini-1.5-flash-latest', api_key=api_key_gemini),
+		ChatOpenAI(
+			base_url='https://api.deepseek.com/v1',
+			model='deepseek-chat',
+			api_key=api_key_deepseek,
+		),
+	],
+	ids=[
+		'gpt-4o',
+		'gpt-4o-mini',
+		'azure-gpt-4o',
+		#'deepseek-reasoner',
+		'qwen2.5:latest',
+		'azure-gpt-4o-mini',
+		'claude-3-5-sonnet',
+		'gemini-2.0-flash-exp',
+		'gemini-1.5-pro',
+		'gemini-1.5-flash-latest',
+		'deepseek-chat',
+	],
+)
+async def llm(request):
+	return request.param
+
+
+@pytest.mark.asyncio
+async def test_model_search(llm, context):
+	"""Test 'Search Google' action"""
+	model_name = llm.model if hasattr(llm, 'model') else llm.model_name
+	print(f'\nTesting model: {model_name}')
+
+	use_vision = True
+	models_without_vision = ['deepseek-chat', 'deepseek-reasoner']
+	if hasattr(llm, 'model') and llm.model in models_without_vision:
+		use_vision = False
+	elif hasattr(llm, 'model_name') and llm.model_name in models_without_vision:
+		use_vision = False
+
+	# require ollama run
+	local_models = ['qwen2.5:latest']
+	if model_name in local_models:
+		# check if ollama is running
+		# ping ollama http://127.0.0.1
+		try:
+			async with httpx.AsyncClient() as client:
+				response = await client.get('http://127.0.0.1:11434/')
+				if response.status_code != 200:
+					raise Exception('Ollama is not running - start with `ollama start`')
+		except Exception:
+			raise Exception('Ollama is not running - start with `ollama start`')
+
+	agent = Agent(
+		task="Search Google for 'elon musk' then click on the first result and scroll down.",
+		llm=llm,
+		browser_context=context,
+		max_failures=2,
+		use_vision=use_vision,
+	)
+	history: AgentHistoryList = await agent.run(max_steps=2)
+	done = history.is_done()
+	successful = history.is_successful()
+	action_names = history.action_names()
+	print(f'Actions performed: {action_names}')
+	errors = [e for e in history.errors() if e is not None]
+	errors = '\n'.join(errors)
+	passed = False
+	if 'search_google' in action_names:
+		passed = True
+	elif 'go_to_url' in action_names:
+		passed = True
+	elif 'open_tab' in action_names:
+		passed = True
+
+	else:
+		passed = False
+	print(f'Model {model_name}: {"✅ PASSED - " if passed else "❌ FAILED - "} Done: {done} Successful: {successful}')
+
+	assert passed, f'Model {model_name} not working\nActions performed: {action_names}\nErrors: {errors}'
--- a/browser-use/tests/test_qwen.py
+++ b/browser-use/tests/test_qwen.py
@ -0,0 +1,66 @@
+import asyncio
+
+import pytest
+from langchain_ollama import ChatOllama
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.browser import Browser, BrowserConfig
+
+
+@pytest.fixture
+def llm():
+	"""Initialize language model for testing"""
+
+	# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
+	# NOTE: Make sure to run ollama server with `ollama start'
+	return ChatOllama(
+		model='qwen2.5:latest',
+		num_ctx=128000,
+	)
+
+
+@pytest.fixture(scope='session')
+def event_loop():
+	"""Create an instance of the default event loop for each test case."""
+	loop = asyncio.get_event_loop_policy().new_event_loop()
+	yield loop
+	loop.close()
+
+
+@pytest.fixture(scope='session')
+async def browser(event_loop):
+	browser_instance = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+	yield browser_instance
+	await browser_instance.close()
+
+
+@pytest.fixture
+async def context(browser):
+	async with await browser.new_context() as context:
+		yield context
+
+
+# pytest tests/test_qwen.py -v -k "test_qwen_url" --capture=no
+# @pytest.mark.asyncio
+async def test_qwen_url(llm, context):
+	"""Test complex ecommerce interaction sequence"""
+	agent = Agent(
+		task='go_to_url amazon.com',
+		llm=llm,
+	)
+
+	history: AgentHistoryList = await agent.run(max_steps=3)
+
+	# Verify sequence of actions
+	action_sequence = []
+	for action in history.model_actions():
+		action_name = list(action.keys())[0]
+		if action_name in ['go_to_url', 'open_tab']:
+			action_sequence.append('navigate')
+
+	assert 'navigate' in action_sequence  # Navigated to Amazon
--- a/browser-use/tests/test_react_dropdown.py
+++ b/browser-use/tests/test_react_dropdown.py
@ -0,0 +1,45 @@
+"""
+Simple try of the agent.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import os
+import sys
+
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import asyncio
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, AgentHistoryList
+
+llm = ChatOpenAI(model='gpt-4o')
+# browser = Browser(config=BrowserConfig(headless=False))
+
+agent = Agent(
+	task=(
+		'go to https://codepen.io/shyam-king/pen/ByBJoOv and select "Tiger" dropdown and read the text given in "Selected Animal" box (it can be empty as well)'
+	),
+	llm=llm,
+	browser_context=BrowserContext(
+		browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
+	),
+)
+
+
+async def test_dropdown():
+	history: AgentHistoryList = await agent.run(10)
+	# await controller.browser.close(force=True)
+
+	result = history.final_result()
+	assert result is not None
+	print('result: ', result)
+	# await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(test_dropdown())
--- a/browser-use/tests/test_save_conversation.py
+++ b/browser-use/tests/test_save_conversation.py
@ -0,0 +1,83 @@
+"""
+Simple try of the agent.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import os
+import shutil
+import sys
+
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, AgentHistoryList
+
+llm = ChatOpenAI(model='gpt-4o')
+
+
+async def test_save_conversation_contains_slash():
+	if os.path.exists('./logs'):
+		shutil.rmtree('./logs')
+
+	agent = Agent(
+		task=('go to google.com and search for text "hi there"'),
+		llm=llm,
+		browser_context=BrowserContext(
+			browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
+		),
+		save_conversation_path='logs/conversation',
+	)
+	history: AgentHistoryList = await agent.run(20)
+
+	result = history.final_result()
+	assert result is not None
+
+	assert os.path.exists('./logs'), 'logs directory was not created'
+	assert os.path.exists('./logs/conversation_2.txt'), 'logs file was not created'
+
+
+async def test_save_conversation_not_contains_slash():
+	if os.path.exists('./logs'):
+		shutil.rmtree('./logs')
+
+	agent = Agent(
+		task=('go to google.com and search for text "hi there"'),
+		llm=llm,
+		browser_context=BrowserContext(
+			browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
+		),
+		save_conversation_path='logs',
+	)
+	history: AgentHistoryList = await agent.run(20)
+
+	result = history.final_result()
+	assert result is not None
+
+	assert os.path.exists('./logs'), 'logs directory was not created'
+	assert os.path.exists('./logs/_2.txt'), 'logs file was not created'
+
+
+async def test_save_conversation_deep_directory():
+	if os.path.exists('./logs'):
+		shutil.rmtree('./logs')
+
+	agent = Agent(
+		task=('go to google.com and search for text "hi there"'),
+		llm=llm,
+		browser_context=BrowserContext(
+			browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
+		),
+		save_conversation_path='logs/deep/directory/conversation',
+	)
+	history: AgentHistoryList = await agent.run(20)
+
+	result = history.final_result()
+	assert result is not None
+
+	assert os.path.exists('./logs/deep/directory'), 'logs directory was not created'
+	assert os.path.exists('./logs/deep/directory/conversation_2.txt'), 'logs file was not created'
--- a/browser-use/tests/test_self_registered_actions.py
+++ b/browser-use/tests/test_self_registered_actions.py
@ -0,0 +1,198 @@
+import asyncio
+import os
+
+import pytest
+from langchain_openai import AzureChatOpenAI
+from pydantic import BaseModel, SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.controller.service import Controller
+
+
+@pytest.fixture(scope='session')
+def event_loop():
+	loop = asyncio.get_event_loop_policy().new_event_loop()
+	yield loop
+	loop.close()
+
+
+@pytest.fixture(scope='session')
+async def browser(event_loop):
+	browser_instance = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+	yield browser_instance
+	await browser_instance.close()
+
+
+@pytest.fixture
+async def context(browser):
+	async with await browser.new_context() as context:
+		yield context
+
+
+@pytest.fixture
+async def controller():
+	"""Initialize the controller with self-registered actions"""
+	controller = Controller()
+
+	# Define custom actions without Pydantic models
+	@controller.action('Print a message')
+	def print_message(message: str):
+		print(f'Message: {message}')
+		return f'Printed message: {message}'
+
+	@controller.action('Add two numbers')
+	def add_numbers(a: int, b: int):
+		result = a + b
+		return f'The sum is {result}'
+
+	@controller.action('Concatenate strings')
+	def concatenate_strings(str1: str, str2: str):
+		result = str1 + str2
+		return f'Concatenated string: {result}'
+
+	# Define Pydantic models
+	class SimpleModel(BaseModel):
+		name: str
+		age: int
+
+	class Address(BaseModel):
+		street: str
+		city: str
+
+	class NestedModel(BaseModel):
+		user: SimpleModel
+		address: Address
+
+	# Add actions with Pydantic model arguments
+	@controller.action('Process simple model', param_model=SimpleModel)
+	def process_simple_model(model: SimpleModel):
+		return f'Processed {model.name}, age {model.age}'
+
+	@controller.action('Process nested model', param_model=NestedModel)
+	def process_nested_model(model: NestedModel):
+		user_info = f'{model.user.name}, age {model.user.age}'
+		address_info = f'{model.address.street}, {model.address.city}'
+		return f'Processed user {user_info} at address {address_info}'
+
+	@controller.action('Process multiple models')
+	def process_multiple_models(model1: SimpleModel, model2: Address):
+		return f'Processed {model1.name} living at {model2.street}, {model2.city}'
+
+	yield controller
+
+
+@pytest.fixture
+def llm():
+	"""Initialize language model for testing"""
+
+	# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
+	return AzureChatOpenAI(
+		model='gpt-4o',
+		api_version='2024-10-21',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+
+
+# @pytest.mark.skip(reason="Skipping test for now")
+@pytest.mark.asyncio
+async def test_self_registered_actions_no_pydantic(llm, controller):
+	"""Test self-registered actions with individual arguments"""
+	agent = Agent(
+		task="First, print the message 'Hello, World!'. Then, add 10 and 20. Next, concatenate 'foo' and 'bar'.",
+		llm=llm,
+		controller=controller,
+	)
+	history: AgentHistoryList = await agent.run(max_steps=10)
+	# Check that custom actions were executed
+	action_names = history.action_names()
+
+	assert 'print_message' in action_names
+	assert 'add_numbers' in action_names
+	assert 'concatenate_strings' in action_names
+
+
+# @pytest.mark.skip(reason="Skipping test for now")
+@pytest.mark.asyncio
+async def test_mixed_arguments_actions(llm, controller):
+	"""Test actions with mixed argument types"""
+
+	# Define another action during the test
+	# Test for async actions
+	@controller.action('Calculate the area of a rectangle')
+	async def calculate_area(length: float, width: float):
+		area = length * width
+		return f'The area is {area}'
+
+	agent = Agent(
+		task='Calculate the area of a rectangle with length 5.5 and width 3.2.',
+		llm=llm,
+		controller=controller,
+	)
+	history = await agent.run(max_steps=5)
+
+	# Check that the action was executed
+	action_names = history.action_names()
+
+	assert 'calculate_area' in action_names
+	# check result
+	correct = 'The area is 17.6'
+	for content in history.extracted_content():
+		if correct in content:
+			break
+	else:
+		pytest.fail(f'{correct} not found in extracted content')
+
+
+@pytest.mark.asyncio
+async def test_pydantic_simple_model(llm, controller):
+	"""Test action with a simple Pydantic model argument"""
+	agent = Agent(
+		task="Process a simple model with name 'Alice' and age 30.",
+		llm=llm,
+		controller=controller,
+	)
+	history = await agent.run(max_steps=5)
+
+	# Check that the action was executed
+	action_names = history.action_names()
+
+	assert 'process_simple_model' in action_names
+	correct = 'Processed Alice, age 30'
+	for content in history.extracted_content():
+		if correct in content:
+			break
+	else:
+		pytest.fail(f'{correct} not found in extracted content')
+
+
+@pytest.mark.asyncio
+async def test_pydantic_nested_model(llm, controller):
+	"""Test action with a nested Pydantic model argument"""
+	agent = Agent(
+		task="Process a nested model with user name 'Bob', age 25, living at '123 Maple St', 'Springfield'.",
+		llm=llm,
+		controller=controller,
+	)
+	history = await agent.run(max_steps=5)
+
+	# Check that the action was executed
+	action_names = history.action_names()
+
+	assert 'process_nested_model' in action_names
+	correct = 'Processed user Bob, age 25 at address 123 Maple St, Springfield'
+	for content in history.extracted_content():
+		if correct in content:
+			break
+	else:
+		pytest.fail(f'{correct} not found in extracted content')
+
+
+# run this file with:
+# pytest tests/test_self_registered_actions.py --capture=no
--- a/browser-use/tests/test_sensitive_data.py
+++ b/browser-use/tests/test_sensitive_data.py
@ -0,0 +1,91 @@
+import pytest
+from langchain_core.messages import HumanMessage, SystemMessage
+from pydantic import BaseModel, Field
+
+from browser_use.agent.message_manager.service import MessageManager, MessageManagerSettings
+from browser_use.agent.views import MessageManagerState
+from browser_use.controller.registry.service import Registry
+
+
+class SensitiveParams(BaseModel):
+	"""Test parameter model for sensitive data testing."""
+
+	text: str = Field(description='Text with sensitive data placeholders')
+
+
+@pytest.fixture
+def registry():
+	return Registry()
+
+
+@pytest.fixture
+def message_manager():
+	return MessageManager(
+		task='Test task',
+		system_message=SystemMessage(content='System message'),
+		settings=MessageManagerSettings(),
+		state=MessageManagerState(),
+	)
+
+
+def test_replace_sensitive_data_with_missing_keys(registry):
+	"""Test that _replace_sensitive_data handles missing keys gracefully"""
+	# Create a simple Pydantic model with sensitive data placeholders
+	params = SensitiveParams(text='Please enter <secret>username</secret> and <secret>password</secret>')
+
+	# Case 1: All keys present
+	sensitive_data = {'username': 'user123', 'password': 'pass456'}
+	result = registry._replace_sensitive_data(params, sensitive_data)
+	assert 'user123' in result.text
+	assert 'pass456' in result.text
+	# Both keys should be replaced
+
+	# Case 2: One key missing
+	sensitive_data = {'username': 'user123'}  # password is missing
+	result = registry._replace_sensitive_data(params, sensitive_data)
+	assert 'user123' in result.text
+	assert '<secret>password</secret>' in result.text
+	# Verify the behavior - username replaced, password kept as tag
+
+	# Case 3: Multiple keys missing
+	sensitive_data = {}  # both keys missing
+	result = registry._replace_sensitive_data(params, sensitive_data)
+	assert '<secret>username</secret>' in result.text
+	assert '<secret>password</secret>' in result.text
+	# Verify both tags are preserved when keys are missing
+
+	# Case 4: One key empty
+	sensitive_data = {'username': 'user123', 'password': ''}
+	result = registry._replace_sensitive_data(params, sensitive_data)
+	assert 'user123' in result.text
+	assert '<secret>password</secret>' in result.text
+	# Empty value should be treated the same as missing key
+
+
+def test_filter_sensitive_data(message_manager):
+	"""Test that _filter_sensitive_data handles all sensitive data scenarios correctly"""
+	# Set up a message with sensitive information
+	message = HumanMessage(content='My username is admin and password is secret123')
+
+	# Case 1: No sensitive data provided
+	message_manager.settings.sensitive_data = None
+	result = message_manager._filter_sensitive_data(message)
+	assert result.content == 'My username is admin and password is secret123'
+
+	# Case 2: All sensitive data is properly replaced
+	message_manager.settings.sensitive_data = {'username': 'admin', 'password': 'secret123'}
+	result = message_manager._filter_sensitive_data(message)
+	assert '<secret>username</secret>' in result.content
+	assert '<secret>password</secret>' in result.content
+
+	# Case 3: Make sure it works with nested content
+	nested_message = HumanMessage(content=[{'type': 'text', 'text': 'My username is admin and password is secret123'}])
+	result = message_manager._filter_sensitive_data(nested_message)
+	assert '<secret>username</secret>' in result.content[0]['text']
+	assert '<secret>password</secret>' in result.content[0]['text']
+
+	# Case 4: Test with empty values
+	message_manager.settings.sensitive_data = {'username': 'admin', 'password': ''}
+	result = message_manager._filter_sensitive_data(message)
+	assert '<secret>username</secret>' in result.content
+	# Only username should be replaced since password is empty
--- a/browser-use/tests/test_service.py
+++ b/browser-use/tests/test_service.py
@ -0,0 +1,344 @@
+from unittest.mock import AsyncMock, MagicMock, Mock, patch
+
+import pytest
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import HumanMessage
+from pydantic import BaseModel
+
+from browser_use.agent.service import Agent
+from browser_use.agent.views import ActionResult
+from browser_use.browser.browser import Browser
+from browser_use.browser.context import BrowserContext
+from browser_use.browser.views import BrowserState
+from browser_use.controller.registry.service import Registry
+from browser_use.controller.registry.views import ActionModel
+from browser_use.controller.service import Controller
+
+# run with python -m pytest tests/test_service.py
+
+
+# run test with:
+# python -m pytest tests/test_service.py
+class TestAgent:
+	@pytest.fixture
+	def mock_controller(self):
+		controller = Mock(spec=Controller)
+		registry = Mock(spec=Registry)
+		registry.registry = MagicMock()
+		registry.registry.actions = {'test_action': MagicMock(param_model=MagicMock())}  # type: ignore
+		controller.registry = registry
+		return controller
+
+	@pytest.fixture
+	def mock_llm(self):
+		return Mock(spec=BaseChatModel)
+
+	@pytest.fixture
+	def mock_browser(self):
+		return Mock(spec=Browser)
+
+	@pytest.fixture
+	def mock_browser_context(self):
+		return Mock(spec=BrowserContext)
+
+	def test_convert_initial_actions(self, mock_controller, mock_llm, mock_browser, mock_browser_context):  # type: ignore
+		"""
+		Test that the _convert_initial_actions method correctly converts
+		dictionary-based actions to ActionModel instances.
+
+		This test ensures that:
+		1. The method processes the initial actions correctly.
+		2. The correct param_model is called with the right parameters.
+		3. The ActionModel is created with the validated parameters.
+		4. The method returns a list of ActionModel instances.
+		"""
+		# Arrange
+		agent = Agent(
+			task='Test task', llm=mock_llm, controller=mock_controller, browser=mock_browser, browser_context=mock_browser_context
+		)
+		initial_actions = [{'test_action': {'param1': 'value1', 'param2': 'value2'}}]
+
+		# Mock the ActionModel
+		mock_action_model = MagicMock(spec=ActionModel)
+		mock_action_model_instance = MagicMock()
+		mock_action_model.return_value = mock_action_model_instance
+		agent.ActionModel = mock_action_model  # type: ignore
+
+		# Act
+		result = agent._convert_initial_actions(initial_actions)
+
+		# Assert
+		assert len(result) == 1
+		mock_controller.registry.registry.actions['test_action'].param_model.assert_called_once_with(  # type: ignore
+			param1='value1', param2='value2'
+		)
+		mock_action_model.assert_called_once()
+		assert isinstance(result[0], MagicMock)
+		assert result[0] == mock_action_model_instance
+
+		# Check that the ActionModel was called with the correct parameters
+		call_args = mock_action_model.call_args[1]
+		assert 'test_action' in call_args
+		assert call_args['test_action'] == mock_controller.registry.registry.actions['test_action'].param_model.return_value  # type: ignore
+
+	@pytest.mark.asyncio
+	async def test_step_error_handling(self):
+		"""
+		Test the error handling in the step method of the Agent class.
+		This test simulates a failure in the get_next_action method and
+		checks if the error is properly handled and recorded.
+		"""
+		# Mock the LLM
+		mock_llm = MagicMock(spec=BaseChatModel)
+
+		# Mock the MessageManager
+		with patch('browser_use.agent.service.MessageManager') as mock_message_manager:
+			# Create an Agent instance with mocked dependencies
+			agent = Agent(task='Test task', llm=mock_llm)
+
+			# Mock the get_next_action method to raise an exception
+			agent.get_next_action = AsyncMock(side_effect=ValueError('Test error'))
+
+			# Mock the browser_context
+			agent.browser_context = AsyncMock()
+			agent.browser_context.get_state = AsyncMock(
+				return_value=BrowserState(
+					url='https://example.com',
+					title='Example',
+					element_tree=MagicMock(),  # Mocked element tree
+					tabs=[],
+					selector_map={},
+					screenshot='',
+				)
+			)
+
+			# Mock the controller
+			agent.controller = AsyncMock()
+
+			# Call the step method
+			await agent.step()
+
+			# Assert that the error was handled and recorded
+			assert agent.consecutive_failures == 1
+			assert len(agent._last_result) == 1
+			assert isinstance(agent._last_result[0], ActionResult)
+			assert 'Test error' in agent._last_result[0].error
+			assert agent._last_result[0].include_in_memory is True
+
+
+class TestRegistry:
+	@pytest.fixture
+	def registry_with_excludes(self):
+		return Registry(exclude_actions=['excluded_action'])
+
+	def test_action_decorator_with_excluded_action(self, registry_with_excludes):
+		"""
+		Test that the action decorator does not register an action
+		if it's in the exclude_actions list.
+		"""
+
+		# Define a function to be decorated
+		def excluded_action():
+			pass
+
+		# Apply the action decorator
+		decorated_func = registry_with_excludes.action(description='This should be excluded')(excluded_action)
+
+		# Assert that the decorated function is the same as the original
+		assert decorated_func == excluded_action
+
+		# Assert that the action was not added to the registry
+		assert 'excluded_action' not in registry_with_excludes.registry.actions
+
+		# Define another function that should be included
+		def included_action():
+			pass
+
+		# Apply the action decorator to an included action
+		registry_with_excludes.action(description='This should be included')(included_action)
+
+		# Assert that the included action was added to the registry
+		assert 'included_action' in registry_with_excludes.registry.actions
+
+	@pytest.mark.asyncio
+	async def test_execute_action_with_and_without_browser_context(self):
+		"""
+		Test that the execute_action method correctly handles actions with and without a browser context.
+		This test ensures that:
+		1. An action requiring a browser context is executed correctly.
+		2. An action not requiring a browser context is executed correctly.
+		3. The browser context is passed to the action function when required.
+		4. The action function receives the correct parameters.
+		5. The method raises an error when a browser context is required but not provided.
+		"""
+		registry = Registry()
+
+		# Define a mock action model
+		class TestActionModel(BaseModel):
+			param1: str
+
+		# Define mock action functions
+		async def test_action_with_browser(param1: str, browser):
+			return f'Action executed with {param1} and browser'
+
+		async def test_action_without_browser(param1: str):
+			return f'Action executed with {param1}'
+
+		# Register the actions
+		registry.registry.actions['test_action_with_browser'] = MagicMock(
+			function=AsyncMock(side_effect=test_action_with_browser),
+			param_model=TestActionModel,
+			description='Test action with browser',
+		)
+
+		registry.registry.actions['test_action_without_browser'] = MagicMock(
+			function=AsyncMock(side_effect=test_action_without_browser),
+			param_model=TestActionModel,
+			description='Test action without browser',
+		)
+
+		# Mock BrowserContext
+		mock_browser = MagicMock()
+
+		# Execute the action with a browser context
+		result_with_browser = await registry.execute_action(
+			'test_action_with_browser', {'param1': 'test_value'}, browser=mock_browser
+		)
+		assert result_with_browser == 'Action executed with test_value and browser'
+
+		# Execute the action without a browser context
+		result_without_browser = await registry.execute_action('test_action_without_browser', {'param1': 'test_value'})
+		assert result_without_browser == 'Action executed with test_value'
+
+		# Test error when browser is required but not provided
+		with pytest.raises(RuntimeError, match='Action test_action_with_browser requires browser but none provided'):
+			await registry.execute_action('test_action_with_browser', {'param1': 'test_value'})
+
+		# Verify that the action functions were called with correct parameters
+		registry.registry.actions['test_action_with_browser'].function.assert_called_once_with(
+			param1='test_value', browser=mock_browser
+		)
+		registry.registry.actions['test_action_without_browser'].function.assert_called_once_with(param1='test_value')
+
+
+class TestAgentRetry:
+	@pytest.fixture
+	def mock_llm(self):
+		return AsyncMock()
+
+	@pytest.fixture
+	def mock_controller(self):
+		controller = Mock()
+		controller.registry = Mock()
+		controller.registry.registry = Mock()
+		controller.registry.registry.actions = {}
+		return controller
+
+	@pytest.fixture
+	def mock_browser_context(self):
+		browser_context = Mock()
+		browser_context.get_state = AsyncMock(
+			return_value=BrowserState(
+				url='https://parabank.parasoft.com/parabank/index.htm',
+				title='ParaBank',
+				element_tree=MagicMock(),
+				tabs=[],
+				selector_map={},
+				screenshot='',
+			)
+		)
+		return browser_context
+
+	@pytest.fixture
+	def mock_action_model(self):
+		action_model = Mock(spec=ActionModel)
+		return action_model
+
+	@pytest.mark.asyncio
+	async def test_step_empty_action_retry(self, mock_llm, mock_controller, mock_browser_context, mock_action_model):
+		"""
+		Test that the step method retries and handles empty actions correctly.
+		"""
+		# Arrange
+		agent = Agent(
+			task='Test task',
+			llm=mock_llm,
+			controller=mock_controller,
+			browser=Mock(),
+			browser_context=mock_browser_context,
+		)
+		agent.ActionModel = mock_action_model  # Inject the mock ActionModel
+
+		# Mock get_next_action to return empty action the first time, then a valid action
+		empty_model_output = MagicMock()
+		empty_model_output.action = []  # Empty action
+		valid_model_output = MagicMock()
+		valid_action = MagicMock()
+		valid_model_output.action = [valid_action]
+
+		mock_llm.return_value.invoke.side_effect = [empty_model_output, valid_model_output]
+		agent.get_next_action = mock_llm.return_value.invoke
+
+		# Act
+		await agent.step()
+
+		# Assert
+		# Check that get_next_action was called twice (initial call + retry)
+		assert agent.get_next_action.call_count == 2
+		# Check that the LLM was called twice
+		assert mock_llm.return_value.invoke.call_count == 2
+
+		# Check that the second call to get_next_action included the clarification message
+		_, retry_messages = mock_llm.return_value.invoke.call_args_list[1]
+		assert len(retry_messages[0]) == 2  # input_messages + clarification message
+		assert isinstance(retry_messages[0][1], HumanMessage)
+		assert 'You forgot to return an action' in retry_messages[0][1].content
+
+		# Check that _last_result contains the valid action
+		assert len(agent._last_result) == 1
+		assert agent._last_result[0].action == valid_action
+
+	@pytest.mark.asyncio
+	async def test_step_empty_action_retry_and_fail(self, mock_llm, mock_controller, mock_browser_context, mock_action_model):
+		"""
+		Test that the step method handles the case where get_next_action returns
+		empty actions twice, and inserts a safe noop action.
+		"""
+		# Arrange
+		agent = Agent(
+			task='Test task',
+			llm=mock_llm,
+			controller=mock_controller,
+			browser=Mock(),
+			browser_context=mock_browser_context,
+		)
+		agent.ActionModel = mock_action_model  # Inject the mock ActionModel
+
+		# Mock get_next_action to return empty action both times
+		empty_model_output = MagicMock()
+		empty_model_output.action = []  # Empty action
+		mock_llm.return_value.invoke.return_value = empty_model_output
+		agent.get_next_action = mock_llm.return_value.invoke
+
+		# Mock the ActionModel instance creation
+		mock_action_instance = MagicMock()
+		mock_action_model.return_value = mock_action_instance
+
+		# Act
+		await agent.step()
+
+		# Assert
+		# Check that get_next_action was called twice
+		assert agent.get_next_action.call_count == 2
+		# Check that the LLM was called twice
+		assert mock_llm.return_value.invoke.call_count == 2
+
+		# Check that ActionModel was instantiated with the noop action
+		mock_action_model.assert_called_once()
+		call_args = mock_action_model.call_args[1]
+		assert 'done' in call_args
+		assert call_args['done'] == {'success': False, 'text': 'No action returned, safe exit.'}
+
+		# Check that _last_result contains the noop action
+		assert len(agent._last_result) == 1
+		assert agent._last_result[0].action == mock_action_instance
--- a/browser-use/tests/test_stress.py
+++ b/browser-use/tests/test_stress.py
@ -0,0 +1,115 @@
+import asyncio
+import os
+import random
+import string
+import time
+
+import pytest
+from langchain_openai import AzureChatOpenAI
+from pydantic import SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.controller.service import Controller
+
+
+@pytest.fixture(scope='session')
+def event_loop():
+	loop = asyncio.get_event_loop_policy().new_event_loop()
+	yield loop
+	loop.close()
+
+
+@pytest.fixture(scope='session')
+async def browser(event_loop):
+	browser_instance = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+	yield browser_instance
+	await browser_instance.close()
+
+
+@pytest.fixture
+async def context(browser):
+	async with await browser.new_context() as context:
+		yield context
+
+
+@pytest.fixture
+def llm():
+	"""Initialize the language model"""
+	model = AzureChatOpenAI(
+		api_version='2024-10-21',
+		model='gpt-4o',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+	return model
+
+
+def generate_random_text(length: int) -> str:
+	"""Generate random text of specified length"""
+	return ''.join(random.choices(string.ascii_letters + string.digits + ' ', k=length))
+
+
+@pytest.fixture
+async def controller():
+	"""Initialize the controller"""
+	controller = Controller()
+	large_text = generate_random_text(10000)
+
+	@controller.action('call this magical function to get very special text')
+	def get_very_special_text():
+		return large_text
+
+	yield controller
+
+
+@pytest.mark.asyncio
+async def test_token_limit_with_multiple_extractions(llm, controller, context):
+	"""Test handling of multiple smaller extractions accumulating tokens"""
+	agent = Agent(
+		task='Call the magical function to get very special text 5 times',
+		llm=llm,
+		controller=controller,
+		browser_context=context,
+		max_input_tokens=2000,
+		save_conversation_path='tmp/stress_test/test_token_limit_with_multiple_extractions.json',
+	)
+
+	history = await agent.run(max_steps=5)
+
+	# check if 5 times called get_special_text
+	calls = [a for a in history.action_names() if a == 'get_very_special_text']
+	assert len(calls) == 5
+	# check the message history should be max 3 messages
+	assert len(agent.message_manager.history.messages) > 3
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize('max_tokens', [4000])  # 8000 20000
+@pytest.mark.asyncio
+async def test_open_3_tabs_and_extract_content(llm, controller, context, max_tokens):
+	"""Stress test: Open 3 tabs with urls and extract content"""
+	agent = Agent(
+		task='Open 3 tabs with https://en.wikipedia.org/wiki/Internet and extract the content from each.',
+		llm=llm,
+		controller=controller,
+		browser_context=context,
+		max_input_tokens=max_tokens,
+		save_conversation_path='tmp/stress_test/test_open_3_tabs_and_extract_content.json',
+	)
+	start_time = time.time()
+	history = await agent.run(max_steps=7)
+	end_time = time.time()
+
+	total_time = end_time - start_time
+
+	print(f'Total time: {total_time:.2f} seconds')
+	# Check for errors
+	errors = history.errors()
+	assert len(errors) == 0, 'Errors occurred during the test'
+	# check if 3 tabs were opened
+	assert len(context.current_state.tabs) >= 3, '3 tabs were not opened'
--- a/browser-use/tests/test_tab_management.py
+++ b/browser-use/tests/test_tab_management.py
@ -0,0 +1,575 @@
+import asyncio
+import logging
+
+import pytest
+from dotenv import load_dotenv
+from pytest_httpserver import HTTPServer
+
+load_dotenv()
+
+from browser_use.agent.views import ActionModel
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+from browser_use.controller.service import Controller
+from browser_use.controller.views import (
+	CloseTabAction,
+	GoToUrlAction,
+	OpenTabAction,
+	SwitchTabAction,
+)
+
+# Set up test logging
+logger = logging.getLogger('tab_tests')
+logger.setLevel(logging.DEBUG)
+
+
+class TestTabManagement:
+	"""Tests for the tab management system with separate agent_current_page and human_current_page references."""
+
+	@pytest.fixture(scope='module')
+	def event_loop(self):
+		"""Create and provide an event loop for async tests."""
+		loop = asyncio.get_event_loop_policy().new_event_loop()
+		yield loop
+		loop.close()
+
+	@pytest.fixture(scope='module')
+	def http_server(self):
+		"""Create and provide a test HTTP server that serves static content."""
+		server = HTTPServer()
+		server.start()
+
+		# Add routes for test pages
+		server.expect_request('/page1').respond_with_data(
+			'<html><head><title>Test Page 1</title></head><body><h1>Test Page 1</h1></body></html>', content_type='text/html'
+		)
+		server.expect_request('/page2').respond_with_data(
+			'<html><head><title>Test Page 2</title></head><body><h1>Test Page 2</h1></body></html>', content_type='text/html'
+		)
+		server.expect_request('/page3').respond_with_data(
+			'<html><head><title>Test Page 3</title></head><body><h1>Test Page 3</h1></body></html>', content_type='text/html'
+		)
+		server.expect_request('/page4').respond_with_data(
+			'<html><head><title>Test Page 4</title></head><body><h1>Test Page 4</h1></body></html>', content_type='text/html'
+		)
+
+		yield server
+		server.stop()
+
+	@pytest.fixture(scope='module')
+	async def browser(self, event_loop):
+		"""Create and provide a Browser instance with security disabled."""
+		browser_instance = Browser(
+			config=BrowserConfig(
+				headless=True,
+			)
+		)
+		yield browser_instance
+		await browser_instance.close()
+
+	@pytest.fixture
+	async def browser_context(self, browser, http_server):
+		"""Create and provide a BrowserContext instance with a properly initialized tab."""
+		context = BrowserContext(browser=browser)
+
+		# Initialize a session
+		session = await context.get_session()
+
+		# Ensure we start with no pages (close any that might exist)
+		for page in session.context.pages:
+			await page.close()
+
+		# Create an initial tab and wait for it to load completely
+		base_url = f'http://{http_server.host}:{http_server.port}'
+		await context.create_new_tab(f'{base_url}/page1')
+		await asyncio.sleep(1)  # Wait for the tab to fully initialize
+
+		# Verify that agent_current_page and human_current_page are properly set
+		assert context.agent_current_page is not None
+		assert context.human_current_page is not None
+		assert f'{http_server.host}:{http_server.port}' in context.agent_current_page.url
+
+		yield context
+		await context.close()
+
+	@pytest.fixture
+	def controller(self):
+		"""Create and provide a Controller instance."""
+		return Controller()
+
+	@pytest.fixture
+	def base_url(self, http_server):
+		"""Return the base URL for the test HTTP server."""
+		return f'http://{http_server.host}:{http_server.port}'
+
+	# Helper methods
+
+	async def _execute_action(self, controller, browser_context, action_data):
+		"""Generic helper to execute any action via the controller."""
+		# Dynamically create an appropriate ActionModel class
+		action_type = list(action_data.keys())[0]
+		action_value = action_data[action_type]
+
+		# Create the ActionModel with the single action field
+		class DynamicActionModel(ActionModel):
+			pass
+
+		# Dynamically add the field with the right type annotation
+		setattr(DynamicActionModel, action_type, type(action_value) | None)
+
+		# Execute the action
+		result = await controller.act(DynamicActionModel(**action_data), browser_context)
+
+		# Give the browser a moment to process the action
+		await asyncio.sleep(0.5)
+
+		return result
+
+	async def _ensure_synchronized_state(self, browser_context, base_url):
+		"""Helper to ensure tab references are properly synchronized before tests."""
+		# Make sure agent_current_page and human_current_page are set and valid
+		session = await browser_context.get_session()
+
+		if not browser_context.agent_current_page or browser_context.agent_current_page not in session.context.pages:
+			if session.context.pages:
+				browser_context.agent_current_page = session.context.pages[0]
+			else:
+				# Create a tab with the test server
+				await browser_context.create_new_tab(f'{base_url}/page1')
+				await asyncio.sleep(1)  # Wait longer for tab to initialize
+
+		if not browser_context.human_current_page or browser_context.human_current_page not in session.context.pages:
+			browser_context.human_current_page = browser_context.agent_current_page
+
+	async def _simulate_user_tab_change(self, page, browser_context):
+		"""Simulate a user changing tabs by properly triggering events with Playwright."""
+		logger.debug(
+			f'BEFORE: agent_tab={browser_context.agent_current_page.url if browser_context.agent_current_page else "None"}, '
+			f'human_current_page={browser_context.human_current_page.url if browser_context.human_current_page else "None"}'
+		)
+		logger.debug(f'Simulating user changing to -> {page.url}')
+
+		# First bring the page to front - this is the physical action a user would take
+		await page.bring_to_front()
+
+		# To simulate a user switching tabs, we need to trigger the right events
+		# Use Playwright's dispatch_event method to properly trigger events from outside
+
+		await page.dispatch_event('body', 'focus')
+		# await page.evaluate("""() => window.dispatchEvent(new Event('focus'))""")
+		# await page.evaluate(
+		# 	"""() => document.dispatchEvent(new Event('pointermove', { bubbles: true, cancelable: false, clientX: 0, clientY: 0 }))"""
+		# )
+		# await page.evaluate(
+		# 	"() => document.dispatchEvent(new Event('deviceorientation', { bubbles: true, cancelable: false, alpha: 0, beta: 0, gamma: 0 }))"
+		# )
+		# await page.evaluate(
+		# 	"""() => document.dispatchEvent(new Event('visibilitychange', { bubbles: true, cancelable: false }))"""
+		# )
+		# logger.debug('Dispatched window.focus event')
+
+		# cheat for now, because playwright really messes with foreground tab detection
+		# TODO: fix this properly by triggering the right events and detecting them in playwright
+		await page.evaluate("""() => {
+			const listener = Object.keys(window).filter(k => k.startsWith('onVisibilityChange'))[0]
+			if (listener) {
+				window[listener]({ bubbles: true, cancelable: false })
+			}
+		}""")
+
+		# Give the event handlers time to process
+		await asyncio.sleep(0.5)
+
+		logger.debug(
+			f'AFTER: agent_tab URL={browser_context.agent_current_page.url if browser_context.agent_current_page else "None"}, '
+			f'human_current_page URL={browser_context.human_current_page.url if browser_context.human_current_page else "None"}'
+		)
+
+	# Tab management tests
+
+	@pytest.mark.asyncio
+	async def test_open_tab_updates_both_references(self, browser_context, base_url):
+		"""Test that open_tab correctly updates both tab references."""
+		# Ensure tab references are synchronized
+		await self._ensure_synchronized_state(browser_context, base_url)
+
+		# Store initial tab count and references
+		session = await browser_context.get_session()
+		initial_tab_count = len(session.context.pages)
+		initial_agent_tab = browser_context.agent_current_page
+
+		# Open a new tab directly via BrowserContext
+		await browser_context.create_new_tab(f'{base_url}/page2')
+
+		# Give time for events to process
+		await asyncio.sleep(1)
+
+		# Verify a new tab was created
+		session = await browser_context.get_session()
+		assert len(session.context.pages) == initial_tab_count + 1
+
+		# Both references should be set to the new tab and different from initial tab
+		assert browser_context.human_current_page is not None
+		assert browser_context.agent_current_page is not None
+		assert browser_context.human_current_page == browser_context.agent_current_page
+		assert initial_agent_tab != browser_context.agent_current_page
+		assert f'{base_url}/page2' in browser_context.agent_current_page.url
+
+	@pytest.mark.asyncio
+	async def test_switch_tab_updates_both_references(self, browser_context, base_url):
+		"""Test that switch_tab updates both tab references."""
+		# Ensure we start with at least one tab
+		await self._ensure_synchronized_state(browser_context, base_url)
+
+		# Create a new tab in addition to existing one
+		await browser_context.create_new_tab(f'{base_url}/page2')
+		await asyncio.sleep(1)
+
+		# Verify we now have the second tab active
+		assert f'{base_url}/page2' in browser_context.agent_current_page.url
+
+		# Switch to the first tab
+		session = await browser_context.get_session()
+		first_tab = session.context.pages[0]
+		await browser_context.switch_to_tab(0)
+		await asyncio.sleep(0.5)
+
+		# Both references should point to the first tab
+		assert browser_context.human_current_page is not None
+		assert browser_context.agent_current_page is not None
+		assert browser_context.human_current_page == browser_context.agent_current_page
+		assert browser_context.agent_current_page == first_tab
+		assert f'{base_url}/page1' in browser_context.agent_current_page.url
+
+		# Verify the underlying page is correct by checking we can interact with it
+		page = await browser_context.get_agent_current_page()
+		title = await page.title()
+		assert 'Test Page 1' in title
+
+	@pytest.mark.asyncio
+	async def test_close_tab_handles_references_correctly(self, browser_context, base_url):
+		"""Test that closing a tab updates references correctly."""
+		# Ensure we start with at least one tab
+		await self._ensure_synchronized_state(browser_context, base_url)
+
+		# Create two tabs with different URLs
+		initial_tab = browser_context.agent_current_page
+		await browser_context.create_new_tab(f'{base_url}/page2')
+		await asyncio.sleep(1)
+
+		# Verify the second tab is now active
+		assert f'{base_url}/page2' in browser_context.agent_current_page.url
+
+		# Close the current tab
+		await browser_context.close_current_tab()
+		await asyncio.sleep(0.5)
+
+		# Both references should be updated to the remaining available tab
+		assert browser_context.human_current_page is not None
+		assert browser_context.agent_current_page is not None
+		assert browser_context.human_current_page == browser_context.agent_current_page
+		assert browser_context.agent_current_page == initial_tab
+		assert not browser_context.human_current_page.is_closed()
+		assert f'{base_url}/page1' in browser_context.human_current_page.url
+
+	@pytest.mark.asyncio
+	async def test_user_changes_tab(self, browser_context, base_url):
+		"""Test that agent_current_page is preserved when user changes the foreground tab."""
+		# Ensure we start with at least one tab
+		await self._ensure_synchronized_state(browser_context, base_url)
+
+		# Create a second tab with a different URL
+		await browser_context.create_new_tab(f'{base_url}/page2')
+		await asyncio.sleep(1)
+		assert f'{base_url}/page2' in browser_context.agent_current_page.url
+
+		# Switch back to the first tab for the agent
+		session = await browser_context.get_session()
+		first_tab = session.context.pages[0]
+		await browser_context.switch_to_tab(0)
+		await self._simulate_user_tab_change(first_tab, browser_context)
+		await asyncio.sleep(0.5)
+
+		# Store agent's active tab
+		agent_tab = browser_context.agent_current_page
+		assert f'{base_url}/page1' in agent_tab.url
+
+		# Simulate user switching to the second tab
+		session = await browser_context.get_session()
+		user_tab = session.context.pages[1]  # Second tab
+
+		# First, log the visibility listeners
+		listeners = await user_tab.evaluate("() => Object.keys(window).filter(k => k.startsWith('onVisibilityChange'))")
+		logger.debug(f'Tab visibility listeners: {listeners}')
+
+		# Make sure handlers exist before attempting to trigger them
+		assert len(listeners) > 0, 'No visibility listeners found on the page'
+
+		# Now try the simulation
+		await self._simulate_user_tab_change(user_tab, browser_context)
+
+		# Verify agent_current_page remains unchanged while human_current_page changed
+		assert browser_context.agent_current_page == agent_tab
+		assert browser_context.human_current_page != browser_context.agent_current_page
+		assert f'{base_url}/page1' in browser_context.agent_current_page.url
+		assert f'{base_url}/page2' in browser_context.human_current_page.url
+
+	@pytest.mark.asyncio
+	async def test_get_agent_current_page(self, browser_context, base_url):
+		"""Test that get_agent_current_page returns agent_current_page regardless of human_current_page."""
+		# Ensure we start with at least one tab
+		await self._ensure_synchronized_state(browser_context, base_url)
+
+		# Create a second tab with a different URL
+		await browser_context.create_new_tab(f'{base_url}/page2')
+		await asyncio.sleep(1)
+
+		# Switch back to the first tab for the agent
+		await browser_context.switch_to_tab(0)
+		await asyncio.sleep(0.5)
+
+		# Simulate user switching to the second tab
+		session = await browser_context.get_session()
+		user_tab = session.context.pages[1]  # Second tab
+		await self._simulate_user_tab_change(user_tab, browser_context)
+
+		# Verify get_agent_current_page returns agent's tab, not foreground tab
+		agent_page = await browser_context.get_agent_current_page()
+		assert agent_page == browser_context.agent_current_page
+		assert f'{base_url}/page1' in agent_page.url
+
+		# Call a method on the page to verify it's fully functional
+		title = await agent_page.title()
+		assert 'Test Page 1' in title
+
+	@pytest.mark.asyncio
+	async def test_browser_operations_use_agent_current_page(self, browser_context, base_url):
+		"""Test that browser operations use agent_current_page, not human_current_page."""
+		# Ensure we start with at least one tab
+		await self._ensure_synchronized_state(browser_context, base_url)
+
+		# Create a second tab with a different URL
+		await browser_context.create_new_tab(f'{base_url}/page2')
+		await asyncio.sleep(1)
+
+		# Switch back to the first tab for the agent
+		await browser_context.switch_to_tab(0)
+		await asyncio.sleep(0.5)
+
+		# Simulate user switching to the second tab
+		session = await browser_context.get_session()
+		user_tab = session.context.pages[1]  # Second tab
+		await self._simulate_user_tab_change(user_tab, browser_context)
+
+		# Verify we have the setup we want
+		assert browser_context.human_current_page != browser_context.agent_current_page
+		assert f'{base_url}/page2' in browser_context.human_current_page.url
+		assert f'{base_url}/page1' in browser_context.agent_current_page.url
+
+		# Execute a navigation directly on agent's tab
+		agent_page = await browser_context.get_agent_current_page()
+		await agent_page.goto(f'{base_url}/page3')
+		await asyncio.sleep(0.5)
+
+		# Verify navigation happened on agent_current_page
+		assert f'{base_url}/page3' in browser_context.agent_current_page.url
+		# But human_current_page remains unchanged
+		assert f'{base_url}/page2' in browser_context.human_current_page.url
+
+	@pytest.mark.asyncio
+	async def test_tab_reference_recovery(self, browser_context, base_url):
+		"""Test recovery when a tab reference becomes invalid."""
+		# Ensure we start with at least one valid tab
+		await self._ensure_synchronized_state(browser_context, base_url)
+
+		# Create a second tab so we have multiple
+		await browser_context.create_new_tab(f'{base_url}/page2')
+		await asyncio.sleep(1)
+
+		# Deliberately corrupt the agent_current_page reference
+		browser_context.agent_current_page = None
+
+		# Call get_agent_current_page, which should recover the reference
+		agent_page = await browser_context.get_agent_current_page()
+
+		# Verify recovery worked
+		assert agent_page is not None
+		assert not agent_page.is_closed()
+
+		# Verify the tab is fully functional
+		title = await agent_page.title()
+		assert title, 'Page should have a title'
+
+		# Verify both references are now valid again
+		assert browser_context.agent_current_page is not None
+		assert browser_context.human_current_page is not None
+
+	@pytest.mark.asyncio
+	async def test_reconcile_tab_state_handles_both_invalid(self, browser_context, base_url):
+		"""Test that reconcile_tab_state can recover when both tab references are invalid."""
+		# Ensure we start with at least one valid tab
+		await self._ensure_synchronized_state(browser_context, base_url)
+
+		# Corrupt both references
+		browser_context.agent_current_page = None
+		browser_context.human_current_page = None
+
+		# Call reconcile_tab_state directly
+		await browser_context._reconcile_tab_state()
+
+		# Verify both references are restored
+		assert browser_context.agent_current_page is not None
+		assert browser_context.human_current_page is not None
+		# and they are the same tab
+		assert browser_context.agent_current_page == browser_context.human_current_page
+		# and the tab is valid
+		assert not browser_context.agent_current_page.is_closed()
+
+	@pytest.mark.asyncio
+	async def test_race_condition_resilience(self, browser_context, base_url):
+		"""Test resilience against race conditions in tab operations."""
+		# Ensure we start with at least one valid tab
+		await self._ensure_synchronized_state(browser_context, base_url)
+
+		# Create two more tabs to have three in total
+		await browser_context.create_new_tab(f'{base_url}/page2')
+		await asyncio.sleep(0.5)
+		await browser_context.create_new_tab(f'{base_url}/page3')
+		await asyncio.sleep(0.5)
+
+		# Verify we have at least 3 tabs
+		session = await browser_context.get_session()
+		assert len(session.context.pages) >= 3
+
+		# Perform a series of rapid tab switches to simulate race conditions
+		for i in range(5):
+			tab_index = i % 3
+			await browser_context.switch_to_tab(tab_index)
+			await asyncio.sleep(0.1)  # Very short delay between switches
+
+		# Verify the state is consistent after rapid operations
+		assert browser_context.human_current_page is not None
+		assert browser_context.agent_current_page is not None
+		assert browser_context.human_current_page == browser_context.agent_current_page
+		assert not browser_context.human_current_page.is_closed()
+
+		# Verify we can still navigate on the final tab
+		page = await browser_context.get_agent_current_page()
+		await page.goto(f'{base_url}/page4')
+		assert f'{base_url}/page4' in page.url
+
+	@pytest.mark.asyncio
+	async def test_tab_management_using_controller_actions(self, browser_context, controller, base_url):
+		"""
+		Test tab management using Controller actions instead of directly calling browser_context methods,
+		ensuring that both human and agent tab detection works correctly.
+		"""
+		# Ensure we start with at least one tab
+		await self._ensure_synchronized_state(browser_context, base_url)
+
+		# Make sure we have a clean single tab to start with
+		session = await browser_context.get_session()
+		while len(session.context.pages) > 1:
+			await browser_context.close_current_tab()
+			await asyncio.sleep(0.5)
+
+		# Store the initial tab for reference
+		initial_tab = browser_context.agent_current_page
+		initial_tab_id = initial_tab.page_id if hasattr(initial_tab, 'page_id') else 0
+
+		# Define action models for tab operations
+		class OpenTabActionModel(ActionModel):
+			open_tab: OpenTabAction | None = None
+
+		class SwitchTabActionModel(ActionModel):
+			switch_tab: SwitchTabAction | None = None
+
+		class GoToUrlActionModel(ActionModel):
+			go_to_url: GoToUrlAction | None = None
+
+		class CloseTabActionModel(ActionModel):
+			close_tab: CloseTabAction | None = None
+
+		# Create second tab with OpenTabAction
+		open_tab_action = {'open_tab': OpenTabAction(url=f'{base_url}/page2')}
+		await controller.act(OpenTabActionModel(**open_tab_action), browser_context)
+		await asyncio.sleep(1)  # Wait for the tab to fully initialize
+
+		# Verify the second tab is opened and active for both agent and human
+		second_tab = browser_context.agent_current_page
+		assert browser_context.human_current_page == browser_context.agent_current_page
+		assert f'{base_url}/page2' in browser_context.agent_current_page.url
+		second_tab_id = second_tab.page_id if hasattr(second_tab, 'page_id') else 1
+
+		# Create third tab with OpenTabAction
+		open_tab_action2 = {'open_tab': OpenTabAction(url=f'{base_url}/page3')}
+		await controller.act(OpenTabActionModel(**open_tab_action2), browser_context)
+		await asyncio.sleep(1)  # Wait for the tab to fully initialize
+
+		# Verify the third tab is opened and active
+		third_tab = browser_context.agent_current_page
+		assert browser_context.human_current_page == browser_context.agent_current_page
+		assert f'{base_url}/page3' in browser_context.agent_current_page.url
+		third_tab_id = third_tab.page_id if hasattr(third_tab, 'page_id') else 2
+
+		# Use SwitchTabAction to go back to the first tab (for the agent)
+		switch_tab_action = {'switch_tab': SwitchTabAction(page_id=initial_tab_id)}
+		await controller.act(SwitchTabActionModel(**switch_tab_action), browser_context)
+		await asyncio.sleep(0.5)
+
+		# Verify agent is now on the first tab
+		assert browser_context.agent_current_page == initial_tab
+		assert f'{base_url}/page1' in browser_context.agent_current_page.url
+		assert browser_context.human_current_page == browser_context.agent_current_page
+
+		# Simulate human switching to the second tab
+		await self._simulate_user_tab_change(second_tab, browser_context)
+		await asyncio.sleep(0.5)
+
+		# Verify human and agent are on different tabs
+		assert browser_context.human_current_page == second_tab
+		assert browser_context.agent_current_page == initial_tab
+		assert browser_context.human_current_page != browser_context.agent_current_page
+		assert f'{base_url}/page2' in browser_context.human_current_page.url
+		assert f'{base_url}/page1' in browser_context.agent_current_page.url
+
+		# Use GoToUrlAction to navigate the agent's tab to a new URL
+		goto_action = {'go_to_url': GoToUrlAction(url=f'{base_url}/page4')}
+		await controller.act(GoToUrlActionModel(**goto_action), browser_context)
+		await asyncio.sleep(0.5)
+
+		# Refresh the agent's page reference and verify navigation
+		agent_page = await browser_context.get_agent_current_page()
+		assert agent_page is not None
+		assert f'{base_url}/page4' in agent_page.url
+
+		# Verify human's tab remains unchanged
+		assert f'{base_url}/page2' in browser_context.human_current_page.url
+
+		# Use CloseTabAction to close the third tab
+		close_tab_action = {'close_tab': CloseTabAction(page_id=third_tab_id)}
+		await controller.act(CloseTabActionModel(**close_tab_action), browser_context)
+		await asyncio.sleep(1.0)  # Extended wait to ensure tab cleanup
+
+		# Verify tab was closed
+		session = await browser_context.get_session()
+		assert len(session.context.pages) == 2
+
+		# Close the second tab, which is the human's current tab
+		close_tab_action2 = {'close_tab': CloseTabAction(page_id=second_tab_id)}
+		await controller.act(CloseTabActionModel(**close_tab_action2), browser_context)
+		await asyncio.sleep(1.0)  # Extended wait to ensure tab cleanup
+
+		# Verify we have only one tab left
+		session = await browser_context.get_session()
+		assert len(session.context.pages) == 1
+
+		# Refresh references and verify both human and agent point to the same tab
+		await browser_context._reconcile_tab_state()
+		assert browser_context.human_current_page is not None
+		assert browser_context.agent_current_page is not None
+		assert browser_context.human_current_page == browser_context.agent_current_page
+
+		# Verify the URL of the remaining tab
+		final_page = await browser_context.get_current_page()
+		assert f'{base_url}' in final_page.url
--- a/browser-use/tests/test_url_allowlist_security.py
+++ b/browser-use/tests/test_url_allowlist_security.py
@ -0,0 +1,91 @@
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+
+
+class TestUrlAllowlistSecurity:
+	"""Tests for URL allowlist security bypass prevention and URL allowlist glob pattern matching."""
+
+	def test_authentication_bypass_prevention(self):
+		"""Test that the URL allowlist cannot be bypassed using authentication credentials."""
+		# Create a context config with a sample allowed domain
+		config = BrowserContextConfig(allowed_domains=['example.com'])
+		context = BrowserContext(browser=None, config=config)
+
+		# Security vulnerability test cases
+		# These should all be detected as malicious despite containing "example.com"
+		assert context._is_url_allowed('https://example.com:password@malicious.com') is False
+		assert context._is_url_allowed('https://example.com@malicious.com') is False
+		assert context._is_url_allowed('https://example.com%20@malicious.com') is False
+		assert context._is_url_allowed('https://example.com%3A@malicious.com') is False
+
+		# Make sure legitimate auth credentials still work
+		assert context._is_url_allowed('https://user:password@example.com') is True
+
+	def test_glob_pattern_matching(self):
+		"""Test that glob patterns in allowed_domains work correctly."""
+		# Test *.example.com pattern (should match subdomains and main domain)
+		glob_config = BrowserContextConfig(allowed_domains=['*.example.com'])
+		glob_context = BrowserContext(browser=None, config=glob_config)
+
+		# Should match subdomains
+		assert glob_context._is_url_allowed('https://sub.example.com') is True
+		assert glob_context._is_url_allowed('https://deep.sub.example.com') is True
+
+		# Should also match main domain
+		assert glob_context._is_url_allowed('https://example.com') is True
+
+		# Should not match other domains
+		assert glob_context._is_url_allowed('https://notexample.com') is False
+		assert glob_context._is_url_allowed('https://example.org') is False
+
+		# Test more complex glob patterns
+		stars_config = BrowserContextConfig(allowed_domains=['*google.com', 'wiki*'])
+		stars_context = BrowserContext(browser=None, config=stars_config)
+
+		# Should match domains ending with google.com
+		assert stars_context._is_url_allowed('https://google.com') is True
+		assert stars_context._is_url_allowed('https://www.google.com') is True
+		assert stars_context._is_url_allowed('https://anygoogle.com') is True
+
+		# Should match domains starting with wiki
+		assert stars_context._is_url_allowed('https://wiki.org') is True
+		assert stars_context._is_url_allowed('https://wikipedia.org') is True
+
+		# Should not match other domains
+		assert stars_context._is_url_allowed('https://example.com') is False
+
+		# Test browser internal URLs
+		assert stars_context._is_url_allowed('chrome://settings') is True
+		assert stars_context._is_url_allowed('about:blank') is True
+
+		# Test security for glob patterns (authentication credentials bypass attempts)
+		# These should all be detected as malicious despite containing allowed domain patterns
+		assert glob_context._is_url_allowed('https://allowed.example.com:password@notallowed.com') is False
+		assert glob_context._is_url_allowed('https://subdomain.example.com@evil.com') is False
+		assert glob_context._is_url_allowed('https://sub.example.com%20@malicious.org') is False
+		assert stars_context._is_url_allowed('https://anygoogle.com@evil.org') is False
+
+	def test_glob_pattern_edge_cases(self):
+		"""Test edge cases for glob pattern matching to ensure proper behavior."""
+		# Test with domains containing glob pattern in the middle
+		stars_config = BrowserContextConfig(allowed_domains=['*google.com', 'wiki*'])
+		stars_context = BrowserContext(browser=None, config=stars_config)
+
+		# Verify that 'wiki*' pattern doesn't match domains that merely contain 'wiki' in the middle
+		assert stars_context._is_url_allowed('https://notawiki.com') is False
+		assert stars_context._is_url_allowed('https://havewikipages.org') is False
+		assert stars_context._is_url_allowed('https://my-wiki-site.com') is False
+
+		# Verify that '*google.com' doesn't match domains that have 'google' in the middle
+		assert stars_context._is_url_allowed('https://mygoogle.company.com') is False
+
+		# Create context with potentially risky glob pattern that demonstrates security concerns
+		risky_config = BrowserContextConfig(allowed_domains=['*.google.*'])
+		risky_context = BrowserContext(browser=None, config=risky_config)
+
+		# Should match legitimate Google domains
+		assert risky_context._is_url_allowed('https://www.google.com') is True
+		assert risky_context._is_url_allowed('https://mail.google.co.uk') is True
+
+		# But could also match potentially malicious domains with a subdomain structure
+		# This demonstrates why such wildcard patterns can be risky
+		assert risky_context._is_url_allowed('https://www.google.evil.com') is True
--- a/browser-use/tests/test_vision.py
+++ b/browser-use/tests/test_vision.py
@ -0,0 +1,64 @@
+"""
+Simple try of the agent.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import os
+import sys
+from pprint import pprint
+
+import pytest
+
+from browser_use.browser.browser import Browser, BrowserConfig
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import asyncio
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, AgentHistoryList, Controller
+
+llm = ChatOpenAI(model='gpt-4o')
+controller = Controller()
+
+# use this test to ask the model questions about the page like
+# which color do you see for bbox labels, list all with their label
+# what's the smallest bboxes with labels and
+
+
+@controller.registry.action(description='explain what you see on the screen and ask user for input')
+async def explain_screen(text: str) -> str:
+	pprint(text)
+	answer = input('\nuser input next question: \n')
+	return answer
+
+
+@controller.registry.action(description='done')
+async def done(text: str) -> str:
+	# pprint(text)
+	return 'call explain_screen'
+
+
+@pytest.fixture(scope='function')
+def event_loop():
+	"""Create an instance of the default event loop for each test case."""
+	loop = asyncio.get_event_loop_policy().new_event_loop()
+	yield loop
+	loop.close()
+
+
+@pytest.mark.skip(reason='this is for local testing only')
+async def test_vision():
+	agent = Agent(
+		task='call explain_screen all the time the user asks you questions e.g. about the page like bbox which you see are labels  - your task is to explain it and get the next question',
+		llm=llm,
+		controller=controller,
+		browser=Browser(config=BrowserConfig(disable_security=True, headless=False)),
+	)
+	try:
+		history: AgentHistoryList = await agent.run(20)
+	finally:
+		# Make sure to close the browser
+		await agent.browser.close()
--- a/browser-use/tests/test_wait_for_element.py
+++ b/browser-use/tests/test_wait_for_element.py
@ -0,0 +1,68 @@
+import asyncio
+import os
+import sys
+
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
+if project_root not in sys.path:
+	sys.path.insert(0, project_root)
+
+import pytest
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+
+# Third-party imports
+from browser_use import Agent, Controller
+
+# Local imports
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+# Load environment variables.
+load_dotenv()
+
+# Initialize language model and controller.
+llm = ChatOpenAI(model='gpt-4o')
+controller = Controller()
+
+
+@pytest.mark.skip(reason='this is for local testing only')
+async def test_wait_for_element():
+	"""Test 'Wait for element' action."""
+
+	initial_actions = [
+		{'open_tab': {'url': 'https://pypi.org/'}},
+		# Uncomment the line below to include the wait action in initial actions.
+		# {'wait_for_element': {'selector': '#search', 'timeout': 30}},
+	]
+
+	# Set up the browser context.
+	context = BrowserContext(
+		browser=Browser(config=BrowserConfig(headless=False, disable_security=True)),
+	)
+
+	# Create the agent with the task.
+	agent = Agent(
+		task="Wait for element '#search' to be visible with a timeout of 30 seconds.",
+		llm=llm,
+		browser_context=context,
+		initial_actions=initial_actions,
+		controller=controller,
+	)
+
+	# Run the agent for a few steps to trigger navigation and then the wait action.
+	history = await agent.run(max_steps=3)
+	action_names = history.action_names()
+
+	# Ensure that the wait_for_element action was executed.
+	assert 'wait_for_element' in action_names, 'Expected wait_for_element action to be executed.'
+
+	# Verify that the #search element is visible by querying the page.
+	page = await context.get_current_page()
+	header_handle = await page.query_selector('#search')
+	assert header_handle is not None, 'Expected to find a #search element on the page.'
+	is_visible = await header_handle.is_visible()
+	assert is_visible, 'Expected the #search element to be visible.'
+
+
+if __name__ == '__main__':
+	asyncio.run(test_wait_for_element())