[Add] browser-use and main.py

2025-05-18 21:57:54 +09:00 · 2025-05-18 21:57:54 +09:00 · 96914d44ac
commit 96914d44ac
parent 08e64bdf45
221 changed files with 30952 additions and 1 deletions
--- a/browser-use/browser_use/browser/browser.py
+++ b/browser-use/browser_use/browser/browser.py
@ -0,0 +1,421 @@
+"""
+Playwright browser on steroids.
+"""
+
+import asyncio
+import gc
+import logging
+import os
+import socket
+import subprocess
+from pathlib import Path
+from tempfile import gettempdir
+from typing import Literal
+
+import httpx
+import psutil
+from dotenv import load_dotenv
+from playwright.async_api import Browser as PlaywrightBrowser
+from playwright.async_api import Playwright, async_playwright
+from pydantic import AliasChoices, BaseModel, ConfigDict, Field
+
+load_dotenv()
+
+
+from browser_use.browser.chrome import (
+	CHROME_ARGS,
+	CHROME_DEBUG_PORT,
+	CHROME_DETERMINISTIC_RENDERING_ARGS,
+	CHROME_DISABLE_SECURITY_ARGS,
+	CHROME_DOCKER_ARGS,
+	CHROME_HEADLESS_ARGS,
+)
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments
+from browser_use.utils import time_execution_async
+
+logger = logging.getLogger(__name__)
+
+IN_DOCKER = os.environ.get('IN_DOCKER', 'false').lower()[0] in 'ty1'
+
+
+class ProxySettings(BaseModel):
+	"""the same as playwright.sync_api.ProxySettings, but now as a Pydantic BaseModel so pydantic can validate it"""
+
+	server: str
+	bypass: str | None = None
+	username: str | None = None
+	password: str | None = None
+
+	model_config = ConfigDict(populate_by_name=True, from_attributes=True)
+
+	# Support dict-like behavior for compatibility with Playwright's ProxySettings
+	def __getitem__(self, key):
+		return getattr(self, key)
+
+	def get(self, key, default=None):
+		return getattr(self, key, default)
+
+
+class BrowserConfig(BaseModel):
+	r"""
+	Configuration for the Browser.
+
+	Default values:
+		headless: False
+			Whether to run browser in headless mode (not recommended)
+
+		disable_security: False
+			Disable browser security features (required for cross-origin iframe support)
+
+		extra_browser_args: []
+			Extra arguments to pass to the browser
+
+		wss_url: None
+			Connect to a browser instance via WebSocket
+
+		cdp_url: None
+			Connect to a browser instance via CDP
+
+		browser_binary_path: None
+			Path to a Browser instance to use to connect to your normal browser
+			e.g. '/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome'
+
+		chrome_remote_debugging_port: 9222
+			Chrome remote debugging port to use to when browser_binary_path is supplied.
+			This allows running multiple chrome browsers with same browser_binary_path but running on different ports.
+			Also, makes it possible to launch new user provided chrome browser without closing already opened chrome instances,
+			by providing non-default chrome debugging port.
+
+		keep_alive: False
+			Keep the browser alive after the agent has finished running
+
+		deterministic_rendering: False
+			Enable deterministic rendering (makes GPU/font rendering consistent across different OS's and docker)
+	"""
+
+	model_config = ConfigDict(
+		arbitrary_types_allowed=True,
+		extra='ignore',
+		populate_by_name=True,
+		from_attributes=True,
+		validate_assignment=True,
+		revalidate_instances='subclass-instances',
+	)
+
+	wss_url: str | None = None
+	cdp_url: str | None = None
+
+	browser_class: Literal['chromium', 'firefox', 'webkit'] = 'chromium'
+	browser_binary_path: str | None = Field(
+		default=None, validation_alias=AliasChoices('browser_instance_path', 'chrome_instance_path')
+	)
+	chrome_remote_debugging_port: int | None = CHROME_DEBUG_PORT
+	extra_browser_args: list[str] = Field(default_factory=list)
+
+	headless: bool = False
+	disable_security: bool = False  # disable_security=True is dangerous as any malicious URL visited could embed an iframe for the user's bank, and use their cookies to steal money
+	deterministic_rendering: bool = False
+	keep_alive: bool = Field(default=False, alias='_force_keep_browser_alive')  # used to be called _force_keep_browser_alive
+
+	proxy: ProxySettings | None = None
+	new_context_config: BrowserContextConfig = Field(default_factory=BrowserContextConfig)
+
+
+# @singleton: TODO - think about id singleton makes sense here
+# @dev By default this is a singleton, but you can create multiple instances if you need to.
+class Browser:
+	"""
+	Playwright browser on steroids.
+
+	This is persistent browser factory that can spawn multiple browser contexts.
+	It is recommended to use only one instance of Browser per your application (RAM usage will grow otherwise).
+	"""
+
+	def __init__(
+		self,
+		config: BrowserConfig | None = None,
+	):
+		logger.debug('🌎  Initializing new browser')
+		self.config = config or BrowserConfig()
+		self.playwright: Playwright | None = None
+		self.playwright_browser: PlaywrightBrowser | None = None
+
+	async def new_context(self, config: BrowserContextConfig | None = None) -> BrowserContext:
+		"""Create a browser context"""
+		browser_config = self.config.model_dump() if self.config else {}
+		context_config = config.model_dump() if config else {}
+		merged_config = {**browser_config, **context_config}
+		return BrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
+
+	async def get_playwright_browser(self) -> PlaywrightBrowser:
+		"""Get a browser context"""
+		if self.playwright_browser is None:
+			return await self._init()
+
+		return self.playwright_browser
+
+	@time_execution_async('--init (browser)')
+	async def _init(self):
+		"""Initialize the browser session"""
+		playwright = await async_playwright().start()
+		self.playwright = playwright
+
+		browser = await self._setup_browser(playwright)
+		self.playwright_browser = browser
+
+		return self.playwright_browser
+
+	async def _setup_remote_cdp_browser(self, playwright: Playwright) -> PlaywrightBrowser:
+		"""Sets up and returns a Playwright Browser instance with anti-detection measures. Firefox has no longer CDP support."""
+		if 'firefox' in (self.config.browser_binary_path or '').lower():
+			raise ValueError(
+				'CDP has been deprecated for firefox, check: https://fxdx.dev/deprecating-cdp-support-in-firefox-embracing-the-future-with-webdriver-bidi/'
+			)
+		if not self.config.cdp_url:
+			raise ValueError('CDP URL is required')
+		logger.info(f'🔌  Connecting to remote browser via CDP {self.config.cdp_url}')
+		browser_class = getattr(playwright, self.config.browser_class)
+		browser = await browser_class.connect_over_cdp(self.config.cdp_url)
+		return browser
+
+	async def _setup_remote_wss_browser(self, playwright: Playwright) -> PlaywrightBrowser:
+		"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
+		if not self.config.wss_url:
+			raise ValueError('WSS URL is required')
+		logger.info(f'🔌  Connecting to remote browser via WSS {self.config.wss_url}')
+		browser_class = getattr(playwright, self.config.browser_class)
+		browser = await browser_class.connect(self.config.wss_url)
+		return browser
+
+	async def _setup_user_provided_browser(self, playwright: Playwright) -> PlaywrightBrowser:
+		"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
+		if not self.config.browser_binary_path:
+			raise ValueError('A browser_binary_path is required')
+
+		assert self.config.browser_class == 'chromium', (
+			'browser_binary_path only supports chromium browsers (make sure browser_class=chromium)'
+		)
+
+		try:
+			# Check if browser is already running
+			async with httpx.AsyncClient() as client:
+				response = await client.get(
+					f'http://localhost:{self.config.chrome_remote_debugging_port}/json/version', timeout=2
+				)
+				if response.status_code == 200:
+					logger.info(
+						f'🔌  Reusing existing browser found running on http://localhost:{self.config.chrome_remote_debugging_port}'
+					)
+					browser_class = getattr(playwright, self.config.browser_class)
+					browser = await browser_class.connect_over_cdp(
+						endpoint_url=f'http://localhost:{self.config.chrome_remote_debugging_port}',
+						timeout=20000,  # 20 second timeout for connection
+					)
+					return browser
+		except httpx.RequestError:
+			logger.debug('🌎  No existing Chrome instance found, starting a new one')
+
+		provided_user_data_dir = [arg for arg in self.config.extra_browser_args if '--user-data-dir=' in arg]
+
+		if provided_user_data_dir:
+			user_data_dir = Path(provided_user_data_dir[0].split('=')[-1])
+		else:
+			fallback_user_data_dir = Path(gettempdir()) / 'browseruse' / 'profiles' / 'default'  # /tmp/browseruse
+			try:
+				# ~/.config/browseruse/profiles/default
+				user_data_dir = Path('~/.config') / 'browseruse' / 'profiles' / 'default'
+				user_data_dir = user_data_dir.expanduser()
+				user_data_dir.mkdir(parents=True, exist_ok=True)
+			except Exception as e:
+				logger.error(f'❌  Failed to create ~/.config/browseruse directory: {type(e).__name__}: {e}')
+				user_data_dir = fallback_user_data_dir
+				user_data_dir.mkdir(parents=True, exist_ok=True)
+
+		logger.info(f'🌐  Storing Browser Profile user data dir in: {user_data_dir}')
+		try:
+			# Remove any existing SingletonLock file to allow the browser to start
+			(user_data_dir / 'Default' / 'SingletonLock').unlink()
+			self.config.extra_browser_args.append('--no-first-run')
+		except (FileNotFoundError, PermissionError, OSError):
+			pass
+
+		# Start a new Chrome instance
+		chrome_launch_args = [
+			*{  # remove duplicates (usually preserves the order, but not guaranteed)
+				f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
+				*([f'--user-data-dir={user_data_dir.resolve()}'] if not provided_user_data_dir else []),
+				*CHROME_ARGS,
+				*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
+				*(CHROME_HEADLESS_ARGS if self.config.headless else []),
+				*(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
+				*(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
+				*self.config.extra_browser_args,
+			},
+		]
+		chrome_sub_process = await asyncio.create_subprocess_exec(
+			self.config.browser_binary_path,
+			*chrome_launch_args,
+			stdout=subprocess.DEVNULL,
+			stderr=subprocess.DEVNULL,
+			shell=False,
+		)
+		self._chrome_subprocess = psutil.Process(chrome_sub_process.pid)
+
+		# Attempt to connect again after starting a new instance
+		for _ in range(10):
+			try:
+				async with httpx.AsyncClient() as client:
+					response = await client.get(
+						f'http://localhost:{self.config.chrome_remote_debugging_port}/json/version', timeout=2
+					)
+					if response.status_code == 200:
+						break
+			except httpx.RequestError:
+				pass
+			await asyncio.sleep(1)
+
+		# Attempt to connect again after starting a new instance
+		try:
+			browser_class = getattr(playwright, self.config.browser_class)
+			browser = await browser_class.connect_over_cdp(
+				endpoint_url=f'http://localhost:{self.config.chrome_remote_debugging_port}',
+				timeout=20000,  # 20 second timeout for connection
+			)
+			return browser
+		except Exception as e:
+			logger.error(f'❌  Failed to start a new Chrome instance: {str(e)}')
+			raise RuntimeError(
+				'To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
+			)
+
+	async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
+		"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
+		assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
+
+		# Use the configured window size from new_context_config if available
+		if (
+			not self.config.headless
+			and hasattr(self.config, 'new_context_config')
+			and hasattr(self.config.new_context_config, 'window_width')
+			and hasattr(self.config.new_context_config, 'window_height')
+			and not self.config.new_context_config.no_viewport
+		):
+			screen_size = {
+				'width': self.config.new_context_config.window_width,
+				'height': self.config.new_context_config.window_height,
+			}
+			offset_x, offset_y = get_window_adjustments()
+		elif self.config.headless:
+			screen_size = {'width': 1920, 'height': 1080}
+			offset_x, offset_y = 0, 0
+		else:
+			screen_size = get_screen_resolution()
+			offset_x, offset_y = get_window_adjustments()
+
+		chrome_args = {
+			f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
+			*CHROME_ARGS,
+			*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
+			*(CHROME_HEADLESS_ARGS if self.config.headless else []),
+			*(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
+			*(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
+			f'--window-position={offset_x},{offset_y}',
+			f'--window-size={screen_size["width"]},{screen_size["height"]}',
+			*self.config.extra_browser_args,
+		}
+
+		# check if chrome remote debugging port is already taken,
+		# if so remove the remote-debugging-port arg to prevent conflicts
+		with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+			if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
+				chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
+
+		browser_class = getattr(playwright, self.config.browser_class)
+		args = {
+			'chromium': list(chrome_args),
+			'firefox': [
+				*{
+					'-no-remote',
+					*self.config.extra_browser_args,
+				}
+			],
+			'webkit': [
+				*{
+					'--no-startup-window',
+					*self.config.extra_browser_args,
+				}
+			],
+		}
+
+		browser = await browser_class.launch(
+			channel='chromium',  # https://github.com/microsoft/playwright/issues/33566
+			headless=self.config.headless,
+			args=args[self.config.browser_class],
+			proxy=self.config.proxy.model_dump() if self.config.proxy else None,
+			handle_sigterm=False,
+			handle_sigint=False,
+		)
+		return browser
+
+	async def _setup_browser(self, playwright: Playwright) -> PlaywrightBrowser:
+		"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
+		try:
+			if self.config.cdp_url:
+				return await self._setup_remote_cdp_browser(playwright)
+			if self.config.wss_url:
+				return await self._setup_remote_wss_browser(playwright)
+
+			if self.config.headless:
+				logger.warning('⚠️ Headless mode is not recommended. Many sites will detect and block all headless browsers.')
+
+			if self.config.browser_binary_path:
+				return await self._setup_user_provided_browser(playwright)
+			else:
+				return await self._setup_builtin_browser(playwright)
+		except Exception as e:
+			logger.error(f'Failed to initialize Playwright browser: {e}')
+			raise
+
+	async def close(self):
+		"""Close the browser instance"""
+		if self.config.keep_alive:
+			return
+
+		try:
+			if self.playwright_browser:
+				await self.playwright_browser.close()
+				del self.playwright_browser
+			if self.playwright:
+				await self.playwright.stop()
+				del self.playwright
+			if chrome_proc := getattr(self, '_chrome_subprocess', None):
+				try:
+					# always kill all children processes, otherwise chrome leaves a bunch of zombie processes
+					for proc in chrome_proc.children(recursive=True):
+						proc.kill()
+					chrome_proc.kill()
+				except Exception as e:
+					logger.debug(f'Failed to terminate chrome subprocess: {e}')
+
+		except Exception as e:
+			if 'OpenAI error' not in str(e):
+				logger.debug(f'Failed to close browser properly: {e}')
+
+		finally:
+			self.playwright_browser = None
+			self.playwright = None
+			self._chrome_subprocess = None
+			gc.collect()
+
+	def __del__(self):
+		"""Async cleanup when object is destroyed"""
+		try:
+			if self.playwright_browser or self.playwright:
+				loop = asyncio.get_running_loop()
+				if loop.is_running():
+					loop.create_task(self.close())
+				else:
+					asyncio.run(self.close())
+		except Exception as e:
+			logger.debug(f'Failed to cleanup browser in destructor: {e}')
--- a/browser-use/browser_use/browser/chrome.py
+++ b/browser-use/browser_use/browser/chrome.py
@ -0,0 +1,177 @@
+CHROME_EXTENSIONS = {}  # coming in a separate PR
+CHROME_EXTENSIONS_PATH = 'chrome_extensions'
+CHROME_PROFILE_PATH = 'chrome_profile'
+CHROME_PROFILE_USER = 'Default'
+CHROME_DEBUG_PORT = 9242
+CHROME_DISABLED_COMPONENTS = [
+	'Translate',
+	'AcceptCHFrame',
+	'OptimizationHints',
+	'ProcessPerSiteUpToMainFrameThreshold',
+	'InterestFeedContentSuggestions',
+	# 'CalculateNativeWinOcclusion',
+	'BackForwardCache',
+	# 'HeavyAdPrivacyMitigations',
+	'LazyFrameLoading',
+	# 'ImprovedCookieControls',
+	'PrivacySandboxSettings4',
+	'AutofillServerCommunication',
+	'CertificateTransparencyComponentUpdater',
+	'DestroyProfileOnBrowserClose',
+	'CrashReporting',
+	'OverscrollHistoryNavigation',
+	'InfiniteSessionRestore',
+	#'LockProfileCookieDatabase',  # disabling allows multiple chrome instances to concurrently modify profile, but might make chrome much slower https://github.com/yt-dlp/yt-dlp/issues/7271  https://issues.chromium.org/issues/40901624
+]  # it's always best to give each chrome instance its own exclusive copy of the user profile
+
+
+CHROME_HEADLESS_ARGS = [
+	'--headless=new',
+	# '--test-type',
+	# '--test-type=gpu',  # https://github.com/puppeteer/puppeteer/issues/10516
+	# '--enable-automation',                            # <- DONT USE THIS, it makes you easily detectable / blocked by cloudflare
+]
+
+CHROME_DOCKER_ARGS = [
+	# Docker-specific options
+	# https://github.com/GoogleChrome/lighthouse-ci/tree/main/docs/recipes/docker-client#--no-sandbox-issues-explained
+	'--no-sandbox',  # rely on docker sandboxing in docker, otherwise we need cap_add: SYS_ADM to use host sandboxing
+	'--disable-gpu-sandbox',
+	'--disable-setuid-sandbox',
+	'--disable-dev-shm-usage',  # docker 75mb default shm size is not big enough, disabling just uses /tmp instead
+	'--no-xshm',
+	# dont try to disable (or install) dbus in docker, its not needed, chrome can work without dbus despite the errors
+]
+
+CHROME_DISABLE_SECURITY_ARGS = [
+	# DANGER: JS isolation security features (to allow easier tampering with pages during automation)
+	# chrome://net-internals
+	'--disable-web-security',  # <- WARNING, breaks some sites that expect/enforce strict CORS headers (try webflow.com)
+	'--disable-site-isolation-trials',
+	'--disable-features=IsolateOrigins,site-per-process',
+	# '--allow-file-access-from-files',                     # <- WARNING, dangerous, allows JS to read filesystem using file:// URLs
+	# DANGER: Disable HTTPS verification
+	'--allow-running-insecure-content',  # Breaks CORS/CSRF/HSTS etc., useful sometimes but very easy to detect
+	'--ignore-certificate-errors',
+	'--ignore-ssl-errors',
+	'--ignore-certificate-errors-spki-list',
+	# '--allow-insecure-localhost',
+]
+
+# flags to make chrome behave more deterministically across different OS's
+CHROME_DETERMINISTIC_RENDERING_ARGS = [
+	'--deterministic-mode',
+	'--js-flags=--random-seed=1157259159',  # make all JS random numbers deterministic by providing a seed
+	'--force-device-scale-factor=1',
+	# GPU, canvas, text, and pdf rendering config
+	# chrome://gpu
+	'--enable-webgl',  # enable web-gl graphics support
+	'--font-render-hinting=none',  # make rendering more deterministic by ignoring OS font hints, may also need css override, try:    * {text-rendering: geometricprecision !important; -webkit-font-smoothing: antialiased;}
+	'--force-color-profile=srgb',  # make rendering more deterministic by using consistent color profile, if browser looks weird, try: generic-rgb
+	# '--disable-partial-raster',  # make rendering more deterministic (TODO: verify if still needed)
+	'--disable-skia-runtime-opts',  # make rendering more deterministic by avoiding Skia hot path runtime optimizations
+	'--disable-2d-canvas-clip-aa',  # make rendering more deterministic by disabling antialiasing on 2d canvas clips
+	# '--disable-gpu',                                  # falls back to more consistent software renderer across all OS's, especially helps linux text rendering look less weird
+	# // '--use-gl=swiftshader',                        <- DO NOT USE, breaks M1 ARM64. it makes rendering more deterministic by using simpler CPU renderer instead of OS GPU renderer  bug: https://groups.google.com/a/chromium.org/g/chromium-dev/c/8eR2GctzGuw
+	# // '--disable-software-rasterizer',               <- DO NOT USE, harmless, used in tandem with --disable-gpu
+	# // '--run-all-compositor-stages-before-draw',     <- DO NOT USE, makes headful chrome hang on startup (tested v121 Google Chrome.app on macOS)
+	# // '--disable-gl-drawing-for-tests',              <- DO NOT USE, disables gl output (makes tests run faster if you dont care about canvas)
+	# // '--blink-settings=imagesEnabled=false',        <- DO NOT USE, disables images entirely (only sometimes useful to speed up loading)
+]
+
+
+CHROME_ARGS = [
+	# Process management & performance tuning
+	# chrome://process-internals
+	# '--disable-lazy-loading',  # make rendering more deterministic by loading all content up-front instead of on-focus
+	# '--disable-renderer-backgrounding',  # dont throttle tab rendering based on focus/visibility
+	# '--disable-background-networking',  # dont throttle tab networking based on focus/visibility
+	# '--disable-background-timer-throttling',  # dont throttle tab timers based on focus/visibility
+	# '--disable-backgrounding-occluded-windows',  # dont throttle tab window based on focus/visibility
+	# '--disable-ipc-flooding-protection',  # dont throttle ipc traffic or accessing big request/response/buffer/etc. objects will fail
+	# '--disable-extensions-http-throttling',  # dont throttle http traffic based on runtime heuristics
+	# '--disable-field-trial-config',  # disable shared field trial state between browser processes
+	# '--disable-back-forward-cache',  # disable browsing navigation cache
+	# Profile data dir setup
+	# chrome://profile-internals
+	# f'--user-data-dir={CHROME_PROFILE_PATH}',     # managed by playwright arg instead
+	# f'--profile-directory={CHROME_PROFILE_USER}',
+	# '--password-store=basic',  # use mock keychain instead of OS-provided keychain (we manage auth.json instead)
+	# '--use-mock-keychain',
+	# '--disable-cookie-encryption',  # we need to be able to write unencrypted cookies to save/load auth.json
+	'--disable-sync',  # don't try to use Google account sync features while automation is active
+	# Extensions
+	# chrome://inspect/#extensions
+	# f'--load-extension={CHROME_EXTENSIONS.map(({unpacked_path}) => unpacked_path).join(',')}',  # not needed when using existing profile that already has extensions installed
+	# f'--allowlisted-extension-id={",".join(CHROME_EXTENSIONS.keys())}',
+	'--allow-legacy-extension-manifests',
+	'--allow-pre-commit-input',  # allow JS mutations before page rendering is complete
+	'--disable-blink-features=AutomationControlled',  # hide the signatures that announce browser is being remote-controlled
+	# f'--proxy-server=https://43.159.28.126:2334:u7ce652b7568805c4-zone-custom-region-us-session-szGWq3FRU-sessTime-60:u7ce652b7568805c4',      # send all network traffic through a proxy https://2captcha.com/proxy
+	# f'--proxy-bypass-list=127.0.0.1',
+	# Browser window and viewport setup
+	# chrome://version
+	# f'--user-agent="{DEFAULT_USER_AGENT}"',
+	# f'--window-size={DEFAULT_VIEWPORT.width},{DEFAULT_VIEWPORT.height}',
+	# '--window-position=0,0',
+	# '--start-maximized',
+	'--install-autogenerated-theme=0,0,0',  # black border makes it easier to see which chrome window is browser-use's
+	'--hide-scrollbars',  # stop scrollbars from affecting screenshot width/height
+	#'--virtual-time-budget=60000',  # DONT USE THIS, makes chrome hang forever and doesn't work, used to fast-forward all animations & timers by 60s, dont use this it's unfortunately buggy and breaks screenshot and PDF capture sometimes
+	#'--autoplay-policy=no-user-gesture-required',  # auto-start videos so they trigger network requests + show up in outputs
+	#'--disable-gesture-requirement-for-media-playback',
+	#'--lang=en-US,en;q=0.9',
+	# IO: stdin/stdout, debug port config
+	# chrome://inspect
+	'--log-level=2',  # 1=DEBUG 2=WARNING 3=ERROR
+	'--enable-logging=stderr',
+	# '--remote-debugging-address=127.0.0.1',         <- DONT USE THIS, no longer supported on chrome >100, never expose to non-localhost, would allow attacker to drive your browser from any machine
+	# '--enable-experimental-extension-apis',          # add support for tab groups via chrome.tabs extension API
+	'--disable-focus-on-load',  # prevent browser from hijacking focus
+	'--disable-window-activation',
+	# '--in-process-gpu',                            <- DONT USE THIS, makes headful startup time ~5-10s slower (tested v121 Google Chrome.app on macOS)
+	# '--disable-component-extensions-with-background-pages',  # TODO: check this, disables chrome components that only run in background with no visible UI (could lower startup time)
+	# uncomment to disable hardware camera/mic/speaker access + present fake devices to websites
+	# (faster to disable, but disabling breaks recording browser audio in puppeteer-stream screenrecordings)
+	# '--use-fake-device-for-media-stream',
+	# '--use-fake-ui-for-media-stream',
+	# '--disable-features=GlobalMediaControls,MediaRouter,DialMediaRouteProvider',
+	# Output format options (PDF, screenshot, etc.)
+	'--export-tagged-pdf',  # include table on contents and tags in printed PDFs
+	'--generate-pdf-document-outline',
+	# Suppress first-run features, popups, hints, updates, etc.
+	# chrome://system
+	'--no-pings',
+	'--no-default-browser-check',
+	'--no-startup-window',
+	'--ash-no-nudges',
+	'--disable-infobars',
+	'--disable-search-engine-choice-screen',
+	'--disable-session-crashed-bubble',
+	'--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"',  # disable browser self-update while automation is active
+	'--hide-crash-restore-bubble',
+	'--suppress-message-center-popups',
+	'--disable-client-side-phishing-detection',
+	'--disable-domain-reliability',
+	'--disable-datasaver-prompt',
+	'--disable-hang-monitor',
+	'--disable-session-crashed-bubble',
+	'--disable-speech-synthesis-api',
+	'--disable-speech-api',
+	'--disable-print-preview',
+	'--safebrowsing-disable-auto-update',
+	# '--deny-permission-prompts',
+	'--disable-external-intent-requests',
+	# '--disable-notifications',
+	'--disable-desktop-notifications',
+	'--noerrdialogs',
+	'--disable-prompt-on-repost',
+	'--silent-debugger-extension-api',
+	# '--block-new-web-contents',
+	'--metrics-recording-only',
+	'--disable-breakpad',
+	# other feature flags
+	# chrome://flags        chrome://components
+	f'--disable-features={",".join(CHROME_DISABLED_COMPONENTS)}',
+	'--enable-features=NetworkService',
+]
--- a/browser-use/browser_use/browser/context.py
+++ b/browser-use/browser_use/browser/context.py
--- a/browser-use/browser_use/browser/dolphin_service.py
+++ b/browser-use/browser_use/browser/dolphin_service.py
@ -0,0 +1,348 @@
+import logging
+import os
+
+import aiohttp
+from playwright.async_api import Page, async_playwright
+
+from browser_use.browser.service import Browser
+from browser_use.browser.views import BrowserState, TabInfo
+
+logger = logging.getLogger(__name__)
+
+
+class DolphinBrowser(Browser):
+	"""A class for managing Dolphin Anty browser sessions using Playwright"""
+
+	def __init__(self, headless: bool = False, keep_open: bool = False):
+		"""
+		Initialize the DolphinBrowser instance.
+
+		Args:
+		    headless (bool): Run browser in headless mode (default: False).
+		    keep_open (bool): Keep browser open after finishing tasks (default: False).
+		"""
+		# Retrieve environment variables for API connection
+		self.api_token = os.getenv('DOLPHIN_API_TOKEN')
+		self.api_url = os.getenv('DOLPHIN_API_URL', 'http://localhost:3001/v1.0')
+		self.profile_id = os.getenv('DOLPHIN_PROFILE_ID')
+
+		# Initialize internal attributes
+		self.playwright = None
+		self.browser = None
+		self.context = None
+		self.page = None
+		self.headless = headless
+		self.keep_open = keep_open
+		self._pages: list[Page] = []  # List to store open pages
+		self.session = None
+		self.cached_state = None
+
+	async def get_current_page(self) -> Page:
+		"""
+		Get the currently active page.
+
+		Raises:
+		    Exception: If no active page is available.
+		"""
+		if not self.page:
+			raise Exception('No active page. Browser might not be connected.')
+		return self.page
+
+	async def create_new_tab(self, url: str | None = None) -> None:
+		"""
+		Create a new tab and optionally navigate to a given URL.
+
+		Args:
+		    url (str, optional): URL to navigate to after creating the tab. Defaults to None.
+
+		Raises:
+		    Exception: If browser context is not initialized or navigation fails.
+		"""
+		if not self.context:
+			raise Exception('Browser context not initialized')
+
+		# Create new page (tab) in the current browser context
+		new_page = await self.context.new_page()
+		self._pages.append(new_page)
+		self.page = new_page  # Set as current page
+
+		if url:
+			try:
+				# Navigate to the URL and wait for the page to load
+				await new_page.goto(url, wait_until='networkidle')
+				await self.wait_for_page_load()
+			except Exception as e:
+				logger.error(f'Failed to navigate to URL {url}: {str(e)}')
+				raise
+
+	async def switch_to_tab(self, page_id: int) -> None:
+		"""
+		Switch to a specific tab by its page ID.
+
+		Args:
+		    page_id (int): The index of the tab to switch to.
+
+		Raises:
+		    Exception: If the tab index is out of range or no tabs are available.
+		"""
+		if not self._pages:
+			raise Exception('No tabs available')
+
+		# Handle negative indices (e.g., -1 for last tab)
+		if page_id < 0:
+			page_id = len(self._pages) + page_id
+
+		if page_id >= len(self._pages) or page_id < 0:
+			raise Exception(f'Tab index {page_id} out of range')
+
+		# Set the current page to the selected tab
+		self.page = self._pages[page_id]
+		await self.page.bring_to_front()  # Bring tab to the front
+		await self.wait_for_page_load()
+
+	async def get_tabs_info(self) -> list[TabInfo]:
+		"""
+		Get information about all open tabs.
+
+		Returns:
+		    list: A list of TabInfo objects containing details about each tab.
+		"""
+		tabs_info = []
+		for idx, page in enumerate(self._pages):
+			tab_info = TabInfo(
+				page_id=idx,
+				url=page.url,
+				title=await page.title(),  # Fetch the title of the page
+			)
+			tabs_info.append(tab_info)
+		return tabs_info
+
+	async def wait_for_page_load(self, timeout: int = 30000):
+		"""
+		Wait for the page to load completely.
+
+		Args:
+		    timeout (int): Maximum time to wait for page load in milliseconds (default: 30000ms).
+
+		Raises:
+		    Exception: If the page fails to load within the specified timeout.
+		"""
+		if self.page:
+			try:
+				await self.page.wait_for_load_state('networkidle', timeout=timeout)
+			except Exception as e:
+				logger.warning(f'Wait for page load timeout: {str(e)}')
+
+	async def get_session(self):
+		"""
+		Get the current session.
+
+		Returns:
+		    DolphinBrowser: The current DolphinBrowser instance.
+
+		Raises:
+		    Exception: If the browser is not connected.
+		"""
+		if not self.browser:
+			raise Exception('Browser not connected. Call connect() first.')
+		self.session = self
+		return self
+
+	async def authenticate(self):
+		"""
+		Authenticate with Dolphin Anty API using the API token.
+
+		Raises:
+		    Exception: If authentication fails.
+		"""
+		async with aiohttp.ClientSession() as session:
+			auth_url = f'{self.api_url}/auth/login-with-token'
+			auth_data = {'token': self.api_token}
+			async with session.post(auth_url, json=auth_data) as response:
+				if not response.ok:
+					raise Exception(f'Failed to authenticate with Dolphin Anty: {await response.text()}')
+				return await response.json()
+
+	async def get_browser_profiles(self):
+		"""
+		Get a list of available browser profiles from Dolphin Anty.
+
+		Returns:
+		    list: A list of browser profiles.
+
+		Raises:
+		    Exception: If fetching the browser profiles fails.
+		"""
+		# Authenticate before fetching profiles
+		await self.authenticate()
+
+		async with aiohttp.ClientSession() as session:
+			headers = {'Authorization': f'Bearer {self.api_token}'}
+			async with session.get(f'{self.api_url}/browser_profiles', headers=headers) as response:
+				if not response.ok:
+					raise Exception(f'Failed to get browser profiles: {await response.text()}')
+				data = await response.json()
+				return data.get('data', [])  # Return the profiles array from the response
+
+	async def start_profile(self, profile_id: str | None = None, headless: bool = False) -> dict:
+		"""
+		Start a browser profile on Dolphin Anty.
+
+		Args:
+		    profile_id (str, optional): Profile ID to start (defaults to the one set in the environment).
+		    headless (bool): Run browser in headless mode (default: False).
+
+		Returns:
+		    dict: Information about the started profile.
+
+		Raises:
+		    ValueError: If no profile ID is provided and no default is set.
+		    Exception: If starting the profile fails.
+		"""
+		# Authenticate before starting the profile
+		await self.authenticate()
+
+		profile_id = profile_id or self.profile_id
+		if not profile_id:
+			raise ValueError('No profile ID provided')
+
+		url = f'{self.api_url}/browser_profiles/{profile_id}/start'
+		params = {'automation': 1}
+		if headless:
+			params['headless'] = 1
+
+		async with aiohttp.ClientSession() as session:
+			async with session.get(url, params=params) as response:
+				if not response.ok:
+					raise Exception(f'Failed to start profile: {await response.text()}')
+				return await response.json()
+
+	async def stop_profile(self, profile_id: str | None = None):
+		"""
+		Stop a browser profile on Dolphin Anty.
+
+		Args:
+		    profile_id (str, optional): Profile ID to stop (defaults to the one set in the environment).
+
+		Returns:
+		    dict: Information about the stopped profile.
+
+		Raises:
+		    ValueError: If no profile ID is provided and no default is set.
+		"""
+		# Authenticate before stopping the profile
+		await self.authenticate()
+
+		profile_id = profile_id or self.profile_id
+		if not profile_id:
+			raise ValueError('No profile ID provided')
+
+		url = f'{self.api_url}/browser_profiles/{profile_id}/stop'
+		async with aiohttp.ClientSession() as session:
+			async with session.get(url) as response:
+				return await response.json()
+
+	async def connect(self, profile_id: str | None = None):
+		"""
+		Connect to a running browser profile using Playwright.
+
+		Args:
+		    profile_id (str, optional): Profile ID to connect to (defaults to the one set in the environment).
+
+		Returns:
+		    PlaywrightBrowser: The connected browser instance.
+
+		Raises:
+		    Exception: If authentication or profile connection fails.
+		"""
+		# Authenticate before connecting to the profile
+		await self.authenticate()
+
+		# Start the browser profile
+		profile_data = await self.start_profile(profile_id)
+
+		if not profile_data.get('success'):
+			raise Exception(f'Failed to start profile: {profile_data}')
+
+		automation = profile_data['automation']
+		port = automation['port']
+		ws_endpoint = automation['wsEndpoint']
+		ws_url = f'ws://127.0.0.1:{port}{ws_endpoint}'
+
+		# Use Playwright to connect to the browser's WebSocket endpoint
+		self.playwright = await async_playwright().start()
+		self.browser = await self.playwright.chromium.connect_over_cdp(ws_url)
+
+		# Get or create a browser context and page
+		contexts = self.browser.contexts
+		self.context = contexts[0] if contexts else await self.browser.new_context()
+		pages = self.context.pages
+		self.page = pages[0] if pages else await self.context.new_page()
+
+		self._pages = [self.page]  # Initialize pages list with the first page
+
+		return self.browser
+
+	async def close(self, force: bool = False):
+		"""
+		Close the browser connection and clean up resources.
+
+		Args:
+		    force (bool): If True, forcefully stop the associated profile (default: False).
+		"""
+		try:
+			# Close all open pages
+			if self._pages:
+				for page in self._pages:
+					try:
+						await page.close()
+					except BaseException:
+						pass
+				self._pages = []
+
+			# Close the browser and Playwright instance
+			if self.browser:
+				await self.browser.close()
+
+			if self.playwright:
+				await self.playwright.stop()
+
+			if force:
+				await self.stop_profile()  # Force stop the profile
+		except Exception as e:
+			logger.error(f'Error during browser cleanup: {str(e)}')
+
+	async def get_current_state(self) -> BrowserState:
+		"""
+		Get the current state of the browser (URL, content, viewport size, tabs).
+
+		Returns:
+		    BrowserState: The current state of the browser.
+
+		Raises:
+		    Exception: If no active page is available.
+		"""
+		if not self.page:
+			raise Exception('No active page')
+
+		# Get page content and viewport size
+		content = await self.page.content()
+		viewport_size = await self.page.viewport_size()
+
+		# Create and return the current browser state
+		state = BrowserState(
+			url=self.page.url,
+			content=content,
+			viewport_height=viewport_size['height'] if viewport_size else 0,
+			viewport_width=viewport_size['width'] if viewport_size else 0,
+			tabs=await self.get_tabs_info(),
+		)
+
+		# Cache and return the state
+		self.cached_state = state
+		return state
+
+	def __del__(self):
+		"""Clean up resources when the DolphinBrowser instance is deleted."""
+		# No need to handle session cleanup as we're using self as session
+		pass
--- a/browser-use/browser_use/browser/tests/httpx_client_test.py
+++ b/browser-use/browser_use/browser/tests/httpx_client_test.py
@ -0,0 +1,39 @@
+import httpx
+import pytest
+
+from browser_use.browser.browser import Browser, BrowserConfig
+
+
+@pytest.mark.asyncio
+async def test_browser_close_doesnt_affect_external_httpx_clients():
+	"""
+	Test that Browser.close() doesn't close HTTPX clients created outside the Browser instance.
+	This test demonstrates the issue where Browser.close() is closing all HTTPX clients.
+	"""
+	# Create an external HTTPX client that should remain open
+	external_client = httpx.AsyncClient()
+
+	# Create a Browser instance
+	browser = Browser(config=BrowserConfig(headless=True))
+
+	# Close the browser (which should trigger cleanup_httpx_clients)
+	await browser.close()
+
+	# Check if the external client is still usable
+	try:
+		# If the client is closed, this will raise RuntimeError
+		# Using a simple HEAD request to a reliable URL
+		await external_client.head('https://www.example.com', timeout=2.0)
+		client_is_closed = False
+	except RuntimeError as e:
+		# If we get "Cannot send a request, as the client has been closed"
+		client_is_closed = 'client has been closed' in str(e)
+	except Exception:
+		# Any other exception means the client is not closed but request failed
+		client_is_closed = False
+	finally:
+		# Always clean up our test client properly
+		await external_client.aclose()
+
+	# Our external client should not be closed by browser.close()
+	assert not client_is_closed, 'External HTTPX client was incorrectly closed by Browser.close()'
--- a/browser-use/browser_use/browser/tests/screenshot_test.py
+++ b/browser-use/browser_use/browser/tests/screenshot_test.py
@ -0,0 +1,36 @@
+import asyncio
+import base64
+
+import pytest
+
+from browser_use.browser.browser import Browser, BrowserConfig
+
+
+async def test_take_full_page_screenshot():
+	browser = Browser(config=BrowserConfig(headless=False, disable_security=True))
+	try:
+		async with await browser.new_context() as context:
+			page = await context.get_current_page()
+			# Go to a test page
+			await page.goto('https://example.com')
+
+			await asyncio.sleep(3)
+			# Take full page screenshot
+			screenshot_b64 = await context.take_screenshot(full_page=True)
+			await asyncio.sleep(3)
+			# Verify screenshot is not empty and is valid base64
+			assert screenshot_b64 is not None
+			assert isinstance(screenshot_b64, str)
+			assert len(screenshot_b64) > 0
+
+			# Test we can decode the base64 string
+			try:
+				base64.b64decode(screenshot_b64)
+			except Exception as e:
+				pytest.fail(f'Failed to decode base64 screenshot: {str(e)}')
+	finally:
+		await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(test_take_full_page_screenshot())
--- a/browser-use/browser_use/browser/tests/test_clicks.py
+++ b/browser-use/browser_use/browser/tests/test_clicks.py
@ -0,0 +1,96 @@
+import asyncio
+import json
+
+import anyio
+import pytest
+
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.dom.views import DOMBaseNode, DOMElementNode, DOMTextNode
+from browser_use.utils import time_execution_sync
+
+
+class ElementTreeSerializer:
+	@staticmethod
+	def dom_element_node_to_json(element_tree: DOMElementNode) -> dict:
+		def node_to_dict(node: DOMBaseNode) -> dict:
+			if isinstance(node, DOMTextNode):
+				return {'type': 'text', 'text': node.text}
+			elif isinstance(node, DOMElementNode):
+				return {
+					'type': 'element',
+					'tag_name': node.tag_name,
+					'attributes': node.attributes,
+					'highlight_index': node.highlight_index,
+					'children': [node_to_dict(child) for child in node.children],
+				}
+			return {}
+
+		return node_to_dict(element_tree)
+
+
+# run with: pytest browser_use/browser/tests/test_clicks.py
+@pytest.mark.asyncio
+async def test_highlight_elements():
+	browser = Browser(config=BrowserConfig(headless=False, disable_security=True))
+
+	async with await browser.new_context() as context:
+		page = await context.get_current_page()
+		# await page.goto('https://immobilienscout24.de')
+		# await page.goto('https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/service-plans')
+		# await page.goto('https://google.com/search?q=elon+musk')
+		# await page.goto('https://kayak.com')
+		# await page.goto('https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe')
+		# await page.goto('https://dictionary.cambridge.org')
+		# await page.goto('https://github.com')
+		await page.goto('https://huggingface.co/')
+
+		await asyncio.sleep(1)
+
+		while True:
+			try:
+				# await asyncio.sleep(10)
+				state = await context.get_state(True)
+
+				async with await anyio.open_file('./tmp/page.json', 'w') as f:
+					await f.write(
+						json.dumps(
+							ElementTreeSerializer.dom_element_node_to_json(state.element_tree),
+							indent=1,
+						)
+					)
+
+				# await time_execution_sync('highlight_selector_map_elements')(
+				# 	browser.highlight_selector_map_elements
+				# )(state.selector_map)
+
+				# Find and print duplicate XPaths
+				xpath_counts = {}
+				if not state.selector_map:
+					continue
+				for selector in state.selector_map.values():
+					xpath = selector.xpath
+					if xpath in xpath_counts:
+						xpath_counts[xpath] += 1
+					else:
+						xpath_counts[xpath] = 1
+
+				print('\nDuplicate XPaths found:')
+				for xpath, count in xpath_counts.items():
+					if count > 1:
+						print(f'XPath: {xpath}')
+						print(f'Count: {count}\n')
+
+				print(list(state.selector_map.keys()), 'Selector map keys')
+				print(state.element_tree.clickable_elements_to_string())
+				action = input('Select next action: ')
+
+				await time_execution_sync('remove_highlight_elements')(context.remove_highlights)()
+
+				node_element = state.selector_map[int(action)]
+
+				# check if index of selector map are the same as index of items in dom_items
+
+				await context._click_element_node(node_element)
+
+			except Exception as e:
+				print(e)
--- a/browser-use/browser_use/browser/utils/screen_resolution.py
+++ b/browser-use/browser_use/browser/utils/screen_resolution.py
@ -0,0 +1,41 @@
+import sys
+
+
+def get_screen_resolution():
+	if sys.platform == 'darwin':  # macOS
+		try:
+			from AppKit import NSScreen
+
+			screen = NSScreen.mainScreen().frame()
+			return {'width': int(screen.size.width), 'height': int(screen.size.height)}
+		except ImportError:
+			print('AppKit is not available. Make sure you are running this on macOS with pyobjc installed.')
+		except Exception as e:
+			print(f'Error retrieving macOS screen resolution: {e}')
+		return {'width': 2560, 'height': 1664}
+
+	else:  # Windows & Linux
+		try:
+			from screeninfo import get_monitors
+
+			monitors = get_monitors()
+			if not monitors:
+				raise Exception('No monitors detected.')
+			monitor = monitors[0]
+			return {'width': monitor.width, 'height': monitor.height}
+		except ImportError:
+			print("screeninfo package not found. Install it using 'pip install screeninfo'.")
+		except Exception as e:
+			print(f'Error retrieving screen resolution: {e}')
+
+		return {'width': 1920, 'height': 1080}
+
+
+def get_window_adjustments():
+	"""Returns recommended x, y offsets for window positioning"""
+	if sys.platform == 'darwin':  # macOS
+		return -4, 24  # macOS has a small title bar, no border
+	elif sys.platform == 'win32':  # Windows
+		return -8, 0  # Windows has a border on the left
+	else:  # Linux
+		return 0, 0
--- a/browser-use/browser_use/browser/views.py
+++ b/browser-use/browser_use/browser/views.py
@ -0,0 +1,54 @@
+from dataclasses import dataclass, field
+from typing import Any
+
+from pydantic import BaseModel
+
+from browser_use.dom.history_tree_processor.service import DOMHistoryElement
+from browser_use.dom.views import DOMState
+
+
+# Pydantic
+class TabInfo(BaseModel):
+	"""Represents information about a browser tab"""
+
+	page_id: int
+	url: str
+	title: str
+	parent_page_id: int | None = None  # parent page that contains this popup or cross-origin iframe
+
+
+@dataclass
+class BrowserState(DOMState):
+	url: str
+	title: str
+	tabs: list[TabInfo]
+	screenshot: str | None = None
+	pixels_above: int = 0
+	pixels_below: int = 0
+	browser_errors: list[str] = field(default_factory=list)
+
+
+@dataclass
+class BrowserStateHistory:
+	url: str
+	title: str
+	tabs: list[TabInfo]
+	interacted_element: list[DOMHistoryElement | None] | list[None]
+	screenshot: str | None = None
+
+	def to_dict(self) -> dict[str, Any]:
+		data = {}
+		data['tabs'] = [tab.model_dump() for tab in self.tabs]
+		data['screenshot'] = self.screenshot
+		data['interacted_element'] = [el.to_dict() if el else None for el in self.interacted_element]
+		data['url'] = self.url
+		data['title'] = self.title
+		return data
+
+
+class BrowserError(Exception):
+	"""Base class for all browser errors"""
+
+
+class URLNotAllowedError(BrowserError):
+	"""Error raised when a URL is not allowed"""