[Add] browser-use and main.py
This commit is contained in:
parent
08e64bdf45
commit
96914d44ac
221 changed files with 30952 additions and 1 deletions
421
browser-use/browser_use/browser/browser.py
Normal file
421
browser-use/browser_use/browser/browser.py
Normal file
|
|
@ -0,0 +1,421 @@
|
|||
"""
|
||||
Playwright browser on steroids.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import gc
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from tempfile import gettempdir
|
||||
from typing import Literal
|
||||
|
||||
import httpx
|
||||
import psutil
|
||||
from dotenv import load_dotenv
|
||||
from playwright.async_api import Browser as PlaywrightBrowser
|
||||
from playwright.async_api import Playwright, async_playwright
|
||||
from pydantic import AliasChoices, BaseModel, ConfigDict, Field
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
from browser_use.browser.chrome import (
|
||||
CHROME_ARGS,
|
||||
CHROME_DEBUG_PORT,
|
||||
CHROME_DETERMINISTIC_RENDERING_ARGS,
|
||||
CHROME_DISABLE_SECURITY_ARGS,
|
||||
CHROME_DOCKER_ARGS,
|
||||
CHROME_HEADLESS_ARGS,
|
||||
)
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments
|
||||
from browser_use.utils import time_execution_async
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
IN_DOCKER = os.environ.get('IN_DOCKER', 'false').lower()[0] in 'ty1'
|
||||
|
||||
|
||||
class ProxySettings(BaseModel):
|
||||
"""the same as playwright.sync_api.ProxySettings, but now as a Pydantic BaseModel so pydantic can validate it"""
|
||||
|
||||
server: str
|
||||
bypass: str | None = None
|
||||
username: str | None = None
|
||||
password: str | None = None
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True, from_attributes=True)
|
||||
|
||||
# Support dict-like behavior for compatibility with Playwright's ProxySettings
|
||||
def __getitem__(self, key):
|
||||
return getattr(self, key)
|
||||
|
||||
def get(self, key, default=None):
|
||||
return getattr(self, key, default)
|
||||
|
||||
|
||||
class BrowserConfig(BaseModel):
|
||||
r"""
|
||||
Configuration for the Browser.
|
||||
|
||||
Default values:
|
||||
headless: False
|
||||
Whether to run browser in headless mode (not recommended)
|
||||
|
||||
disable_security: False
|
||||
Disable browser security features (required for cross-origin iframe support)
|
||||
|
||||
extra_browser_args: []
|
||||
Extra arguments to pass to the browser
|
||||
|
||||
wss_url: None
|
||||
Connect to a browser instance via WebSocket
|
||||
|
||||
cdp_url: None
|
||||
Connect to a browser instance via CDP
|
||||
|
||||
browser_binary_path: None
|
||||
Path to a Browser instance to use to connect to your normal browser
|
||||
e.g. '/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome'
|
||||
|
||||
chrome_remote_debugging_port: 9222
|
||||
Chrome remote debugging port to use to when browser_binary_path is supplied.
|
||||
This allows running multiple chrome browsers with same browser_binary_path but running on different ports.
|
||||
Also, makes it possible to launch new user provided chrome browser without closing already opened chrome instances,
|
||||
by providing non-default chrome debugging port.
|
||||
|
||||
keep_alive: False
|
||||
Keep the browser alive after the agent has finished running
|
||||
|
||||
deterministic_rendering: False
|
||||
Enable deterministic rendering (makes GPU/font rendering consistent across different OS's and docker)
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
extra='ignore',
|
||||
populate_by_name=True,
|
||||
from_attributes=True,
|
||||
validate_assignment=True,
|
||||
revalidate_instances='subclass-instances',
|
||||
)
|
||||
|
||||
wss_url: str | None = None
|
||||
cdp_url: str | None = None
|
||||
|
||||
browser_class: Literal['chromium', 'firefox', 'webkit'] = 'chromium'
|
||||
browser_binary_path: str | None = Field(
|
||||
default=None, validation_alias=AliasChoices('browser_instance_path', 'chrome_instance_path')
|
||||
)
|
||||
chrome_remote_debugging_port: int | None = CHROME_DEBUG_PORT
|
||||
extra_browser_args: list[str] = Field(default_factory=list)
|
||||
|
||||
headless: bool = False
|
||||
disable_security: bool = False # disable_security=True is dangerous as any malicious URL visited could embed an iframe for the user's bank, and use their cookies to steal money
|
||||
deterministic_rendering: bool = False
|
||||
keep_alive: bool = Field(default=False, alias='_force_keep_browser_alive') # used to be called _force_keep_browser_alive
|
||||
|
||||
proxy: ProxySettings | None = None
|
||||
new_context_config: BrowserContextConfig = Field(default_factory=BrowserContextConfig)
|
||||
|
||||
|
||||
# @singleton: TODO - think about id singleton makes sense here
|
||||
# @dev By default this is a singleton, but you can create multiple instances if you need to.
|
||||
class Browser:
|
||||
"""
|
||||
Playwright browser on steroids.
|
||||
|
||||
This is persistent browser factory that can spawn multiple browser contexts.
|
||||
It is recommended to use only one instance of Browser per your application (RAM usage will grow otherwise).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: BrowserConfig | None = None,
|
||||
):
|
||||
logger.debug('🌎 Initializing new browser')
|
||||
self.config = config or BrowserConfig()
|
||||
self.playwright: Playwright | None = None
|
||||
self.playwright_browser: PlaywrightBrowser | None = None
|
||||
|
||||
async def new_context(self, config: BrowserContextConfig | None = None) -> BrowserContext:
|
||||
"""Create a browser context"""
|
||||
browser_config = self.config.model_dump() if self.config else {}
|
||||
context_config = config.model_dump() if config else {}
|
||||
merged_config = {**browser_config, **context_config}
|
||||
return BrowserContext(config=BrowserContextConfig(**merged_config), browser=self)
|
||||
|
||||
async def get_playwright_browser(self) -> PlaywrightBrowser:
|
||||
"""Get a browser context"""
|
||||
if self.playwright_browser is None:
|
||||
return await self._init()
|
||||
|
||||
return self.playwright_browser
|
||||
|
||||
@time_execution_async('--init (browser)')
|
||||
async def _init(self):
|
||||
"""Initialize the browser session"""
|
||||
playwright = await async_playwright().start()
|
||||
self.playwright = playwright
|
||||
|
||||
browser = await self._setup_browser(playwright)
|
||||
self.playwright_browser = browser
|
||||
|
||||
return self.playwright_browser
|
||||
|
||||
async def _setup_remote_cdp_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
||||
"""Sets up and returns a Playwright Browser instance with anti-detection measures. Firefox has no longer CDP support."""
|
||||
if 'firefox' in (self.config.browser_binary_path or '').lower():
|
||||
raise ValueError(
|
||||
'CDP has been deprecated for firefox, check: https://fxdx.dev/deprecating-cdp-support-in-firefox-embracing-the-future-with-webdriver-bidi/'
|
||||
)
|
||||
if not self.config.cdp_url:
|
||||
raise ValueError('CDP URL is required')
|
||||
logger.info(f'🔌 Connecting to remote browser via CDP {self.config.cdp_url}')
|
||||
browser_class = getattr(playwright, self.config.browser_class)
|
||||
browser = await browser_class.connect_over_cdp(self.config.cdp_url)
|
||||
return browser
|
||||
|
||||
async def _setup_remote_wss_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
||||
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
||||
if not self.config.wss_url:
|
||||
raise ValueError('WSS URL is required')
|
||||
logger.info(f'🔌 Connecting to remote browser via WSS {self.config.wss_url}')
|
||||
browser_class = getattr(playwright, self.config.browser_class)
|
||||
browser = await browser_class.connect(self.config.wss_url)
|
||||
return browser
|
||||
|
||||
async def _setup_user_provided_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
||||
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
||||
if not self.config.browser_binary_path:
|
||||
raise ValueError('A browser_binary_path is required')
|
||||
|
||||
assert self.config.browser_class == 'chromium', (
|
||||
'browser_binary_path only supports chromium browsers (make sure browser_class=chromium)'
|
||||
)
|
||||
|
||||
try:
|
||||
# Check if browser is already running
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(
|
||||
f'http://localhost:{self.config.chrome_remote_debugging_port}/json/version', timeout=2
|
||||
)
|
||||
if response.status_code == 200:
|
||||
logger.info(
|
||||
f'🔌 Reusing existing browser found running on http://localhost:{self.config.chrome_remote_debugging_port}'
|
||||
)
|
||||
browser_class = getattr(playwright, self.config.browser_class)
|
||||
browser = await browser_class.connect_over_cdp(
|
||||
endpoint_url=f'http://localhost:{self.config.chrome_remote_debugging_port}',
|
||||
timeout=20000, # 20 second timeout for connection
|
||||
)
|
||||
return browser
|
||||
except httpx.RequestError:
|
||||
logger.debug('🌎 No existing Chrome instance found, starting a new one')
|
||||
|
||||
provided_user_data_dir = [arg for arg in self.config.extra_browser_args if '--user-data-dir=' in arg]
|
||||
|
||||
if provided_user_data_dir:
|
||||
user_data_dir = Path(provided_user_data_dir[0].split('=')[-1])
|
||||
else:
|
||||
fallback_user_data_dir = Path(gettempdir()) / 'browseruse' / 'profiles' / 'default' # /tmp/browseruse
|
||||
try:
|
||||
# ~/.config/browseruse/profiles/default
|
||||
user_data_dir = Path('~/.config') / 'browseruse' / 'profiles' / 'default'
|
||||
user_data_dir = user_data_dir.expanduser()
|
||||
user_data_dir.mkdir(parents=True, exist_ok=True)
|
||||
except Exception as e:
|
||||
logger.error(f'❌ Failed to create ~/.config/browseruse directory: {type(e).__name__}: {e}')
|
||||
user_data_dir = fallback_user_data_dir
|
||||
user_data_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info(f'🌐 Storing Browser Profile user data dir in: {user_data_dir}')
|
||||
try:
|
||||
# Remove any existing SingletonLock file to allow the browser to start
|
||||
(user_data_dir / 'Default' / 'SingletonLock').unlink()
|
||||
self.config.extra_browser_args.append('--no-first-run')
|
||||
except (FileNotFoundError, PermissionError, OSError):
|
||||
pass
|
||||
|
||||
# Start a new Chrome instance
|
||||
chrome_launch_args = [
|
||||
*{ # remove duplicates (usually preserves the order, but not guaranteed)
|
||||
f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
|
||||
*([f'--user-data-dir={user_data_dir.resolve()}'] if not provided_user_data_dir else []),
|
||||
*CHROME_ARGS,
|
||||
*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
|
||||
*(CHROME_HEADLESS_ARGS if self.config.headless else []),
|
||||
*(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
|
||||
*(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
|
||||
*self.config.extra_browser_args,
|
||||
},
|
||||
]
|
||||
chrome_sub_process = await asyncio.create_subprocess_exec(
|
||||
self.config.browser_binary_path,
|
||||
*chrome_launch_args,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
shell=False,
|
||||
)
|
||||
self._chrome_subprocess = psutil.Process(chrome_sub_process.pid)
|
||||
|
||||
# Attempt to connect again after starting a new instance
|
||||
for _ in range(10):
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(
|
||||
f'http://localhost:{self.config.chrome_remote_debugging_port}/json/version', timeout=2
|
||||
)
|
||||
if response.status_code == 200:
|
||||
break
|
||||
except httpx.RequestError:
|
||||
pass
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Attempt to connect again after starting a new instance
|
||||
try:
|
||||
browser_class = getattr(playwright, self.config.browser_class)
|
||||
browser = await browser_class.connect_over_cdp(
|
||||
endpoint_url=f'http://localhost:{self.config.chrome_remote_debugging_port}',
|
||||
timeout=20000, # 20 second timeout for connection
|
||||
)
|
||||
return browser
|
||||
except Exception as e:
|
||||
logger.error(f'❌ Failed to start a new Chrome instance: {str(e)}')
|
||||
raise RuntimeError(
|
||||
'To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
|
||||
)
|
||||
|
||||
async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
||||
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
||||
assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
|
||||
|
||||
# Use the configured window size from new_context_config if available
|
||||
if (
|
||||
not self.config.headless
|
||||
and hasattr(self.config, 'new_context_config')
|
||||
and hasattr(self.config.new_context_config, 'window_width')
|
||||
and hasattr(self.config.new_context_config, 'window_height')
|
||||
and not self.config.new_context_config.no_viewport
|
||||
):
|
||||
screen_size = {
|
||||
'width': self.config.new_context_config.window_width,
|
||||
'height': self.config.new_context_config.window_height,
|
||||
}
|
||||
offset_x, offset_y = get_window_adjustments()
|
||||
elif self.config.headless:
|
||||
screen_size = {'width': 1920, 'height': 1080}
|
||||
offset_x, offset_y = 0, 0
|
||||
else:
|
||||
screen_size = get_screen_resolution()
|
||||
offset_x, offset_y = get_window_adjustments()
|
||||
|
||||
chrome_args = {
|
||||
f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
|
||||
*CHROME_ARGS,
|
||||
*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
|
||||
*(CHROME_HEADLESS_ARGS if self.config.headless else []),
|
||||
*(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
|
||||
*(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
|
||||
f'--window-position={offset_x},{offset_y}',
|
||||
f'--window-size={screen_size["width"]},{screen_size["height"]}',
|
||||
*self.config.extra_browser_args,
|
||||
}
|
||||
|
||||
# check if chrome remote debugging port is already taken,
|
||||
# if so remove the remote-debugging-port arg to prevent conflicts
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
|
||||
chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
|
||||
|
||||
browser_class = getattr(playwright, self.config.browser_class)
|
||||
args = {
|
||||
'chromium': list(chrome_args),
|
||||
'firefox': [
|
||||
*{
|
||||
'-no-remote',
|
||||
*self.config.extra_browser_args,
|
||||
}
|
||||
],
|
||||
'webkit': [
|
||||
*{
|
||||
'--no-startup-window',
|
||||
*self.config.extra_browser_args,
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
browser = await browser_class.launch(
|
||||
channel='chromium', # https://github.com/microsoft/playwright/issues/33566
|
||||
headless=self.config.headless,
|
||||
args=args[self.config.browser_class],
|
||||
proxy=self.config.proxy.model_dump() if self.config.proxy else None,
|
||||
handle_sigterm=False,
|
||||
handle_sigint=False,
|
||||
)
|
||||
return browser
|
||||
|
||||
async def _setup_browser(self, playwright: Playwright) -> PlaywrightBrowser:
|
||||
"""Sets up and returns a Playwright Browser instance with anti-detection measures."""
|
||||
try:
|
||||
if self.config.cdp_url:
|
||||
return await self._setup_remote_cdp_browser(playwright)
|
||||
if self.config.wss_url:
|
||||
return await self._setup_remote_wss_browser(playwright)
|
||||
|
||||
if self.config.headless:
|
||||
logger.warning('⚠️ Headless mode is not recommended. Many sites will detect and block all headless browsers.')
|
||||
|
||||
if self.config.browser_binary_path:
|
||||
return await self._setup_user_provided_browser(playwright)
|
||||
else:
|
||||
return await self._setup_builtin_browser(playwright)
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to initialize Playwright browser: {e}')
|
||||
raise
|
||||
|
||||
async def close(self):
|
||||
"""Close the browser instance"""
|
||||
if self.config.keep_alive:
|
||||
return
|
||||
|
||||
try:
|
||||
if self.playwright_browser:
|
||||
await self.playwright_browser.close()
|
||||
del self.playwright_browser
|
||||
if self.playwright:
|
||||
await self.playwright.stop()
|
||||
del self.playwright
|
||||
if chrome_proc := getattr(self, '_chrome_subprocess', None):
|
||||
try:
|
||||
# always kill all children processes, otherwise chrome leaves a bunch of zombie processes
|
||||
for proc in chrome_proc.children(recursive=True):
|
||||
proc.kill()
|
||||
chrome_proc.kill()
|
||||
except Exception as e:
|
||||
logger.debug(f'Failed to terminate chrome subprocess: {e}')
|
||||
|
||||
except Exception as e:
|
||||
if 'OpenAI error' not in str(e):
|
||||
logger.debug(f'Failed to close browser properly: {e}')
|
||||
|
||||
finally:
|
||||
self.playwright_browser = None
|
||||
self.playwright = None
|
||||
self._chrome_subprocess = None
|
||||
gc.collect()
|
||||
|
||||
def __del__(self):
|
||||
"""Async cleanup when object is destroyed"""
|
||||
try:
|
||||
if self.playwright_browser or self.playwright:
|
||||
loop = asyncio.get_running_loop()
|
||||
if loop.is_running():
|
||||
loop.create_task(self.close())
|
||||
else:
|
||||
asyncio.run(self.close())
|
||||
except Exception as e:
|
||||
logger.debug(f'Failed to cleanup browser in destructor: {e}')
|
||||
177
browser-use/browser_use/browser/chrome.py
Normal file
177
browser-use/browser_use/browser/chrome.py
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
CHROME_EXTENSIONS = {} # coming in a separate PR
|
||||
CHROME_EXTENSIONS_PATH = 'chrome_extensions'
|
||||
CHROME_PROFILE_PATH = 'chrome_profile'
|
||||
CHROME_PROFILE_USER = 'Default'
|
||||
CHROME_DEBUG_PORT = 9242
|
||||
CHROME_DISABLED_COMPONENTS = [
|
||||
'Translate',
|
||||
'AcceptCHFrame',
|
||||
'OptimizationHints',
|
||||
'ProcessPerSiteUpToMainFrameThreshold',
|
||||
'InterestFeedContentSuggestions',
|
||||
# 'CalculateNativeWinOcclusion',
|
||||
'BackForwardCache',
|
||||
# 'HeavyAdPrivacyMitigations',
|
||||
'LazyFrameLoading',
|
||||
# 'ImprovedCookieControls',
|
||||
'PrivacySandboxSettings4',
|
||||
'AutofillServerCommunication',
|
||||
'CertificateTransparencyComponentUpdater',
|
||||
'DestroyProfileOnBrowserClose',
|
||||
'CrashReporting',
|
||||
'OverscrollHistoryNavigation',
|
||||
'InfiniteSessionRestore',
|
||||
#'LockProfileCookieDatabase', # disabling allows multiple chrome instances to concurrently modify profile, but might make chrome much slower https://github.com/yt-dlp/yt-dlp/issues/7271 https://issues.chromium.org/issues/40901624
|
||||
] # it's always best to give each chrome instance its own exclusive copy of the user profile
|
||||
|
||||
|
||||
CHROME_HEADLESS_ARGS = [
|
||||
'--headless=new',
|
||||
# '--test-type',
|
||||
# '--test-type=gpu', # https://github.com/puppeteer/puppeteer/issues/10516
|
||||
# '--enable-automation', # <- DONT USE THIS, it makes you easily detectable / blocked by cloudflare
|
||||
]
|
||||
|
||||
CHROME_DOCKER_ARGS = [
|
||||
# Docker-specific options
|
||||
# https://github.com/GoogleChrome/lighthouse-ci/tree/main/docs/recipes/docker-client#--no-sandbox-issues-explained
|
||||
'--no-sandbox', # rely on docker sandboxing in docker, otherwise we need cap_add: SYS_ADM to use host sandboxing
|
||||
'--disable-gpu-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage', # docker 75mb default shm size is not big enough, disabling just uses /tmp instead
|
||||
'--no-xshm',
|
||||
# dont try to disable (or install) dbus in docker, its not needed, chrome can work without dbus despite the errors
|
||||
]
|
||||
|
||||
CHROME_DISABLE_SECURITY_ARGS = [
|
||||
# DANGER: JS isolation security features (to allow easier tampering with pages during automation)
|
||||
# chrome://net-internals
|
||||
'--disable-web-security', # <- WARNING, breaks some sites that expect/enforce strict CORS headers (try webflow.com)
|
||||
'--disable-site-isolation-trials',
|
||||
'--disable-features=IsolateOrigins,site-per-process',
|
||||
# '--allow-file-access-from-files', # <- WARNING, dangerous, allows JS to read filesystem using file:// URLs
|
||||
# DANGER: Disable HTTPS verification
|
||||
'--allow-running-insecure-content', # Breaks CORS/CSRF/HSTS etc., useful sometimes but very easy to detect
|
||||
'--ignore-certificate-errors',
|
||||
'--ignore-ssl-errors',
|
||||
'--ignore-certificate-errors-spki-list',
|
||||
# '--allow-insecure-localhost',
|
||||
]
|
||||
|
||||
# flags to make chrome behave more deterministically across different OS's
|
||||
CHROME_DETERMINISTIC_RENDERING_ARGS = [
|
||||
'--deterministic-mode',
|
||||
'--js-flags=--random-seed=1157259159', # make all JS random numbers deterministic by providing a seed
|
||||
'--force-device-scale-factor=1',
|
||||
# GPU, canvas, text, and pdf rendering config
|
||||
# chrome://gpu
|
||||
'--enable-webgl', # enable web-gl graphics support
|
||||
'--font-render-hinting=none', # make rendering more deterministic by ignoring OS font hints, may also need css override, try: * {text-rendering: geometricprecision !important; -webkit-font-smoothing: antialiased;}
|
||||
'--force-color-profile=srgb', # make rendering more deterministic by using consistent color profile, if browser looks weird, try: generic-rgb
|
||||
# '--disable-partial-raster', # make rendering more deterministic (TODO: verify if still needed)
|
||||
'--disable-skia-runtime-opts', # make rendering more deterministic by avoiding Skia hot path runtime optimizations
|
||||
'--disable-2d-canvas-clip-aa', # make rendering more deterministic by disabling antialiasing on 2d canvas clips
|
||||
# '--disable-gpu', # falls back to more consistent software renderer across all OS's, especially helps linux text rendering look less weird
|
||||
# // '--use-gl=swiftshader', <- DO NOT USE, breaks M1 ARM64. it makes rendering more deterministic by using simpler CPU renderer instead of OS GPU renderer bug: https://groups.google.com/a/chromium.org/g/chromium-dev/c/8eR2GctzGuw
|
||||
# // '--disable-software-rasterizer', <- DO NOT USE, harmless, used in tandem with --disable-gpu
|
||||
# // '--run-all-compositor-stages-before-draw', <- DO NOT USE, makes headful chrome hang on startup (tested v121 Google Chrome.app on macOS)
|
||||
# // '--disable-gl-drawing-for-tests', <- DO NOT USE, disables gl output (makes tests run faster if you dont care about canvas)
|
||||
# // '--blink-settings=imagesEnabled=false', <- DO NOT USE, disables images entirely (only sometimes useful to speed up loading)
|
||||
]
|
||||
|
||||
|
||||
CHROME_ARGS = [
|
||||
# Process management & performance tuning
|
||||
# chrome://process-internals
|
||||
# '--disable-lazy-loading', # make rendering more deterministic by loading all content up-front instead of on-focus
|
||||
# '--disable-renderer-backgrounding', # dont throttle tab rendering based on focus/visibility
|
||||
# '--disable-background-networking', # dont throttle tab networking based on focus/visibility
|
||||
# '--disable-background-timer-throttling', # dont throttle tab timers based on focus/visibility
|
||||
# '--disable-backgrounding-occluded-windows', # dont throttle tab window based on focus/visibility
|
||||
# '--disable-ipc-flooding-protection', # dont throttle ipc traffic or accessing big request/response/buffer/etc. objects will fail
|
||||
# '--disable-extensions-http-throttling', # dont throttle http traffic based on runtime heuristics
|
||||
# '--disable-field-trial-config', # disable shared field trial state between browser processes
|
||||
# '--disable-back-forward-cache', # disable browsing navigation cache
|
||||
# Profile data dir setup
|
||||
# chrome://profile-internals
|
||||
# f'--user-data-dir={CHROME_PROFILE_PATH}', # managed by playwright arg instead
|
||||
# f'--profile-directory={CHROME_PROFILE_USER}',
|
||||
# '--password-store=basic', # use mock keychain instead of OS-provided keychain (we manage auth.json instead)
|
||||
# '--use-mock-keychain',
|
||||
# '--disable-cookie-encryption', # we need to be able to write unencrypted cookies to save/load auth.json
|
||||
'--disable-sync', # don't try to use Google account sync features while automation is active
|
||||
# Extensions
|
||||
# chrome://inspect/#extensions
|
||||
# f'--load-extension={CHROME_EXTENSIONS.map(({unpacked_path}) => unpacked_path).join(',')}', # not needed when using existing profile that already has extensions installed
|
||||
# f'--allowlisted-extension-id={",".join(CHROME_EXTENSIONS.keys())}',
|
||||
'--allow-legacy-extension-manifests',
|
||||
'--allow-pre-commit-input', # allow JS mutations before page rendering is complete
|
||||
'--disable-blink-features=AutomationControlled', # hide the signatures that announce browser is being remote-controlled
|
||||
# f'--proxy-server=https://43.159.28.126:2334:u7ce652b7568805c4-zone-custom-region-us-session-szGWq3FRU-sessTime-60:u7ce652b7568805c4', # send all network traffic through a proxy https://2captcha.com/proxy
|
||||
# f'--proxy-bypass-list=127.0.0.1',
|
||||
# Browser window and viewport setup
|
||||
# chrome://version
|
||||
# f'--user-agent="{DEFAULT_USER_AGENT}"',
|
||||
# f'--window-size={DEFAULT_VIEWPORT.width},{DEFAULT_VIEWPORT.height}',
|
||||
# '--window-position=0,0',
|
||||
# '--start-maximized',
|
||||
'--install-autogenerated-theme=0,0,0', # black border makes it easier to see which chrome window is browser-use's
|
||||
'--hide-scrollbars', # stop scrollbars from affecting screenshot width/height
|
||||
#'--virtual-time-budget=60000', # DONT USE THIS, makes chrome hang forever and doesn't work, used to fast-forward all animations & timers by 60s, dont use this it's unfortunately buggy and breaks screenshot and PDF capture sometimes
|
||||
#'--autoplay-policy=no-user-gesture-required', # auto-start videos so they trigger network requests + show up in outputs
|
||||
#'--disable-gesture-requirement-for-media-playback',
|
||||
#'--lang=en-US,en;q=0.9',
|
||||
# IO: stdin/stdout, debug port config
|
||||
# chrome://inspect
|
||||
'--log-level=2', # 1=DEBUG 2=WARNING 3=ERROR
|
||||
'--enable-logging=stderr',
|
||||
# '--remote-debugging-address=127.0.0.1', <- DONT USE THIS, no longer supported on chrome >100, never expose to non-localhost, would allow attacker to drive your browser from any machine
|
||||
# '--enable-experimental-extension-apis', # add support for tab groups via chrome.tabs extension API
|
||||
'--disable-focus-on-load', # prevent browser from hijacking focus
|
||||
'--disable-window-activation',
|
||||
# '--in-process-gpu', <- DONT USE THIS, makes headful startup time ~5-10s slower (tested v121 Google Chrome.app on macOS)
|
||||
# '--disable-component-extensions-with-background-pages', # TODO: check this, disables chrome components that only run in background with no visible UI (could lower startup time)
|
||||
# uncomment to disable hardware camera/mic/speaker access + present fake devices to websites
|
||||
# (faster to disable, but disabling breaks recording browser audio in puppeteer-stream screenrecordings)
|
||||
# '--use-fake-device-for-media-stream',
|
||||
# '--use-fake-ui-for-media-stream',
|
||||
# '--disable-features=GlobalMediaControls,MediaRouter,DialMediaRouteProvider',
|
||||
# Output format options (PDF, screenshot, etc.)
|
||||
'--export-tagged-pdf', # include table on contents and tags in printed PDFs
|
||||
'--generate-pdf-document-outline',
|
||||
# Suppress first-run features, popups, hints, updates, etc.
|
||||
# chrome://system
|
||||
'--no-pings',
|
||||
'--no-default-browser-check',
|
||||
'--no-startup-window',
|
||||
'--ash-no-nudges',
|
||||
'--disable-infobars',
|
||||
'--disable-search-engine-choice-screen',
|
||||
'--disable-session-crashed-bubble',
|
||||
'--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"', # disable browser self-update while automation is active
|
||||
'--hide-crash-restore-bubble',
|
||||
'--suppress-message-center-popups',
|
||||
'--disable-client-side-phishing-detection',
|
||||
'--disable-domain-reliability',
|
||||
'--disable-datasaver-prompt',
|
||||
'--disable-hang-monitor',
|
||||
'--disable-session-crashed-bubble',
|
||||
'--disable-speech-synthesis-api',
|
||||
'--disable-speech-api',
|
||||
'--disable-print-preview',
|
||||
'--safebrowsing-disable-auto-update',
|
||||
# '--deny-permission-prompts',
|
||||
'--disable-external-intent-requests',
|
||||
# '--disable-notifications',
|
||||
'--disable-desktop-notifications',
|
||||
'--noerrdialogs',
|
||||
'--disable-prompt-on-repost',
|
||||
'--silent-debugger-extension-api',
|
||||
# '--block-new-web-contents',
|
||||
'--metrics-recording-only',
|
||||
'--disable-breakpad',
|
||||
# other feature flags
|
||||
# chrome://flags chrome://components
|
||||
f'--disable-features={",".join(CHROME_DISABLED_COMPONENTS)}',
|
||||
'--enable-features=NetworkService',
|
||||
]
|
||||
2027
browser-use/browser_use/browser/context.py
Normal file
2027
browser-use/browser_use/browser/context.py
Normal file
File diff suppressed because it is too large
Load diff
348
browser-use/browser_use/browser/dolphin_service.py
Normal file
348
browser-use/browser_use/browser/dolphin_service.py
Normal file
|
|
@ -0,0 +1,348 @@
|
|||
import logging
|
||||
import os
|
||||
|
||||
import aiohttp
|
||||
from playwright.async_api import Page, async_playwright
|
||||
|
||||
from browser_use.browser.service import Browser
|
||||
from browser_use.browser.views import BrowserState, TabInfo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DolphinBrowser(Browser):
|
||||
"""A class for managing Dolphin Anty browser sessions using Playwright"""
|
||||
|
||||
def __init__(self, headless: bool = False, keep_open: bool = False):
|
||||
"""
|
||||
Initialize the DolphinBrowser instance.
|
||||
|
||||
Args:
|
||||
headless (bool): Run browser in headless mode (default: False).
|
||||
keep_open (bool): Keep browser open after finishing tasks (default: False).
|
||||
"""
|
||||
# Retrieve environment variables for API connection
|
||||
self.api_token = os.getenv('DOLPHIN_API_TOKEN')
|
||||
self.api_url = os.getenv('DOLPHIN_API_URL', 'http://localhost:3001/v1.0')
|
||||
self.profile_id = os.getenv('DOLPHIN_PROFILE_ID')
|
||||
|
||||
# Initialize internal attributes
|
||||
self.playwright = None
|
||||
self.browser = None
|
||||
self.context = None
|
||||
self.page = None
|
||||
self.headless = headless
|
||||
self.keep_open = keep_open
|
||||
self._pages: list[Page] = [] # List to store open pages
|
||||
self.session = None
|
||||
self.cached_state = None
|
||||
|
||||
async def get_current_page(self) -> Page:
|
||||
"""
|
||||
Get the currently active page.
|
||||
|
||||
Raises:
|
||||
Exception: If no active page is available.
|
||||
"""
|
||||
if not self.page:
|
||||
raise Exception('No active page. Browser might not be connected.')
|
||||
return self.page
|
||||
|
||||
async def create_new_tab(self, url: str | None = None) -> None:
|
||||
"""
|
||||
Create a new tab and optionally navigate to a given URL.
|
||||
|
||||
Args:
|
||||
url (str, optional): URL to navigate to after creating the tab. Defaults to None.
|
||||
|
||||
Raises:
|
||||
Exception: If browser context is not initialized or navigation fails.
|
||||
"""
|
||||
if not self.context:
|
||||
raise Exception('Browser context not initialized')
|
||||
|
||||
# Create new page (tab) in the current browser context
|
||||
new_page = await self.context.new_page()
|
||||
self._pages.append(new_page)
|
||||
self.page = new_page # Set as current page
|
||||
|
||||
if url:
|
||||
try:
|
||||
# Navigate to the URL and wait for the page to load
|
||||
await new_page.goto(url, wait_until='networkidle')
|
||||
await self.wait_for_page_load()
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to navigate to URL {url}: {str(e)}')
|
||||
raise
|
||||
|
||||
async def switch_to_tab(self, page_id: int) -> None:
|
||||
"""
|
||||
Switch to a specific tab by its page ID.
|
||||
|
||||
Args:
|
||||
page_id (int): The index of the tab to switch to.
|
||||
|
||||
Raises:
|
||||
Exception: If the tab index is out of range or no tabs are available.
|
||||
"""
|
||||
if not self._pages:
|
||||
raise Exception('No tabs available')
|
||||
|
||||
# Handle negative indices (e.g., -1 for last tab)
|
||||
if page_id < 0:
|
||||
page_id = len(self._pages) + page_id
|
||||
|
||||
if page_id >= len(self._pages) or page_id < 0:
|
||||
raise Exception(f'Tab index {page_id} out of range')
|
||||
|
||||
# Set the current page to the selected tab
|
||||
self.page = self._pages[page_id]
|
||||
await self.page.bring_to_front() # Bring tab to the front
|
||||
await self.wait_for_page_load()
|
||||
|
||||
async def get_tabs_info(self) -> list[TabInfo]:
|
||||
"""
|
||||
Get information about all open tabs.
|
||||
|
||||
Returns:
|
||||
list: A list of TabInfo objects containing details about each tab.
|
||||
"""
|
||||
tabs_info = []
|
||||
for idx, page in enumerate(self._pages):
|
||||
tab_info = TabInfo(
|
||||
page_id=idx,
|
||||
url=page.url,
|
||||
title=await page.title(), # Fetch the title of the page
|
||||
)
|
||||
tabs_info.append(tab_info)
|
||||
return tabs_info
|
||||
|
||||
async def wait_for_page_load(self, timeout: int = 30000):
|
||||
"""
|
||||
Wait for the page to load completely.
|
||||
|
||||
Args:
|
||||
timeout (int): Maximum time to wait for page load in milliseconds (default: 30000ms).
|
||||
|
||||
Raises:
|
||||
Exception: If the page fails to load within the specified timeout.
|
||||
"""
|
||||
if self.page:
|
||||
try:
|
||||
await self.page.wait_for_load_state('networkidle', timeout=timeout)
|
||||
except Exception as e:
|
||||
logger.warning(f'Wait for page load timeout: {str(e)}')
|
||||
|
||||
async def get_session(self):
|
||||
"""
|
||||
Get the current session.
|
||||
|
||||
Returns:
|
||||
DolphinBrowser: The current DolphinBrowser instance.
|
||||
|
||||
Raises:
|
||||
Exception: If the browser is not connected.
|
||||
"""
|
||||
if not self.browser:
|
||||
raise Exception('Browser not connected. Call connect() first.')
|
||||
self.session = self
|
||||
return self
|
||||
|
||||
async def authenticate(self):
|
||||
"""
|
||||
Authenticate with Dolphin Anty API using the API token.
|
||||
|
||||
Raises:
|
||||
Exception: If authentication fails.
|
||||
"""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
auth_url = f'{self.api_url}/auth/login-with-token'
|
||||
auth_data = {'token': self.api_token}
|
||||
async with session.post(auth_url, json=auth_data) as response:
|
||||
if not response.ok:
|
||||
raise Exception(f'Failed to authenticate with Dolphin Anty: {await response.text()}')
|
||||
return await response.json()
|
||||
|
||||
async def get_browser_profiles(self):
|
||||
"""
|
||||
Get a list of available browser profiles from Dolphin Anty.
|
||||
|
||||
Returns:
|
||||
list: A list of browser profiles.
|
||||
|
||||
Raises:
|
||||
Exception: If fetching the browser profiles fails.
|
||||
"""
|
||||
# Authenticate before fetching profiles
|
||||
await self.authenticate()
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
headers = {'Authorization': f'Bearer {self.api_token}'}
|
||||
async with session.get(f'{self.api_url}/browser_profiles', headers=headers) as response:
|
||||
if not response.ok:
|
||||
raise Exception(f'Failed to get browser profiles: {await response.text()}')
|
||||
data = await response.json()
|
||||
return data.get('data', []) # Return the profiles array from the response
|
||||
|
||||
async def start_profile(self, profile_id: str | None = None, headless: bool = False) -> dict:
|
||||
"""
|
||||
Start a browser profile on Dolphin Anty.
|
||||
|
||||
Args:
|
||||
profile_id (str, optional): Profile ID to start (defaults to the one set in the environment).
|
||||
headless (bool): Run browser in headless mode (default: False).
|
||||
|
||||
Returns:
|
||||
dict: Information about the started profile.
|
||||
|
||||
Raises:
|
||||
ValueError: If no profile ID is provided and no default is set.
|
||||
Exception: If starting the profile fails.
|
||||
"""
|
||||
# Authenticate before starting the profile
|
||||
await self.authenticate()
|
||||
|
||||
profile_id = profile_id or self.profile_id
|
||||
if not profile_id:
|
||||
raise ValueError('No profile ID provided')
|
||||
|
||||
url = f'{self.api_url}/browser_profiles/{profile_id}/start'
|
||||
params = {'automation': 1}
|
||||
if headless:
|
||||
params['headless'] = 1
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, params=params) as response:
|
||||
if not response.ok:
|
||||
raise Exception(f'Failed to start profile: {await response.text()}')
|
||||
return await response.json()
|
||||
|
||||
async def stop_profile(self, profile_id: str | None = None):
|
||||
"""
|
||||
Stop a browser profile on Dolphin Anty.
|
||||
|
||||
Args:
|
||||
profile_id (str, optional): Profile ID to stop (defaults to the one set in the environment).
|
||||
|
||||
Returns:
|
||||
dict: Information about the stopped profile.
|
||||
|
||||
Raises:
|
||||
ValueError: If no profile ID is provided and no default is set.
|
||||
"""
|
||||
# Authenticate before stopping the profile
|
||||
await self.authenticate()
|
||||
|
||||
profile_id = profile_id or self.profile_id
|
||||
if not profile_id:
|
||||
raise ValueError('No profile ID provided')
|
||||
|
||||
url = f'{self.api_url}/browser_profiles/{profile_id}/stop'
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as response:
|
||||
return await response.json()
|
||||
|
||||
async def connect(self, profile_id: str | None = None):
|
||||
"""
|
||||
Connect to a running browser profile using Playwright.
|
||||
|
||||
Args:
|
||||
profile_id (str, optional): Profile ID to connect to (defaults to the one set in the environment).
|
||||
|
||||
Returns:
|
||||
PlaywrightBrowser: The connected browser instance.
|
||||
|
||||
Raises:
|
||||
Exception: If authentication or profile connection fails.
|
||||
"""
|
||||
# Authenticate before connecting to the profile
|
||||
await self.authenticate()
|
||||
|
||||
# Start the browser profile
|
||||
profile_data = await self.start_profile(profile_id)
|
||||
|
||||
if not profile_data.get('success'):
|
||||
raise Exception(f'Failed to start profile: {profile_data}')
|
||||
|
||||
automation = profile_data['automation']
|
||||
port = automation['port']
|
||||
ws_endpoint = automation['wsEndpoint']
|
||||
ws_url = f'ws://127.0.0.1:{port}{ws_endpoint}'
|
||||
|
||||
# Use Playwright to connect to the browser's WebSocket endpoint
|
||||
self.playwright = await async_playwright().start()
|
||||
self.browser = await self.playwright.chromium.connect_over_cdp(ws_url)
|
||||
|
||||
# Get or create a browser context and page
|
||||
contexts = self.browser.contexts
|
||||
self.context = contexts[0] if contexts else await self.browser.new_context()
|
||||
pages = self.context.pages
|
||||
self.page = pages[0] if pages else await self.context.new_page()
|
||||
|
||||
self._pages = [self.page] # Initialize pages list with the first page
|
||||
|
||||
return self.browser
|
||||
|
||||
async def close(self, force: bool = False):
|
||||
"""
|
||||
Close the browser connection and clean up resources.
|
||||
|
||||
Args:
|
||||
force (bool): If True, forcefully stop the associated profile (default: False).
|
||||
"""
|
||||
try:
|
||||
# Close all open pages
|
||||
if self._pages:
|
||||
for page in self._pages:
|
||||
try:
|
||||
await page.close()
|
||||
except BaseException:
|
||||
pass
|
||||
self._pages = []
|
||||
|
||||
# Close the browser and Playwright instance
|
||||
if self.browser:
|
||||
await self.browser.close()
|
||||
|
||||
if self.playwright:
|
||||
await self.playwright.stop()
|
||||
|
||||
if force:
|
||||
await self.stop_profile() # Force stop the profile
|
||||
except Exception as e:
|
||||
logger.error(f'Error during browser cleanup: {str(e)}')
|
||||
|
||||
async def get_current_state(self) -> BrowserState:
|
||||
"""
|
||||
Get the current state of the browser (URL, content, viewport size, tabs).
|
||||
|
||||
Returns:
|
||||
BrowserState: The current state of the browser.
|
||||
|
||||
Raises:
|
||||
Exception: If no active page is available.
|
||||
"""
|
||||
if not self.page:
|
||||
raise Exception('No active page')
|
||||
|
||||
# Get page content and viewport size
|
||||
content = await self.page.content()
|
||||
viewport_size = await self.page.viewport_size()
|
||||
|
||||
# Create and return the current browser state
|
||||
state = BrowserState(
|
||||
url=self.page.url,
|
||||
content=content,
|
||||
viewport_height=viewport_size['height'] if viewport_size else 0,
|
||||
viewport_width=viewport_size['width'] if viewport_size else 0,
|
||||
tabs=await self.get_tabs_info(),
|
||||
)
|
||||
|
||||
# Cache and return the state
|
||||
self.cached_state = state
|
||||
return state
|
||||
|
||||
def __del__(self):
|
||||
"""Clean up resources when the DolphinBrowser instance is deleted."""
|
||||
# No need to handle session cleanup as we're using self as session
|
||||
pass
|
||||
39
browser-use/browser_use/browser/tests/httpx_client_test.py
Normal file
39
browser-use/browser_use/browser/tests/httpx_client_test.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import httpx
|
||||
import pytest
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_browser_close_doesnt_affect_external_httpx_clients():
|
||||
"""
|
||||
Test that Browser.close() doesn't close HTTPX clients created outside the Browser instance.
|
||||
This test demonstrates the issue where Browser.close() is closing all HTTPX clients.
|
||||
"""
|
||||
# Create an external HTTPX client that should remain open
|
||||
external_client = httpx.AsyncClient()
|
||||
|
||||
# Create a Browser instance
|
||||
browser = Browser(config=BrowserConfig(headless=True))
|
||||
|
||||
# Close the browser (which should trigger cleanup_httpx_clients)
|
||||
await browser.close()
|
||||
|
||||
# Check if the external client is still usable
|
||||
try:
|
||||
# If the client is closed, this will raise RuntimeError
|
||||
# Using a simple HEAD request to a reliable URL
|
||||
await external_client.head('https://www.example.com', timeout=2.0)
|
||||
client_is_closed = False
|
||||
except RuntimeError as e:
|
||||
# If we get "Cannot send a request, as the client has been closed"
|
||||
client_is_closed = 'client has been closed' in str(e)
|
||||
except Exception:
|
||||
# Any other exception means the client is not closed but request failed
|
||||
client_is_closed = False
|
||||
finally:
|
||||
# Always clean up our test client properly
|
||||
await external_client.aclose()
|
||||
|
||||
# Our external client should not be closed by browser.close()
|
||||
assert not client_is_closed, 'External HTTPX client was incorrectly closed by Browser.close()'
|
||||
36
browser-use/browser_use/browser/tests/screenshot_test.py
Normal file
36
browser-use/browser_use/browser/tests/screenshot_test.py
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
import asyncio
|
||||
import base64
|
||||
|
||||
import pytest
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
|
||||
async def test_take_full_page_screenshot():
|
||||
browser = Browser(config=BrowserConfig(headless=False, disable_security=True))
|
||||
try:
|
||||
async with await browser.new_context() as context:
|
||||
page = await context.get_current_page()
|
||||
# Go to a test page
|
||||
await page.goto('https://example.com')
|
||||
|
||||
await asyncio.sleep(3)
|
||||
# Take full page screenshot
|
||||
screenshot_b64 = await context.take_screenshot(full_page=True)
|
||||
await asyncio.sleep(3)
|
||||
# Verify screenshot is not empty and is valid base64
|
||||
assert screenshot_b64 is not None
|
||||
assert isinstance(screenshot_b64, str)
|
||||
assert len(screenshot_b64) > 0
|
||||
|
||||
# Test we can decode the base64 string
|
||||
try:
|
||||
base64.b64decode(screenshot_b64)
|
||||
except Exception as e:
|
||||
pytest.fail(f'Failed to decode base64 screenshot: {str(e)}')
|
||||
finally:
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(test_take_full_page_screenshot())
|
||||
96
browser-use/browser_use/browser/tests/test_clicks.py
Normal file
96
browser-use/browser_use/browser/tests/test_clicks.py
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
import asyncio
|
||||
import json
|
||||
|
||||
import anyio
|
||||
import pytest
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.dom.views import DOMBaseNode, DOMElementNode, DOMTextNode
|
||||
from browser_use.utils import time_execution_sync
|
||||
|
||||
|
||||
class ElementTreeSerializer:
|
||||
@staticmethod
|
||||
def dom_element_node_to_json(element_tree: DOMElementNode) -> dict:
|
||||
def node_to_dict(node: DOMBaseNode) -> dict:
|
||||
if isinstance(node, DOMTextNode):
|
||||
return {'type': 'text', 'text': node.text}
|
||||
elif isinstance(node, DOMElementNode):
|
||||
return {
|
||||
'type': 'element',
|
||||
'tag_name': node.tag_name,
|
||||
'attributes': node.attributes,
|
||||
'highlight_index': node.highlight_index,
|
||||
'children': [node_to_dict(child) for child in node.children],
|
||||
}
|
||||
return {}
|
||||
|
||||
return node_to_dict(element_tree)
|
||||
|
||||
|
||||
# run with: pytest browser_use/browser/tests/test_clicks.py
|
||||
@pytest.mark.asyncio
|
||||
async def test_highlight_elements():
|
||||
browser = Browser(config=BrowserConfig(headless=False, disable_security=True))
|
||||
|
||||
async with await browser.new_context() as context:
|
||||
page = await context.get_current_page()
|
||||
# await page.goto('https://immobilienscout24.de')
|
||||
# await page.goto('https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/service-plans')
|
||||
# await page.goto('https://google.com/search?q=elon+musk')
|
||||
# await page.goto('https://kayak.com')
|
||||
# await page.goto('https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe')
|
||||
# await page.goto('https://dictionary.cambridge.org')
|
||||
# await page.goto('https://github.com')
|
||||
await page.goto('https://huggingface.co/')
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
while True:
|
||||
try:
|
||||
# await asyncio.sleep(10)
|
||||
state = await context.get_state(True)
|
||||
|
||||
async with await anyio.open_file('./tmp/page.json', 'w') as f:
|
||||
await f.write(
|
||||
json.dumps(
|
||||
ElementTreeSerializer.dom_element_node_to_json(state.element_tree),
|
||||
indent=1,
|
||||
)
|
||||
)
|
||||
|
||||
# await time_execution_sync('highlight_selector_map_elements')(
|
||||
# browser.highlight_selector_map_elements
|
||||
# )(state.selector_map)
|
||||
|
||||
# Find and print duplicate XPaths
|
||||
xpath_counts = {}
|
||||
if not state.selector_map:
|
||||
continue
|
||||
for selector in state.selector_map.values():
|
||||
xpath = selector.xpath
|
||||
if xpath in xpath_counts:
|
||||
xpath_counts[xpath] += 1
|
||||
else:
|
||||
xpath_counts[xpath] = 1
|
||||
|
||||
print('\nDuplicate XPaths found:')
|
||||
for xpath, count in xpath_counts.items():
|
||||
if count > 1:
|
||||
print(f'XPath: {xpath}')
|
||||
print(f'Count: {count}\n')
|
||||
|
||||
print(list(state.selector_map.keys()), 'Selector map keys')
|
||||
print(state.element_tree.clickable_elements_to_string())
|
||||
action = input('Select next action: ')
|
||||
|
||||
await time_execution_sync('remove_highlight_elements')(context.remove_highlights)()
|
||||
|
||||
node_element = state.selector_map[int(action)]
|
||||
|
||||
# check if index of selector map are the same as index of items in dom_items
|
||||
|
||||
await context._click_element_node(node_element)
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
41
browser-use/browser_use/browser/utils/screen_resolution.py
Normal file
41
browser-use/browser_use/browser/utils/screen_resolution.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
import sys
|
||||
|
||||
|
||||
def get_screen_resolution():
|
||||
if sys.platform == 'darwin': # macOS
|
||||
try:
|
||||
from AppKit import NSScreen
|
||||
|
||||
screen = NSScreen.mainScreen().frame()
|
||||
return {'width': int(screen.size.width), 'height': int(screen.size.height)}
|
||||
except ImportError:
|
||||
print('AppKit is not available. Make sure you are running this on macOS with pyobjc installed.')
|
||||
except Exception as e:
|
||||
print(f'Error retrieving macOS screen resolution: {e}')
|
||||
return {'width': 2560, 'height': 1664}
|
||||
|
||||
else: # Windows & Linux
|
||||
try:
|
||||
from screeninfo import get_monitors
|
||||
|
||||
monitors = get_monitors()
|
||||
if not monitors:
|
||||
raise Exception('No monitors detected.')
|
||||
monitor = monitors[0]
|
||||
return {'width': monitor.width, 'height': monitor.height}
|
||||
except ImportError:
|
||||
print("screeninfo package not found. Install it using 'pip install screeninfo'.")
|
||||
except Exception as e:
|
||||
print(f'Error retrieving screen resolution: {e}')
|
||||
|
||||
return {'width': 1920, 'height': 1080}
|
||||
|
||||
|
||||
def get_window_adjustments():
|
||||
"""Returns recommended x, y offsets for window positioning"""
|
||||
if sys.platform == 'darwin': # macOS
|
||||
return -4, 24 # macOS has a small title bar, no border
|
||||
elif sys.platform == 'win32': # Windows
|
||||
return -8, 0 # Windows has a border on the left
|
||||
else: # Linux
|
||||
return 0, 0
|
||||
54
browser-use/browser_use/browser/views.py
Normal file
54
browser-use/browser_use/browser/views.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use.dom.history_tree_processor.service import DOMHistoryElement
|
||||
from browser_use.dom.views import DOMState
|
||||
|
||||
|
||||
# Pydantic
|
||||
class TabInfo(BaseModel):
|
||||
"""Represents information about a browser tab"""
|
||||
|
||||
page_id: int
|
||||
url: str
|
||||
title: str
|
||||
parent_page_id: int | None = None # parent page that contains this popup or cross-origin iframe
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrowserState(DOMState):
|
||||
url: str
|
||||
title: str
|
||||
tabs: list[TabInfo]
|
||||
screenshot: str | None = None
|
||||
pixels_above: int = 0
|
||||
pixels_below: int = 0
|
||||
browser_errors: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrowserStateHistory:
|
||||
url: str
|
||||
title: str
|
||||
tabs: list[TabInfo]
|
||||
interacted_element: list[DOMHistoryElement | None] | list[None]
|
||||
screenshot: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data = {}
|
||||
data['tabs'] = [tab.model_dump() for tab in self.tabs]
|
||||
data['screenshot'] = self.screenshot
|
||||
data['interacted_element'] = [el.to_dict() if el else None for el in self.interacted_element]
|
||||
data['url'] = self.url
|
||||
data['title'] = self.title
|
||||
return data
|
||||
|
||||
|
||||
class BrowserError(Exception):
|
||||
"""Base class for all browser errors"""
|
||||
|
||||
|
||||
class URLNotAllowedError(BrowserError):
|
||||
"""Error raised when a URL is not allowed"""
|
||||
Loading…
Add table
Add a link
Reference in a new issue