[Add] browser-use and main.py

2025-05-18 21:57:54 +09:00 · 2025-05-18 21:57:54 +09:00 · 96914d44ac
commit 96914d44ac
parent 08e64bdf45
221 changed files with 30952 additions and 1 deletions
--- a/browser-use/examples/features/click_fallback_options.py
+++ b/browser-use/examples/features/click_fallback_options.py
@ -0,0 +1,210 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from aiohttp import web  # make sure to install aiohttp: pip install aiohttp
+from langchain_openai import ChatOpenAI
+
+# from langchain_google_genai import ChatGoogleGenerativeAI
+from browser_use import Agent, Controller
+
+# Define a simple HTML page
+HTML_CONTENT = """
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Custom Select Div</title>
+  <style>
+    .custom-select {
+      position: relative;
+      width: 200px;
+      font-family: Arial, sans-serif;
+      margin-bottom: 20px;
+    }
+
+    .select-display {
+      padding: 10px;
+      border: 1px solid #ccc;
+      background-color: #fff;
+      cursor: pointer;
+    }
+
+    .select-options {
+      position: absolute;
+      top: 100%;
+      left: 0;
+      right: 0;
+      border: 1px solid #ccc;
+      border-top: none;
+      background-color: #fff;
+      display: none;
+      max-height: 150px;
+      overflow-y: auto;
+      z-index: 100;
+    }
+
+    .select-option {
+      padding: 10px;
+      cursor: pointer;
+    }
+
+    .select-option:hover {
+      background-color: #f0f0f0;
+    }
+  </style>
+</head>
+<body>
+  <div class="custom-select">
+    <div class="select-display">Select a fruit</div>
+    <div class="select-options">
+      <div class="select-option" data-value="option1">Apples</div>
+      <div class="select-option" data-value="option2">Oranges</div>
+      <div class="select-option" data-value="option3">Pineapples</div>
+    </div>
+  </div>
+
+  <div class="custom-select">
+    <div class="select-display">Select a fruit</div>
+    <div class="select-options">
+      <div class="select-option" data-value="option1">Apples</div>
+      <div class="select-option" data-value="option2">Oranges</div>
+      <div class="select-option" data-value="option3">Pineapples</div>
+    </div>
+  </div>
+  
+  <div class="custom-select">
+    <div class="select-display">Select a fruit</div>
+    <div class="select-options">
+      <div class="select-option" data-value="option1">Apples</div>
+      <div class="select-option" data-value="option2">Oranges</div>
+      <div class="select-option" data-value="option3">Pineapples</div>
+    </div>
+  </div>
+  
+  <div class="custom-select">
+    <div class="select-display">Select a fruit</div>
+    <div class="select-options">
+      <div class="select-option" data-value="option1">Apples</div>
+      <div class="select-option" data-value="option2">Oranges</div>
+      <div class="select-option" data-value="option3">Pineapples</div>
+    </div>
+  </div>
+
+  <label for="cars">Choose a car:</label>
+  <select name="cars" id="cars">
+    <option value="volvo">Volvo</option>
+    <option value="bmw">BMW</option>
+    <option value="mercedes">Mercedes</option>
+    <option value="audi">Audi</option>
+  </select>
+
+  <button onclick="alert('I told you!')">Don't click me</button>
+
+  <script>
+    document.querySelectorAll('.custom-select').forEach(customSelect => {
+      const selectDisplay = customSelect.querySelector('.select-display');
+      const selectOptions = customSelect.querySelector('.select-options');
+      const options = customSelect.querySelectorAll('.select-option');
+
+      selectDisplay.addEventListener('click', (e) => {
+        // Close all other dropdowns
+        document.querySelectorAll('.select-options').forEach(opt => {
+          if (opt !== selectOptions) opt.style.display = 'none';
+        });
+
+        // Toggle current dropdown
+        const isVisible = selectOptions.style.display === 'block';
+        selectOptions.style.display = isVisible ? 'none' : 'block';
+
+        e.stopPropagation();
+      });
+
+      options.forEach(option => {
+        option.addEventListener('click', () => {
+          selectDisplay.textContent = option.textContent;
+          selectDisplay.dataset.value = option.getAttribute('data-value');
+          selectOptions.style.display = 'none';
+        });
+      });
+    });
+
+    // Close all dropdowns if clicking outside
+    document.addEventListener('click', () => {
+      document.querySelectorAll('.select-options').forEach(opt => {
+        opt.style.display = 'none';
+      });
+    });
+  </script>
+</body>
+</html>
+
+"""
+
+
+# aiohttp request handler to serve the HTML content
+async def handle_root(request):
+	return web.Response(text=HTML_CONTENT, content_type='text/html')
+
+
+# Function to run the HTTP server
+async def run_http_server():
+	app = web.Application()
+	app.router.add_get('/', handle_root)
+	runner = web.AppRunner(app)
+	await runner.setup()
+	site = web.TCPSite(runner, 'localhost', 8000)
+	await site.start()
+	print('HTTP server running on http://localhost:8000')
+	# Keep the server running indefinitely.
+	await asyncio.Event().wait()
+
+
+# Your agent tasks and other logic
+controller = Controller()
+
+
+async def main():
+	# Start the HTTP server in the background.
+	server_task = asyncio.create_task(run_http_server())
+
+	# Example tasks for the agent.
+	xpath_task = 'Open http://localhost:8000/, click element with the xpath "/html/body/div/div[1]" and then click on Oranges'
+	css_selector_task = 'Open http://localhost:8000/, click element with the selector div.select-display and then click on apples'
+	text_task = 'Open http://localhost:8000/, click the third element with the text "Select a fruit" and then click on Apples, then click the second element with the text "Select a fruit" and then click on Oranges'
+	select_task = 'Open http://localhost:8000/, choose the car BMW'
+	button_task = 'Open http://localhost:8000/, click on the button'
+
+	llm = ChatOpenAI(model='gpt-4o')
+	# llm = ChatGoogleGenerativeAI(
+	#     model="gemini-2.0-flash-lite",
+	# )
+
+	# Run different agent tasks.
+	for task in [xpath_task, css_selector_task, text_task, select_task, button_task]:
+		agent = Agent(
+			task=task,
+			llm=llm,
+			controller=controller,
+		)
+		await agent.run()
+
+	# Wait for user input before shutting down.
+	input('Press Enter to close...')
+	# Cancel the server task once finished.
+	server_task.cancel()
+	try:
+		await server_task
+	except asyncio.CancelledError:
+		print('HTTP server stopped.')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/cross_origin_iframes.py
+++ b/browser-use/examples/features/cross_origin_iframes.py
@ -0,0 +1,52 @@
+"""
+Example of how it supports cross-origin iframes.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Controller
+from browser_use.browser.browser import Browser, BrowserConfig
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+
+browser = Browser(
+	config=BrowserConfig(
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	)
+)
+controller = Controller()
+
+
+async def main():
+	agent = Agent(
+		task='Click "Go cross-site (simple page)" button on https://csreis.github.io/tests/cross-site-iframe.html then tell me the text within',
+		llm=ChatOpenAI(model='gpt-4o', temperature=0.0),
+		controller=controller,
+		browser=browser,
+	)
+
+	await agent.run()
+	await browser.close()
+
+	input('Press Enter to close...')
+
+
+if __name__ == '__main__':
+	try:
+		asyncio.run(main())
+	except Exception as e:
+		print(e)
--- a/browser-use/examples/features/custom_output.py
+++ b/browser-use/examples/features/custom_output.py
@ -0,0 +1,59 @@
+"""
+Show how to use custom outputs.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use import Agent, Controller
+
+
+class Post(BaseModel):
+	post_title: str
+	post_url: str
+	num_comments: int
+	hours_since_post: int
+
+
+class Posts(BaseModel):
+	posts: list[Post]
+
+
+controller = Controller(output_model=Posts)
+
+
+async def main():
+	task = 'Go to hackernews show hn and give me the first  5 posts'
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, controller=controller)
+
+	history = await agent.run()
+
+	result = history.final_result()
+	if result:
+		parsed: Posts = Posts.model_validate_json(result)
+
+		for post in parsed.posts:
+			print('\n--------------------------------')
+			print(f'Title:            {post.post_title}')
+			print(f'URL:              {post.post_url}')
+			print(f'Comments:         {post.num_comments}')
+			print(f'Hours since post: {post.hours_since_post}')
+	else:
+		print('No result')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/custom_system_prompt.py
+++ b/browser-use/examples/features/custom_system_prompt.py
@ -0,0 +1,39 @@
+import asyncio
+import json
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+extend_system_message = (
+	'REMEMBER the most important RULE: ALWAYS open first a new tab and go first to url wikipedia.com no matter the task!!!'
+)
+
+# or use override_system_message to completely override the system prompt
+
+
+async def main():
+	task = "do google search to find images of Elon Musk's wife"
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, extend_system_message=extend_system_message)
+
+	print(
+		json.dumps(
+			agent.message_manager.system_prompt.model_dump(exclude_unset=True),
+			indent=4,
+		)
+	)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/custom_user_agent.py
+++ b/browser-use/examples/features/custom_user_agent.py
@ -0,0 +1,79 @@
+import argparse
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_anthropic import ChatAnthropic
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from browser_use.controller.service import Controller
+
+
+def get_llm(provider: str):
+	if provider == 'anthropic':
+		return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None, temperature=0.0)
+	elif provider == 'openai':
+		return ChatOpenAI(model='gpt-4o', temperature=0.0)
+
+	else:
+		raise ValueError(f'Unsupported provider: {provider}')
+
+
+# NOTE: This example is to find your current user agent string to use it in the browser_context
+task = 'go to https://whatismyuseragent.com and find the current user agent string '
+
+
+controller = Controller()
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--query', type=str, help='The query to process', default=task)
+parser.add_argument(
+	'--provider',
+	type=str,
+	choices=['openai', 'anthropic'],
+	default='openai',
+	help='The model provider to use (default: openai)',
+)
+
+args = parser.parse_args()
+
+llm = get_llm(args.provider)
+
+
+browser = Browser(
+	config=BrowserConfig(
+		# browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	)
+)
+
+browser_context = BrowserContext(config=BrowserContextConfig(user_agent='foobarfoo'), browser=browser)
+
+agent = Agent(
+	task=args.query,
+	llm=llm,
+	controller=controller,
+	# browser=browser,
+	browser_context=browser_context,
+	use_vision=True,
+	max_actions_per_step=1,
+)
+
+
+async def main():
+	await agent.run(max_steps=25)
+
+	input('Press Enter to close the browser...')
+	await browser_context.close()
+
+
+asyncio.run(main())
--- a/browser-use/examples/features/download_file.py
+++ b/browser-use/examples/features/download_file.py
@ -0,0 +1,42 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import SecretStr
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContextConfig
+
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+	raise ValueError('GOOGLE_API_KEY is not set')
+llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
+browser = Browser(
+	config=BrowserConfig(
+		new_context_config=BrowserContextConfig(save_downloads_path=os.path.join(os.path.expanduser('~'), 'downloads'))
+	)
+)
+
+
+async def run_download():
+	agent = Agent(
+		task=('Go to "https://file-examples.com/" and download the smallest doc file.'),
+		llm=llm,
+		max_actions_per_step=8,
+		use_vision=True,
+		browser=browser,
+	)
+	await agent.run(max_steps=25)
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(run_download())
--- a/browser-use/examples/features/drag_drop.py
+++ b/browser-use/examples/features/drag_drop.py
@ -0,0 +1,51 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import SecretStr
+
+from browser_use import Agent
+
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+	raise ValueError('GOOGLE_API_KEY is not set')
+
+llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
+
+
+task_1 = """
+Navigate to: https://sortablejs.github.io/Sortable/. 
+Then scroll down to the first examplw with title "Simple list example". 
+Drag the element with name "item 1" to below the element with name "item 3".
+"""
+
+
+task_2 = """
+Navigate to: https://excalidraw.com/.
+Click on the pencil icon (with index 40).
+Then draw a triangle in the canvas.
+Draw the triangle starting from coordinate (400,400).
+You can use the drag and drop action to draw the triangle.
+"""
+
+
+async def run_search():
+	agent = Agent(
+		task=task_1,
+		llm=llm,
+		max_actions_per_step=1,
+		use_vision=True,
+	)
+
+	await agent.run(max_steps=25)
+
+
+if __name__ == '__main__':
+	asyncio.run(run_search())
--- a/browser-use/examples/features/follow_up_tasks.py
+++ b/browser-use/examples/features/follow_up_tasks.py
@ -0,0 +1,50 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser, BrowserConfig, BrowserContextConfig, Controller
+
+# Initialize the model
+llm = ChatOpenAI(
+	model='gpt-4o',
+	temperature=0.0,
+)
+# Get your chrome path
+browser = Browser(
+	config=BrowserConfig(
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+		new_context_config=BrowserContextConfig(
+			keep_alive=True,
+		),
+	),
+)
+
+controller = Controller()
+
+
+task = 'Find the founders of browser-use and draft them a short personalized message'
+
+agent = Agent(task=task, llm=llm, controller=controller, browser=browser)
+
+
+async def main():
+	await agent.run()
+
+	# new_task = input('Type in a new task: ')
+	new_task = 'Find an image of the founders'
+
+	agent.add_new_task(new_task)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/initial_actions.py
+++ b/browser-use/examples/features/initial_actions.py
@ -0,0 +1,34 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+llm = ChatOpenAI(model='gpt-4o')
+
+initial_actions = [
+	{'open_tab': {'url': 'https://www.google.com'}},
+	{'open_tab': {'url': 'https://en.wikipedia.org/wiki/Randomness'}},
+	{'scroll_down': {'amount': 1000}},
+]
+agent = Agent(
+	task='What theories are displayed on the page?',
+	initial_actions=initial_actions,
+	llm=llm,
+)
+
+
+async def main():
+	await agent.run(max_steps=10)
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/multi-tab_handling.py
+++ b/browser-use/examples/features/multi-tab_handling.py
@ -0,0 +1,33 @@
+"""
+Simple try of the agent.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+# video: https://preview.screen.studio/share/clenCmS6
+llm = ChatOpenAI(model='gpt-4o')
+agent = Agent(
+	task='open 3 tabs with elon musk, trump, and steve jobs, then go back to the first and stop',
+	llm=llm,
+)
+
+
+async def main():
+	await agent.run()
+
+
+asyncio.run(main())
--- a/browser-use/examples/features/multiple_agents_same_browser.py
+++ b/browser-use/examples/features/multiple_agents_same_browser.py
@ -0,0 +1,67 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser
+
+
+# Video: https://preview.screen.studio/share/8Elaq9sm
+async def main():
+	# Persist the browser state across agents
+
+	browser = Browser()
+	async with await browser.new_context() as context:
+		model = ChatOpenAI(model='gpt-4o')
+		current_agent = None
+
+		async def get_input():
+			return await asyncio.get_event_loop().run_in_executor(
+				None, lambda: input('Enter task (p: pause current agent, r: resume, b: break): ')
+			)
+
+		while True:
+			task = await get_input()
+
+			if task.lower() == 'p':
+				# Pause the current agent if one exists
+				if current_agent:
+					current_agent.pause()
+				continue
+			elif task.lower() == 'r':
+				# Resume the current agent if one exists
+				if current_agent:
+					current_agent.resume()
+				continue
+			elif task.lower() == 'b':
+				# Break the current agent's execution if one exists
+				if current_agent:
+					current_agent.stop()
+					current_agent = None
+				continue
+
+			# If there's a current agent running, pause it before starting new one
+			if current_agent:
+				current_agent.pause()
+
+			# Create and run new agent with the task
+			current_agent = Agent(
+				task=task,
+				llm=model,
+				browser_context=context,
+			)
+
+			# Run the agent asynchronously without blocking
+			asyncio.create_task(current_agent.run())
+
+
+asyncio.run(main())
+
+# Now aad the cheapest to the cart
--- a/browser-use/examples/features/outsource_state.py
+++ b/browser-use/examples/features/outsource_state.py
@ -0,0 +1,70 @@
+"""
+Show how to use custom outputs.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import anyio
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.agent.views import AgentState
+from browser_use.browser.browser import Browser, BrowserConfig
+
+
+async def main():
+	task = 'Go to hackernews show hn and give me the first  5 posts'
+
+	browser = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+
+	browser_context = await browser.new_context()
+
+	agent_state = AgentState()
+
+	for i in range(10):
+		agent = Agent(
+			task=task,
+			llm=ChatOpenAI(model='gpt-4o'),
+			browser=browser,
+			browser_context=browser_context,
+			injected_agent_state=agent_state,
+			page_extraction_llm=ChatOpenAI(model='gpt-4o-mini'),
+		)
+
+		done, valid = await agent.take_step()
+		print(f'Step {i}: Done: {done}, Valid: {valid}')
+
+		if done and valid:
+			break
+
+		agent_state.history.history = []
+
+		# Save state to file
+		async with await anyio.open_file('agent_state.json', 'w') as f:
+			serialized = agent_state.model_dump_json(exclude={'history'})
+			await f.write(serialized)
+
+		# Load state back from file
+		async with await anyio.open_file('agent_state.json', 'r') as f:
+			loaded_json = await f.read()
+			agent_state = AgentState.model_validate_json(loaded_json)
+
+		break
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/parallel_agents.py
+++ b/browser-use/examples/features/parallel_agents.py
@ -0,0 +1,59 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use.agent.service import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContextConfig
+
+browser = Browser(
+	config=BrowserConfig(
+		disable_security=True,
+		headless=False,
+		new_context_config=BrowserContextConfig(save_recording_path='./tmp/recordings'),
+	)
+)
+llm = ChatOpenAI(model='gpt-4o')
+
+
+async def main():
+	agents = [
+		Agent(task=task, llm=llm, browser=browser)
+		for task in [
+			'Search Google for weather in Tokyo',
+			'Check Reddit front page title',
+			'Look up Bitcoin price on Coinbase',
+			'Find NASA image of the day',
+			# 'Check top story on CNN',
+			# 'Search latest SpaceX launch date',
+			# 'Look up population of Paris',
+			# 'Find current time in Sydney',
+			# 'Check who won last Super Bowl',
+			# 'Search trending topics on Twitter',
+		]
+	]
+
+	await asyncio.gather(*[agent.run() for agent in agents])
+
+	# async with await browser.new_context() as context:
+	agentX = Agent(
+		task='Go to apple.com and return the title of the page',
+		llm=llm,
+		browser=browser,
+		# browser_context=context,
+	)
+	await agentX.run()
+
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/pause_agent.py
+++ b/browser-use/examples/features/pause_agent.py
@ -0,0 +1,103 @@
+import asyncio
+import os
+import sys
+import threading
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+
+class AgentController:
+	def __init__(self):
+		llm = ChatOpenAI(model='gpt-4o')
+		self.agent = Agent(
+			task='open in one action https://www.google.com, https://www.wikipedia.org, https://www.youtube.com, https://www.github.com, https://amazon.com',
+			llm=llm,
+		)
+		self.running = False
+
+	async def run_agent(self):
+		"""Run the agent"""
+		self.running = True
+		await self.agent.run()
+
+	def start(self):
+		"""Start the agent in a separate thread"""
+		loop = asyncio.new_event_loop()
+		asyncio.set_event_loop(loop)
+		loop.run_until_complete(self.run_agent())
+
+	def pause(self):
+		"""Pause the agent"""
+		self.agent.pause()
+
+	def resume(self):
+		"""Resume the agent"""
+		self.agent.resume()
+
+	def stop(self):
+		"""Stop the agent"""
+		self.agent.stop()
+		self.running = False
+
+
+def print_menu():
+	print('\nAgent Control Menu:')
+	print('1. Start')
+	print('2. Pause')
+	print('3. Resume')
+	print('4. Stop')
+	print('5. Exit')
+
+
+async def main():
+	controller = AgentController()
+	agent_thread = None
+
+	while True:
+		print_menu()
+		try:
+			choice = input('Enter your choice (1-5): ')
+		except KeyboardInterrupt:
+			choice = '5'
+
+		if choice == '1' and not agent_thread:
+			print('Starting agent...')
+			agent_thread = threading.Thread(target=controller.start)
+			agent_thread.start()
+
+		elif choice == '2':
+			print('Pausing agent...')
+			controller.pause()
+
+		elif choice == '3':
+			print('Resuming agent...')
+			controller.resume()
+
+		elif choice == '4':
+			print('Stopping agent...')
+			controller.stop()
+			if agent_thread:
+				agent_thread.join()
+				agent_thread = None
+
+		elif choice == '5':
+			print('Exiting...')
+			if controller.running:
+				controller.stop()
+				if agent_thread:
+					agent_thread.join()
+			break
+
+		await asyncio.sleep(0.1)  # Small delay to prevent CPU spinning
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/planner.py
+++ b/browser-use/examples/features/planner.py
@ -0,0 +1,30 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+planner_llm = ChatOpenAI(
+	model='o3-mini',
+)
+task = 'your task'
+
+
+agent = Agent(task=task, llm=llm, planner_llm=planner_llm, use_vision_for_planner=False, planner_interval=1)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/playwright_script_generation.py
+++ b/browser-use/examples/features/playwright_script_generation.py
@ -0,0 +1,136 @@
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+# Ensure the project root is in the Python path if running directly
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser, BrowserConfig
+
+# Define the task for the agent
+TASK_DESCRIPTION = """
+1. Go to amazon.com
+2. Search for 'i7 14700k'
+4. If there is an 'Add to Cart' button, open the product page and then click add to cart.
+5. the open the shopping cart page /cart button/ go to cart button.
+6. Scroll down to the bottom of the cart page.
+7. Scroll up to the top of the cart page.
+8. Finish the task.
+"""
+
+# Define the path where the Playwright script will be saved
+SCRIPT_DIR = Path('./playwright_scripts')
+SCRIPT_PATH = SCRIPT_DIR / 'playwright_amazon_cart_script.py'
+
+
+# Helper function to stream output from the subprocess
+async def stream_output(stream, prefix):
+	if stream is None:
+		print(f'{prefix}: (No stream available)')
+		return
+	while True:
+		line = await stream.readline()
+		if not line:
+			break
+		print(f'{prefix}: {line.decode().rstrip()}', flush=True)
+
+
+async def main():
+	# Initialize the language model
+	llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
+
+	# Configure the browser
+	# Use headless=False if you want to watch the agent visually
+	browser_config = BrowserConfig(headless=False)
+	browser = Browser(config=browser_config)
+
+	# Configure the agent
+	# The 'save_playwright_script_path' argument tells the agent where to save the script
+	agent = Agent(
+		task=TASK_DESCRIPTION,
+		llm=llm,
+		browser=browser,
+		save_playwright_script_path=str(SCRIPT_PATH),  # Pass the path as a string
+	)
+
+	print('Running the agent to generate the Playwright script...')
+	history = None  # Initialize history to None
+	try:
+		history = await agent.run()
+		print('Agent finished running.')
+
+		if history and history.is_successful():
+			print(f'Agent completed the task successfully. Final result: {history.final_result()}')
+		elif history:
+			print('Agent finished, but the task might not be fully successful.')
+			if history.has_errors():
+				print(f'Errors encountered: {history.errors()}')
+		else:
+			print('Agent run did not return a history object.')
+
+	except Exception as e:
+		print(f'An error occurred during the agent run: {e}')
+		# Ensure browser is closed even if agent run fails
+		if browser:
+			await browser.close()
+		return  # Exit if agent failed
+
+	# --- Execute the Generated Playwright Script ---
+	print(f'\nChecking if Playwright script was generated at: {SCRIPT_PATH}')
+	if SCRIPT_PATH.exists():
+		print('Playwright script found. Attempting to execute...')
+		try:
+			# Ensure the script directory exists before running
+			SCRIPT_DIR.mkdir(parents=True, exist_ok=True)
+
+			# Execute the generated script using asyncio.create_subprocess_exec
+			process = await asyncio.create_subprocess_exec(
+				sys.executable,
+				str(SCRIPT_PATH),
+				stdout=asyncio.subprocess.PIPE,
+				stderr=asyncio.subprocess.PIPE,
+				cwd=Path.cwd(),  # Run from the current working directory
+			)
+
+			print('\n--- Playwright Script Execution ---')
+			# Create tasks to stream stdout and stderr concurrently
+			stdout_task = asyncio.create_task(stream_output(process.stdout, 'stdout'))
+			stderr_task = asyncio.create_task(stream_output(process.stderr, 'stderr'))
+
+			# Wait for both stream tasks and the process to finish
+			await asyncio.gather(stdout_task, stderr_task)
+			returncode = await process.wait()
+			print('-------------------------------------')
+
+			if returncode == 0:
+				print('\n✅ Playwright script executed successfully!')
+			else:
+				print(f'\n⚠️ Playwright script finished with exit code {returncode}.')
+
+		except Exception as e:
+			print(f'\n❌ An error occurred while executing the Playwright script: {e}')
+	else:
+		print(f'\n❌ Playwright script not found at {SCRIPT_PATH}. Generation might have failed.')
+
+	# Close the browser used by the agent (if not already closed by agent.run error handling)
+	# Note: The generated script manages its own browser instance.
+	if browser:
+		await browser.close()
+		print("Agent's browser closed.")
+
+
+if __name__ == '__main__':
+	# Ensure the script directory is clean before running (optional)
+	if SCRIPT_PATH.exists():
+		SCRIPT_PATH.unlink()
+		print(f'Removed existing script: {SCRIPT_PATH}')
+
+	# Run the main async function
+	asyncio.run(main())
--- a/browser-use/examples/features/restrict_urls.py
+++ b/browser-use/examples/features/restrict_urls.py
@ -0,0 +1,47 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContextConfig
+
+llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+task = (
+	"go to google.com and search for openai.com and click on the first link then extract content and scroll down - what's there?"
+)
+
+allowed_domains = ['google.com']
+
+browser = Browser(
+	config=BrowserConfig(
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+		new_context_config=BrowserContextConfig(
+			allowed_domains=allowed_domains,
+		),
+	),
+)
+
+agent = Agent(
+	task=task,
+	llm=llm,
+	browser=browser,
+)
+
+
+async def main():
+	await agent.run(max_steps=25)
+
+	input('Press Enter to close the browser...')
+	await browser.close()
+
+
+asyncio.run(main())
--- a/browser-use/examples/features/result_processing.py
+++ b/browser-use/examples/features/result_processing.py
@ -0,0 +1,60 @@
+import asyncio
+import os
+import sys
+from pprint import pprint
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
+
+llm = ChatOpenAI(model='gpt-4o')
+browser = Browser(
+	config=BrowserConfig(
+		headless=False,
+		disable_security=True,
+	)
+)
+
+
+async def main():
+	async with await browser.new_context(
+		config=BrowserContextConfig(
+			trace_path='./tmp/result_processing',
+			no_viewport=False,
+			window_width=1280,
+			window_height=1000,
+		)
+	) as browser_context:
+		agent = Agent(
+			task="go to google.com and type 'OpenAI' click search and give me the first url",
+			llm=llm,
+			browser_context=browser_context,
+		)
+		history: AgentHistoryList = await agent.run(max_steps=3)
+
+		print('Final Result:')
+		pprint(history.final_result(), indent=4)
+
+		print('\nErrors:')
+		pprint(history.errors(), indent=4)
+
+		# e.g. xPaths the model clicked on
+		print('\nModel Outputs:')
+		pprint(history.model_actions(), indent=4)
+
+		print('\nThoughts:')
+		pprint(history.model_thoughts(), indent=4)
+	# close browser
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/save_trace.py
+++ b/browser-use/examples/features/save_trace.py
@ -0,0 +1,34 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use.agent.service import Agent
+from browser_use.browser.browser import Browser
+from browser_use.browser.context import BrowserContextConfig
+
+llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+
+
+async def main():
+	browser = Browser()
+
+	async with await browser.new_context(config=BrowserContextConfig(trace_path='./tmp/traces/')) as context:
+		agent = Agent(
+			task='Go to hackernews, then go to apple.com and return all titles of open tabs',
+			llm=llm,
+			browser_context=context,
+		)
+		await agent.run()
+
+	await browser.close()
+
+
+asyncio.run(main())
--- a/browser-use/examples/features/sensitive_data.py
+++ b/browser-use/examples/features/sensitive_data.py
@ -0,0 +1,32 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+# Initialize the model
+llm = ChatOpenAI(
+	model='gpt-4o',
+	temperature=0.0,
+)
+# the model will see x_name and x_password, but never the actual values.
+sensitive_data = {'x_name': 'my_x_name', 'x_password': 'my_x_password'}
+task = 'go to x.com and login with x_name and x_password then find interesting posts and like them'
+
+agent = Agent(task=task, llm=llm, sensitive_data=sensitive_data)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/small_model_for_extraction.py
+++ b/browser-use/examples/features/small_model_for_extraction.py
@ -0,0 +1,26 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+small_llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.0)
+task = 'Find the founders of browser-use in ycombinator, extract all links and open the links one by one'
+agent = Agent(task=task, llm=llm, page_extraction_llm=small_llm)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/task_with_memory.py
+++ b/browser-use/examples/features/task_with_memory.py
@ -0,0 +1,102 @@
+import asyncio
+import json
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import anyio
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use import Agent, Browser, BrowserConfig, Controller
+
+links = [
+	'https://docs.mem0.ai/components/llms/models/litellm',
+	'https://docs.mem0.ai/components/llms/models/mistral_AI',
+	'https://docs.mem0.ai/components/llms/models/ollama',
+	'https://docs.mem0.ai/components/llms/models/openai',
+	'https://docs.mem0.ai/components/llms/models/together',
+	'https://docs.mem0.ai/components/llms/models/xAI',
+	'https://docs.mem0.ai/components/llms/overview',
+	'https://docs.mem0.ai/components/vectordbs/config',
+	'https://docs.mem0.ai/components/vectordbs/dbs/azure_ai_search',
+	'https://docs.mem0.ai/components/vectordbs/dbs/chroma',
+	'https://docs.mem0.ai/components/vectordbs/dbs/elasticsearch',
+	'https://docs.mem0.ai/components/vectordbs/dbs/milvus',
+	'https://docs.mem0.ai/components/vectordbs/dbs/opensearch',
+	'https://docs.mem0.ai/components/vectordbs/dbs/pgvector',
+	'https://docs.mem0.ai/components/vectordbs/dbs/pinecone',
+	'https://docs.mem0.ai/components/vectordbs/dbs/qdrant',
+	'https://docs.mem0.ai/components/vectordbs/dbs/redis',
+	'https://docs.mem0.ai/components/vectordbs/dbs/supabase',
+	'https://docs.mem0.ai/components/vectordbs/dbs/vertex_ai_vector_search',
+	'https://docs.mem0.ai/components/vectordbs/dbs/weaviate',
+	'https://docs.mem0.ai/components/vectordbs/overview',
+	'https://docs.mem0.ai/contributing/development',
+	'https://docs.mem0.ai/contributing/documentation',
+	'https://docs.mem0.ai/core-concepts/memory-operations',
+	'https://docs.mem0.ai/core-concepts/memory-types',
+]
+
+
+class Link(BaseModel):
+	url: str
+	title: str
+	summary: str
+
+
+class Links(BaseModel):
+	links: list[Link]
+
+
+initial_actions = [
+	{'open_tab': {'url': 'https://docs.mem0.ai/'}},
+]
+controller = Controller(output_model=Links)
+task_description = f"""
+Visit all the links provided in {links} and summarize the content of the page with url and title. There are {len(links)} links to visit. Make sure to visit all the links. Return a json with the following format: [{{url: <url>, title: <title>, summary: <summary>}}].
+
+Guidelines:
+1. Strictly stay on the domain https://docs.mem0.ai
+2. Do not visit any other websites.
+3. Ignore the links that are hashed (#) or javascript (:), or mailto, or tel, or other protocols
+4. Don't visit any other url other than the ones provided above.
+5. Capture the unique urls which are not already visited.
+6. If you visit any page that doesn't have host name docs.mem0.ai, then do not visit it and come back to the page with host name docs.mem0.ai.
+"""
+
+
+async def main(max_steps=500):
+	config = BrowserConfig(headless=True)
+	browser = Browser(config=config)
+
+	agent = Agent(
+		task=task_description,
+		llm=ChatOpenAI(model='gpt-4o-mini'),
+		controller=controller,
+		initial_actions=initial_actions,
+		enable_memory=True,
+		browser=browser,
+	)
+	history = await agent.run(max_steps=max_steps)
+	result = history.final_result()
+	parsed_result = []
+	if result:
+		parsed: Links = Links.model_validate_json(result)
+		print(f'Total parsed links: {len(parsed.links)}')
+		for link in parsed.links:
+			parsed_result.append({'title': link.title, 'url': link.url, 'summary': link.summary})
+	else:
+		print('No result')
+
+	async with await anyio.open_file('result.json', 'w+') as f:
+		await f.write(json.dumps(parsed_result, indent=4))
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/validate_output.py
+++ b/browser-use/examples/features/validate_output.py
@ -0,0 +1,49 @@
+"""
+Demonstrate output validator.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use import ActionResult, Agent, Controller
+
+controller = Controller()
+
+
+class DoneResult(BaseModel):
+	title: str
+	comments: str
+	hours_since_start: int
+
+
+# we overwrite done() in this example to demonstrate the validator
+@controller.registry.action('Done with task', param_model=DoneResult)
+async def done(params: DoneResult):
+	result = ActionResult(is_done=True, extracted_content=params.model_dump_json())
+	print(result)
+	# NOTE: this is clearly wrong - to demonstrate the validator
+	return 'blablabla'
+
+
+async def main():
+	task = 'Go to hackernews hn and give me the top 1 post'
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, controller=controller, validate_output=True)
+	# NOTE: this should fail to demonstrate the validator
+	await agent.run(max_steps=5)
+
+
+if __name__ == '__main__':
+	asyncio.run(main())