[Add] browser-use and main.py

This commit is contained in:
tv0924@icloud.com 2025-05-18 21:57:54 +09:00
commit 96914d44ac
221 changed files with 30952 additions and 1 deletions

View file

@ -0,0 +1,210 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from aiohttp import web # make sure to install aiohttp: pip install aiohttp
from langchain_openai import ChatOpenAI
# from langchain_google_genai import ChatGoogleGenerativeAI
from browser_use import Agent, Controller
# Define a simple HTML page
HTML_CONTENT = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Custom Select Div</title>
<style>
.custom-select {
position: relative;
width: 200px;
font-family: Arial, sans-serif;
margin-bottom: 20px;
}
.select-display {
padding: 10px;
border: 1px solid #ccc;
background-color: #fff;
cursor: pointer;
}
.select-options {
position: absolute;
top: 100%;
left: 0;
right: 0;
border: 1px solid #ccc;
border-top: none;
background-color: #fff;
display: none;
max-height: 150px;
overflow-y: auto;
z-index: 100;
}
.select-option {
padding: 10px;
cursor: pointer;
}
.select-option:hover {
background-color: #f0f0f0;
}
</style>
</head>
<body>
<div class="custom-select">
<div class="select-display">Select a fruit</div>
<div class="select-options">
<div class="select-option" data-value="option1">Apples</div>
<div class="select-option" data-value="option2">Oranges</div>
<div class="select-option" data-value="option3">Pineapples</div>
</div>
</div>
<div class="custom-select">
<div class="select-display">Select a fruit</div>
<div class="select-options">
<div class="select-option" data-value="option1">Apples</div>
<div class="select-option" data-value="option2">Oranges</div>
<div class="select-option" data-value="option3">Pineapples</div>
</div>
</div>
<div class="custom-select">
<div class="select-display">Select a fruit</div>
<div class="select-options">
<div class="select-option" data-value="option1">Apples</div>
<div class="select-option" data-value="option2">Oranges</div>
<div class="select-option" data-value="option3">Pineapples</div>
</div>
</div>
<div class="custom-select">
<div class="select-display">Select a fruit</div>
<div class="select-options">
<div class="select-option" data-value="option1">Apples</div>
<div class="select-option" data-value="option2">Oranges</div>
<div class="select-option" data-value="option3">Pineapples</div>
</div>
</div>
<label for="cars">Choose a car:</label>
<select name="cars" id="cars">
<option value="volvo">Volvo</option>
<option value="bmw">BMW</option>
<option value="mercedes">Mercedes</option>
<option value="audi">Audi</option>
</select>
<button onclick="alert('I told you!')">Don't click me</button>
<script>
document.querySelectorAll('.custom-select').forEach(customSelect => {
const selectDisplay = customSelect.querySelector('.select-display');
const selectOptions = customSelect.querySelector('.select-options');
const options = customSelect.querySelectorAll('.select-option');
selectDisplay.addEventListener('click', (e) => {
// Close all other dropdowns
document.querySelectorAll('.select-options').forEach(opt => {
if (opt !== selectOptions) opt.style.display = 'none';
});
// Toggle current dropdown
const isVisible = selectOptions.style.display === 'block';
selectOptions.style.display = isVisible ? 'none' : 'block';
e.stopPropagation();
});
options.forEach(option => {
option.addEventListener('click', () => {
selectDisplay.textContent = option.textContent;
selectDisplay.dataset.value = option.getAttribute('data-value');
selectOptions.style.display = 'none';
});
});
});
// Close all dropdowns if clicking outside
document.addEventListener('click', () => {
document.querySelectorAll('.select-options').forEach(opt => {
opt.style.display = 'none';
});
});
</script>
</body>
</html>
"""
# aiohttp request handler to serve the HTML content
async def handle_root(request):
return web.Response(text=HTML_CONTENT, content_type='text/html')
# Function to run the HTTP server
async def run_http_server():
app = web.Application()
app.router.add_get('/', handle_root)
runner = web.AppRunner(app)
await runner.setup()
site = web.TCPSite(runner, 'localhost', 8000)
await site.start()
print('HTTP server running on http://localhost:8000')
# Keep the server running indefinitely.
await asyncio.Event().wait()
# Your agent tasks and other logic
controller = Controller()
async def main():
# Start the HTTP server in the background.
server_task = asyncio.create_task(run_http_server())
# Example tasks for the agent.
xpath_task = 'Open http://localhost:8000/, click element with the xpath "/html/body/div/div[1]" and then click on Oranges'
css_selector_task = 'Open http://localhost:8000/, click element with the selector div.select-display and then click on apples'
text_task = 'Open http://localhost:8000/, click the third element with the text "Select a fruit" and then click on Apples, then click the second element with the text "Select a fruit" and then click on Oranges'
select_task = 'Open http://localhost:8000/, choose the car BMW'
button_task = 'Open http://localhost:8000/, click on the button'
llm = ChatOpenAI(model='gpt-4o')
# llm = ChatGoogleGenerativeAI(
# model="gemini-2.0-flash-lite",
# )
# Run different agent tasks.
for task in [xpath_task, css_selector_task, text_task, select_task, button_task]:
agent = Agent(
task=task,
llm=llm,
controller=controller,
)
await agent.run()
# Wait for user input before shutting down.
input('Press Enter to close...')
# Cancel the server task once finished.
server_task.cancel()
try:
await server_task
except asyncio.CancelledError:
print('HTTP server stopped.')
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,52 @@
"""
Example of how it supports cross-origin iframes.
@dev You need to add OPENAI_API_KEY to your environment variables.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent, Controller
from browser_use.browser.browser import Browser, BrowserConfig
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
browser = Browser(
config=BrowserConfig(
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
)
)
controller = Controller()
async def main():
agent = Agent(
task='Click "Go cross-site (simple page)" button on https://csreis.github.io/tests/cross-site-iframe.html then tell me the text within',
llm=ChatOpenAI(model='gpt-4o', temperature=0.0),
controller=controller,
browser=browser,
)
await agent.run()
await browser.close()
input('Press Enter to close...')
if __name__ == '__main__':
try:
asyncio.run(main())
except Exception as e:
print(e)

View file

@ -0,0 +1,59 @@
"""
Show how to use custom outputs.
@dev You need to add OPENAI_API_KEY to your environment variables.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from pydantic import BaseModel
from browser_use import Agent, Controller
class Post(BaseModel):
post_title: str
post_url: str
num_comments: int
hours_since_post: int
class Posts(BaseModel):
posts: list[Post]
controller = Controller(output_model=Posts)
async def main():
task = 'Go to hackernews show hn and give me the first 5 posts'
model = ChatOpenAI(model='gpt-4o')
agent = Agent(task=task, llm=model, controller=controller)
history = await agent.run()
result = history.final_result()
if result:
parsed: Posts = Posts.model_validate_json(result)
for post in parsed.posts:
print('\n--------------------------------')
print(f'Title: {post.post_title}')
print(f'URL: {post.post_url}')
print(f'Comments: {post.num_comments}')
print(f'Hours since post: {post.hours_since_post}')
else:
print('No result')
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,39 @@
import asyncio
import json
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
extend_system_message = (
'REMEMBER the most important RULE: ALWAYS open first a new tab and go first to url wikipedia.com no matter the task!!!'
)
# or use override_system_message to completely override the system prompt
async def main():
task = "do google search to find images of Elon Musk's wife"
model = ChatOpenAI(model='gpt-4o')
agent = Agent(task=task, llm=model, extend_system_message=extend_system_message)
print(
json.dumps(
agent.message_manager.system_prompt.model_dump(exclude_unset=True),
indent=4,
)
)
await agent.run()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,79 @@
import argparse
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_anthropic import ChatAnthropic
from langchain_openai import ChatOpenAI
from browser_use import Agent
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from browser_use.controller.service import Controller
def get_llm(provider: str):
if provider == 'anthropic':
return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None, temperature=0.0)
elif provider == 'openai':
return ChatOpenAI(model='gpt-4o', temperature=0.0)
else:
raise ValueError(f'Unsupported provider: {provider}')
# NOTE: This example is to find your current user agent string to use it in the browser_context
task = 'go to https://whatismyuseragent.com and find the current user agent string '
controller = Controller()
parser = argparse.ArgumentParser()
parser.add_argument('--query', type=str, help='The query to process', default=task)
parser.add_argument(
'--provider',
type=str,
choices=['openai', 'anthropic'],
default='openai',
help='The model provider to use (default: openai)',
)
args = parser.parse_args()
llm = get_llm(args.provider)
browser = Browser(
config=BrowserConfig(
# browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
)
)
browser_context = BrowserContext(config=BrowserContextConfig(user_agent='foobarfoo'), browser=browser)
agent = Agent(
task=args.query,
llm=llm,
controller=controller,
# browser=browser,
browser_context=browser_context,
use_vision=True,
max_actions_per_step=1,
)
async def main():
await agent.run(max_steps=25)
input('Press Enter to close the browser...')
await browser_context.close()
asyncio.run(main())

View file

@ -0,0 +1,42 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import SecretStr
from browser_use import Agent
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContextConfig
api_key = os.getenv('GOOGLE_API_KEY')
if not api_key:
raise ValueError('GOOGLE_API_KEY is not set')
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
browser = Browser(
config=BrowserConfig(
new_context_config=BrowserContextConfig(save_downloads_path=os.path.join(os.path.expanduser('~'), 'downloads'))
)
)
async def run_download():
agent = Agent(
task=('Go to "https://file-examples.com/" and download the smallest doc file.'),
llm=llm,
max_actions_per_step=8,
use_vision=True,
browser=browser,
)
await agent.run(max_steps=25)
await browser.close()
if __name__ == '__main__':
asyncio.run(run_download())

View file

@ -0,0 +1,51 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import SecretStr
from browser_use import Agent
api_key = os.getenv('GOOGLE_API_KEY')
if not api_key:
raise ValueError('GOOGLE_API_KEY is not set')
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
task_1 = """
Navigate to: https://sortablejs.github.io/Sortable/.
Then scroll down to the first examplw with title "Simple list example".
Drag the element with name "item 1" to below the element with name "item 3".
"""
task_2 = """
Navigate to: https://excalidraw.com/.
Click on the pencil icon (with index 40).
Then draw a triangle in the canvas.
Draw the triangle starting from coordinate (400,400).
You can use the drag and drop action to draw the triangle.
"""
async def run_search():
agent = Agent(
task=task_1,
llm=llm,
max_actions_per_step=1,
use_vision=True,
)
await agent.run(max_steps=25)
if __name__ == '__main__':
asyncio.run(run_search())

View file

@ -0,0 +1,50 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent, Browser, BrowserConfig, BrowserContextConfig, Controller
# Initialize the model
llm = ChatOpenAI(
model='gpt-4o',
temperature=0.0,
)
# Get your chrome path
browser = Browser(
config=BrowserConfig(
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
new_context_config=BrowserContextConfig(
keep_alive=True,
),
),
)
controller = Controller()
task = 'Find the founders of browser-use and draft them a short personalized message'
agent = Agent(task=task, llm=llm, controller=controller, browser=browser)
async def main():
await agent.run()
# new_task = input('Type in a new task: ')
new_task = 'Find an image of the founders'
agent.add_new_task(new_task)
await agent.run()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,34 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
llm = ChatOpenAI(model='gpt-4o')
initial_actions = [
{'open_tab': {'url': 'https://www.google.com'}},
{'open_tab': {'url': 'https://en.wikipedia.org/wiki/Randomness'}},
{'scroll_down': {'amount': 1000}},
]
agent = Agent(
task='What theories are displayed on the page?',
initial_actions=initial_actions,
llm=llm,
)
async def main():
await agent.run(max_steps=10)
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,33 @@
"""
Simple try of the agent.
@dev You need to add OPENAI_API_KEY to your environment variables.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
# video: https://preview.screen.studio/share/clenCmS6
llm = ChatOpenAI(model='gpt-4o')
agent = Agent(
task='open 3 tabs with elon musk, trump, and steve jobs, then go back to the first and stop',
llm=llm,
)
async def main():
await agent.run()
asyncio.run(main())

View file

@ -0,0 +1,67 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent, Browser
# Video: https://preview.screen.studio/share/8Elaq9sm
async def main():
# Persist the browser state across agents
browser = Browser()
async with await browser.new_context() as context:
model = ChatOpenAI(model='gpt-4o')
current_agent = None
async def get_input():
return await asyncio.get_event_loop().run_in_executor(
None, lambda: input('Enter task (p: pause current agent, r: resume, b: break): ')
)
while True:
task = await get_input()
if task.lower() == 'p':
# Pause the current agent if one exists
if current_agent:
current_agent.pause()
continue
elif task.lower() == 'r':
# Resume the current agent if one exists
if current_agent:
current_agent.resume()
continue
elif task.lower() == 'b':
# Break the current agent's execution if one exists
if current_agent:
current_agent.stop()
current_agent = None
continue
# If there's a current agent running, pause it before starting new one
if current_agent:
current_agent.pause()
# Create and run new agent with the task
current_agent = Agent(
task=task,
llm=model,
browser_context=context,
)
# Run the agent asynchronously without blocking
asyncio.create_task(current_agent.run())
asyncio.run(main())
# Now aad the cheapest to the cart

View file

@ -0,0 +1,70 @@
"""
Show how to use custom outputs.
@dev You need to add OPENAI_API_KEY to your environment variables.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
import anyio
from langchain_openai import ChatOpenAI
from browser_use import Agent
from browser_use.agent.views import AgentState
from browser_use.browser.browser import Browser, BrowserConfig
async def main():
task = 'Go to hackernews show hn and give me the first 5 posts'
browser = Browser(
config=BrowserConfig(
headless=True,
)
)
browser_context = await browser.new_context()
agent_state = AgentState()
for i in range(10):
agent = Agent(
task=task,
llm=ChatOpenAI(model='gpt-4o'),
browser=browser,
browser_context=browser_context,
injected_agent_state=agent_state,
page_extraction_llm=ChatOpenAI(model='gpt-4o-mini'),
)
done, valid = await agent.take_step()
print(f'Step {i}: Done: {done}, Valid: {valid}')
if done and valid:
break
agent_state.history.history = []
# Save state to file
async with await anyio.open_file('agent_state.json', 'w') as f:
serialized = agent_state.model_dump_json(exclude={'history'})
await f.write(serialized)
# Load state back from file
async with await anyio.open_file('agent_state.json', 'r') as f:
loaded_json = await f.read()
agent_state = AgentState.model_validate_json(loaded_json)
break
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,59 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use.agent.service import Agent
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContextConfig
browser = Browser(
config=BrowserConfig(
disable_security=True,
headless=False,
new_context_config=BrowserContextConfig(save_recording_path='./tmp/recordings'),
)
)
llm = ChatOpenAI(model='gpt-4o')
async def main():
agents = [
Agent(task=task, llm=llm, browser=browser)
for task in [
'Search Google for weather in Tokyo',
'Check Reddit front page title',
'Look up Bitcoin price on Coinbase',
'Find NASA image of the day',
# 'Check top story on CNN',
# 'Search latest SpaceX launch date',
# 'Look up population of Paris',
# 'Find current time in Sydney',
# 'Check who won last Super Bowl',
# 'Search trending topics on Twitter',
]
]
await asyncio.gather(*[agent.run() for agent in agents])
# async with await browser.new_context() as context:
agentX = Agent(
task='Go to apple.com and return the title of the page',
llm=llm,
browser=browser,
# browser_context=context,
)
await agentX.run()
await browser.close()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,103 @@
import asyncio
import os
import sys
import threading
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
class AgentController:
def __init__(self):
llm = ChatOpenAI(model='gpt-4o')
self.agent = Agent(
task='open in one action https://www.google.com, https://www.wikipedia.org, https://www.youtube.com, https://www.github.com, https://amazon.com',
llm=llm,
)
self.running = False
async def run_agent(self):
"""Run the agent"""
self.running = True
await self.agent.run()
def start(self):
"""Start the agent in a separate thread"""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(self.run_agent())
def pause(self):
"""Pause the agent"""
self.agent.pause()
def resume(self):
"""Resume the agent"""
self.agent.resume()
def stop(self):
"""Stop the agent"""
self.agent.stop()
self.running = False
def print_menu():
print('\nAgent Control Menu:')
print('1. Start')
print('2. Pause')
print('3. Resume')
print('4. Stop')
print('5. Exit')
async def main():
controller = AgentController()
agent_thread = None
while True:
print_menu()
try:
choice = input('Enter your choice (1-5): ')
except KeyboardInterrupt:
choice = '5'
if choice == '1' and not agent_thread:
print('Starting agent...')
agent_thread = threading.Thread(target=controller.start)
agent_thread.start()
elif choice == '2':
print('Pausing agent...')
controller.pause()
elif choice == '3':
print('Resuming agent...')
controller.resume()
elif choice == '4':
print('Stopping agent...')
controller.stop()
if agent_thread:
agent_thread.join()
agent_thread = None
elif choice == '5':
print('Exiting...')
if controller.running:
controller.stop()
if agent_thread:
agent_thread.join()
break
await asyncio.sleep(0.1) # Small delay to prevent CPU spinning
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,30 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
planner_llm = ChatOpenAI(
model='o3-mini',
)
task = 'your task'
agent = Agent(task=task, llm=llm, planner_llm=planner_llm, use_vision_for_planner=False, planner_interval=1)
async def main():
await agent.run()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,136 @@
import asyncio
import os
import sys
from pathlib import Path
# Ensure the project root is in the Python path if running directly
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent, Browser, BrowserConfig
# Define the task for the agent
TASK_DESCRIPTION = """
1. Go to amazon.com
2. Search for 'i7 14700k'
4. If there is an 'Add to Cart' button, open the product page and then click add to cart.
5. the open the shopping cart page /cart button/ go to cart button.
6. Scroll down to the bottom of the cart page.
7. Scroll up to the top of the cart page.
8. Finish the task.
"""
# Define the path where the Playwright script will be saved
SCRIPT_DIR = Path('./playwright_scripts')
SCRIPT_PATH = SCRIPT_DIR / 'playwright_amazon_cart_script.py'
# Helper function to stream output from the subprocess
async def stream_output(stream, prefix):
if stream is None:
print(f'{prefix}: (No stream available)')
return
while True:
line = await stream.readline()
if not line:
break
print(f'{prefix}: {line.decode().rstrip()}', flush=True)
async def main():
# Initialize the language model
llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
# Configure the browser
# Use headless=False if you want to watch the agent visually
browser_config = BrowserConfig(headless=False)
browser = Browser(config=browser_config)
# Configure the agent
# The 'save_playwright_script_path' argument tells the agent where to save the script
agent = Agent(
task=TASK_DESCRIPTION,
llm=llm,
browser=browser,
save_playwright_script_path=str(SCRIPT_PATH), # Pass the path as a string
)
print('Running the agent to generate the Playwright script...')
history = None # Initialize history to None
try:
history = await agent.run()
print('Agent finished running.')
if history and history.is_successful():
print(f'Agent completed the task successfully. Final result: {history.final_result()}')
elif history:
print('Agent finished, but the task might not be fully successful.')
if history.has_errors():
print(f'Errors encountered: {history.errors()}')
else:
print('Agent run did not return a history object.')
except Exception as e:
print(f'An error occurred during the agent run: {e}')
# Ensure browser is closed even if agent run fails
if browser:
await browser.close()
return # Exit if agent failed
# --- Execute the Generated Playwright Script ---
print(f'\nChecking if Playwright script was generated at: {SCRIPT_PATH}')
if SCRIPT_PATH.exists():
print('Playwright script found. Attempting to execute...')
try:
# Ensure the script directory exists before running
SCRIPT_DIR.mkdir(parents=True, exist_ok=True)
# Execute the generated script using asyncio.create_subprocess_exec
process = await asyncio.create_subprocess_exec(
sys.executable,
str(SCRIPT_PATH),
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=Path.cwd(), # Run from the current working directory
)
print('\n--- Playwright Script Execution ---')
# Create tasks to stream stdout and stderr concurrently
stdout_task = asyncio.create_task(stream_output(process.stdout, 'stdout'))
stderr_task = asyncio.create_task(stream_output(process.stderr, 'stderr'))
# Wait for both stream tasks and the process to finish
await asyncio.gather(stdout_task, stderr_task)
returncode = await process.wait()
print('-------------------------------------')
if returncode == 0:
print('\n✅ Playwright script executed successfully!')
else:
print(f'\n⚠️ Playwright script finished with exit code {returncode}.')
except Exception as e:
print(f'\n❌ An error occurred while executing the Playwright script: {e}')
else:
print(f'\n❌ Playwright script not found at {SCRIPT_PATH}. Generation might have failed.')
# Close the browser used by the agent (if not already closed by agent.run error handling)
# Note: The generated script manages its own browser instance.
if browser:
await browser.close()
print("Agent's browser closed.")
if __name__ == '__main__':
# Ensure the script directory is clean before running (optional)
if SCRIPT_PATH.exists():
SCRIPT_PATH.unlink()
print(f'Removed existing script: {SCRIPT_PATH}')
# Run the main async function
asyncio.run(main())

View file

@ -0,0 +1,47 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContextConfig
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
task = (
"go to google.com and search for openai.com and click on the first link then extract content and scroll down - what's there?"
)
allowed_domains = ['google.com']
browser = Browser(
config=BrowserConfig(
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
new_context_config=BrowserContextConfig(
allowed_domains=allowed_domains,
),
),
)
agent = Agent(
task=task,
llm=llm,
browser=browser,
)
async def main():
await agent.run(max_steps=25)
input('Press Enter to close the browser...')
await browser.close()
asyncio.run(main())

View file

@ -0,0 +1,60 @@
import asyncio
import os
import sys
from pprint import pprint
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
from browser_use.agent.views import AgentHistoryList
from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
llm = ChatOpenAI(model='gpt-4o')
browser = Browser(
config=BrowserConfig(
headless=False,
disable_security=True,
)
)
async def main():
async with await browser.new_context(
config=BrowserContextConfig(
trace_path='./tmp/result_processing',
no_viewport=False,
window_width=1280,
window_height=1000,
)
) as browser_context:
agent = Agent(
task="go to google.com and type 'OpenAI' click search and give me the first url",
llm=llm,
browser_context=browser_context,
)
history: AgentHistoryList = await agent.run(max_steps=3)
print('Final Result:')
pprint(history.final_result(), indent=4)
print('\nErrors:')
pprint(history.errors(), indent=4)
# e.g. xPaths the model clicked on
print('\nModel Outputs:')
pprint(history.model_actions(), indent=4)
print('\nThoughts:')
pprint(history.model_thoughts(), indent=4)
# close browser
await browser.close()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,34 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use.agent.service import Agent
from browser_use.browser.browser import Browser
from browser_use.browser.context import BrowserContextConfig
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
async def main():
browser = Browser()
async with await browser.new_context(config=BrowserContextConfig(trace_path='./tmp/traces/')) as context:
agent = Agent(
task='Go to hackernews, then go to apple.com and return all titles of open tabs',
llm=llm,
browser_context=context,
)
await agent.run()
await browser.close()
asyncio.run(main())

View file

@ -0,0 +1,32 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
# Initialize the model
llm = ChatOpenAI(
model='gpt-4o',
temperature=0.0,
)
# the model will see x_name and x_password, but never the actual values.
sensitive_data = {'x_name': 'my_x_name', 'x_password': 'my_x_password'}
task = 'go to x.com and login with x_name and x_password then find interesting posts and like them'
agent = Agent(task=task, llm=llm, sensitive_data=sensitive_data)
async def main():
await agent.run()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,26 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
small_llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.0)
task = 'Find the founders of browser-use in ycombinator, extract all links and open the links one by one'
agent = Agent(task=task, llm=llm, page_extraction_llm=small_llm)
async def main():
await agent.run()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,102 @@
import asyncio
import json
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
import anyio
from langchain_openai import ChatOpenAI
from pydantic import BaseModel
from browser_use import Agent, Browser, BrowserConfig, Controller
links = [
'https://docs.mem0.ai/components/llms/models/litellm',
'https://docs.mem0.ai/components/llms/models/mistral_AI',
'https://docs.mem0.ai/components/llms/models/ollama',
'https://docs.mem0.ai/components/llms/models/openai',
'https://docs.mem0.ai/components/llms/models/together',
'https://docs.mem0.ai/components/llms/models/xAI',
'https://docs.mem0.ai/components/llms/overview',
'https://docs.mem0.ai/components/vectordbs/config',
'https://docs.mem0.ai/components/vectordbs/dbs/azure_ai_search',
'https://docs.mem0.ai/components/vectordbs/dbs/chroma',
'https://docs.mem0.ai/components/vectordbs/dbs/elasticsearch',
'https://docs.mem0.ai/components/vectordbs/dbs/milvus',
'https://docs.mem0.ai/components/vectordbs/dbs/opensearch',
'https://docs.mem0.ai/components/vectordbs/dbs/pgvector',
'https://docs.mem0.ai/components/vectordbs/dbs/pinecone',
'https://docs.mem0.ai/components/vectordbs/dbs/qdrant',
'https://docs.mem0.ai/components/vectordbs/dbs/redis',
'https://docs.mem0.ai/components/vectordbs/dbs/supabase',
'https://docs.mem0.ai/components/vectordbs/dbs/vertex_ai_vector_search',
'https://docs.mem0.ai/components/vectordbs/dbs/weaviate',
'https://docs.mem0.ai/components/vectordbs/overview',
'https://docs.mem0.ai/contributing/development',
'https://docs.mem0.ai/contributing/documentation',
'https://docs.mem0.ai/core-concepts/memory-operations',
'https://docs.mem0.ai/core-concepts/memory-types',
]
class Link(BaseModel):
url: str
title: str
summary: str
class Links(BaseModel):
links: list[Link]
initial_actions = [
{'open_tab': {'url': 'https://docs.mem0.ai/'}},
]
controller = Controller(output_model=Links)
task_description = f"""
Visit all the links provided in {links} and summarize the content of the page with url and title. There are {len(links)} links to visit. Make sure to visit all the links. Return a json with the following format: [{{url: <url>, title: <title>, summary: <summary>}}].
Guidelines:
1. Strictly stay on the domain https://docs.mem0.ai
2. Do not visit any other websites.
3. Ignore the links that are hashed (#) or javascript (:), or mailto, or tel, or other protocols
4. Don't visit any other url other than the ones provided above.
5. Capture the unique urls which are not already visited.
6. If you visit any page that doesn't have host name docs.mem0.ai, then do not visit it and come back to the page with host name docs.mem0.ai.
"""
async def main(max_steps=500):
config = BrowserConfig(headless=True)
browser = Browser(config=config)
agent = Agent(
task=task_description,
llm=ChatOpenAI(model='gpt-4o-mini'),
controller=controller,
initial_actions=initial_actions,
enable_memory=True,
browser=browser,
)
history = await agent.run(max_steps=max_steps)
result = history.final_result()
parsed_result = []
if result:
parsed: Links = Links.model_validate_json(result)
print(f'Total parsed links: {len(parsed.links)}')
for link in parsed.links:
parsed_result.append({'title': link.title, 'url': link.url, 'summary': link.summary})
else:
print('No result')
async with await anyio.open_file('result.json', 'w+') as f:
await f.write(json.dumps(parsed_result, indent=4))
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,49 @@
"""
Demonstrate output validator.
@dev You need to add OPENAI_API_KEY to your environment variables.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from pydantic import BaseModel
from browser_use import ActionResult, Agent, Controller
controller = Controller()
class DoneResult(BaseModel):
title: str
comments: str
hours_since_start: int
# we overwrite done() in this example to demonstrate the validator
@controller.registry.action('Done with task', param_model=DoneResult)
async def done(params: DoneResult):
result = ActionResult(is_done=True, extracted_content=params.model_dump_json())
print(result)
# NOTE: this is clearly wrong - to demonstrate the validator
return 'blablabla'
async def main():
task = 'Go to hackernews hn and give me the top 1 post'
model = ChatOpenAI(model='gpt-4o')
agent = Agent(task=task, llm=model, controller=controller, validate_output=True)
# NOTE: this should fail to demonstrate the validator
await agent.run(max_steps=5)
if __name__ == '__main__':
asyncio.run(main())