[Add] browser-use and main.py
This commit is contained in:
parent
08e64bdf45
commit
96914d44ac
221 changed files with 30952 additions and 1 deletions
210
browser-use/examples/features/click_fallback_options.py
Normal file
210
browser-use/examples/features/click_fallback_options.py
Normal file
|
|
@ -0,0 +1,210 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from aiohttp import web # make sure to install aiohttp: pip install aiohttp
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
# from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
from browser_use import Agent, Controller
|
||||
|
||||
# Define a simple HTML page
|
||||
HTML_CONTENT = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Custom Select Div</title>
|
||||
<style>
|
||||
.custom-select {
|
||||
position: relative;
|
||||
width: 200px;
|
||||
font-family: Arial, sans-serif;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.select-display {
|
||||
padding: 10px;
|
||||
border: 1px solid #ccc;
|
||||
background-color: #fff;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.select-options {
|
||||
position: absolute;
|
||||
top: 100%;
|
||||
left: 0;
|
||||
right: 0;
|
||||
border: 1px solid #ccc;
|
||||
border-top: none;
|
||||
background-color: #fff;
|
||||
display: none;
|
||||
max-height: 150px;
|
||||
overflow-y: auto;
|
||||
z-index: 100;
|
||||
}
|
||||
|
||||
.select-option {
|
||||
padding: 10px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.select-option:hover {
|
||||
background-color: #f0f0f0;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="custom-select">
|
||||
<div class="select-display">Select a fruit</div>
|
||||
<div class="select-options">
|
||||
<div class="select-option" data-value="option1">Apples</div>
|
||||
<div class="select-option" data-value="option2">Oranges</div>
|
||||
<div class="select-option" data-value="option3">Pineapples</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="custom-select">
|
||||
<div class="select-display">Select a fruit</div>
|
||||
<div class="select-options">
|
||||
<div class="select-option" data-value="option1">Apples</div>
|
||||
<div class="select-option" data-value="option2">Oranges</div>
|
||||
<div class="select-option" data-value="option3">Pineapples</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="custom-select">
|
||||
<div class="select-display">Select a fruit</div>
|
||||
<div class="select-options">
|
||||
<div class="select-option" data-value="option1">Apples</div>
|
||||
<div class="select-option" data-value="option2">Oranges</div>
|
||||
<div class="select-option" data-value="option3">Pineapples</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="custom-select">
|
||||
<div class="select-display">Select a fruit</div>
|
||||
<div class="select-options">
|
||||
<div class="select-option" data-value="option1">Apples</div>
|
||||
<div class="select-option" data-value="option2">Oranges</div>
|
||||
<div class="select-option" data-value="option3">Pineapples</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<label for="cars">Choose a car:</label>
|
||||
<select name="cars" id="cars">
|
||||
<option value="volvo">Volvo</option>
|
||||
<option value="bmw">BMW</option>
|
||||
<option value="mercedes">Mercedes</option>
|
||||
<option value="audi">Audi</option>
|
||||
</select>
|
||||
|
||||
<button onclick="alert('I told you!')">Don't click me</button>
|
||||
|
||||
<script>
|
||||
document.querySelectorAll('.custom-select').forEach(customSelect => {
|
||||
const selectDisplay = customSelect.querySelector('.select-display');
|
||||
const selectOptions = customSelect.querySelector('.select-options');
|
||||
const options = customSelect.querySelectorAll('.select-option');
|
||||
|
||||
selectDisplay.addEventListener('click', (e) => {
|
||||
// Close all other dropdowns
|
||||
document.querySelectorAll('.select-options').forEach(opt => {
|
||||
if (opt !== selectOptions) opt.style.display = 'none';
|
||||
});
|
||||
|
||||
// Toggle current dropdown
|
||||
const isVisible = selectOptions.style.display === 'block';
|
||||
selectOptions.style.display = isVisible ? 'none' : 'block';
|
||||
|
||||
e.stopPropagation();
|
||||
});
|
||||
|
||||
options.forEach(option => {
|
||||
option.addEventListener('click', () => {
|
||||
selectDisplay.textContent = option.textContent;
|
||||
selectDisplay.dataset.value = option.getAttribute('data-value');
|
||||
selectOptions.style.display = 'none';
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// Close all dropdowns if clicking outside
|
||||
document.addEventListener('click', () => {
|
||||
document.querySelectorAll('.select-options').forEach(opt => {
|
||||
opt.style.display = 'none';
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
"""
|
||||
|
||||
|
||||
# aiohttp request handler to serve the HTML content
|
||||
async def handle_root(request):
|
||||
return web.Response(text=HTML_CONTENT, content_type='text/html')
|
||||
|
||||
|
||||
# Function to run the HTTP server
|
||||
async def run_http_server():
|
||||
app = web.Application()
|
||||
app.router.add_get('/', handle_root)
|
||||
runner = web.AppRunner(app)
|
||||
await runner.setup()
|
||||
site = web.TCPSite(runner, 'localhost', 8000)
|
||||
await site.start()
|
||||
print('HTTP server running on http://localhost:8000')
|
||||
# Keep the server running indefinitely.
|
||||
await asyncio.Event().wait()
|
||||
|
||||
|
||||
# Your agent tasks and other logic
|
||||
controller = Controller()
|
||||
|
||||
|
||||
async def main():
|
||||
# Start the HTTP server in the background.
|
||||
server_task = asyncio.create_task(run_http_server())
|
||||
|
||||
# Example tasks for the agent.
|
||||
xpath_task = 'Open http://localhost:8000/, click element with the xpath "/html/body/div/div[1]" and then click on Oranges'
|
||||
css_selector_task = 'Open http://localhost:8000/, click element with the selector div.select-display and then click on apples'
|
||||
text_task = 'Open http://localhost:8000/, click the third element with the text "Select a fruit" and then click on Apples, then click the second element with the text "Select a fruit" and then click on Oranges'
|
||||
select_task = 'Open http://localhost:8000/, choose the car BMW'
|
||||
button_task = 'Open http://localhost:8000/, click on the button'
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
# llm = ChatGoogleGenerativeAI(
|
||||
# model="gemini-2.0-flash-lite",
|
||||
# )
|
||||
|
||||
# Run different agent tasks.
|
||||
for task in [xpath_task, css_selector_task, text_task, select_task, button_task]:
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
)
|
||||
await agent.run()
|
||||
|
||||
# Wait for user input before shutting down.
|
||||
input('Press Enter to close...')
|
||||
# Cancel the server task once finished.
|
||||
server_task.cancel()
|
||||
try:
|
||||
await server_task
|
||||
except asyncio.CancelledError:
|
||||
print('HTTP server stopped.')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
52
browser-use/examples/features/cross_origin_iframes.py
Normal file
52
browser-use/examples/features/cross_origin_iframes.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
"""
|
||||
Example of how it supports cross-origin iframes.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, Controller
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
if not os.getenv('OPENAI_API_KEY'):
|
||||
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
|
||||
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
)
|
||||
)
|
||||
controller = Controller()
|
||||
|
||||
|
||||
async def main():
|
||||
agent = Agent(
|
||||
task='Click "Go cross-site (simple page)" button on https://csreis.github.io/tests/cross-site-iframe.html then tell me the text within',
|
||||
llm=ChatOpenAI(model='gpt-4o', temperature=0.0),
|
||||
controller=controller,
|
||||
browser=browser,
|
||||
)
|
||||
|
||||
await agent.run()
|
||||
await browser.close()
|
||||
|
||||
input('Press Enter to close...')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
asyncio.run(main())
|
||||
except Exception as e:
|
||||
print(e)
|
||||
59
browser-use/examples/features/custom_output.py
Normal file
59
browser-use/examples/features/custom_output.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
"""
|
||||
Show how to use custom outputs.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use import Agent, Controller
|
||||
|
||||
|
||||
class Post(BaseModel):
|
||||
post_title: str
|
||||
post_url: str
|
||||
num_comments: int
|
||||
hours_since_post: int
|
||||
|
||||
|
||||
class Posts(BaseModel):
|
||||
posts: list[Post]
|
||||
|
||||
|
||||
controller = Controller(output_model=Posts)
|
||||
|
||||
|
||||
async def main():
|
||||
task = 'Go to hackernews show hn and give me the first 5 posts'
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(task=task, llm=model, controller=controller)
|
||||
|
||||
history = await agent.run()
|
||||
|
||||
result = history.final_result()
|
||||
if result:
|
||||
parsed: Posts = Posts.model_validate_json(result)
|
||||
|
||||
for post in parsed.posts:
|
||||
print('\n--------------------------------')
|
||||
print(f'Title: {post.post_title}')
|
||||
print(f'URL: {post.post_url}')
|
||||
print(f'Comments: {post.num_comments}')
|
||||
print(f'Hours since post: {post.hours_since_post}')
|
||||
else:
|
||||
print('No result')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
39
browser-use/examples/features/custom_system_prompt.py
Normal file
39
browser-use/examples/features/custom_system_prompt.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
|
||||
extend_system_message = (
|
||||
'REMEMBER the most important RULE: ALWAYS open first a new tab and go first to url wikipedia.com no matter the task!!!'
|
||||
)
|
||||
|
||||
# or use override_system_message to completely override the system prompt
|
||||
|
||||
|
||||
async def main():
|
||||
task = "do google search to find images of Elon Musk's wife"
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(task=task, llm=model, extend_system_message=extend_system_message)
|
||||
|
||||
print(
|
||||
json.dumps(
|
||||
agent.message_manager.system_prompt.model_dump(exclude_unset=True),
|
||||
indent=4,
|
||||
)
|
||||
)
|
||||
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
79
browser-use/examples/features/custom_user_agent.py
Normal file
79
browser-use/examples/features/custom_user_agent.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
from browser_use.controller.service import Controller
|
||||
|
||||
|
||||
def get_llm(provider: str):
|
||||
if provider == 'anthropic':
|
||||
return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None, temperature=0.0)
|
||||
elif provider == 'openai':
|
||||
return ChatOpenAI(model='gpt-4o', temperature=0.0)
|
||||
|
||||
else:
|
||||
raise ValueError(f'Unsupported provider: {provider}')
|
||||
|
||||
|
||||
# NOTE: This example is to find your current user agent string to use it in the browser_context
|
||||
task = 'go to https://whatismyuseragent.com and find the current user agent string '
|
||||
|
||||
|
||||
controller = Controller()
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--query', type=str, help='The query to process', default=task)
|
||||
parser.add_argument(
|
||||
'--provider',
|
||||
type=str,
|
||||
choices=['openai', 'anthropic'],
|
||||
default='openai',
|
||||
help='The model provider to use (default: openai)',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
llm = get_llm(args.provider)
|
||||
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
# browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
)
|
||||
)
|
||||
|
||||
browser_context = BrowserContext(config=BrowserContextConfig(user_agent='foobarfoo'), browser=browser)
|
||||
|
||||
agent = Agent(
|
||||
task=args.query,
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
# browser=browser,
|
||||
browser_context=browser_context,
|
||||
use_vision=True,
|
||||
max_actions_per_step=1,
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run(max_steps=25)
|
||||
|
||||
input('Press Enter to close the browser...')
|
||||
await browser_context.close()
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
42
browser-use/examples/features/download_file.py
Normal file
42
browser-use/examples/features/download_file.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
from browser_use import Agent
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContextConfig
|
||||
|
||||
api_key = os.getenv('GOOGLE_API_KEY')
|
||||
if not api_key:
|
||||
raise ValueError('GOOGLE_API_KEY is not set')
|
||||
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
new_context_config=BrowserContextConfig(save_downloads_path=os.path.join(os.path.expanduser('~'), 'downloads'))
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def run_download():
|
||||
agent = Agent(
|
||||
task=('Go to "https://file-examples.com/" and download the smallest doc file.'),
|
||||
llm=llm,
|
||||
max_actions_per_step=8,
|
||||
use_vision=True,
|
||||
browser=browser,
|
||||
)
|
||||
await agent.run(max_steps=25)
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(run_download())
|
||||
51
browser-use/examples/features/drag_drop.py
Normal file
51
browser-use/examples/features/drag_drop.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
from browser_use import Agent
|
||||
|
||||
api_key = os.getenv('GOOGLE_API_KEY')
|
||||
if not api_key:
|
||||
raise ValueError('GOOGLE_API_KEY is not set')
|
||||
|
||||
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
|
||||
|
||||
|
||||
task_1 = """
|
||||
Navigate to: https://sortablejs.github.io/Sortable/.
|
||||
Then scroll down to the first examplw with title "Simple list example".
|
||||
Drag the element with name "item 1" to below the element with name "item 3".
|
||||
"""
|
||||
|
||||
|
||||
task_2 = """
|
||||
Navigate to: https://excalidraw.com/.
|
||||
Click on the pencil icon (with index 40).
|
||||
Then draw a triangle in the canvas.
|
||||
Draw the triangle starting from coordinate (400,400).
|
||||
You can use the drag and drop action to draw the triangle.
|
||||
"""
|
||||
|
||||
|
||||
async def run_search():
|
||||
agent = Agent(
|
||||
task=task_1,
|
||||
llm=llm,
|
||||
max_actions_per_step=1,
|
||||
use_vision=True,
|
||||
)
|
||||
|
||||
await agent.run(max_steps=25)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(run_search())
|
||||
50
browser-use/examples/features/follow_up_tasks.py
Normal file
50
browser-use/examples/features/follow_up_tasks.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, Browser, BrowserConfig, BrowserContextConfig, Controller
|
||||
|
||||
# Initialize the model
|
||||
llm = ChatOpenAI(
|
||||
model='gpt-4o',
|
||||
temperature=0.0,
|
||||
)
|
||||
# Get your chrome path
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
new_context_config=BrowserContextConfig(
|
||||
keep_alive=True,
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
controller = Controller()
|
||||
|
||||
|
||||
task = 'Find the founders of browser-use and draft them a short personalized message'
|
||||
|
||||
agent = Agent(task=task, llm=llm, controller=controller, browser=browser)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run()
|
||||
|
||||
# new_task = input('Type in a new task: ')
|
||||
new_task = 'Find an image of the founders'
|
||||
|
||||
agent.add_new_task(new_task)
|
||||
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
34
browser-use/examples/features/initial_actions.py
Normal file
34
browser-use/examples/features/initial_actions.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
|
||||
initial_actions = [
|
||||
{'open_tab': {'url': 'https://www.google.com'}},
|
||||
{'open_tab': {'url': 'https://en.wikipedia.org/wiki/Randomness'}},
|
||||
{'scroll_down': {'amount': 1000}},
|
||||
]
|
||||
agent = Agent(
|
||||
task='What theories are displayed on the page?',
|
||||
initial_actions=initial_actions,
|
||||
llm=llm,
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run(max_steps=10)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
33
browser-use/examples/features/multi-tab_handling.py
Normal file
33
browser-use/examples/features/multi-tab_handling.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
"""
|
||||
Simple try of the agent.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
|
||||
# video: https://preview.screen.studio/share/clenCmS6
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(
|
||||
task='open 3 tabs with elon musk, trump, and steve jobs, then go back to the first and stop',
|
||||
llm=llm,
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run()
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, Browser
|
||||
|
||||
|
||||
# Video: https://preview.screen.studio/share/8Elaq9sm
|
||||
async def main():
|
||||
# Persist the browser state across agents
|
||||
|
||||
browser = Browser()
|
||||
async with await browser.new_context() as context:
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
current_agent = None
|
||||
|
||||
async def get_input():
|
||||
return await asyncio.get_event_loop().run_in_executor(
|
||||
None, lambda: input('Enter task (p: pause current agent, r: resume, b: break): ')
|
||||
)
|
||||
|
||||
while True:
|
||||
task = await get_input()
|
||||
|
||||
if task.lower() == 'p':
|
||||
# Pause the current agent if one exists
|
||||
if current_agent:
|
||||
current_agent.pause()
|
||||
continue
|
||||
elif task.lower() == 'r':
|
||||
# Resume the current agent if one exists
|
||||
if current_agent:
|
||||
current_agent.resume()
|
||||
continue
|
||||
elif task.lower() == 'b':
|
||||
# Break the current agent's execution if one exists
|
||||
if current_agent:
|
||||
current_agent.stop()
|
||||
current_agent = None
|
||||
continue
|
||||
|
||||
# If there's a current agent running, pause it before starting new one
|
||||
if current_agent:
|
||||
current_agent.pause()
|
||||
|
||||
# Create and run new agent with the task
|
||||
current_agent = Agent(
|
||||
task=task,
|
||||
llm=model,
|
||||
browser_context=context,
|
||||
)
|
||||
|
||||
# Run the agent asynchronously without blocking
|
||||
asyncio.create_task(current_agent.run())
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
|
||||
# Now aad the cheapest to the cart
|
||||
70
browser-use/examples/features/outsource_state.py
Normal file
70
browser-use/examples/features/outsource_state.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
"""
|
||||
Show how to use custom outputs.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import anyio
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
from browser_use.agent.views import AgentState
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
|
||||
async def main():
|
||||
task = 'Go to hackernews show hn and give me the first 5 posts'
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=True,
|
||||
)
|
||||
)
|
||||
|
||||
browser_context = await browser.new_context()
|
||||
|
||||
agent_state = AgentState()
|
||||
|
||||
for i in range(10):
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=ChatOpenAI(model='gpt-4o'),
|
||||
browser=browser,
|
||||
browser_context=browser_context,
|
||||
injected_agent_state=agent_state,
|
||||
page_extraction_llm=ChatOpenAI(model='gpt-4o-mini'),
|
||||
)
|
||||
|
||||
done, valid = await agent.take_step()
|
||||
print(f'Step {i}: Done: {done}, Valid: {valid}')
|
||||
|
||||
if done and valid:
|
||||
break
|
||||
|
||||
agent_state.history.history = []
|
||||
|
||||
# Save state to file
|
||||
async with await anyio.open_file('agent_state.json', 'w') as f:
|
||||
serialized = agent_state.model_dump_json(exclude={'history'})
|
||||
await f.write(serialized)
|
||||
|
||||
# Load state back from file
|
||||
async with await anyio.open_file('agent_state.json', 'r') as f:
|
||||
loaded_json = await f.read()
|
||||
agent_state = AgentState.model_validate_json(loaded_json)
|
||||
|
||||
break
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
59
browser-use/examples/features/parallel_agents.py
Normal file
59
browser-use/examples/features/parallel_agents.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContextConfig
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
disable_security=True,
|
||||
headless=False,
|
||||
new_context_config=BrowserContextConfig(save_recording_path='./tmp/recordings'),
|
||||
)
|
||||
)
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
|
||||
|
||||
async def main():
|
||||
agents = [
|
||||
Agent(task=task, llm=llm, browser=browser)
|
||||
for task in [
|
||||
'Search Google for weather in Tokyo',
|
||||
'Check Reddit front page title',
|
||||
'Look up Bitcoin price on Coinbase',
|
||||
'Find NASA image of the day',
|
||||
# 'Check top story on CNN',
|
||||
# 'Search latest SpaceX launch date',
|
||||
# 'Look up population of Paris',
|
||||
# 'Find current time in Sydney',
|
||||
# 'Check who won last Super Bowl',
|
||||
# 'Search trending topics on Twitter',
|
||||
]
|
||||
]
|
||||
|
||||
await asyncio.gather(*[agent.run() for agent in agents])
|
||||
|
||||
# async with await browser.new_context() as context:
|
||||
agentX = Agent(
|
||||
task='Go to apple.com and return the title of the page',
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
# browser_context=context,
|
||||
)
|
||||
await agentX.run()
|
||||
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
103
browser-use/examples/features/pause_agent.py
Normal file
103
browser-use/examples/features/pause_agent.py
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
|
||||
|
||||
class AgentController:
|
||||
def __init__(self):
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
self.agent = Agent(
|
||||
task='open in one action https://www.google.com, https://www.wikipedia.org, https://www.youtube.com, https://www.github.com, https://amazon.com',
|
||||
llm=llm,
|
||||
)
|
||||
self.running = False
|
||||
|
||||
async def run_agent(self):
|
||||
"""Run the agent"""
|
||||
self.running = True
|
||||
await self.agent.run()
|
||||
|
||||
def start(self):
|
||||
"""Start the agent in a separate thread"""
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
loop.run_until_complete(self.run_agent())
|
||||
|
||||
def pause(self):
|
||||
"""Pause the agent"""
|
||||
self.agent.pause()
|
||||
|
||||
def resume(self):
|
||||
"""Resume the agent"""
|
||||
self.agent.resume()
|
||||
|
||||
def stop(self):
|
||||
"""Stop the agent"""
|
||||
self.agent.stop()
|
||||
self.running = False
|
||||
|
||||
|
||||
def print_menu():
|
||||
print('\nAgent Control Menu:')
|
||||
print('1. Start')
|
||||
print('2. Pause')
|
||||
print('3. Resume')
|
||||
print('4. Stop')
|
||||
print('5. Exit')
|
||||
|
||||
|
||||
async def main():
|
||||
controller = AgentController()
|
||||
agent_thread = None
|
||||
|
||||
while True:
|
||||
print_menu()
|
||||
try:
|
||||
choice = input('Enter your choice (1-5): ')
|
||||
except KeyboardInterrupt:
|
||||
choice = '5'
|
||||
|
||||
if choice == '1' and not agent_thread:
|
||||
print('Starting agent...')
|
||||
agent_thread = threading.Thread(target=controller.start)
|
||||
agent_thread.start()
|
||||
|
||||
elif choice == '2':
|
||||
print('Pausing agent...')
|
||||
controller.pause()
|
||||
|
||||
elif choice == '3':
|
||||
print('Resuming agent...')
|
||||
controller.resume()
|
||||
|
||||
elif choice == '4':
|
||||
print('Stopping agent...')
|
||||
controller.stop()
|
||||
if agent_thread:
|
||||
agent_thread.join()
|
||||
agent_thread = None
|
||||
|
||||
elif choice == '5':
|
||||
print('Exiting...')
|
||||
if controller.running:
|
||||
controller.stop()
|
||||
if agent_thread:
|
||||
agent_thread.join()
|
||||
break
|
||||
|
||||
await asyncio.sleep(0.1) # Small delay to prevent CPU spinning
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
30
browser-use/examples/features/planner.py
Normal file
30
browser-use/examples/features/planner.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
|
||||
planner_llm = ChatOpenAI(
|
||||
model='o3-mini',
|
||||
)
|
||||
task = 'your task'
|
||||
|
||||
|
||||
agent = Agent(task=task, llm=llm, planner_llm=planner_llm, use_vision_for_planner=False, planner_interval=1)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
136
browser-use/examples/features/playwright_script_generation.py
Normal file
136
browser-use/examples/features/playwright_script_generation.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Ensure the project root is in the Python path if running directly
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, Browser, BrowserConfig
|
||||
|
||||
# Define the task for the agent
|
||||
TASK_DESCRIPTION = """
|
||||
1. Go to amazon.com
|
||||
2. Search for 'i7 14700k'
|
||||
4. If there is an 'Add to Cart' button, open the product page and then click add to cart.
|
||||
5. the open the shopping cart page /cart button/ go to cart button.
|
||||
6. Scroll down to the bottom of the cart page.
|
||||
7. Scroll up to the top of the cart page.
|
||||
8. Finish the task.
|
||||
"""
|
||||
|
||||
# Define the path where the Playwright script will be saved
|
||||
SCRIPT_DIR = Path('./playwright_scripts')
|
||||
SCRIPT_PATH = SCRIPT_DIR / 'playwright_amazon_cart_script.py'
|
||||
|
||||
|
||||
# Helper function to stream output from the subprocess
|
||||
async def stream_output(stream, prefix):
|
||||
if stream is None:
|
||||
print(f'{prefix}: (No stream available)')
|
||||
return
|
||||
while True:
|
||||
line = await stream.readline()
|
||||
if not line:
|
||||
break
|
||||
print(f'{prefix}: {line.decode().rstrip()}', flush=True)
|
||||
|
||||
|
||||
async def main():
|
||||
# Initialize the language model
|
||||
llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
|
||||
|
||||
# Configure the browser
|
||||
# Use headless=False if you want to watch the agent visually
|
||||
browser_config = BrowserConfig(headless=False)
|
||||
browser = Browser(config=browser_config)
|
||||
|
||||
# Configure the agent
|
||||
# The 'save_playwright_script_path' argument tells the agent where to save the script
|
||||
agent = Agent(
|
||||
task=TASK_DESCRIPTION,
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
save_playwright_script_path=str(SCRIPT_PATH), # Pass the path as a string
|
||||
)
|
||||
|
||||
print('Running the agent to generate the Playwright script...')
|
||||
history = None # Initialize history to None
|
||||
try:
|
||||
history = await agent.run()
|
||||
print('Agent finished running.')
|
||||
|
||||
if history and history.is_successful():
|
||||
print(f'Agent completed the task successfully. Final result: {history.final_result()}')
|
||||
elif history:
|
||||
print('Agent finished, but the task might not be fully successful.')
|
||||
if history.has_errors():
|
||||
print(f'Errors encountered: {history.errors()}')
|
||||
else:
|
||||
print('Agent run did not return a history object.')
|
||||
|
||||
except Exception as e:
|
||||
print(f'An error occurred during the agent run: {e}')
|
||||
# Ensure browser is closed even if agent run fails
|
||||
if browser:
|
||||
await browser.close()
|
||||
return # Exit if agent failed
|
||||
|
||||
# --- Execute the Generated Playwright Script ---
|
||||
print(f'\nChecking if Playwright script was generated at: {SCRIPT_PATH}')
|
||||
if SCRIPT_PATH.exists():
|
||||
print('Playwright script found. Attempting to execute...')
|
||||
try:
|
||||
# Ensure the script directory exists before running
|
||||
SCRIPT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Execute the generated script using asyncio.create_subprocess_exec
|
||||
process = await asyncio.create_subprocess_exec(
|
||||
sys.executable,
|
||||
str(SCRIPT_PATH),
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
cwd=Path.cwd(), # Run from the current working directory
|
||||
)
|
||||
|
||||
print('\n--- Playwright Script Execution ---')
|
||||
# Create tasks to stream stdout and stderr concurrently
|
||||
stdout_task = asyncio.create_task(stream_output(process.stdout, 'stdout'))
|
||||
stderr_task = asyncio.create_task(stream_output(process.stderr, 'stderr'))
|
||||
|
||||
# Wait for both stream tasks and the process to finish
|
||||
await asyncio.gather(stdout_task, stderr_task)
|
||||
returncode = await process.wait()
|
||||
print('-------------------------------------')
|
||||
|
||||
if returncode == 0:
|
||||
print('\n✅ Playwright script executed successfully!')
|
||||
else:
|
||||
print(f'\n⚠️ Playwright script finished with exit code {returncode}.')
|
||||
|
||||
except Exception as e:
|
||||
print(f'\n❌ An error occurred while executing the Playwright script: {e}')
|
||||
else:
|
||||
print(f'\n❌ Playwright script not found at {SCRIPT_PATH}. Generation might have failed.')
|
||||
|
||||
# Close the browser used by the agent (if not already closed by agent.run error handling)
|
||||
# Note: The generated script manages its own browser instance.
|
||||
if browser:
|
||||
await browser.close()
|
||||
print("Agent's browser closed.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Ensure the script directory is clean before running (optional)
|
||||
if SCRIPT_PATH.exists():
|
||||
SCRIPT_PATH.unlink()
|
||||
print(f'Removed existing script: {SCRIPT_PATH}')
|
||||
|
||||
# Run the main async function
|
||||
asyncio.run(main())
|
||||
47
browser-use/examples/features/restrict_urls.py
Normal file
47
browser-use/examples/features/restrict_urls.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContextConfig
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
|
||||
task = (
|
||||
"go to google.com and search for openai.com and click on the first link then extract content and scroll down - what's there?"
|
||||
)
|
||||
|
||||
allowed_domains = ['google.com']
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
new_context_config=BrowserContextConfig(
|
||||
allowed_domains=allowed_domains,
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run(max_steps=25)
|
||||
|
||||
input('Press Enter to close the browser...')
|
||||
await browser.close()
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
60
browser-use/examples/features/result_processing.py
Normal file
60
browser-use/examples/features/result_processing.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from pprint import pprint
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
from browser_use.agent.views import AgentHistoryList
|
||||
from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
disable_security=True,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
async with await browser.new_context(
|
||||
config=BrowserContextConfig(
|
||||
trace_path='./tmp/result_processing',
|
||||
no_viewport=False,
|
||||
window_width=1280,
|
||||
window_height=1000,
|
||||
)
|
||||
) as browser_context:
|
||||
agent = Agent(
|
||||
task="go to google.com and type 'OpenAI' click search and give me the first url",
|
||||
llm=llm,
|
||||
browser_context=browser_context,
|
||||
)
|
||||
history: AgentHistoryList = await agent.run(max_steps=3)
|
||||
|
||||
print('Final Result:')
|
||||
pprint(history.final_result(), indent=4)
|
||||
|
||||
print('\nErrors:')
|
||||
pprint(history.errors(), indent=4)
|
||||
|
||||
# e.g. xPaths the model clicked on
|
||||
print('\nModel Outputs:')
|
||||
pprint(history.model_actions(), indent=4)
|
||||
|
||||
print('\nThoughts:')
|
||||
pprint(history.model_thoughts(), indent=4)
|
||||
# close browser
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
34
browser-use/examples/features/save_trace.py
Normal file
34
browser-use/examples/features/save_trace.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.browser.browser import Browser
|
||||
from browser_use.browser.context import BrowserContextConfig
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
|
||||
|
||||
|
||||
async def main():
|
||||
browser = Browser()
|
||||
|
||||
async with await browser.new_context(config=BrowserContextConfig(trace_path='./tmp/traces/')) as context:
|
||||
agent = Agent(
|
||||
task='Go to hackernews, then go to apple.com and return all titles of open tabs',
|
||||
llm=llm,
|
||||
browser_context=context,
|
||||
)
|
||||
await agent.run()
|
||||
|
||||
await browser.close()
|
||||
|
||||
|
||||
asyncio.run(main())
|
||||
32
browser-use/examples/features/sensitive_data.py
Normal file
32
browser-use/examples/features/sensitive_data.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
|
||||
# Initialize the model
|
||||
llm = ChatOpenAI(
|
||||
model='gpt-4o',
|
||||
temperature=0.0,
|
||||
)
|
||||
# the model will see x_name and x_password, but never the actual values.
|
||||
sensitive_data = {'x_name': 'my_x_name', 'x_password': 'my_x_password'}
|
||||
task = 'go to x.com and login with x_name and x_password then find interesting posts and like them'
|
||||
|
||||
agent = Agent(task=task, llm=llm, sensitive_data=sensitive_data)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
26
browser-use/examples/features/small_model_for_extraction.py
Normal file
26
browser-use/examples/features/small_model_for_extraction.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
|
||||
small_llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.0)
|
||||
task = 'Find the founders of browser-use in ycombinator, extract all links and open the links one by one'
|
||||
agent = Agent(task=task, llm=llm, page_extraction_llm=small_llm)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
102
browser-use/examples/features/task_with_memory.py
Normal file
102
browser-use/examples/features/task_with_memory.py
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import anyio
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use import Agent, Browser, BrowserConfig, Controller
|
||||
|
||||
links = [
|
||||
'https://docs.mem0.ai/components/llms/models/litellm',
|
||||
'https://docs.mem0.ai/components/llms/models/mistral_AI',
|
||||
'https://docs.mem0.ai/components/llms/models/ollama',
|
||||
'https://docs.mem0.ai/components/llms/models/openai',
|
||||
'https://docs.mem0.ai/components/llms/models/together',
|
||||
'https://docs.mem0.ai/components/llms/models/xAI',
|
||||
'https://docs.mem0.ai/components/llms/overview',
|
||||
'https://docs.mem0.ai/components/vectordbs/config',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/azure_ai_search',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/chroma',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/elasticsearch',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/milvus',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/opensearch',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/pgvector',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/pinecone',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/qdrant',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/redis',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/supabase',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/vertex_ai_vector_search',
|
||||
'https://docs.mem0.ai/components/vectordbs/dbs/weaviate',
|
||||
'https://docs.mem0.ai/components/vectordbs/overview',
|
||||
'https://docs.mem0.ai/contributing/development',
|
||||
'https://docs.mem0.ai/contributing/documentation',
|
||||
'https://docs.mem0.ai/core-concepts/memory-operations',
|
||||
'https://docs.mem0.ai/core-concepts/memory-types',
|
||||
]
|
||||
|
||||
|
||||
class Link(BaseModel):
|
||||
url: str
|
||||
title: str
|
||||
summary: str
|
||||
|
||||
|
||||
class Links(BaseModel):
|
||||
links: list[Link]
|
||||
|
||||
|
||||
initial_actions = [
|
||||
{'open_tab': {'url': 'https://docs.mem0.ai/'}},
|
||||
]
|
||||
controller = Controller(output_model=Links)
|
||||
task_description = f"""
|
||||
Visit all the links provided in {links} and summarize the content of the page with url and title. There are {len(links)} links to visit. Make sure to visit all the links. Return a json with the following format: [{{url: <url>, title: <title>, summary: <summary>}}].
|
||||
|
||||
Guidelines:
|
||||
1. Strictly stay on the domain https://docs.mem0.ai
|
||||
2. Do not visit any other websites.
|
||||
3. Ignore the links that are hashed (#) or javascript (:), or mailto, or tel, or other protocols
|
||||
4. Don't visit any other url other than the ones provided above.
|
||||
5. Capture the unique urls which are not already visited.
|
||||
6. If you visit any page that doesn't have host name docs.mem0.ai, then do not visit it and come back to the page with host name docs.mem0.ai.
|
||||
"""
|
||||
|
||||
|
||||
async def main(max_steps=500):
|
||||
config = BrowserConfig(headless=True)
|
||||
browser = Browser(config=config)
|
||||
|
||||
agent = Agent(
|
||||
task=task_description,
|
||||
llm=ChatOpenAI(model='gpt-4o-mini'),
|
||||
controller=controller,
|
||||
initial_actions=initial_actions,
|
||||
enable_memory=True,
|
||||
browser=browser,
|
||||
)
|
||||
history = await agent.run(max_steps=max_steps)
|
||||
result = history.final_result()
|
||||
parsed_result = []
|
||||
if result:
|
||||
parsed: Links = Links.model_validate_json(result)
|
||||
print(f'Total parsed links: {len(parsed.links)}')
|
||||
for link in parsed.links:
|
||||
parsed_result.append({'title': link.title, 'url': link.url, 'summary': link.summary})
|
||||
else:
|
||||
print('No result')
|
||||
|
||||
async with await anyio.open_file('result.json', 'w+') as f:
|
||||
await f.write(json.dumps(parsed_result, indent=4))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
49
browser-use/examples/features/validate_output.py
Normal file
49
browser-use/examples/features/validate_output.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
"""
|
||||
Demonstrate output validator.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use import ActionResult, Agent, Controller
|
||||
|
||||
controller = Controller()
|
||||
|
||||
|
||||
class DoneResult(BaseModel):
|
||||
title: str
|
||||
comments: str
|
||||
hours_since_start: int
|
||||
|
||||
|
||||
# we overwrite done() in this example to demonstrate the validator
|
||||
@controller.registry.action('Done with task', param_model=DoneResult)
|
||||
async def done(params: DoneResult):
|
||||
result = ActionResult(is_done=True, extracted_content=params.model_dump_json())
|
||||
print(result)
|
||||
# NOTE: this is clearly wrong - to demonstrate the validator
|
||||
return 'blablabla'
|
||||
|
||||
|
||||
async def main():
|
||||
task = 'Go to hackernews hn and give me the top 1 post'
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(task=task, llm=model, controller=controller, validate_output=True)
|
||||
# NOTE: this should fail to demonstrate the validator
|
||||
await agent.run(max_steps=5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue