[Add] browser-use and main.py
This commit is contained in:
parent
08e64bdf45
commit
96914d44ac
221 changed files with 30952 additions and 1 deletions
12
browser-use/examples/use-cases/README.md
Normal file
12
browser-use/examples/use-cases/README.md
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
# Use Cases of Browser-Use
|
||||
|
||||
| File Name | Description |
|
||||
|-----------|------------|
|
||||
| `captcha.py` | Automates CAPTCHA solving on a demo website. |
|
||||
| `check_appointment.py` | Checks for available visa appointment slots on the Greece MFA website. |
|
||||
| `find_and_apply_to_jobs.py` | Searches for job listings, evaluates relevance based on a CV, and applies automatically. |
|
||||
| `online_coding_agent.py` | Implements a multi-agent system for online code editors, with separate agents for coding and execution. |
|
||||
| `post-twitter.py` | Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies. |
|
||||
| `scrolling_page.py` | Automates webpage scrolling with various scrolling actions and text search functionality. |
|
||||
| `twitter_post_using_cookies.py` | Automates posting on X (Twitter) using stored authentication cookies. |
|
||||
| `web_voyager_agent.py` | A general-purpose web navigation agent for tasks like flight booking and course searching. |
|
||||
40
browser-use/examples/use-cases/captcha.py
Normal file
40
browser-use/examples/use-cases/captcha.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
"""
|
||||
Goal: Automates CAPTCHA solving on a demo website.
|
||||
|
||||
|
||||
Simple try of the agent.
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
NOTE: captchas are hard. For this example it works. But e.g. for iframes it does not.
|
||||
for this example it helps to zoom in.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
|
||||
if not os.getenv('OPENAI_API_KEY'):
|
||||
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
|
||||
|
||||
|
||||
async def main():
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(
|
||||
task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
|
||||
llm=llm,
|
||||
)
|
||||
await agent.run()
|
||||
input('Press Enter to exit')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
52
browser-use/examples/use-cases/check_appointment.py
Normal file
52
browser-use/examples/use-cases/check_appointment.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Goal: Checks for available visa appointment slots on the Greece MFA website.
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel, SecretStr
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.controller.service import Controller
|
||||
|
||||
if not os.getenv('OPENAI_API_KEY'):
|
||||
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
|
||||
|
||||
controller = Controller()
|
||||
|
||||
|
||||
class WebpageInfo(BaseModel):
|
||||
"""Model for webpage link."""
|
||||
|
||||
link: str = 'https://appointment.mfa.gr/en/reservations/aero/ireland-grcon-dub/'
|
||||
|
||||
|
||||
@controller.action('Go to the webpage', param_model=WebpageInfo)
|
||||
def go_to_webpage(webpage_info: WebpageInfo):
|
||||
"""Returns the webpage link."""
|
||||
return webpage_info.link
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main function to execute the agent task."""
|
||||
task = (
|
||||
'Go to the Greece MFA webpage via the link I provided you.'
|
||||
'Check the visa appointment dates. If there is no available date in this month, check the next month.'
|
||||
'If there is no available date in both months, tell me there is no available date.'
|
||||
)
|
||||
|
||||
model = ChatOpenAI(model='gpt-4o-mini', api_key=SecretStr(os.getenv('OPENAI_API_KEY', '')))
|
||||
agent = Agent(task, model, controller=controller, use_vision=True)
|
||||
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
160
browser-use/examples/use-cases/find_and_apply_to_jobs.py
Normal file
160
browser-use/examples/use-cases/find_and_apply_to_jobs.py
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
"""
|
||||
Goal: Searches for job listings, evaluates relevance based on a CV, and applies
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
Also you have to install PyPDF2 to read pdf files: pip install PyPDF2
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import csv
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from pydantic import BaseModel, SecretStr
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
from browser_use import ActionResult, Agent, Controller
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
required_env_vars = ['AZURE_OPENAI_KEY', 'AZURE_OPENAI_ENDPOINT']
|
||||
for var in required_env_vars:
|
||||
if not os.getenv(var):
|
||||
raise ValueError(f'{var} is not set. Please add it to your environment variables.')
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
# full screen mode
|
||||
controller = Controller()
|
||||
|
||||
# NOTE: This is the path to your cv file
|
||||
CV = Path.cwd() / 'cv_04_24.pdf'
|
||||
|
||||
if not CV.exists():
|
||||
raise FileNotFoundError(f'You need to set the path to your cv file in the CV variable. CV file not found at {CV}')
|
||||
|
||||
|
||||
class Job(BaseModel):
|
||||
title: str
|
||||
link: str
|
||||
company: str
|
||||
fit_score: float
|
||||
location: str | None = None
|
||||
salary: str | None = None
|
||||
|
||||
|
||||
@controller.action('Save jobs to file - with a score how well it fits to my profile', param_model=Job)
|
||||
def save_jobs(job: Job):
|
||||
with open('jobs.csv', 'a', newline='') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow([job.title, job.company, job.link, job.salary, job.location])
|
||||
|
||||
return 'Saved job to file'
|
||||
|
||||
|
||||
@controller.action('Read jobs from file')
|
||||
def read_jobs():
|
||||
with open('jobs.csv') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
@controller.action('Read my cv for context to fill forms')
|
||||
def read_cv():
|
||||
pdf = PdfReader(CV)
|
||||
text = ''
|
||||
for page in pdf.pages:
|
||||
text += page.extract_text() or ''
|
||||
logger.info(f'Read cv with {len(text)} characters')
|
||||
return ActionResult(extracted_content=text, include_in_memory=True)
|
||||
|
||||
|
||||
@controller.action(
|
||||
'Upload cv to element - call this function to upload if element is not found, try with different index of the same upload element',
|
||||
)
|
||||
async def upload_cv(index: int, browser: BrowserContext):
|
||||
path = str(CV.absolute())
|
||||
dom_el = await browser.get_dom_element_by_index(index)
|
||||
|
||||
if dom_el is None:
|
||||
return ActionResult(error=f'No element found at index {index}')
|
||||
|
||||
file_upload_dom_el = dom_el.get_file_upload_element()
|
||||
|
||||
if file_upload_dom_el is None:
|
||||
logger.info(f'No file upload element found at index {index}')
|
||||
return ActionResult(error=f'No file upload element found at index {index}')
|
||||
|
||||
file_upload_el = await browser.get_locate_element(file_upload_dom_el)
|
||||
|
||||
if file_upload_el is None:
|
||||
logger.info(f'No file upload element found at index {index}')
|
||||
return ActionResult(error=f'No file upload element found at index {index}')
|
||||
|
||||
try:
|
||||
await file_upload_el.set_input_files(path)
|
||||
msg = f'Successfully uploaded file "{path}" to index {index}'
|
||||
logger.info(msg)
|
||||
return ActionResult(extracted_content=msg)
|
||||
except Exception as e:
|
||||
logger.debug(f'Error in set_input_files: {str(e)}')
|
||||
return ActionResult(error=f'Failed to upload file to index {index}')
|
||||
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
disable_security=True,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
# ground_task = (
|
||||
# 'You are a professional job finder. '
|
||||
# '1. Read my cv with read_cv'
|
||||
# '2. Read the saved jobs file '
|
||||
# '3. start applying to the first link of Amazon '
|
||||
# 'You can navigate through pages e.g. by scrolling '
|
||||
# 'Make sure to be on the english version of the page'
|
||||
# )
|
||||
ground_task = (
|
||||
'You are a professional job finder. '
|
||||
'1. Read my cv with read_cv'
|
||||
'find ml internships in and save them to a file'
|
||||
'search at company:'
|
||||
)
|
||||
tasks = [
|
||||
ground_task + '\n' + 'Google',
|
||||
# ground_task + '\n' + 'Amazon',
|
||||
# ground_task + '\n' + 'Apple',
|
||||
# ground_task + '\n' + 'Microsoft',
|
||||
# ground_task
|
||||
# + '\n'
|
||||
# + 'go to https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite/job/Taiwan%2C-Remote/Fulfillment-Analyst---New-College-Graduate-2025_JR1988949/apply/autofillWithResume?workerSubType=0c40f6bd1d8f10adf6dae42e46d44a17&workerSubType=ab40a98049581037a3ada55b087049b7 NVIDIA',
|
||||
# ground_task + '\n' + 'Meta',
|
||||
]
|
||||
model = AzureChatOpenAI(
|
||||
model='gpt-4o',
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
)
|
||||
|
||||
agents = []
|
||||
for task in tasks:
|
||||
agent = Agent(task=task, llm=model, controller=controller, browser=browser)
|
||||
agents.append(agent)
|
||||
|
||||
await asyncio.gather(*[agent.run() for agent in agents])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
90
browser-use/examples/use-cases/find_influencer_profiles.py
Normal file
90
browser-use/examples/use-cases/find_influencer_profiles.py
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
"""
|
||||
Show how to use custom outputs.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import httpx
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use import Agent, Controller
|
||||
from browser_use.agent.views import ActionResult
|
||||
|
||||
|
||||
class Profile(BaseModel):
|
||||
platform: str
|
||||
profile_url: str
|
||||
|
||||
|
||||
class Profiles(BaseModel):
|
||||
profiles: list[Profile]
|
||||
|
||||
|
||||
controller = Controller(exclude_actions=['search_google'], output_model=Profiles)
|
||||
BEARER_TOKEN = os.getenv('BEARER_TOKEN')
|
||||
|
||||
if not BEARER_TOKEN:
|
||||
# use the api key for ask tessa
|
||||
# you can also use other apis like exa, xAI, perplexity, etc.
|
||||
raise ValueError('BEARER_TOKEN is not set - go to https://www.heytessa.ai/ and create an api key')
|
||||
|
||||
|
||||
@controller.registry.action('Search the web for a specific query')
|
||||
async def search_web(query: str):
|
||||
keys_to_use = ['url', 'title', 'content', 'author', 'score']
|
||||
headers = {'Authorization': f'Bearer {BEARER_TOKEN}'}
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
'https://asktessa.ai/api/search',
|
||||
headers=headers,
|
||||
json={'query': query},
|
||||
)
|
||||
|
||||
final_results = [
|
||||
{key: source[key] for key in keys_to_use if key in source}
|
||||
for source in await response.json()['sources']
|
||||
if source['score'] >= 0.2
|
||||
]
|
||||
# print(json.dumps(final_results, indent=4))
|
||||
result_text = json.dumps(final_results, indent=4)
|
||||
print(result_text)
|
||||
return ActionResult(extracted_content=result_text, include_in_memory=True)
|
||||
|
||||
|
||||
async def main():
|
||||
task = (
|
||||
'Go to this tiktok video url, open it and extract the @username from the resulting url. Then do a websearch for this username to find all his social media profiles. Return me the links to the social media profiles with the platform name.'
|
||||
' https://www.tiktokv.com/share/video/7470981717659110678/ '
|
||||
)
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(task=task, llm=model, controller=controller)
|
||||
|
||||
history = await agent.run()
|
||||
|
||||
result = history.final_result()
|
||||
if result:
|
||||
parsed: Profiles = Profiles.model_validate_json(result)
|
||||
|
||||
for profile in parsed.profiles:
|
||||
print('\n--------------------------------')
|
||||
print(f'Platform: {profile.platform}')
|
||||
print(f'Profile URL: {profile.profile_url}')
|
||||
|
||||
else:
|
||||
print('No result')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
193
browser-use/examples/use-cases/google_sheets.py
Normal file
193
browser-use/examples/use-cases/google_sheets.py
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
import asyncio
|
||||
|
||||
import pyperclip
|
||||
from dotenv import load_dotenv
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import ActionResult, Agent, Controller
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
),
|
||||
)
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
if not os.getenv('OPENAI_API_KEY'):
|
||||
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
|
||||
|
||||
|
||||
controller = Controller()
|
||||
|
||||
|
||||
def is_google_sheet(page) -> bool:
|
||||
return page.url.startswith('https://docs.google.com/spreadsheets/')
|
||||
|
||||
|
||||
@controller.registry.action('Google Sheets: Open a specific Google Sheet')
|
||||
async def open_google_sheet(browser: BrowserContext, google_sheet_url: str):
|
||||
page = await browser.get_current_page()
|
||||
if page.url != google_sheet_url:
|
||||
await page.goto(google_sheet_url)
|
||||
await page.wait_for_load_state()
|
||||
if not is_google_sheet(page):
|
||||
return ActionResult(error='Failed to open Google Sheet, are you sure you have permissions to access this sheet?')
|
||||
return ActionResult(extracted_content=f'Opened Google Sheet {google_sheet_url}', include_in_memory=False)
|
||||
|
||||
|
||||
@controller.registry.action('Google Sheets: Get the contents of the entire sheet', page_filter=is_google_sheet)
|
||||
async def get_sheet_contents(browser: BrowserContext):
|
||||
page = await browser.get_current_page()
|
||||
|
||||
# select all cells
|
||||
await page.keyboard.press('Enter')
|
||||
await page.keyboard.press('Escape')
|
||||
await page.keyboard.press('ControlOrMeta+A')
|
||||
await page.keyboard.press('ControlOrMeta+C')
|
||||
|
||||
extracted_tsv = pyperclip.paste()
|
||||
return ActionResult(extracted_content=extracted_tsv, include_in_memory=True)
|
||||
|
||||
|
||||
@controller.registry.action('Google Sheets: Select a specific cell or range of cells', page_filter=is_google_sheet)
|
||||
async def select_cell_or_range(browser: BrowserContext, cell_or_range: str):
|
||||
page = await browser.get_current_page()
|
||||
|
||||
await page.keyboard.press('Enter') # make sure we dont delete current cell contents if we were last editing
|
||||
await page.keyboard.press('Escape') # to clear current focus (otherwise select range popup is additive)
|
||||
await asyncio.sleep(0.1)
|
||||
await page.keyboard.press('Home') # move cursor to the top left of the sheet first
|
||||
await page.keyboard.press('ArrowUp')
|
||||
await asyncio.sleep(0.1)
|
||||
await page.keyboard.press('Control+G') # open the goto range popup
|
||||
await asyncio.sleep(0.2)
|
||||
await page.keyboard.type(cell_or_range, delay=0.05)
|
||||
await asyncio.sleep(0.2)
|
||||
await page.keyboard.press('Enter')
|
||||
await asyncio.sleep(0.2)
|
||||
await page.keyboard.press('Escape') # to make sure the popup still closes in the case where the jump failed
|
||||
return ActionResult(extracted_content=f'Selected cell {cell_or_range}', include_in_memory=False)
|
||||
|
||||
|
||||
@controller.registry.action('Google Sheets: Get the contents of a specific cell or range of cells', page_filter=is_google_sheet)
|
||||
async def get_range_contents(browser: BrowserContext, cell_or_range: str):
|
||||
page = await browser.get_current_page()
|
||||
|
||||
await select_cell_or_range(browser, cell_or_range)
|
||||
|
||||
await page.keyboard.press('ControlOrMeta+C')
|
||||
await asyncio.sleep(0.1)
|
||||
extracted_tsv = pyperclip.paste()
|
||||
return ActionResult(extracted_content=extracted_tsv, include_in_memory=True)
|
||||
|
||||
|
||||
@controller.registry.action('Google Sheets: Clear the currently selected cells', page_filter=is_google_sheet)
|
||||
async def clear_selected_range(browser: BrowserContext):
|
||||
page = await browser.get_current_page()
|
||||
|
||||
await page.keyboard.press('Backspace')
|
||||
return ActionResult(extracted_content='Cleared selected range', include_in_memory=False)
|
||||
|
||||
|
||||
@controller.registry.action('Google Sheets: Input text into the currently selected cell', page_filter=is_google_sheet)
|
||||
async def input_selected_cell_text(browser: BrowserContext, text: str):
|
||||
page = await browser.get_current_page()
|
||||
|
||||
await page.keyboard.type(text, delay=0.1)
|
||||
await page.keyboard.press('Enter') # make sure to commit the input so it doesn't get overwritten by the next action
|
||||
await page.keyboard.press('ArrowUp')
|
||||
return ActionResult(extracted_content=f'Inputted text {text}', include_in_memory=False)
|
||||
|
||||
|
||||
@controller.registry.action('Google Sheets: Batch update a range of cells', page_filter=is_google_sheet)
|
||||
async def update_range_contents(browser: BrowserContext, range: str, new_contents_tsv: str):
|
||||
page = await browser.get_current_page()
|
||||
|
||||
await select_cell_or_range(browser, range)
|
||||
|
||||
# simulate paste event from clipboard with TSV content
|
||||
await page.evaluate(f"""
|
||||
const clipboardData = new DataTransfer();
|
||||
clipboardData.setData('text/plain', `{new_contents_tsv}`);
|
||||
document.activeElement.dispatchEvent(new ClipboardEvent('paste', {{clipboardData}}));
|
||||
""")
|
||||
|
||||
return ActionResult(extracted_content=f'Updated cell {range} with {new_contents_tsv}', include_in_memory=False)
|
||||
|
||||
|
||||
# many more snippets for keyboard-shortcut based Google Sheets automation can be found here, see:
|
||||
# - https://github.com/philc/sheetkeys/blob/master/content_scripts/sheet_actions.js
|
||||
# - https://github.com/philc/sheetkeys/blob/master/content_scripts/commands.js
|
||||
# - https://support.google.com/docs/answer/181110?hl=en&co=GENIE.Platform%3DDesktop#zippy=%2Cmac-shortcuts
|
||||
|
||||
# Tip: LLM is bad at spatial reasoning, don't make it navigate with arrow keys relative to current cell
|
||||
# if given arrow keys, it will try to jump from G1 to A2 by pressing Down, without realizing needs to go Down+LeftLeftLeftLeft
|
||||
|
||||
|
||||
async def main():
|
||||
async with await browser.new_context() as context:
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
|
||||
eraser = Agent(
|
||||
task="""
|
||||
Clear all the existing values in columns A through F in this Google Sheet:
|
||||
https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
|
||||
""",
|
||||
llm=model,
|
||||
browser_context=context,
|
||||
controller=controller,
|
||||
)
|
||||
await eraser.run()
|
||||
|
||||
researcher = Agent(
|
||||
task="""
|
||||
Google to find the full name, nationality, and date of birth of the CEO of the top 10 Fortune 100 companies.
|
||||
For each company, append a row to this existing Google Sheet: https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
|
||||
Make sure column headers are present and all existing values in the sheet are formatted correctly.
|
||||
Columns:
|
||||
A: Company Name
|
||||
B: CEO Full Name
|
||||
C: CEO Country of Birth
|
||||
D: CEO Date of Birth (YYYY-MM-DD)
|
||||
E: Source URL where the information was found
|
||||
""",
|
||||
llm=model,
|
||||
browser_context=context,
|
||||
controller=controller,
|
||||
)
|
||||
await researcher.run()
|
||||
|
||||
improvised_continuer = Agent(
|
||||
task="""
|
||||
Read the Google Sheet https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
|
||||
Add 3 more rows to the bottom continuing the existing pattern, make sure any data you add is sourced correctly.
|
||||
""",
|
||||
llm=model,
|
||||
browser_context=context,
|
||||
controller=controller,
|
||||
)
|
||||
await improvised_continuer.run()
|
||||
|
||||
final_fact_checker = Agent(
|
||||
task="""
|
||||
Read the Google Sheet https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
|
||||
Fact-check every entry, add a new column F with your findings for each row.
|
||||
Make sure to check the source URL for each row, and make sure the information is correct.
|
||||
""",
|
||||
llm=model,
|
||||
browser_context=context,
|
||||
controller=controller,
|
||||
)
|
||||
await final_fact_checker.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
49
browser-use/examples/use-cases/online_coding_agent.py
Normal file
49
browser-use/examples/use-cases/online_coding_agent.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
# Goal: Implements a multi-agent system for online code editors, with separate agents for coding and execution.
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, Browser
|
||||
|
||||
if not os.getenv('OPENAI_API_KEY'):
|
||||
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
|
||||
|
||||
|
||||
async def main():
|
||||
browser = Browser()
|
||||
async with await browser.new_context() as context:
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
|
||||
# Initialize browser agent
|
||||
agent1 = Agent(
|
||||
task='Open an online code editor programiz.',
|
||||
llm=model,
|
||||
browser_context=context,
|
||||
)
|
||||
executor = Agent(
|
||||
task='Executor. Execute the code written by the coder and suggest some updates if there are errors.',
|
||||
llm=model,
|
||||
browser_context=context,
|
||||
)
|
||||
|
||||
coder = Agent(
|
||||
task='Coder. Your job is to write and complete code. You are an expert coder. Code a simple calculator. Write the code on the coding interface after agent1 has opened the link.',
|
||||
llm=model,
|
||||
browser_context=context,
|
||||
)
|
||||
await agent1.run()
|
||||
await executor.run()
|
||||
await coder.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
127
browser-use/examples/use-cases/post-twitter.py
Normal file
127
browser-use/examples/use-cases/post-twitter.py
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
"""
|
||||
Goal: Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies.
|
||||
|
||||
X Posting Template using browser-use
|
||||
----------------------------------------
|
||||
|
||||
This template allows you to automate posting on X using browser-use.
|
||||
It supports:
|
||||
- Posting new tweets
|
||||
- Tagging users
|
||||
- Replying to tweets
|
||||
|
||||
Add your target user and message in the config section.
|
||||
|
||||
target_user="XXXXX"
|
||||
message="XXXXX"
|
||||
reply_url="XXXXX"
|
||||
|
||||
Any issues, contact me on X @defichemist95
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, Controller
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
if not os.getenv('OPENAI_API_KEY'):
|
||||
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
|
||||
|
||||
|
||||
# ============ Configuration Section ============
|
||||
@dataclass
|
||||
class TwitterConfig:
|
||||
"""Configuration for Twitter posting"""
|
||||
|
||||
openai_api_key: str
|
||||
chrome_path: str
|
||||
target_user: str # Twitter handle without @
|
||||
message: str
|
||||
reply_url: str
|
||||
headless: bool = False
|
||||
model: str = 'gpt-4o-mini'
|
||||
base_url: str = 'https://x.com/home'
|
||||
|
||||
|
||||
# Customize these settings
|
||||
config = TwitterConfig(
|
||||
openai_api_key=os.getenv('OPENAI_API_KEY'),
|
||||
chrome_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', # This is for MacOS (Chrome)
|
||||
target_user='XXXXX',
|
||||
message='XXXXX',
|
||||
reply_url='XXXXX',
|
||||
headless=False,
|
||||
)
|
||||
|
||||
|
||||
def create_twitter_agent(config: TwitterConfig) -> Agent:
|
||||
llm = ChatOpenAI(model=config.model, api_key=config.openai_api_key)
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=config.headless,
|
||||
browser_binary_path=config.chrome_path,
|
||||
)
|
||||
)
|
||||
|
||||
controller = Controller()
|
||||
|
||||
# Construct the full message with tag
|
||||
full_message = f'@{config.target_user} {config.message}'
|
||||
|
||||
# Create the agent with detailed instructions
|
||||
return Agent(
|
||||
task=f"""Navigate to Twitter and create a post and reply to a tweet.
|
||||
|
||||
Here are the specific steps:
|
||||
|
||||
1. Go to {config.base_url}. See the text input field at the top of the page that says "What's happening?"
|
||||
2. Look for the text input field at the top of the page that says "What's happening?"
|
||||
3. Click the input field and type exactly this message:
|
||||
"{full_message}"
|
||||
4. Find and click the "Post" button (look for attributes: 'button' and 'data-testid="tweetButton"')
|
||||
5. Do not click on the '+' button which will add another tweet.
|
||||
|
||||
6. Navigate to {config.reply_url}
|
||||
7. Before replying, understand the context of the tweet by scrolling down and reading the comments.
|
||||
8. Reply to the tweet under 50 characters.
|
||||
|
||||
Important:
|
||||
- Wait for each element to load before interacting
|
||||
- Make sure the message is typed exactly as shown
|
||||
- Verify the post button is clickable before clicking
|
||||
- Do not click on the '+' button which will add another tweet
|
||||
""",
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
browser=browser,
|
||||
)
|
||||
|
||||
|
||||
async def post_tweet(agent: Agent):
|
||||
try:
|
||||
await agent.run(max_steps=100)
|
||||
agent.create_history_gif()
|
||||
print('Tweet posted successfully!')
|
||||
except Exception as e:
|
||||
print(f'Error posting tweet: {str(e)}')
|
||||
|
||||
|
||||
async def main():
|
||||
agent = create_twitter_agent(config)
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
43
browser-use/examples/use-cases/scrolling_page.py
Normal file
43
browser-use/examples/use-cases/scrolling_page.py
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
# Goal: Automates webpage scrolling with various scrolling actions and text search functionality.
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
if not os.getenv('OPENAI_API_KEY'):
|
||||
raise ValueError('OPENAI_API_KEY is not set')
|
||||
|
||||
"""
|
||||
Example: Using the 'Scroll down' action.
|
||||
|
||||
This script demonstrates how the agent can navigate to a webpage and scroll down the content.
|
||||
If no amount is specified, the agent will scroll down by one page height.
|
||||
"""
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
|
||||
agent = Agent(
|
||||
# task="Navigate to 'https://en.wikipedia.org/wiki/Internet' and scroll down by one page - then scroll up by 100 pixels - then scroll down by 100 pixels - then scroll down by 10000 pixels.",
|
||||
task="Navigate to 'https://en.wikipedia.org/wiki/Internet' and scroll to the string 'The vast majority of computer'",
|
||||
llm=llm,
|
||||
browser=Browser(config=BrowserConfig(headless=False)),
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
128
browser-use/examples/use-cases/shopping.py
Normal file
128
browser-use/examples/use-cases/shopping.py
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, Browser
|
||||
|
||||
task = """
|
||||
### Prompt for Shopping Agent – Migros Online Grocery Order
|
||||
|
||||
**Objective:**
|
||||
Visit [Migros Online](https://www.migros.ch/en), search for the required grocery items, add them to the cart, select an appropriate delivery window, and complete the checkout process using TWINT.
|
||||
|
||||
**Important:**
|
||||
- Make sure that you don't buy more than it's needed for each article.
|
||||
- After your search, if you click the "+" button, it adds the item to the basket.
|
||||
- if you open the basket sidewindow menu, you can close it by clicking the X button on the top right. This will help you navigate easier.
|
||||
---
|
||||
|
||||
### Step 1: Navigate to the Website
|
||||
- Open [Migros Online](https://www.migros.ch/en).
|
||||
- You should be logged in as Nikolaos Kaliorakis
|
||||
|
||||
---
|
||||
|
||||
### Step 2: Add Items to the Basket
|
||||
|
||||
#### Shopping List:
|
||||
|
||||
**Meat & Dairy:**
|
||||
- Beef Minced meat (1 kg)
|
||||
- Gruyère cheese (grated preferably)
|
||||
- 2 liters full-fat milk
|
||||
- Butter (cheapest available)
|
||||
|
||||
**Vegetables:**
|
||||
- Carrots (1kg pack)
|
||||
- Celery
|
||||
- Leeks (1 piece)
|
||||
- 1 kg potatoes
|
||||
|
||||
At this stage, check the basket on the top right (indicates the price) and check if you bought the right items.
|
||||
|
||||
**Fruits:**
|
||||
- 2 lemons
|
||||
- Oranges (for snacking)
|
||||
|
||||
**Pantry Items:**
|
||||
- Lasagna sheets
|
||||
- Tahini
|
||||
- Tomato paste (below CHF2)
|
||||
- Black pepper refill (not with the mill)
|
||||
- 2x 1L Oatly Barista(oat milk)
|
||||
- 1 pack of eggs (10 egg package)
|
||||
|
||||
#### Ingredients I already have (DO NOT purchase):
|
||||
- Olive oil, garlic, canned tomatoes, dried oregano, bay leaves, salt, chili flakes, flour, nutmeg, cumin.
|
||||
|
||||
---
|
||||
|
||||
### Step 3: Handling Unavailable Items
|
||||
- If an item is **out of stock**, find the best alternative.
|
||||
- Use the following recipe contexts to choose substitutions:
|
||||
- **Pasta Bolognese & Lasagna:** Minced meat, tomato paste, lasagna sheets, milk (for béchamel), Gruyère cheese.
|
||||
- **Hummus:** Tahini, chickpeas, lemon juice, olive oil.
|
||||
- **Chickpea Curry Soup:** Chickpeas, leeks, curry, lemons.
|
||||
- **Crispy Slow-Cooked Pork Belly with Vegetables:** Potatoes, butter.
|
||||
- Example substitutions:
|
||||
- If Gruyère cheese is unavailable, select another semi-hard cheese.
|
||||
- If Tahini is unavailable, a sesame-based alternative may work.
|
||||
|
||||
---
|
||||
|
||||
### Step 4: Adjusting for Minimum Order Requirement
|
||||
- If the total order **is below CHF 99**, add **a liquid soap refill** to reach the minimum. If it;s still you can buy some bread, dark chockolate.
|
||||
- At this step, check if you have bought MORE items than needed. If the price is more then CHF200, you MUST remove items.
|
||||
- If an item is not available, choose an alternative.
|
||||
- if an age verification is needed, remove alcoholic products, we haven't verified yet.
|
||||
|
||||
---
|
||||
|
||||
### Step 5: Select Delivery Window
|
||||
- Choose a **delivery window within the current week**. It's ok to pay up to CHF2 for the window selection.
|
||||
- Preferably select a slot within the workweek.
|
||||
|
||||
---
|
||||
|
||||
### Step 6: Checkout
|
||||
- Proceed to checkout.
|
||||
- Select **TWINT** as the payment method.
|
||||
- Check out.
|
||||
-
|
||||
- if it's needed the username is: nikoskalio.dev@gmail.com
|
||||
- and the password is : TheCircuit.Migros.dev!
|
||||
---
|
||||
|
||||
### Step 7: Confirm Order & Output Summary
|
||||
- Once the order is placed, output a summary including:
|
||||
- **Final list of items purchased** (including any substitutions).
|
||||
- **Total cost**.
|
||||
- **Chosen delivery time**.
|
||||
|
||||
**Important:** Ensure efficiency and accuracy throughout the process."""
|
||||
|
||||
browser = Browser()
|
||||
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=ChatOpenAI(model='gpt-4o'),
|
||||
browser=browser,
|
||||
)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run()
|
||||
input('Press Enter to close the browser...')
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
48
browser-use/examples/use-cases/twitter_post_using_cookies.py
Normal file
48
browser-use/examples/use-cases/twitter_post_using_cookies.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
# Goal: Automates posting on X (Twitter) using stored authentication cookies.
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
from browser_use import Agent
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext, BrowserContextConfig
|
||||
|
||||
api_key = os.getenv('GOOGLE_API_KEY')
|
||||
if not api_key:
|
||||
raise ValueError('GOOGLE_API_KEY is not set')
|
||||
|
||||
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
|
||||
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
# browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
)
|
||||
)
|
||||
file_path = os.path.join(os.path.dirname(__file__), 'twitter_cookies.txt')
|
||||
context = BrowserContext(browser=browser, config=BrowserContextConfig(cookies_file=file_path))
|
||||
|
||||
|
||||
async def main():
|
||||
agent = Agent(
|
||||
browser_context=context,
|
||||
task=('go to https://x.com. write a new post with the text "browser-use ftw", and submit it'),
|
||||
llm=llm,
|
||||
max_actions_per_step=4,
|
||||
)
|
||||
await agent.run(max_steps=25)
|
||||
input('Press Enter to close the browser...')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
77
browser-use/examples/use-cases/web_voyager_agent.py
Normal file
77
browser-use/examples/use-cases/web_voyager_agent.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
# Goal: A general-purpose web navigation agent for tasks like flight booking and course searching.
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Adjust Python path
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import AzureChatOpenAI, ChatOpenAI
|
||||
from pydantic import SecretStr
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
|
||||
|
||||
# Set LLM based on defined environment variables
|
||||
if os.getenv('OPENAI_API_KEY'):
|
||||
llm = ChatOpenAI(
|
||||
model='gpt-4o',
|
||||
)
|
||||
elif os.getenv('AZURE_OPENAI_KEY') and os.getenv('AZURE_OPENAI_ENDPOINT'):
|
||||
llm = AzureChatOpenAI(
|
||||
model='gpt-4o',
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
)
|
||||
else:
|
||||
raise ValueError('No LLM found. Please set OPENAI_API_KEY or AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT.')
|
||||
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=False, # This is True in production
|
||||
disable_security=True,
|
||||
new_context_config=BrowserContextConfig(
|
||||
disable_security=True,
|
||||
minimum_wait_page_load_time=1, # 3 on prod
|
||||
maximum_wait_page_load_time=10, # 20 on prod
|
||||
# Set no_viewport=False to constrain the viewport to the specified dimensions
|
||||
# This is useful for specific cases where you need a fixed viewport size
|
||||
no_viewport=False,
|
||||
window_width=1280,
|
||||
window_height=1100,
|
||||
# trace_path='./tmp/web_voyager_agent',
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# TASK = """
|
||||
# Find the lowest-priced one-way flight from Cairo to Montreal on February 21, 2025, including the total travel time and number of stops. on https://www.google.com/travel/flights/
|
||||
# """
|
||||
# TASK = """
|
||||
# Browse Coursera, which universities offer Master of Advanced Study in Engineering degrees? Tell me what is the latest application deadline for this degree? on https://www.coursera.org/"""
|
||||
TASK = """
|
||||
Find and book a hotel in Paris with suitable accommodations for a family of four (two adults and two children) offering free cancellation for the dates of February 14-21, 2025. on https://www.booking.com/
|
||||
"""
|
||||
|
||||
|
||||
async def main():
|
||||
agent = Agent(
|
||||
task=TASK,
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
validate_output=True,
|
||||
enable_memory=False,
|
||||
)
|
||||
history = await agent.run(max_steps=50)
|
||||
history.save_to_file('./tmp/history.json')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent
|
||||
from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
|
||||
|
||||
# video https://preview.screen.studio/share/vuq91Ej8
|
||||
llm = ChatOpenAI(
|
||||
model='gpt-4o',
|
||||
temperature=0.0,
|
||||
)
|
||||
task = 'go to https://en.wikipedia.org/wiki/Banana and click on buttons on the wikipedia page to go as fast as possible from banna to Quantum mechanics'
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
new_context_config=BrowserContextConfig(
|
||||
viewport_expansion=-1,
|
||||
highlight_elements=False,
|
||||
),
|
||||
),
|
||||
)
|
||||
agent = Agent(task=task, llm=llm, browser=browser, use_vision=False)
|
||||
|
||||
|
||||
async def main():
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue