[Add] browser-use and main.py

This commit is contained in:
tv0924@icloud.com 2025-05-18 21:57:54 +09:00
commit 96914d44ac
221 changed files with 30952 additions and 1 deletions

View file

@ -0,0 +1,12 @@
# Use Cases of Browser-Use
| File Name | Description |
|-----------|------------|
| `captcha.py` | Automates CAPTCHA solving on a demo website. |
| `check_appointment.py` | Checks for available visa appointment slots on the Greece MFA website. |
| `find_and_apply_to_jobs.py` | Searches for job listings, evaluates relevance based on a CV, and applies automatically. |
| `online_coding_agent.py` | Implements a multi-agent system for online code editors, with separate agents for coding and execution. |
| `post-twitter.py` | Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies. |
| `scrolling_page.py` | Automates webpage scrolling with various scrolling actions and text search functionality. |
| `twitter_post_using_cookies.py` | Automates posting on X (Twitter) using stored authentication cookies. |
| `web_voyager_agent.py` | A general-purpose web navigation agent for tasks like flight booking and course searching. |

View file

@ -0,0 +1,40 @@
"""
Goal: Automates CAPTCHA solving on a demo website.
Simple try of the agent.
@dev You need to add OPENAI_API_KEY to your environment variables.
NOTE: captchas are hard. For this example it works. But e.g. for iframes it does not.
for this example it helps to zoom in.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
async def main():
llm = ChatOpenAI(model='gpt-4o')
agent = Agent(
task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
llm=llm,
)
await agent.run()
input('Press Enter to exit')
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,52 @@
# Goal: Checks for available visa appointment slots on the Greece MFA website.
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, SecretStr
from browser_use.agent.service import Agent
from browser_use.controller.service import Controller
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
controller = Controller()
class WebpageInfo(BaseModel):
"""Model for webpage link."""
link: str = 'https://appointment.mfa.gr/en/reservations/aero/ireland-grcon-dub/'
@controller.action('Go to the webpage', param_model=WebpageInfo)
def go_to_webpage(webpage_info: WebpageInfo):
"""Returns the webpage link."""
return webpage_info.link
async def main():
"""Main function to execute the agent task."""
task = (
'Go to the Greece MFA webpage via the link I provided you.'
'Check the visa appointment dates. If there is no available date in this month, check the next month.'
'If there is no available date in both months, tell me there is no available date.'
)
model = ChatOpenAI(model='gpt-4o-mini', api_key=SecretStr(os.getenv('OPENAI_API_KEY', '')))
agent = Agent(task, model, controller=controller, use_vision=True)
await agent.run()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,160 @@
"""
Goal: Searches for job listings, evaluates relevance based on a CV, and applies
@dev You need to add OPENAI_API_KEY to your environment variables.
Also you have to install PyPDF2 to read pdf files: pip install PyPDF2
"""
import asyncio
import csv
import logging
import os
import sys
from pathlib import Path
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import AzureChatOpenAI
from pydantic import BaseModel, SecretStr
from PyPDF2 import PdfReader
from browser_use import ActionResult, Agent, Controller
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContext
required_env_vars = ['AZURE_OPENAI_KEY', 'AZURE_OPENAI_ENDPOINT']
for var in required_env_vars:
if not os.getenv(var):
raise ValueError(f'{var} is not set. Please add it to your environment variables.')
logger = logging.getLogger(__name__)
# full screen mode
controller = Controller()
# NOTE: This is the path to your cv file
CV = Path.cwd() / 'cv_04_24.pdf'
if not CV.exists():
raise FileNotFoundError(f'You need to set the path to your cv file in the CV variable. CV file not found at {CV}')
class Job(BaseModel):
title: str
link: str
company: str
fit_score: float
location: str | None = None
salary: str | None = None
@controller.action('Save jobs to file - with a score how well it fits to my profile', param_model=Job)
def save_jobs(job: Job):
with open('jobs.csv', 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow([job.title, job.company, job.link, job.salary, job.location])
return 'Saved job to file'
@controller.action('Read jobs from file')
def read_jobs():
with open('jobs.csv') as f:
return f.read()
@controller.action('Read my cv for context to fill forms')
def read_cv():
pdf = PdfReader(CV)
text = ''
for page in pdf.pages:
text += page.extract_text() or ''
logger.info(f'Read cv with {len(text)} characters')
return ActionResult(extracted_content=text, include_in_memory=True)
@controller.action(
'Upload cv to element - call this function to upload if element is not found, try with different index of the same upload element',
)
async def upload_cv(index: int, browser: BrowserContext):
path = str(CV.absolute())
dom_el = await browser.get_dom_element_by_index(index)
if dom_el is None:
return ActionResult(error=f'No element found at index {index}')
file_upload_dom_el = dom_el.get_file_upload_element()
if file_upload_dom_el is None:
logger.info(f'No file upload element found at index {index}')
return ActionResult(error=f'No file upload element found at index {index}')
file_upload_el = await browser.get_locate_element(file_upload_dom_el)
if file_upload_el is None:
logger.info(f'No file upload element found at index {index}')
return ActionResult(error=f'No file upload element found at index {index}')
try:
await file_upload_el.set_input_files(path)
msg = f'Successfully uploaded file "{path}" to index {index}'
logger.info(msg)
return ActionResult(extracted_content=msg)
except Exception as e:
logger.debug(f'Error in set_input_files: {str(e)}')
return ActionResult(error=f'Failed to upload file to index {index}')
browser = Browser(
config=BrowserConfig(
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
disable_security=True,
)
)
async def main():
# ground_task = (
# 'You are a professional job finder. '
# '1. Read my cv with read_cv'
# '2. Read the saved jobs file '
# '3. start applying to the first link of Amazon '
# 'You can navigate through pages e.g. by scrolling '
# 'Make sure to be on the english version of the page'
# )
ground_task = (
'You are a professional job finder. '
'1. Read my cv with read_cv'
'find ml internships in and save them to a file'
'search at company:'
)
tasks = [
ground_task + '\n' + 'Google',
# ground_task + '\n' + 'Amazon',
# ground_task + '\n' + 'Apple',
# ground_task + '\n' + 'Microsoft',
# ground_task
# + '\n'
# + 'go to https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite/job/Taiwan%2C-Remote/Fulfillment-Analyst---New-College-Graduate-2025_JR1988949/apply/autofillWithResume?workerSubType=0c40f6bd1d8f10adf6dae42e46d44a17&workerSubType=ab40a98049581037a3ada55b087049b7 NVIDIA',
# ground_task + '\n' + 'Meta',
]
model = AzureChatOpenAI(
model='gpt-4o',
api_version='2024-10-21',
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
)
agents = []
for task in tasks:
agent = Agent(task=task, llm=model, controller=controller, browser=browser)
agents.append(agent)
await asyncio.gather(*[agent.run() for agent in agents])
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,90 @@
"""
Show how to use custom outputs.
@dev You need to add OPENAI_API_KEY to your environment variables.
"""
import asyncio
import json
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
import httpx
from langchain_openai import ChatOpenAI
from pydantic import BaseModel
from browser_use import Agent, Controller
from browser_use.agent.views import ActionResult
class Profile(BaseModel):
platform: str
profile_url: str
class Profiles(BaseModel):
profiles: list[Profile]
controller = Controller(exclude_actions=['search_google'], output_model=Profiles)
BEARER_TOKEN = os.getenv('BEARER_TOKEN')
if not BEARER_TOKEN:
# use the api key for ask tessa
# you can also use other apis like exa, xAI, perplexity, etc.
raise ValueError('BEARER_TOKEN is not set - go to https://www.heytessa.ai/ and create an api key')
@controller.registry.action('Search the web for a specific query')
async def search_web(query: str):
keys_to_use = ['url', 'title', 'content', 'author', 'score']
headers = {'Authorization': f'Bearer {BEARER_TOKEN}'}
async with httpx.AsyncClient() as client:
response = await client.post(
'https://asktessa.ai/api/search',
headers=headers,
json={'query': query},
)
final_results = [
{key: source[key] for key in keys_to_use if key in source}
for source in await response.json()['sources']
if source['score'] >= 0.2
]
# print(json.dumps(final_results, indent=4))
result_text = json.dumps(final_results, indent=4)
print(result_text)
return ActionResult(extracted_content=result_text, include_in_memory=True)
async def main():
task = (
'Go to this tiktok video url, open it and extract the @username from the resulting url. Then do a websearch for this username to find all his social media profiles. Return me the links to the social media profiles with the platform name.'
' https://www.tiktokv.com/share/video/7470981717659110678/ '
)
model = ChatOpenAI(model='gpt-4o')
agent = Agent(task=task, llm=model, controller=controller)
history = await agent.run()
result = history.final_result()
if result:
parsed: Profiles = Profiles.model_validate_json(result)
for profile in parsed.profiles:
print('\n--------------------------------')
print(f'Platform: {profile.platform}')
print(f'Profile URL: {profile.profile_url}')
else:
print('No result')
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,193 @@
import os
import sys
from browser_use.browser.context import BrowserContext
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import asyncio
import pyperclip
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from browser_use import ActionResult, Agent, Controller
from browser_use.browser.browser import Browser, BrowserConfig
browser = Browser(
config=BrowserConfig(
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
),
)
# Load environment variables
load_dotenv()
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
controller = Controller()
def is_google_sheet(page) -> bool:
return page.url.startswith('https://docs.google.com/spreadsheets/')
@controller.registry.action('Google Sheets: Open a specific Google Sheet')
async def open_google_sheet(browser: BrowserContext, google_sheet_url: str):
page = await browser.get_current_page()
if page.url != google_sheet_url:
await page.goto(google_sheet_url)
await page.wait_for_load_state()
if not is_google_sheet(page):
return ActionResult(error='Failed to open Google Sheet, are you sure you have permissions to access this sheet?')
return ActionResult(extracted_content=f'Opened Google Sheet {google_sheet_url}', include_in_memory=False)
@controller.registry.action('Google Sheets: Get the contents of the entire sheet', page_filter=is_google_sheet)
async def get_sheet_contents(browser: BrowserContext):
page = await browser.get_current_page()
# select all cells
await page.keyboard.press('Enter')
await page.keyboard.press('Escape')
await page.keyboard.press('ControlOrMeta+A')
await page.keyboard.press('ControlOrMeta+C')
extracted_tsv = pyperclip.paste()
return ActionResult(extracted_content=extracted_tsv, include_in_memory=True)
@controller.registry.action('Google Sheets: Select a specific cell or range of cells', page_filter=is_google_sheet)
async def select_cell_or_range(browser: BrowserContext, cell_or_range: str):
page = await browser.get_current_page()
await page.keyboard.press('Enter') # make sure we dont delete current cell contents if we were last editing
await page.keyboard.press('Escape') # to clear current focus (otherwise select range popup is additive)
await asyncio.sleep(0.1)
await page.keyboard.press('Home') # move cursor to the top left of the sheet first
await page.keyboard.press('ArrowUp')
await asyncio.sleep(0.1)
await page.keyboard.press('Control+G') # open the goto range popup
await asyncio.sleep(0.2)
await page.keyboard.type(cell_or_range, delay=0.05)
await asyncio.sleep(0.2)
await page.keyboard.press('Enter')
await asyncio.sleep(0.2)
await page.keyboard.press('Escape') # to make sure the popup still closes in the case where the jump failed
return ActionResult(extracted_content=f'Selected cell {cell_or_range}', include_in_memory=False)
@controller.registry.action('Google Sheets: Get the contents of a specific cell or range of cells', page_filter=is_google_sheet)
async def get_range_contents(browser: BrowserContext, cell_or_range: str):
page = await browser.get_current_page()
await select_cell_or_range(browser, cell_or_range)
await page.keyboard.press('ControlOrMeta+C')
await asyncio.sleep(0.1)
extracted_tsv = pyperclip.paste()
return ActionResult(extracted_content=extracted_tsv, include_in_memory=True)
@controller.registry.action('Google Sheets: Clear the currently selected cells', page_filter=is_google_sheet)
async def clear_selected_range(browser: BrowserContext):
page = await browser.get_current_page()
await page.keyboard.press('Backspace')
return ActionResult(extracted_content='Cleared selected range', include_in_memory=False)
@controller.registry.action('Google Sheets: Input text into the currently selected cell', page_filter=is_google_sheet)
async def input_selected_cell_text(browser: BrowserContext, text: str):
page = await browser.get_current_page()
await page.keyboard.type(text, delay=0.1)
await page.keyboard.press('Enter') # make sure to commit the input so it doesn't get overwritten by the next action
await page.keyboard.press('ArrowUp')
return ActionResult(extracted_content=f'Inputted text {text}', include_in_memory=False)
@controller.registry.action('Google Sheets: Batch update a range of cells', page_filter=is_google_sheet)
async def update_range_contents(browser: BrowserContext, range: str, new_contents_tsv: str):
page = await browser.get_current_page()
await select_cell_or_range(browser, range)
# simulate paste event from clipboard with TSV content
await page.evaluate(f"""
const clipboardData = new DataTransfer();
clipboardData.setData('text/plain', `{new_contents_tsv}`);
document.activeElement.dispatchEvent(new ClipboardEvent('paste', {{clipboardData}}));
""")
return ActionResult(extracted_content=f'Updated cell {range} with {new_contents_tsv}', include_in_memory=False)
# many more snippets for keyboard-shortcut based Google Sheets automation can be found here, see:
# - https://github.com/philc/sheetkeys/blob/master/content_scripts/sheet_actions.js
# - https://github.com/philc/sheetkeys/blob/master/content_scripts/commands.js
# - https://support.google.com/docs/answer/181110?hl=en&co=GENIE.Platform%3DDesktop#zippy=%2Cmac-shortcuts
# Tip: LLM is bad at spatial reasoning, don't make it navigate with arrow keys relative to current cell
# if given arrow keys, it will try to jump from G1 to A2 by pressing Down, without realizing needs to go Down+LeftLeftLeftLeft
async def main():
async with await browser.new_context() as context:
model = ChatOpenAI(model='gpt-4o')
eraser = Agent(
task="""
Clear all the existing values in columns A through F in this Google Sheet:
https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
""",
llm=model,
browser_context=context,
controller=controller,
)
await eraser.run()
researcher = Agent(
task="""
Google to find the full name, nationality, and date of birth of the CEO of the top 10 Fortune 100 companies.
For each company, append a row to this existing Google Sheet: https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
Make sure column headers are present and all existing values in the sheet are formatted correctly.
Columns:
A: Company Name
B: CEO Full Name
C: CEO Country of Birth
D: CEO Date of Birth (YYYY-MM-DD)
E: Source URL where the information was found
""",
llm=model,
browser_context=context,
controller=controller,
)
await researcher.run()
improvised_continuer = Agent(
task="""
Read the Google Sheet https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
Add 3 more rows to the bottom continuing the existing pattern, make sure any data you add is sourced correctly.
""",
llm=model,
browser_context=context,
controller=controller,
)
await improvised_continuer.run()
final_fact_checker = Agent(
task="""
Read the Google Sheet https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
Fact-check every entry, add a new column F with your findings for each row.
Make sure to check the source URL for each row, and make sure the information is correct.
""",
llm=model,
browser_context=context,
controller=controller,
)
await final_fact_checker.run()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,49 @@
# Goal: Implements a multi-agent system for online code editors, with separate agents for coding and execution.
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent, Browser
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
async def main():
browser = Browser()
async with await browser.new_context() as context:
model = ChatOpenAI(model='gpt-4o')
# Initialize browser agent
agent1 = Agent(
task='Open an online code editor programiz.',
llm=model,
browser_context=context,
)
executor = Agent(
task='Executor. Execute the code written by the coder and suggest some updates if there are errors.',
llm=model,
browser_context=context,
)
coder = Agent(
task='Coder. Your job is to write and complete code. You are an expert coder. Code a simple calculator. Write the code on the coding interface after agent1 has opened the link.',
llm=model,
browser_context=context,
)
await agent1.run()
await executor.run()
await coder.run()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,127 @@
"""
Goal: Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies.
X Posting Template using browser-use
----------------------------------------
This template allows you to automate posting on X using browser-use.
It supports:
- Posting new tweets
- Tagging users
- Replying to tweets
Add your target user and message in the config section.
target_user="XXXXX"
message="XXXXX"
reply_url="XXXXX"
Any issues, contact me on X @defichemist95
"""
import asyncio
import os
import sys
from dataclasses import dataclass
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent, Controller
from browser_use.browser.browser import Browser, BrowserConfig
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
# ============ Configuration Section ============
@dataclass
class TwitterConfig:
"""Configuration for Twitter posting"""
openai_api_key: str
chrome_path: str
target_user: str # Twitter handle without @
message: str
reply_url: str
headless: bool = False
model: str = 'gpt-4o-mini'
base_url: str = 'https://x.com/home'
# Customize these settings
config = TwitterConfig(
openai_api_key=os.getenv('OPENAI_API_KEY'),
chrome_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', # This is for MacOS (Chrome)
target_user='XXXXX',
message='XXXXX',
reply_url='XXXXX',
headless=False,
)
def create_twitter_agent(config: TwitterConfig) -> Agent:
llm = ChatOpenAI(model=config.model, api_key=config.openai_api_key)
browser = Browser(
config=BrowserConfig(
headless=config.headless,
browser_binary_path=config.chrome_path,
)
)
controller = Controller()
# Construct the full message with tag
full_message = f'@{config.target_user} {config.message}'
# Create the agent with detailed instructions
return Agent(
task=f"""Navigate to Twitter and create a post and reply to a tweet.
Here are the specific steps:
1. Go to {config.base_url}. See the text input field at the top of the page that says "What's happening?"
2. Look for the text input field at the top of the page that says "What's happening?"
3. Click the input field and type exactly this message:
"{full_message}"
4. Find and click the "Post" button (look for attributes: 'button' and 'data-testid="tweetButton"')
5. Do not click on the '+' button which will add another tweet.
6. Navigate to {config.reply_url}
7. Before replying, understand the context of the tweet by scrolling down and reading the comments.
8. Reply to the tweet under 50 characters.
Important:
- Wait for each element to load before interacting
- Make sure the message is typed exactly as shown
- Verify the post button is clickable before clicking
- Do not click on the '+' button which will add another tweet
""",
llm=llm,
controller=controller,
browser=browser,
)
async def post_tweet(agent: Agent):
try:
await agent.run(max_steps=100)
agent.create_history_gif()
print('Tweet posted successfully!')
except Exception as e:
print(f'Error posting tweet: {str(e)}')
async def main():
agent = create_twitter_agent(config)
await agent.run()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,43 @@
# Goal: Automates webpage scrolling with various scrolling actions and text search functionality.
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
from browser_use.browser.browser import Browser, BrowserConfig
if not os.getenv('OPENAI_API_KEY'):
raise ValueError('OPENAI_API_KEY is not set')
"""
Example: Using the 'Scroll down' action.
This script demonstrates how the agent can navigate to a webpage and scroll down the content.
If no amount is specified, the agent will scroll down by one page height.
"""
llm = ChatOpenAI(model='gpt-4o')
agent = Agent(
# task="Navigate to 'https://en.wikipedia.org/wiki/Internet' and scroll down by one page - then scroll up by 100 pixels - then scroll down by 100 pixels - then scroll down by 10000 pixels.",
task="Navigate to 'https://en.wikipedia.org/wiki/Internet' and scroll to the string 'The vast majority of computer'",
llm=llm,
browser=Browser(config=BrowserConfig(headless=False)),
)
async def main():
await agent.run()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,128 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent, Browser
task = """
### Prompt for Shopping Agent Migros Online Grocery Order
**Objective:**
Visit [Migros Online](https://www.migros.ch/en), search for the required grocery items, add them to the cart, select an appropriate delivery window, and complete the checkout process using TWINT.
**Important:**
- Make sure that you don't buy more than it's needed for each article.
- After your search, if you click the "+" button, it adds the item to the basket.
- if you open the basket sidewindow menu, you can close it by clicking the X button on the top right. This will help you navigate easier.
---
### Step 1: Navigate to the Website
- Open [Migros Online](https://www.migros.ch/en).
- You should be logged in as Nikolaos Kaliorakis
---
### Step 2: Add Items to the Basket
#### Shopping List:
**Meat & Dairy:**
- Beef Minced meat (1 kg)
- Gruyère cheese (grated preferably)
- 2 liters full-fat milk
- Butter (cheapest available)
**Vegetables:**
- Carrots (1kg pack)
- Celery
- Leeks (1 piece)
- 1 kg potatoes
At this stage, check the basket on the top right (indicates the price) and check if you bought the right items.
**Fruits:**
- 2 lemons
- Oranges (for snacking)
**Pantry Items:**
- Lasagna sheets
- Tahini
- Tomato paste (below CHF2)
- Black pepper refill (not with the mill)
- 2x 1L Oatly Barista(oat milk)
- 1 pack of eggs (10 egg package)
#### Ingredients I already have (DO NOT purchase):
- Olive oil, garlic, canned tomatoes, dried oregano, bay leaves, salt, chili flakes, flour, nutmeg, cumin.
---
### Step 3: Handling Unavailable Items
- If an item is **out of stock**, find the best alternative.
- Use the following recipe contexts to choose substitutions:
- **Pasta Bolognese & Lasagna:** Minced meat, tomato paste, lasagna sheets, milk (for béchamel), Gruyère cheese.
- **Hummus:** Tahini, chickpeas, lemon juice, olive oil.
- **Chickpea Curry Soup:** Chickpeas, leeks, curry, lemons.
- **Crispy Slow-Cooked Pork Belly with Vegetables:** Potatoes, butter.
- Example substitutions:
- If Gruyère cheese is unavailable, select another semi-hard cheese.
- If Tahini is unavailable, a sesame-based alternative may work.
---
### Step 4: Adjusting for Minimum Order Requirement
- If the total order **is below CHF 99**, add **a liquid soap refill** to reach the minimum. If it;s still you can buy some bread, dark chockolate.
- At this step, check if you have bought MORE items than needed. If the price is more then CHF200, you MUST remove items.
- If an item is not available, choose an alternative.
- if an age verification is needed, remove alcoholic products, we haven't verified yet.
---
### Step 5: Select Delivery Window
- Choose a **delivery window within the current week**. It's ok to pay up to CHF2 for the window selection.
- Preferably select a slot within the workweek.
---
### Step 6: Checkout
- Proceed to checkout.
- Select **TWINT** as the payment method.
- Check out.
-
- if it's needed the username is: nikoskalio.dev@gmail.com
- and the password is : TheCircuit.Migros.dev!
---
### Step 7: Confirm Order & Output Summary
- Once the order is placed, output a summary including:
- **Final list of items purchased** (including any substitutions).
- **Total cost**.
- **Chosen delivery time**.
**Important:** Ensure efficiency and accuracy throughout the process."""
browser = Browser()
agent = Agent(
task=task,
llm=ChatOpenAI(model='gpt-4o'),
browser=browser,
)
async def main():
await agent.run()
input('Press Enter to close the browser...')
await browser.close()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,48 @@
# Goal: Automates posting on X (Twitter) using stored authentication cookies.
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import SecretStr
from browser_use import Agent
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.browser.context import BrowserContext, BrowserContextConfig
api_key = os.getenv('GOOGLE_API_KEY')
if not api_key:
raise ValueError('GOOGLE_API_KEY is not set')
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
browser = Browser(
config=BrowserConfig(
# browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
)
)
file_path = os.path.join(os.path.dirname(__file__), 'twitter_cookies.txt')
context = BrowserContext(browser=browser, config=BrowserContextConfig(cookies_file=file_path))
async def main():
agent = Agent(
browser_context=context,
task=('go to https://x.com. write a new post with the text "browser-use ftw", and submit it'),
llm=llm,
max_actions_per_step=4,
)
await agent.run(max_steps=25)
input('Press Enter to close the browser...')
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,77 @@
# Goal: A general-purpose web navigation agent for tasks like flight booking and course searching.
import asyncio
import os
import sys
# Adjust Python path
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import AzureChatOpenAI, ChatOpenAI
from pydantic import SecretStr
from browser_use.agent.service import Agent
from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
# Set LLM based on defined environment variables
if os.getenv('OPENAI_API_KEY'):
llm = ChatOpenAI(
model='gpt-4o',
)
elif os.getenv('AZURE_OPENAI_KEY') and os.getenv('AZURE_OPENAI_ENDPOINT'):
llm = AzureChatOpenAI(
model='gpt-4o',
api_version='2024-10-21',
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
)
else:
raise ValueError('No LLM found. Please set OPENAI_API_KEY or AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT.')
browser = Browser(
config=BrowserConfig(
headless=False, # This is True in production
disable_security=True,
new_context_config=BrowserContextConfig(
disable_security=True,
minimum_wait_page_load_time=1, # 3 on prod
maximum_wait_page_load_time=10, # 20 on prod
# Set no_viewport=False to constrain the viewport to the specified dimensions
# This is useful for specific cases where you need a fixed viewport size
no_viewport=False,
window_width=1280,
window_height=1100,
# trace_path='./tmp/web_voyager_agent',
),
)
)
# TASK = """
# Find the lowest-priced one-way flight from Cairo to Montreal on February 21, 2025, including the total travel time and number of stops. on https://www.google.com/travel/flights/
# """
# TASK = """
# Browse Coursera, which universities offer Master of Advanced Study in Engineering degrees? Tell me what is the latest application deadline for this degree? on https://www.coursera.org/"""
TASK = """
Find and book a hotel in Paris with suitable accommodations for a family of four (two adults and two children) offering free cancellation for the dates of February 14-21, 2025. on https://www.booking.com/
"""
async def main():
agent = Agent(
task=TASK,
llm=llm,
browser=browser,
validate_output=True,
enable_memory=False,
)
history = await agent.run(max_steps=50)
history.save_to_file('./tmp/history.json')
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,39 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
# video https://preview.screen.studio/share/vuq91Ej8
llm = ChatOpenAI(
model='gpt-4o',
temperature=0.0,
)
task = 'go to https://en.wikipedia.org/wiki/Banana and click on buttons on the wikipedia page to go as fast as possible from banna to Quantum mechanics'
browser = Browser(
config=BrowserConfig(
new_context_config=BrowserContextConfig(
viewport_expansion=-1,
highlight_elements=False,
),
),
)
agent = Agent(task=task, llm=llm, browser=browser, use_vision=False)
async def main():
await agent.run()
if __name__ == '__main__':
asyncio.run(main())