[Add] browser-use and main.py

2025-05-18 21:57:54 +09:00 · 2025-05-18 21:57:54 +09:00 · 96914d44ac
commit 96914d44ac
parent 08e64bdf45
221 changed files with 30952 additions and 1 deletions
--- a/browser-use/examples/use-cases/README.md
+++ b/browser-use/examples/use-cases/README.md
@ -0,0 +1,12 @@
+# Use Cases of Browser-Use
+
+| File Name | Description |
+|-----------|------------|
+| `captcha.py` | Automates CAPTCHA solving on a demo website. |
+| `check_appointment.py` | Checks for available visa appointment slots on the Greece MFA website. |
+| `find_and_apply_to_jobs.py` | Searches for job listings, evaluates relevance based on a CV, and applies automatically. |
+| `online_coding_agent.py` | Implements a multi-agent system for online code editors, with separate agents for coding and execution. |
+| `post-twitter.py` | Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies. |
+| `scrolling_page.py` | Automates webpage scrolling with various scrolling actions and text search functionality. |
+| `twitter_post_using_cookies.py` | Automates posting on X (Twitter) using stored authentication cookies. |
+| `web_voyager_agent.py` | A general-purpose web navigation agent for tasks like flight booking and course searching. |
--- a/browser-use/examples/use-cases/captcha.py
+++ b/browser-use/examples/use-cases/captcha.py
@ -0,0 +1,40 @@
+"""
+Goal: Automates CAPTCHA solving on a demo website.
+
+
+Simple try of the agent.
+@dev You need to add OPENAI_API_KEY to your environment variables.
+NOTE: captchas are hard. For this example it works. But e.g. for iframes it does not.
+for this example it helps to zoom in.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+
+async def main():
+	llm = ChatOpenAI(model='gpt-4o')
+	agent = Agent(
+		task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
+		llm=llm,
+	)
+	await agent.run()
+	input('Press Enter to exit')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/check_appointment.py
+++ b/browser-use/examples/use-cases/check_appointment.py
@ -0,0 +1,52 @@
+# Goal: Checks for available visa appointment slots on the Greece MFA website.
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.controller.service import Controller
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+controller = Controller()
+
+
+class WebpageInfo(BaseModel):
+	"""Model for webpage link."""
+
+	link: str = 'https://appointment.mfa.gr/en/reservations/aero/ireland-grcon-dub/'
+
+
+@controller.action('Go to the webpage', param_model=WebpageInfo)
+def go_to_webpage(webpage_info: WebpageInfo):
+	"""Returns the webpage link."""
+	return webpage_info.link
+
+
+async def main():
+	"""Main function to execute the agent task."""
+	task = (
+		'Go to the Greece MFA webpage via the link I provided you.'
+		'Check the visa appointment dates. If there is no available date in this month, check the next month.'
+		'If there is no available date in both months, tell me there is no available date.'
+	)
+
+	model = ChatOpenAI(model='gpt-4o-mini', api_key=SecretStr(os.getenv('OPENAI_API_KEY', '')))
+	agent = Agent(task, model, controller=controller, use_vision=True)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/find_and_apply_to_jobs.py
+++ b/browser-use/examples/use-cases/find_and_apply_to_jobs.py
@ -0,0 +1,160 @@
+"""
+Goal: Searches for job listings, evaluates relevance based on a CV, and applies
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+Also you have to install PyPDF2 to read pdf files: pip install PyPDF2
+"""
+
+import asyncio
+import csv
+import logging
+import os
+import sys
+from pathlib import Path
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import AzureChatOpenAI
+from pydantic import BaseModel, SecretStr
+from PyPDF2 import PdfReader
+
+from browser_use import ActionResult, Agent, Controller
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+required_env_vars = ['AZURE_OPENAI_KEY', 'AZURE_OPENAI_ENDPOINT']
+for var in required_env_vars:
+	if not os.getenv(var):
+		raise ValueError(f'{var} is not set. Please add it to your environment variables.')
+
+logger = logging.getLogger(__name__)
+# full screen mode
+controller = Controller()
+
+# NOTE: This is the path to your cv file
+CV = Path.cwd() / 'cv_04_24.pdf'
+
+if not CV.exists():
+	raise FileNotFoundError(f'You need to set the path to your cv file in the CV variable. CV file not found at {CV}')
+
+
+class Job(BaseModel):
+	title: str
+	link: str
+	company: str
+	fit_score: float
+	location: str | None = None
+	salary: str | None = None
+
+
+@controller.action('Save jobs to file - with a score how well it fits to my profile', param_model=Job)
+def save_jobs(job: Job):
+	with open('jobs.csv', 'a', newline='') as f:
+		writer = csv.writer(f)
+		writer.writerow([job.title, job.company, job.link, job.salary, job.location])
+
+	return 'Saved job to file'
+
+
+@controller.action('Read jobs from file')
+def read_jobs():
+	with open('jobs.csv') as f:
+		return f.read()
+
+
+@controller.action('Read my cv for context to fill forms')
+def read_cv():
+	pdf = PdfReader(CV)
+	text = ''
+	for page in pdf.pages:
+		text += page.extract_text() or ''
+	logger.info(f'Read cv with {len(text)} characters')
+	return ActionResult(extracted_content=text, include_in_memory=True)
+
+
+@controller.action(
+	'Upload cv to element - call this function to upload if element is not found, try with different index of the same upload element',
+)
+async def upload_cv(index: int, browser: BrowserContext):
+	path = str(CV.absolute())
+	dom_el = await browser.get_dom_element_by_index(index)
+
+	if dom_el is None:
+		return ActionResult(error=f'No element found at index {index}')
+
+	file_upload_dom_el = dom_el.get_file_upload_element()
+
+	if file_upload_dom_el is None:
+		logger.info(f'No file upload element found at index {index}')
+		return ActionResult(error=f'No file upload element found at index {index}')
+
+	file_upload_el = await browser.get_locate_element(file_upload_dom_el)
+
+	if file_upload_el is None:
+		logger.info(f'No file upload element found at index {index}')
+		return ActionResult(error=f'No file upload element found at index {index}')
+
+	try:
+		await file_upload_el.set_input_files(path)
+		msg = f'Successfully uploaded file "{path}" to index {index}'
+		logger.info(msg)
+		return ActionResult(extracted_content=msg)
+	except Exception as e:
+		logger.debug(f'Error in set_input_files: {str(e)}')
+		return ActionResult(error=f'Failed to upload file to index {index}')
+
+
+browser = Browser(
+	config=BrowserConfig(
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+		disable_security=True,
+	)
+)
+
+
+async def main():
+	# ground_task = (
+	# 	'You are a professional job finder. '
+	# 	'1. Read my cv with read_cv'
+	# 	'2. Read the saved jobs file '
+	# 	'3. start applying to the first link of Amazon '
+	# 	'You can navigate through pages e.g. by scrolling '
+	# 	'Make sure to be on the english version of the page'
+	# )
+	ground_task = (
+		'You are a professional job finder. '
+		'1. Read my cv with read_cv'
+		'find ml internships in and save them to a file'
+		'search at company:'
+	)
+	tasks = [
+		ground_task + '\n' + 'Google',
+		# ground_task + '\n' + 'Amazon',
+		# ground_task + '\n' + 'Apple',
+		# ground_task + '\n' + 'Microsoft',
+		# ground_task
+		# + '\n'
+		# + 'go to https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite/job/Taiwan%2C-Remote/Fulfillment-Analyst---New-College-Graduate-2025_JR1988949/apply/autofillWithResume?workerSubType=0c40f6bd1d8f10adf6dae42e46d44a17&workerSubType=ab40a98049581037a3ada55b087049b7 NVIDIA',
+		# ground_task + '\n' + 'Meta',
+	]
+	model = AzureChatOpenAI(
+		model='gpt-4o',
+		api_version='2024-10-21',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+
+	agents = []
+	for task in tasks:
+		agent = Agent(task=task, llm=model, controller=controller, browser=browser)
+		agents.append(agent)
+
+	await asyncio.gather(*[agent.run() for agent in agents])
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/find_influencer_profiles.py
+++ b/browser-use/examples/use-cases/find_influencer_profiles.py
@ -0,0 +1,90 @@
+"""
+Show how to use custom outputs.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import json
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import httpx
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use import Agent, Controller
+from browser_use.agent.views import ActionResult
+
+
+class Profile(BaseModel):
+	platform: str
+	profile_url: str
+
+
+class Profiles(BaseModel):
+	profiles: list[Profile]
+
+
+controller = Controller(exclude_actions=['search_google'], output_model=Profiles)
+BEARER_TOKEN = os.getenv('BEARER_TOKEN')
+
+if not BEARER_TOKEN:
+	# use the api key for ask tessa
+	# you can also use other apis like exa, xAI, perplexity, etc.
+	raise ValueError('BEARER_TOKEN is not set - go to https://www.heytessa.ai/ and create an api key')
+
+
+@controller.registry.action('Search the web for a specific query')
+async def search_web(query: str):
+	keys_to_use = ['url', 'title', 'content', 'author', 'score']
+	headers = {'Authorization': f'Bearer {BEARER_TOKEN}'}
+	async with httpx.AsyncClient() as client:
+		response = await client.post(
+			'https://asktessa.ai/api/search',
+			headers=headers,
+			json={'query': query},
+		)
+
+	final_results = [
+		{key: source[key] for key in keys_to_use if key in source}
+		for source in await response.json()['sources']
+		if source['score'] >= 0.2
+	]
+	# print(json.dumps(final_results, indent=4))
+	result_text = json.dumps(final_results, indent=4)
+	print(result_text)
+	return ActionResult(extracted_content=result_text, include_in_memory=True)
+
+
+async def main():
+	task = (
+		'Go to this tiktok video url, open it and extract the @username from the resulting url. Then do a websearch for this username to find all his social media profiles. Return me the links to the social media profiles with the platform name.'
+		' https://www.tiktokv.com/share/video/7470981717659110678/  '
+	)
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, controller=controller)
+
+	history = await agent.run()
+
+	result = history.final_result()
+	if result:
+		parsed: Profiles = Profiles.model_validate_json(result)
+
+		for profile in parsed.profiles:
+			print('\n--------------------------------')
+			print(f'Platform:         {profile.platform}')
+			print(f'Profile URL:      {profile.profile_url}')
+
+	else:
+		print('No result')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/google_sheets.py
+++ b/browser-use/examples/use-cases/google_sheets.py
@ -0,0 +1,193 @@
+import os
+import sys
+
+from browser_use.browser.context import BrowserContext
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import asyncio
+
+import pyperclip
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+
+from browser_use import ActionResult, Agent, Controller
+from browser_use.browser.browser import Browser, BrowserConfig
+
+browser = Browser(
+	config=BrowserConfig(
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	),
+)
+
+# Load environment variables
+load_dotenv()
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+
+controller = Controller()
+
+
+def is_google_sheet(page) -> bool:
+	return page.url.startswith('https://docs.google.com/spreadsheets/')
+
+
+@controller.registry.action('Google Sheets: Open a specific Google Sheet')
+async def open_google_sheet(browser: BrowserContext, google_sheet_url: str):
+	page = await browser.get_current_page()
+	if page.url != google_sheet_url:
+		await page.goto(google_sheet_url)
+		await page.wait_for_load_state()
+	if not is_google_sheet(page):
+		return ActionResult(error='Failed to open Google Sheet, are you sure you have permissions to access this sheet?')
+	return ActionResult(extracted_content=f'Opened Google Sheet {google_sheet_url}', include_in_memory=False)
+
+
+@controller.registry.action('Google Sheets: Get the contents of the entire sheet', page_filter=is_google_sheet)
+async def get_sheet_contents(browser: BrowserContext):
+	page = await browser.get_current_page()
+
+	# select all cells
+	await page.keyboard.press('Enter')
+	await page.keyboard.press('Escape')
+	await page.keyboard.press('ControlOrMeta+A')
+	await page.keyboard.press('ControlOrMeta+C')
+
+	extracted_tsv = pyperclip.paste()
+	return ActionResult(extracted_content=extracted_tsv, include_in_memory=True)
+
+
+@controller.registry.action('Google Sheets: Select a specific cell or range of cells', page_filter=is_google_sheet)
+async def select_cell_or_range(browser: BrowserContext, cell_or_range: str):
+	page = await browser.get_current_page()
+
+	await page.keyboard.press('Enter')  # make sure we dont delete current cell contents if we were last editing
+	await page.keyboard.press('Escape')  # to clear current focus (otherwise select range popup is additive)
+	await asyncio.sleep(0.1)
+	await page.keyboard.press('Home')  # move cursor to the top left of the sheet first
+	await page.keyboard.press('ArrowUp')
+	await asyncio.sleep(0.1)
+	await page.keyboard.press('Control+G')  # open the goto range popup
+	await asyncio.sleep(0.2)
+	await page.keyboard.type(cell_or_range, delay=0.05)
+	await asyncio.sleep(0.2)
+	await page.keyboard.press('Enter')
+	await asyncio.sleep(0.2)
+	await page.keyboard.press('Escape')  # to make sure the popup still closes in the case where the jump failed
+	return ActionResult(extracted_content=f'Selected cell {cell_or_range}', include_in_memory=False)
+
+
+@controller.registry.action('Google Sheets: Get the contents of a specific cell or range of cells', page_filter=is_google_sheet)
+async def get_range_contents(browser: BrowserContext, cell_or_range: str):
+	page = await browser.get_current_page()
+
+	await select_cell_or_range(browser, cell_or_range)
+
+	await page.keyboard.press('ControlOrMeta+C')
+	await asyncio.sleep(0.1)
+	extracted_tsv = pyperclip.paste()
+	return ActionResult(extracted_content=extracted_tsv, include_in_memory=True)
+
+
+@controller.registry.action('Google Sheets: Clear the currently selected cells', page_filter=is_google_sheet)
+async def clear_selected_range(browser: BrowserContext):
+	page = await browser.get_current_page()
+
+	await page.keyboard.press('Backspace')
+	return ActionResult(extracted_content='Cleared selected range', include_in_memory=False)
+
+
+@controller.registry.action('Google Sheets: Input text into the currently selected cell', page_filter=is_google_sheet)
+async def input_selected_cell_text(browser: BrowserContext, text: str):
+	page = await browser.get_current_page()
+
+	await page.keyboard.type(text, delay=0.1)
+	await page.keyboard.press('Enter')  # make sure to commit the input so it doesn't get overwritten by the next action
+	await page.keyboard.press('ArrowUp')
+	return ActionResult(extracted_content=f'Inputted text {text}', include_in_memory=False)
+
+
+@controller.registry.action('Google Sheets: Batch update a range of cells', page_filter=is_google_sheet)
+async def update_range_contents(browser: BrowserContext, range: str, new_contents_tsv: str):
+	page = await browser.get_current_page()
+
+	await select_cell_or_range(browser, range)
+
+	# simulate paste event from clipboard with TSV content
+	await page.evaluate(f"""
+		const clipboardData = new DataTransfer();
+		clipboardData.setData('text/plain', `{new_contents_tsv}`);
+		document.activeElement.dispatchEvent(new ClipboardEvent('paste', {{clipboardData}}));
+	""")
+
+	return ActionResult(extracted_content=f'Updated cell {range} with {new_contents_tsv}', include_in_memory=False)
+
+
+# many more snippets for keyboard-shortcut based Google Sheets automation can be found here, see:
+# - https://github.com/philc/sheetkeys/blob/master/content_scripts/sheet_actions.js
+# - https://github.com/philc/sheetkeys/blob/master/content_scripts/commands.js
+# - https://support.google.com/docs/answer/181110?hl=en&co=GENIE.Platform%3DDesktop#zippy=%2Cmac-shortcuts
+
+# Tip: LLM is bad at spatial reasoning, don't make it navigate with arrow keys relative to current cell
+# if given arrow keys, it will try to jump from G1 to A2 by pressing Down, without realizing needs to go Down+LeftLeftLeftLeft
+
+
+async def main():
+	async with await browser.new_context() as context:
+		model = ChatOpenAI(model='gpt-4o')
+
+		eraser = Agent(
+			task="""
+				Clear all the existing values in columns A through F in this Google Sheet:
+				https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
+			""",
+			llm=model,
+			browser_context=context,
+			controller=controller,
+		)
+		await eraser.run()
+
+		researcher = Agent(
+			task="""
+				Google to find the full name, nationality, and date of birth of the CEO of the top 10 Fortune 100 companies.
+				For each company, append a row to this existing Google Sheet: https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
+				Make sure column headers are present and all existing values in the sheet are formatted correctly.
+				Columns:
+					A: Company Name
+					B: CEO Full Name
+					C: CEO Country of Birth
+					D: CEO Date of Birth (YYYY-MM-DD)
+					E: Source URL where the information was found
+			""",
+			llm=model,
+			browser_context=context,
+			controller=controller,
+		)
+		await researcher.run()
+
+		improvised_continuer = Agent(
+			task="""
+				Read the Google Sheet https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
+				Add 3 more rows to the bottom continuing the existing pattern, make sure any data you add is sourced correctly.
+			""",
+			llm=model,
+			browser_context=context,
+			controller=controller,
+		)
+		await improvised_continuer.run()
+
+		final_fact_checker = Agent(
+			task="""
+				Read the Google Sheet https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
+				Fact-check every entry, add a new column F with your findings for each row.
+				Make sure to check the source URL for each row, and make sure the information is correct.
+			""",
+			llm=model,
+			browser_context=context,
+			controller=controller,
+		)
+		await final_fact_checker.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/online_coding_agent.py
+++ b/browser-use/examples/use-cases/online_coding_agent.py
@ -0,0 +1,49 @@
+# Goal: Implements a multi-agent system for online code editors, with separate agents for coding and execution.
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+
+async def main():
+	browser = Browser()
+	async with await browser.new_context() as context:
+		model = ChatOpenAI(model='gpt-4o')
+
+		# Initialize browser agent
+		agent1 = Agent(
+			task='Open an online code editor programiz.',
+			llm=model,
+			browser_context=context,
+		)
+		executor = Agent(
+			task='Executor. Execute the code written by the coder and suggest some updates if there are errors.',
+			llm=model,
+			browser_context=context,
+		)
+
+		coder = Agent(
+			task='Coder. Your job is to write and complete code. You are an expert coder. Code a simple calculator. Write the code on the coding interface after agent1 has opened the link.',
+			llm=model,
+			browser_context=context,
+		)
+		await agent1.run()
+		await executor.run()
+		await coder.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/post-twitter.py
+++ b/browser-use/examples/use-cases/post-twitter.py
@ -0,0 +1,127 @@
+"""
+Goal: Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies.
+
+X Posting Template using browser-use
+----------------------------------------
+
+This template allows you to automate posting on X using browser-use.
+It supports:
+- Posting new tweets
+- Tagging users
+- Replying to tweets
+
+Add your target user and message in the config section.
+
+target_user="XXXXX"
+message="XXXXX"
+reply_url="XXXXX"
+
+Any issues, contact me on X @defichemist95
+"""
+
+import asyncio
+import os
+import sys
+from dataclasses import dataclass
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Controller
+from browser_use.browser.browser import Browser, BrowserConfig
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+
+# ============ Configuration Section ============
+@dataclass
+class TwitterConfig:
+	"""Configuration for Twitter posting"""
+
+	openai_api_key: str
+	chrome_path: str
+	target_user: str  # Twitter handle without @
+	message: str
+	reply_url: str
+	headless: bool = False
+	model: str = 'gpt-4o-mini'
+	base_url: str = 'https://x.com/home'
+
+
+# Customize these settings
+config = TwitterConfig(
+	openai_api_key=os.getenv('OPENAI_API_KEY'),
+	chrome_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',  # This is for MacOS (Chrome)
+	target_user='XXXXX',
+	message='XXXXX',
+	reply_url='XXXXX',
+	headless=False,
+)
+
+
+def create_twitter_agent(config: TwitterConfig) -> Agent:
+	llm = ChatOpenAI(model=config.model, api_key=config.openai_api_key)
+
+	browser = Browser(
+		config=BrowserConfig(
+			headless=config.headless,
+			browser_binary_path=config.chrome_path,
+		)
+	)
+
+	controller = Controller()
+
+	# Construct the full message with tag
+	full_message = f'@{config.target_user} {config.message}'
+
+	# Create the agent with detailed instructions
+	return Agent(
+		task=f"""Navigate to Twitter and create a post and reply to a tweet.
+
+        Here are the specific steps:
+
+        1. Go to {config.base_url}. See the text input field at the top of the page that says "What's happening?"
+        2. Look for the text input field at the top of the page that says "What's happening?"
+        3. Click the input field and type exactly this message:
+        "{full_message}"
+        4. Find and click the "Post" button (look for attributes: 'button' and 'data-testid="tweetButton"')
+        5. Do not click on the '+' button which will add another tweet.
+
+        6. Navigate to {config.reply_url}
+        7. Before replying, understand the context of the tweet by scrolling down and reading the comments.
+        8. Reply to the tweet under 50 characters.
+
+        Important:
+        - Wait for each element to load before interacting
+        - Make sure the message is typed exactly as shown
+        - Verify the post button is clickable before clicking
+        - Do not click on the '+' button which will add another tweet
+        """,
+		llm=llm,
+		controller=controller,
+		browser=browser,
+	)
+
+
+async def post_tweet(agent: Agent):
+	try:
+		await agent.run(max_steps=100)
+		agent.create_history_gif()
+		print('Tweet posted successfully!')
+	except Exception as e:
+		print(f'Error posting tweet: {str(e)}')
+
+
+async def main():
+	agent = create_twitter_agent(config)
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/scrolling_page.py
+++ b/browser-use/examples/use-cases/scrolling_page.py
@ -0,0 +1,43 @@
+# Goal: Automates webpage scrolling with various scrolling actions and text search functionality.
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set')
+
+"""
+Example: Using the 'Scroll down' action.
+
+This script demonstrates how the agent can navigate to a webpage and scroll down the content.
+If no amount is specified, the agent will scroll down by one page height.
+"""
+
+llm = ChatOpenAI(model='gpt-4o')
+
+agent = Agent(
+	# task="Navigate to 'https://en.wikipedia.org/wiki/Internet' and scroll down by one page - then scroll up by 100 pixels - then scroll down by 100 pixels - then scroll down by 10000 pixels.",
+	task="Navigate to 'https://en.wikipedia.org/wiki/Internet' and scroll to the string 'The vast majority of computer'",
+	llm=llm,
+	browser=Browser(config=BrowserConfig(headless=False)),
+)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/shopping.py
+++ b/browser-use/examples/use-cases/shopping.py
@ -0,0 +1,128 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser
+
+task = """
+   ### Prompt for Shopping Agent – Migros Online Grocery Order
+
+**Objective:**
+Visit [Migros Online](https://www.migros.ch/en), search for the required grocery items, add them to the cart, select an appropriate delivery window, and complete the checkout process using TWINT.
+
+**Important:**
+- Make sure that you don't buy more than it's needed for each article.
+- After your search, if you click  the "+" button, it adds the item to the basket.
+- if you open the basket sidewindow menu, you can close it by clicking the X button on the top right. This will help you navigate easier.
+---
+
+### Step 1: Navigate to the Website
+- Open [Migros Online](https://www.migros.ch/en).
+- You should be logged in as Nikolaos Kaliorakis
+
+---
+
+### Step 2: Add Items to the Basket
+
+#### Shopping List:
+
+**Meat & Dairy:**
+- Beef Minced meat (1 kg)
+- Gruyère cheese (grated preferably)
+- 2 liters full-fat milk
+- Butter (cheapest available)
+
+**Vegetables:**
+- Carrots (1kg pack)
+- Celery
+- Leeks (1 piece)
+- 1 kg potatoes
+
+At this stage, check the basket on the top right (indicates the price) and check if you bought the right items.
+
+**Fruits:**
+- 2 lemons
+- Oranges (for snacking)
+
+**Pantry Items:**
+- Lasagna sheets
+- Tahini
+- Tomato paste (below CHF2)
+- Black pepper refill (not with the mill)
+- 2x 1L Oatly Barista(oat milk)
+- 1 pack of eggs (10 egg package)
+
+#### Ingredients I already have (DO NOT purchase):
+- Olive oil, garlic, canned tomatoes, dried oregano, bay leaves, salt, chili flakes, flour, nutmeg, cumin.
+
+---
+
+### Step 3: Handling Unavailable Items
+- If an item is **out of stock**, find the best alternative.
+- Use the following recipe contexts to choose substitutions:
+  - **Pasta Bolognese & Lasagna:** Minced meat, tomato paste, lasagna sheets, milk (for béchamel), Gruyère cheese.
+  - **Hummus:** Tahini, chickpeas, lemon juice, olive oil.
+  - **Chickpea Curry Soup:** Chickpeas, leeks, curry, lemons.
+  - **Crispy Slow-Cooked Pork Belly with Vegetables:** Potatoes, butter.
+- Example substitutions:
+  - If Gruyère cheese is unavailable, select another semi-hard cheese.
+  - If Tahini is unavailable, a sesame-based alternative may work.
+
+---
+
+### Step 4: Adjusting for Minimum Order Requirement
+- If the total order **is below CHF 99**, add **a liquid soap refill** to reach the minimum. If it;s still you can buy some bread, dark chockolate.
+- At this step, check if you have bought MORE items than needed. If the price is more then CHF200, you MUST remove items.
+- If an item is not available, choose an alternative.
+- if an age verification is needed, remove alcoholic products, we haven't verified yet.
+
+---
+
+### Step 5: Select Delivery Window
+- Choose a **delivery window within the current week**. It's ok to pay up to CHF2 for the window selection.
+- Preferably select a slot within the workweek.
+
+---
+
+### Step 6: Checkout
+- Proceed to checkout.
+- Select **TWINT** as the payment method.
+- Check out.
+- 
+- if it's needed the username is: nikoskalio.dev@gmail.com 
+- and the password is : TheCircuit.Migros.dev!
+---
+
+### Step 7: Confirm Order & Output Summary
+- Once the order is placed, output a summary including:
+  - **Final list of items purchased** (including any substitutions).
+  - **Total cost**.
+  - **Chosen delivery time**.
+
+**Important:** Ensure efficiency and accuracy throughout the process."""
+
+browser = Browser()
+
+agent = Agent(
+	task=task,
+	llm=ChatOpenAI(model='gpt-4o'),
+	browser=browser,
+)
+
+
+async def main():
+	await agent.run()
+	input('Press Enter to close the browser...')
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/twitter_post_using_cookies.py
+++ b/browser-use/examples/use-cases/twitter_post_using_cookies.py
@ -0,0 +1,48 @@
+# Goal: Automates posting on X (Twitter) using stored authentication cookies.
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import SecretStr
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+	raise ValueError('GOOGLE_API_KEY is not set')
+
+llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
+
+
+browser = Browser(
+	config=BrowserConfig(
+		# browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	)
+)
+file_path = os.path.join(os.path.dirname(__file__), 'twitter_cookies.txt')
+context = BrowserContext(browser=browser, config=BrowserContextConfig(cookies_file=file_path))
+
+
+async def main():
+	agent = Agent(
+		browser_context=context,
+		task=('go to https://x.com. write a new post with the text "browser-use ftw", and submit it'),
+		llm=llm,
+		max_actions_per_step=4,
+	)
+	await agent.run(max_steps=25)
+	input('Press Enter to close the browser...')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/web_voyager_agent.py
+++ b/browser-use/examples/use-cases/web_voyager_agent.py
@ -0,0 +1,77 @@
+# Goal: A general-purpose web navigation agent for tasks like flight booking and course searching.
+
+import asyncio
+import os
+import sys
+
+# Adjust Python path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import AzureChatOpenAI, ChatOpenAI
+from pydantic import SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
+
+# Set LLM based on defined environment variables
+if os.getenv('OPENAI_API_KEY'):
+	llm = ChatOpenAI(
+		model='gpt-4o',
+	)
+elif os.getenv('AZURE_OPENAI_KEY') and os.getenv('AZURE_OPENAI_ENDPOINT'):
+	llm = AzureChatOpenAI(
+		model='gpt-4o',
+		api_version='2024-10-21',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+else:
+	raise ValueError('No LLM found. Please set OPENAI_API_KEY or AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT.')
+
+
+browser = Browser(
+	config=BrowserConfig(
+		headless=False,  # This is True in production
+		disable_security=True,
+		new_context_config=BrowserContextConfig(
+			disable_security=True,
+			minimum_wait_page_load_time=1,  # 3 on prod
+			maximum_wait_page_load_time=10,  # 20 on prod
+			# Set no_viewport=False to constrain the viewport to the specified dimensions
+			# This is useful for specific cases where you need a fixed viewport size
+			no_viewport=False,
+			window_width=1280,
+			window_height=1100,
+			# trace_path='./tmp/web_voyager_agent',
+		),
+	)
+)
+
+# TASK = """
+# Find the lowest-priced one-way flight from Cairo to Montreal on February 21, 2025, including the total travel time and number of stops. on https://www.google.com/travel/flights/
+# """
+# TASK = """
+# Browse Coursera, which universities offer Master of Advanced Study in Engineering degrees? Tell me what is the latest application deadline for this degree? on https://www.coursera.org/"""
+TASK = """
+Find and book a hotel in Paris with suitable accommodations for a family of four (two adults and two children) offering free cancellation for the dates of February 14-21, 2025. on https://www.booking.com/
+"""
+
+
+async def main():
+	agent = Agent(
+		task=TASK,
+		llm=llm,
+		browser=browser,
+		validate_output=True,
+		enable_memory=False,
+	)
+	history = await agent.run(max_steps=50)
+	history.save_to_file('./tmp/history.json')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/wikipedia_banana_to_quantum.py
+++ b/browser-use/examples/use-cases/wikipedia_banana_to_quantum.py
@ -0,0 +1,39 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
+
+# video https://preview.screen.studio/share/vuq91Ej8
+llm = ChatOpenAI(
+	model='gpt-4o',
+	temperature=0.0,
+)
+task = 'go to https://en.wikipedia.org/wiki/Banana and click on buttons on the wikipedia page to go as fast as possible from banna to Quantum mechanics'
+
+browser = Browser(
+	config=BrowserConfig(
+		new_context_config=BrowserContextConfig(
+			viewport_expansion=-1,
+			highlight_elements=False,
+		),
+	),
+)
+agent = Agent(task=task, llm=llm, browser=browser, use_vision=False)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())