[Add] browser-use and main.py

2025-05-18 21:57:54 +09:00 · 2025-05-18 21:57:54 +09:00 · 96914d44ac
commit 96914d44ac
parent 08e64bdf45
221 changed files with 30952 additions and 1 deletions
--- a/browser-use/examples/browser/real_browser.py
+++ b/browser-use/examples/browser/real_browser.py
@ -0,0 +1,37 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser, BrowserConfig
+
+browser = Browser(
+	config=BrowserConfig(
+		# NOTE: you need to close your chrome browser - so that this can open your browser in debug mode
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	)
+)
+
+
+async def main():
+	agent = Agent(
+		task='In docs.google.com write my Papa a quick letter',
+		llm=ChatOpenAI(model='gpt-4o'),
+		browser=browser,
+	)
+
+	await agent.run()
+	await browser.close()
+
+	input('Press Enter to close...')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/browser/stealth.py
+++ b/browser-use/examples/browser/stealth.py
@ -0,0 +1,83 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser, BrowserConfig, BrowserContextConfig
+
+llm = ChatOpenAI(model='gpt-4o')
+browser = Browser(
+	config=BrowserConfig(
+		headless=False,
+		disable_security=False,
+		keep_alive=True,
+		new_context_config=BrowserContextConfig(
+			keep_alive=True,
+			disable_security=False,
+		),
+	)
+)
+
+
+async def main():
+	agent = Agent(
+		task="""
+            Go to https://bot-detector.rebrowser.net/ and verify that all the bot checks are passed.
+        """,
+		llm=llm,
+		browser=browser,
+	)
+	await agent.run()
+	input('Press Enter to continue to the next test...')
+
+	agent = Agent(
+		task="""
+            Go to https://www.webflow.com/ and verify that the page is not blocked by a bot check.
+        """,
+		llm=llm,
+		browser=browser,
+	)
+	await agent.run()
+	input('Press Enter to continue to the next test...')
+
+	agent = Agent(
+		task="""
+            Go to https://www.okta.com/ and verify that the page is not blocked by a bot check.
+        """,
+		llm=llm,
+		browser=browser,
+	)
+	await agent.run()
+
+	agent = Agent(
+		task="""
+            Go to https://abrahamjuliot.github.io/creepjs/ and verify that the detection score is >50%.
+        """,
+		llm=llm,
+		browser=browser,
+	)
+	await agent.run()
+
+	input('Press Enter to close the browser...')
+
+	agent = Agent(
+		task="""
+            Go to https://nowsecure.nl/ check the "I'm not a robot" checkbox.
+        """,
+		llm=llm,
+		browser=browser,
+	)
+	await agent.run()
+
+	input('Press Enter to close the browser...')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/browser/using_cdp.py
+++ b/browser-use/examples/browser/using_cdp.py
@ -0,0 +1,61 @@
+"""
+Simple demonstration of the CDP feature.
+
+To test this locally, follow these steps:
+1. Create a shortcut for the executable Chrome file.
+2. Add the following argument to the shortcut:
+   - On Windows: `--remote-debugging-port=9222`
+3. Open a web browser and navigate to `http://localhost:9222/json/version` to verify that the Remote Debugging Protocol (CDP) is running.
+4. Launch this example.
+
+@dev You need to set the `GOOGLE_API_KEY` environment variable before proceeding.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import SecretStr
+
+from browser_use import Agent, Controller
+from browser_use.browser.browser import Browser, BrowserConfig
+
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+	raise ValueError('GOOGLE_API_KEY is not set')
+
+browser = Browser(
+	config=BrowserConfig(
+		headless=False,
+		cdp_url='http://localhost:9222',
+	)
+)
+controller = Controller()
+
+
+async def main():
+	task = 'In docs.google.com write my Papa a quick thank you for everything letter \n - Magnus'
+	task += ' and save the document as pdf'
+	model = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(str(api_key)))
+	agent = Agent(
+		task=task,
+		llm=model,
+		controller=controller,
+		browser=browser,
+	)
+
+	await agent.run()
+	await browser.close()
+
+	input('Press Enter to close...')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/custom-functions/action_filters.py
+++ b/browser-use/examples/custom-functions/action_filters.py
@ -0,0 +1,94 @@
+"""
+Action filters (domains and page_filter) let you limit actions available to the Agent on a step-by-step/page-by-page basis.
+
+@registry.action(..., domains=['*'], page_filter=lambda page: return True)
+async def some_action(browser: BrowserContext):
+    ...
+
+This helps prevent the LLM from deciding to use an action that is not compatible with the current page.
+It helps limit decision fatique by scoping actions only to pages where they make sense.
+It also helps prevent mis-triggering stateful actions or actions that could break other programs or leak secrets.
+
+For example:
+    - only run on certain domains @registry.action(..., domains=['example.com', '*.example.com', 'example.co.*']) (supports globs, but no regex)
+    - only fill in a password on a specific login page url
+    - only run if this action has not run before on this page (e.g. by looking up the url in a file on disk)
+
+During each step, the agent recalculates the actions available specifically for that page, and informs the LLM.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from playwright.async_api import Page
+
+from browser_use.agent.service import Agent, Browser, BrowserContext, Controller
+
+# Initialize controller and registry
+controller = Controller()
+registry = controller.registry
+
+
+# Action will only be available to Agent on Google domains because of the domain filter
+@registry.action(description='Trigger disco mode', domains=['google.com', '*.google.com'])
+async def disco_mode(browser: BrowserContext):
+	page = await browser.get_current_page()
+	await page.evaluate("""() => { 
+        // define the wiggle animation
+        document.styleSheets[0].insertRule('@keyframes wiggle { 0% { transform: rotate(0deg); } 50% { transform: rotate(10deg); } 100% { transform: rotate(0deg); } }');
+        
+        document.querySelectorAll("*").forEach(element => {
+            element.style.animation = "wiggle 0.5s infinite";
+        });
+    }""")
+
+
+# you can create a custom page filter function that determines if the action should be available for a given page
+def is_login_page(page: Page) -> bool:
+	return 'login' in page.url.lower() or 'signin' in page.url.lower()
+
+
+# then use it in the action decorator to limit the action to only be available on pages where the filter returns True
+@registry.action(description='Use the force, luke', page_filter=is_login_page)
+async def use_the_force(browser: BrowserContext):
+	# this will only ever run on pages that matched the filter
+	page = await browser.get_current_page()
+	assert is_login_page(page)
+
+	await page.evaluate("""() => { document.querySelector('body').innerHTML = 'These are not the droids you are looking for';}""")
+
+
+async def main():
+	"""Main function to run the example"""
+	browser = Browser()
+	llm = ChatOpenAI(model_name='gpt-4o')
+
+	# Create the agent
+	agent = Agent(  # disco mode will not be triggered on apple.com because the LLM won't be able to see that action available, it should work on Google.com though.
+		task="""
+            Go to apple.com and trigger disco mode (if dont know how to do that, then just move on).
+            Then go to google.com and trigger disco mode.
+            After that, go to the Google login page and Use the force, luke.
+        """,
+		llm=llm,
+		browser=browser,
+		controller=controller,
+	)
+
+	# Run the agent
+	await agent.run(max_steps=10)
+
+	# Cleanup
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/custom-functions/advanced_search.py
+++ b/browser-use/examples/custom-functions/advanced_search.py
@ -0,0 +1,98 @@
+import asyncio
+import json
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import httpx
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use import ActionResult, Agent, Controller
+
+
+class Person(BaseModel):
+	name: str
+	email: str | None = None
+
+
+class PersonList(BaseModel):
+	people: list[Person]
+
+
+controller = Controller(exclude_actions=['search_google'], output_model=PersonList)
+BEARER_TOKEN = os.getenv('BEARER_TOKEN')
+
+if not BEARER_TOKEN:
+	# use the api key for ask tessa
+	# you can also use other apis like exa, xAI, perplexity, etc.
+	raise ValueError('BEARER_TOKEN is not set - go to https://www.heytessa.ai/ and create an api key')
+
+
+@controller.registry.action('Search the web for a specific query')
+async def search_web(query: str):
+	keys_to_use = ['url', 'title', 'content', 'author', 'score']
+	headers = {'Authorization': f'Bearer {BEARER_TOKEN}'}
+	async with httpx.AsyncClient() as client:
+		response = await client.post('https://asktessa.ai/api/search', headers=headers, json={'query': query})
+
+	final_results = [
+		{key: source[key] for key in keys_to_use if key in source}
+		for source in response.json()['sources']
+		if source['score'] >= 0.8
+	]
+	# print(json.dumps(final_results, indent=4))
+	result_text = json.dumps(final_results, indent=4)
+	print(result_text)
+	return ActionResult(extracted_content=result_text, include_in_memory=True)
+
+
+names = [
+	'Ruedi Aebersold',
+	'Bernd Bodenmiller',
+	'Eugene Demler',
+	'Erich Fischer',
+	'Pietro Gambardella',
+	'Matthias Huss',
+	'Reto Knutti',
+	'Maksym Kovalenko',
+	'Antonio Lanzavecchia',
+	'Maria Lukatskaya',
+	'Jochen Markard',
+	'Javier Pérez-Ramírez',
+	'Federica Sallusto',
+	'Gisbert Schneider',
+	'Sonia I. Seneviratne',
+	'Michael Siegrist',
+	'Johan Six',
+	'Tanja Stadler',
+	'Shinichi Sunagawa',
+	'Michael Bruce Zimmermann',
+]
+
+
+async def main():
+	task = 'use search_web with "find email address of the following ETH professor:" for each of the following persons in a list of actions. Finally return the list with name and email if provided'
+	task += '\n' + '\n'.join(names)
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, controller=controller, max_actions_per_step=20)
+
+	history = await agent.run()
+
+	result = history.final_result()
+	if result:
+		parsed: PersonList = PersonList.model_validate_json(result)
+
+		for person in parsed.people:
+			print(f'{person.name} - {person.email}')
+	else:
+		print('No result')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/custom-functions/clipboard.py
+++ b/browser-use/examples/custom-functions/clipboard.py
@ -0,0 +1,60 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import pyperclip
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Controller
+from browser_use.agent.views import ActionResult
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+browser = Browser(
+	config=BrowserConfig(
+		headless=False,
+	)
+)
+controller = Controller()
+
+
+@controller.registry.action('Copy text to clipboard')
+def copy_to_clipboard(text: str):
+	pyperclip.copy(text)
+	return ActionResult(extracted_content=text)
+
+
+@controller.registry.action('Paste text from clipboard')
+async def paste_from_clipboard(browser: BrowserContext):
+	text = pyperclip.paste()
+	# send text to browser
+	page = await browser.get_current_page()
+	await page.keyboard.type(text)
+
+	return ActionResult(extracted_content=text)
+
+
+async def main():
+	task = 'Copy the text "Hello, world!" to the clipboard, then go to google.com and paste the text'
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(
+		task=task,
+		llm=model,
+		controller=controller,
+		browser=browser,
+	)
+
+	await agent.run()
+	await browser.close()
+
+	input('Press Enter to close...')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/custom-functions/custom_hooks_before_after_step.py
+++ b/browser-use/examples/custom-functions/custom_hooks_before_after_step.py
@ -0,0 +1,236 @@
+"""
+Description: These Python modules are designed to capture detailed
+browser usage datafor analysis, with both server and client
+components working together to record and store the information.
+
+Author: Carlos A. Planchón
+https://github.com/carlosplanchon/
+
+Adapt this code to your needs.
+
+Feedback is appreciated!
+"""
+
+#####################
+#                   #
+#   --- UTILS ---   #
+#                   #
+#####################
+
+import base64
+
+
+def b64_to_png(b64_string: str, output_file):
+	"""
+	Convert a Base64-encoded string to a PNG file.
+
+	:param b64_string: A string containing Base64-encoded data
+	:param output_file: The path to the output PNG file
+	"""
+	with open(output_file, 'wb') as f:
+		f.write(base64.b64decode(b64_string))
+
+
+###################################################################
+#                                                                 #
+#   --- FASTAPI API TO RECORD AND SAVE Browser-Use ACTIVITY ---   #
+#                                                                 #
+###################################################################
+
+# Save to api.py and run with `python api.py`
+
+# ! pip install uvicorn
+# ! pip install fastapi
+# ! pip install prettyprinter
+
+import json
+from pathlib import Path
+
+import prettyprinter
+from fastapi import FastAPI, Request
+
+prettyprinter.install_extras()
+
+app = FastAPI()
+
+
+@app.post('/post_agent_history_step')
+async def post_agent_history_step(request: Request):
+	data = await request.json()
+	prettyprinter.cpprint(data)
+
+	# Ensure the "recordings" folder exists using pathlib
+	recordings_folder = Path('recordings')
+	recordings_folder.mkdir(exist_ok=True)
+
+	# Determine the next file number by examining existing .json files
+	existing_numbers = []
+	for item in recordings_folder.iterdir():
+		if item.is_file() and item.suffix == '.json':
+			try:
+				file_num = int(item.stem)
+				existing_numbers.append(file_num)
+			except ValueError:
+				# In case the file name isn't just a number
+				...
+
+	if existing_numbers:
+		next_number = max(existing_numbers) + 1
+	else:
+		next_number = 1
+
+	# Construct the file path
+	file_path = recordings_folder / f'{next_number}.json'
+
+	# Save the JSON data to the file
+	with file_path.open('w') as f:
+		json.dump(data, f, indent=2)
+
+	return {'status': 'ok', 'message': f'Saved to {file_path}'}
+
+
+if __name__ == '__main__':
+	import uvicorn
+
+	uvicorn.run(app, host='0.0.0.0', port=9000)
+
+
+##############################################################
+#                                                            #
+#   --- CLIENT TO RECORD AND SAVE Browser-Use ACTIVITY ---   #
+#                                                            #
+##############################################################
+
+"""
+pyobjtojson:
+
+A Python library to safely and recursively serialize any Python object
+(including Pydantic models and dataclasses) into JSON-ready structures,
+gracefully handling circular references.
+"""
+
+# ! pip install -U pyobjtojson
+# ! pip install -U prettyprinter
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import requests
+from langchain_openai import ChatOpenAI
+from pyobjtojson import obj_to_json
+
+from browser_use import Agent
+
+# import prettyprinter
+# prettyprinter.install_extras()
+
+
+def send_agent_history_step(data):
+	url = 'http://127.0.0.1:9000/post_agent_history_step'
+	response = requests.post(url, json=data)
+	return response.json()
+
+
+async def record_activity(agent_obj):
+	website_html = None
+	website_screenshot = None
+	urls_json_last_elem = None
+	model_thoughts_last_elem = None
+	model_outputs_json_last_elem = None
+	model_actions_json_last_elem = None
+	extracted_content_json_last_elem = None
+
+	print('--- ON_STEP_START HOOK ---')
+	website_html: str = await agent_obj.browser_context.get_page_html()
+	website_screenshot: str = await agent_obj.browser_context.take_screenshot()
+
+	print('--> History:')
+	if hasattr(agent_obj, 'state'):
+		history = agent_obj.state.history
+	else:
+		history = None
+
+	model_thoughts = obj_to_json(obj=history.model_thoughts(), check_circular=False)
+
+	# print("--- MODEL THOUGHTS ---")
+	if len(model_thoughts) > 0:
+		model_thoughts_last_elem = model_thoughts[-1]
+		# prettyprinter.cpprint(model_thoughts_last_elem)
+
+	# print("--- MODEL OUTPUT ACTION ---")
+	model_outputs = agent_obj.state.history.model_outputs()
+	model_outputs_json = obj_to_json(obj=model_outputs, check_circular=False)
+
+	if len(model_outputs_json) > 0:
+		model_outputs_json_last_elem = model_outputs_json[-1]
+		# prettyprinter.cpprint(model_outputs_json_last_elem)
+
+	# print("--- MODEL INTERACTED ELEM ---")
+	model_actions = agent_obj.state.history.model_actions()
+	model_actions_json = obj_to_json(obj=model_actions, check_circular=False)
+
+	if len(model_actions_json) > 0:
+		model_actions_json_last_elem = model_actions_json[-1]
+		# prettyprinter.cpprint(model_actions_json_last_elem)
+
+	# print("--- EXTRACTED CONTENT ---")
+	extracted_content = agent_obj.state.history.extracted_content()
+	extracted_content_json = obj_to_json(obj=extracted_content, check_circular=False)
+	if len(extracted_content_json) > 0:
+		extracted_content_json_last_elem = extracted_content_json[-1]
+		# prettyprinter.cpprint(extracted_content_json_last_elem)
+
+	# print("--- URLS ---")
+	urls = agent_obj.state.history.urls()
+	# prettyprinter.cpprint(urls)
+	urls_json = obj_to_json(obj=urls, check_circular=False)
+
+	if len(urls_json) > 0:
+		urls_json_last_elem = urls_json[-1]
+		# prettyprinter.cpprint(urls_json_last_elem)
+
+	model_step_summary = {
+		'website_html': website_html,
+		'website_screenshot': website_screenshot,
+		'url': urls_json_last_elem,
+		'model_thoughts': model_thoughts_last_elem,
+		'model_outputs': model_outputs_json_last_elem,
+		'model_actions': model_actions_json_last_elem,
+		'extracted_content': extracted_content_json_last_elem,
+	}
+
+	print('--- MODEL STEP SUMMARY ---')
+	# prettyprinter.cpprint(model_step_summary)
+
+	send_agent_history_step(data=model_step_summary)
+
+	# response = send_agent_history_step(data=history)
+	# print(response)
+
+	# print("--> Website HTML:")
+	# print(website_html[:200])
+	# print("--> Website Screenshot:")
+	# print(website_screenshot[:200])
+
+
+agent = Agent(
+	task='Compare the price of gpt-4o and DeepSeek-V3',
+	llm=ChatOpenAI(model='gpt-4o'),
+)
+
+
+async def run_agent():
+	try:
+		await agent.run(on_step_start=record_activity, max_steps=30)
+	except Exception as e:
+		print(e)
+
+
+asyncio.run(run_agent())
--- a/browser-use/examples/custom-functions/file_upload.py
+++ b/browser-use/examples/custom-functions/file_upload.py
@ -0,0 +1,112 @@
+import asyncio
+import logging
+import os
+import sys
+from pathlib import Path
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import anyio
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Controller
+from browser_use.agent.views import ActionResult
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+logger = logging.getLogger(__name__)
+
+# Initialize controller first
+browser = Browser(
+	config=BrowserConfig(
+		headless=False,
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	)
+)
+controller = Controller()
+
+
+@controller.action(
+	'Upload file to interactive element with file path ',
+)
+async def upload_file(index: int, path: str, browser: BrowserContext, available_file_paths: list[str]):
+	if path not in available_file_paths:
+		return ActionResult(error=f'File path {path} is not available')
+
+	if not os.path.exists(path):
+		return ActionResult(error=f'File {path} does not exist')
+
+	dom_el = await browser.get_dom_element_by_index(index)
+
+	file_upload_dom_el = dom_el.get_file_upload_element()
+
+	if file_upload_dom_el is None:
+		msg = f'No file upload element found at index {index}'
+		logger.info(msg)
+		return ActionResult(error=msg)
+
+	file_upload_el = await browser.get_locate_element(file_upload_dom_el)
+
+	if file_upload_el is None:
+		msg = f'No file upload element found at index {index}'
+		logger.info(msg)
+		return ActionResult(error=msg)
+
+	try:
+		await file_upload_el.set_input_files(path)
+		msg = f'Successfully uploaded file to index {index}'
+		logger.info(msg)
+		return ActionResult(extracted_content=msg, include_in_memory=True)
+	except Exception as e:
+		msg = f'Failed to upload file to index {index}: {str(e)}'
+		logger.info(msg)
+		return ActionResult(error=msg)
+
+
+@controller.action('Read the file content of a file given a path')
+async def read_file(path: str, available_file_paths: list[str]):
+	if path not in available_file_paths:
+		return ActionResult(error=f'File path {path} is not available')
+
+	async with await anyio.open_file(path, 'r') as f:
+		content = await f.read()
+	msg = f'File content: {content}'
+	logger.info(msg)
+	return ActionResult(extracted_content=msg, include_in_memory=True)
+
+
+def create_file(file_type: str = 'txt'):
+	with open(f'tmp.{file_type}', 'w') as f:
+		f.write('test')
+	file_path = Path.cwd() / f'tmp.{file_type}'
+	logger.info(f'Created file: {file_path}')
+	return str(file_path)
+
+
+async def main():
+	task = 'Go to https://kzmpmkh2zfk1ojnpxfn1.lite.vusercontent.net/ and - read the file content and upload them to fields'
+
+	available_file_paths = [create_file('txt'), create_file('pdf'), create_file('csv')]
+
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(
+		task=task,
+		llm=model,
+		controller=controller,
+		browser=browser,
+		available_file_paths=available_file_paths,
+	)
+
+	await agent.run()
+
+	await browser.close()
+
+	input('Press Enter to close...')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/custom-functions/hover_element.py
+++ b/browser-use/examples/custom-functions/hover_element.py
@ -0,0 +1,97 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use import Agent, Controller
+from browser_use.agent.views import ActionResult
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+
+class HoverAction(BaseModel):
+	index: int | None = None
+	xpath: str | None = None
+	selector: str | None = None
+
+
+browser = Browser(
+	config=BrowserConfig(
+		headless=False,
+	)
+)
+controller = Controller()
+
+
+@controller.registry.action(
+	'Hover over an element',
+	param_model=HoverAction,  # Define this model with at least "index: int" field
+)
+async def hover_element(params: HoverAction, browser: BrowserContext):
+	"""
+	Hovers over the element specified by its index from the cached selector map or by XPath.
+	"""
+	session = await browser.get_session()
+	state = session.cached_state
+
+	if params.xpath:
+		# Use XPath to locate the element
+		element_handle = await browser.get_locate_element_by_xpath(params.xpath)
+		if element_handle is None:
+			raise Exception(f'Failed to locate element with XPath {params.xpath}')
+	elif params.selector:
+		# Use CSS selector to locate the element
+		element_handle = await browser.get_locate_element_by_css_selector(params.selector)
+		if element_handle is None:
+			raise Exception(f'Failed to locate element with CSS Selector {params.selector}')
+	elif params.index is not None:
+		# Use index to locate the element
+		if state is None or params.index not in state.selector_map:
+			raise Exception(f'Element index {params.index} does not exist - retry or use alternative actions')
+		element_node = state.selector_map[params.index]
+		element_handle = await browser.get_locate_element(element_node)
+		if element_handle is None:
+			raise Exception(f'Failed to locate element with index {params.index}')
+	else:
+		raise Exception('Either index or xpath must be provided')
+
+	try:
+		await element_handle.hover()
+		msg = (
+			f'🖱️ Hovered over element at index {params.index}'
+			if params.index is not None
+			else f'🖱️ Hovered over element with XPath {params.xpath}'
+		)
+		return ActionResult(extracted_content=msg, include_in_memory=True)
+	except Exception as e:
+		err_msg = f'❌ Failed to hover over element: {str(e)}'
+		raise Exception(err_msg)
+
+
+async def main():
+	task = 'Open https://testpages.eviltester.com/styled/csspseudo/css-hover.html and hover the element with the css selector #hoverdivpara, then click on "Can you click me?"'
+	# task = 'Open https://testpages.eviltester.com/styled/csspseudo/css-hover.html and hover the element with the xpath //*[@id="hoverdivpara"], then click on "Can you click me?"'
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(
+		task=task,
+		llm=model,
+		controller=controller,
+		browser=browser,
+	)
+
+	await agent.run()
+	await browser.close()
+
+	input('Press Enter to close...')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/custom-functions/notification.py
+++ b/browser-use/examples/custom-functions/notification.py
@ -0,0 +1,45 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import ActionResult, Agent, Controller
+
+controller = Controller()
+
+
+@controller.registry.action('Done with task ')
+async def done(text: str):
+	import yagmail
+
+	# To send emails use
+	# STEP 1: go to https://support.google.com/accounts/answer/185833
+	# STEP 2: Create an app password (you can't use here your normal gmail password)
+	# STEP 3: Use the app password in the code below for the password
+	yag = yagmail.SMTP('your_email@gmail.com', 'your_app_password')
+	yag.send(
+		to='recipient@example.com',
+		subject='Test Email',
+		contents=f'result\n: {text}',
+	)
+
+	return ActionResult(is_done=True, extracted_content='Email sent!')
+
+
+async def main():
+	task = 'go to brower-use.com and then done'
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, controller=controller)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/custom-functions/onepassword_2fa.py
+++ b/browser-use/examples/custom-functions/onepassword_2fa.py
@ -0,0 +1,57 @@
+import asyncio
+import logging
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from onepassword.client import Client  # pip install onepassword-sdk
+
+from browser_use import ActionResult, Agent, Controller
+
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+OP_SERVICE_ACCOUNT_TOKEN = os.getenv('OP_SERVICE_ACCOUNT_TOKEN')
+OP_ITEM_ID = os.getenv('OP_ITEM_ID')  # Go to 1Password, right click on the item, click "Copy Secret Reference"
+
+
+controller = Controller()
+
+
+@controller.registry.action('Get 2FA code from 1Password for Google Account', domains=['*.google.com', 'google.com'])
+async def get_1password_2fa() -> ActionResult:
+	"""
+	Custom action to retrieve 2FA/MFA code from 1Password using onepassword.client SDK.
+	"""
+	client = await Client.authenticate(
+		# setup instructions: https://github.com/1Password/onepassword-sdk-python/#-get-started
+		auth=OP_SERVICE_ACCOUNT_TOKEN,
+		integration_name='Browser-Use',
+		integration_version='v1.0.0',
+	)
+
+	mfa_code = await client.secrets.resolve(f'op://Private/{OP_ITEM_ID}/One-time passcode')
+
+	return ActionResult(extracted_content=mfa_code)
+
+
+async def main():
+	# Example task using the 1Password 2FA action
+	task = 'Go to account.google.com, enter username and password, then if prompted for 2FA code, get 2FA code from 1Password for and enter it'
+
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, controller=controller)
+
+	result = await agent.run()
+	print(f'Task completed with result: {result}')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/custom-functions/save_to_file_hugging_face.py
+++ b/browser-use/examples/custom-functions/save_to_file_hugging_face.py
@ -0,0 +1,50 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use.agent.service import Agent
+from browser_use.controller.service import Controller
+
+# Initialize controller first
+controller = Controller()
+
+
+class Model(BaseModel):
+	title: str
+	url: str
+	likes: int
+	license: str
+
+
+class Models(BaseModel):
+	models: list[Model]
+
+
+@controller.action('Save models', param_model=Models)
+def save_models(params: Models):
+	with open('models.txt', 'a') as f:
+		for model in params.models:
+			f.write(f'{model.title} ({model.url}): {model.likes} likes, {model.license}\n')
+
+
+# video: https://preview.screen.studio/share/EtOhIk0P
+async def main():
+	task = 'Look up models with a license of cc-by-sa-4.0 and sort by most likes on Hugging face, save top 5 to file.'
+
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, controller=controller)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/click_fallback_options.py
+++ b/browser-use/examples/features/click_fallback_options.py
@ -0,0 +1,210 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from aiohttp import web  # make sure to install aiohttp: pip install aiohttp
+from langchain_openai import ChatOpenAI
+
+# from langchain_google_genai import ChatGoogleGenerativeAI
+from browser_use import Agent, Controller
+
+# Define a simple HTML page
+HTML_CONTENT = """
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Custom Select Div</title>
+  <style>
+    .custom-select {
+      position: relative;
+      width: 200px;
+      font-family: Arial, sans-serif;
+      margin-bottom: 20px;
+    }
+
+    .select-display {
+      padding: 10px;
+      border: 1px solid #ccc;
+      background-color: #fff;
+      cursor: pointer;
+    }
+
+    .select-options {
+      position: absolute;
+      top: 100%;
+      left: 0;
+      right: 0;
+      border: 1px solid #ccc;
+      border-top: none;
+      background-color: #fff;
+      display: none;
+      max-height: 150px;
+      overflow-y: auto;
+      z-index: 100;
+    }
+
+    .select-option {
+      padding: 10px;
+      cursor: pointer;
+    }
+
+    .select-option:hover {
+      background-color: #f0f0f0;
+    }
+  </style>
+</head>
+<body>
+  <div class="custom-select">
+    <div class="select-display">Select a fruit</div>
+    <div class="select-options">
+      <div class="select-option" data-value="option1">Apples</div>
+      <div class="select-option" data-value="option2">Oranges</div>
+      <div class="select-option" data-value="option3">Pineapples</div>
+    </div>
+  </div>
+
+  <div class="custom-select">
+    <div class="select-display">Select a fruit</div>
+    <div class="select-options">
+      <div class="select-option" data-value="option1">Apples</div>
+      <div class="select-option" data-value="option2">Oranges</div>
+      <div class="select-option" data-value="option3">Pineapples</div>
+    </div>
+  </div>
+  
+  <div class="custom-select">
+    <div class="select-display">Select a fruit</div>
+    <div class="select-options">
+      <div class="select-option" data-value="option1">Apples</div>
+      <div class="select-option" data-value="option2">Oranges</div>
+      <div class="select-option" data-value="option3">Pineapples</div>
+    </div>
+  </div>
+  
+  <div class="custom-select">
+    <div class="select-display">Select a fruit</div>
+    <div class="select-options">
+      <div class="select-option" data-value="option1">Apples</div>
+      <div class="select-option" data-value="option2">Oranges</div>
+      <div class="select-option" data-value="option3">Pineapples</div>
+    </div>
+  </div>
+
+  <label for="cars">Choose a car:</label>
+  <select name="cars" id="cars">
+    <option value="volvo">Volvo</option>
+    <option value="bmw">BMW</option>
+    <option value="mercedes">Mercedes</option>
+    <option value="audi">Audi</option>
+  </select>
+
+  <button onclick="alert('I told you!')">Don't click me</button>
+
+  <script>
+    document.querySelectorAll('.custom-select').forEach(customSelect => {
+      const selectDisplay = customSelect.querySelector('.select-display');
+      const selectOptions = customSelect.querySelector('.select-options');
+      const options = customSelect.querySelectorAll('.select-option');
+
+      selectDisplay.addEventListener('click', (e) => {
+        // Close all other dropdowns
+        document.querySelectorAll('.select-options').forEach(opt => {
+          if (opt !== selectOptions) opt.style.display = 'none';
+        });
+
+        // Toggle current dropdown
+        const isVisible = selectOptions.style.display === 'block';
+        selectOptions.style.display = isVisible ? 'none' : 'block';
+
+        e.stopPropagation();
+      });
+
+      options.forEach(option => {
+        option.addEventListener('click', () => {
+          selectDisplay.textContent = option.textContent;
+          selectDisplay.dataset.value = option.getAttribute('data-value');
+          selectOptions.style.display = 'none';
+        });
+      });
+    });
+
+    // Close all dropdowns if clicking outside
+    document.addEventListener('click', () => {
+      document.querySelectorAll('.select-options').forEach(opt => {
+        opt.style.display = 'none';
+      });
+    });
+  </script>
+</body>
+</html>
+
+"""
+
+
+# aiohttp request handler to serve the HTML content
+async def handle_root(request):
+	return web.Response(text=HTML_CONTENT, content_type='text/html')
+
+
+# Function to run the HTTP server
+async def run_http_server():
+	app = web.Application()
+	app.router.add_get('/', handle_root)
+	runner = web.AppRunner(app)
+	await runner.setup()
+	site = web.TCPSite(runner, 'localhost', 8000)
+	await site.start()
+	print('HTTP server running on http://localhost:8000')
+	# Keep the server running indefinitely.
+	await asyncio.Event().wait()
+
+
+# Your agent tasks and other logic
+controller = Controller()
+
+
+async def main():
+	# Start the HTTP server in the background.
+	server_task = asyncio.create_task(run_http_server())
+
+	# Example tasks for the agent.
+	xpath_task = 'Open http://localhost:8000/, click element with the xpath "/html/body/div/div[1]" and then click on Oranges'
+	css_selector_task = 'Open http://localhost:8000/, click element with the selector div.select-display and then click on apples'
+	text_task = 'Open http://localhost:8000/, click the third element with the text "Select a fruit" and then click on Apples, then click the second element with the text "Select a fruit" and then click on Oranges'
+	select_task = 'Open http://localhost:8000/, choose the car BMW'
+	button_task = 'Open http://localhost:8000/, click on the button'
+
+	llm = ChatOpenAI(model='gpt-4o')
+	# llm = ChatGoogleGenerativeAI(
+	#     model="gemini-2.0-flash-lite",
+	# )
+
+	# Run different agent tasks.
+	for task in [xpath_task, css_selector_task, text_task, select_task, button_task]:
+		agent = Agent(
+			task=task,
+			llm=llm,
+			controller=controller,
+		)
+		await agent.run()
+
+	# Wait for user input before shutting down.
+	input('Press Enter to close...')
+	# Cancel the server task once finished.
+	server_task.cancel()
+	try:
+		await server_task
+	except asyncio.CancelledError:
+		print('HTTP server stopped.')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/cross_origin_iframes.py
+++ b/browser-use/examples/features/cross_origin_iframes.py
@ -0,0 +1,52 @@
+"""
+Example of how it supports cross-origin iframes.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Controller
+from browser_use.browser.browser import Browser, BrowserConfig
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+
+browser = Browser(
+	config=BrowserConfig(
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	)
+)
+controller = Controller()
+
+
+async def main():
+	agent = Agent(
+		task='Click "Go cross-site (simple page)" button on https://csreis.github.io/tests/cross-site-iframe.html then tell me the text within',
+		llm=ChatOpenAI(model='gpt-4o', temperature=0.0),
+		controller=controller,
+		browser=browser,
+	)
+
+	await agent.run()
+	await browser.close()
+
+	input('Press Enter to close...')
+
+
+if __name__ == '__main__':
+	try:
+		asyncio.run(main())
+	except Exception as e:
+		print(e)
--- a/browser-use/examples/features/custom_output.py
+++ b/browser-use/examples/features/custom_output.py
@ -0,0 +1,59 @@
+"""
+Show how to use custom outputs.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use import Agent, Controller
+
+
+class Post(BaseModel):
+	post_title: str
+	post_url: str
+	num_comments: int
+	hours_since_post: int
+
+
+class Posts(BaseModel):
+	posts: list[Post]
+
+
+controller = Controller(output_model=Posts)
+
+
+async def main():
+	task = 'Go to hackernews show hn and give me the first  5 posts'
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, controller=controller)
+
+	history = await agent.run()
+
+	result = history.final_result()
+	if result:
+		parsed: Posts = Posts.model_validate_json(result)
+
+		for post in parsed.posts:
+			print('\n--------------------------------')
+			print(f'Title:            {post.post_title}')
+			print(f'URL:              {post.post_url}')
+			print(f'Comments:         {post.num_comments}')
+			print(f'Hours since post: {post.hours_since_post}')
+	else:
+		print('No result')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/custom_system_prompt.py
+++ b/browser-use/examples/features/custom_system_prompt.py
@ -0,0 +1,39 @@
+import asyncio
+import json
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+extend_system_message = (
+	'REMEMBER the most important RULE: ALWAYS open first a new tab and go first to url wikipedia.com no matter the task!!!'
+)
+
+# or use override_system_message to completely override the system prompt
+
+
+async def main():
+	task = "do google search to find images of Elon Musk's wife"
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, extend_system_message=extend_system_message)
+
+	print(
+		json.dumps(
+			agent.message_manager.system_prompt.model_dump(exclude_unset=True),
+			indent=4,
+		)
+	)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/custom_user_agent.py
+++ b/browser-use/examples/features/custom_user_agent.py
@ -0,0 +1,79 @@
+import argparse
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_anthropic import ChatAnthropic
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from browser_use.controller.service import Controller
+
+
+def get_llm(provider: str):
+	if provider == 'anthropic':
+		return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None, temperature=0.0)
+	elif provider == 'openai':
+		return ChatOpenAI(model='gpt-4o', temperature=0.0)
+
+	else:
+		raise ValueError(f'Unsupported provider: {provider}')
+
+
+# NOTE: This example is to find your current user agent string to use it in the browser_context
+task = 'go to https://whatismyuseragent.com and find the current user agent string '
+
+
+controller = Controller()
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--query', type=str, help='The query to process', default=task)
+parser.add_argument(
+	'--provider',
+	type=str,
+	choices=['openai', 'anthropic'],
+	default='openai',
+	help='The model provider to use (default: openai)',
+)
+
+args = parser.parse_args()
+
+llm = get_llm(args.provider)
+
+
+browser = Browser(
+	config=BrowserConfig(
+		# browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	)
+)
+
+browser_context = BrowserContext(config=BrowserContextConfig(user_agent='foobarfoo'), browser=browser)
+
+agent = Agent(
+	task=args.query,
+	llm=llm,
+	controller=controller,
+	# browser=browser,
+	browser_context=browser_context,
+	use_vision=True,
+	max_actions_per_step=1,
+)
+
+
+async def main():
+	await agent.run(max_steps=25)
+
+	input('Press Enter to close the browser...')
+	await browser_context.close()
+
+
+asyncio.run(main())
--- a/browser-use/examples/features/download_file.py
+++ b/browser-use/examples/features/download_file.py
@ -0,0 +1,42 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import SecretStr
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContextConfig
+
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+	raise ValueError('GOOGLE_API_KEY is not set')
+llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
+browser = Browser(
+	config=BrowserConfig(
+		new_context_config=BrowserContextConfig(save_downloads_path=os.path.join(os.path.expanduser('~'), 'downloads'))
+	)
+)
+
+
+async def run_download():
+	agent = Agent(
+		task=('Go to "https://file-examples.com/" and download the smallest doc file.'),
+		llm=llm,
+		max_actions_per_step=8,
+		use_vision=True,
+		browser=browser,
+	)
+	await agent.run(max_steps=25)
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(run_download())
--- a/browser-use/examples/features/drag_drop.py
+++ b/browser-use/examples/features/drag_drop.py
@ -0,0 +1,51 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import SecretStr
+
+from browser_use import Agent
+
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+	raise ValueError('GOOGLE_API_KEY is not set')
+
+llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
+
+
+task_1 = """
+Navigate to: https://sortablejs.github.io/Sortable/. 
+Then scroll down to the first examplw with title "Simple list example". 
+Drag the element with name "item 1" to below the element with name "item 3".
+"""
+
+
+task_2 = """
+Navigate to: https://excalidraw.com/.
+Click on the pencil icon (with index 40).
+Then draw a triangle in the canvas.
+Draw the triangle starting from coordinate (400,400).
+You can use the drag and drop action to draw the triangle.
+"""
+
+
+async def run_search():
+	agent = Agent(
+		task=task_1,
+		llm=llm,
+		max_actions_per_step=1,
+		use_vision=True,
+	)
+
+	await agent.run(max_steps=25)
+
+
+if __name__ == '__main__':
+	asyncio.run(run_search())
--- a/browser-use/examples/features/follow_up_tasks.py
+++ b/browser-use/examples/features/follow_up_tasks.py
@ -0,0 +1,50 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser, BrowserConfig, BrowserContextConfig, Controller
+
+# Initialize the model
+llm = ChatOpenAI(
+	model='gpt-4o',
+	temperature=0.0,
+)
+# Get your chrome path
+browser = Browser(
+	config=BrowserConfig(
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+		new_context_config=BrowserContextConfig(
+			keep_alive=True,
+		),
+	),
+)
+
+controller = Controller()
+
+
+task = 'Find the founders of browser-use and draft them a short personalized message'
+
+agent = Agent(task=task, llm=llm, controller=controller, browser=browser)
+
+
+async def main():
+	await agent.run()
+
+	# new_task = input('Type in a new task: ')
+	new_task = 'Find an image of the founders'
+
+	agent.add_new_task(new_task)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/initial_actions.py
+++ b/browser-use/examples/features/initial_actions.py
@ -0,0 +1,34 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+llm = ChatOpenAI(model='gpt-4o')
+
+initial_actions = [
+	{'open_tab': {'url': 'https://www.google.com'}},
+	{'open_tab': {'url': 'https://en.wikipedia.org/wiki/Randomness'}},
+	{'scroll_down': {'amount': 1000}},
+]
+agent = Agent(
+	task='What theories are displayed on the page?',
+	initial_actions=initial_actions,
+	llm=llm,
+)
+
+
+async def main():
+	await agent.run(max_steps=10)
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/multi-tab_handling.py
+++ b/browser-use/examples/features/multi-tab_handling.py
@ -0,0 +1,33 @@
+"""
+Simple try of the agent.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+# video: https://preview.screen.studio/share/clenCmS6
+llm = ChatOpenAI(model='gpt-4o')
+agent = Agent(
+	task='open 3 tabs with elon musk, trump, and steve jobs, then go back to the first and stop',
+	llm=llm,
+)
+
+
+async def main():
+	await agent.run()
+
+
+asyncio.run(main())
--- a/browser-use/examples/features/multiple_agents_same_browser.py
+++ b/browser-use/examples/features/multiple_agents_same_browser.py
@ -0,0 +1,67 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser
+
+
+# Video: https://preview.screen.studio/share/8Elaq9sm
+async def main():
+	# Persist the browser state across agents
+
+	browser = Browser()
+	async with await browser.new_context() as context:
+		model = ChatOpenAI(model='gpt-4o')
+		current_agent = None
+
+		async def get_input():
+			return await asyncio.get_event_loop().run_in_executor(
+				None, lambda: input('Enter task (p: pause current agent, r: resume, b: break): ')
+			)
+
+		while True:
+			task = await get_input()
+
+			if task.lower() == 'p':
+				# Pause the current agent if one exists
+				if current_agent:
+					current_agent.pause()
+				continue
+			elif task.lower() == 'r':
+				# Resume the current agent if one exists
+				if current_agent:
+					current_agent.resume()
+				continue
+			elif task.lower() == 'b':
+				# Break the current agent's execution if one exists
+				if current_agent:
+					current_agent.stop()
+					current_agent = None
+				continue
+
+			# If there's a current agent running, pause it before starting new one
+			if current_agent:
+				current_agent.pause()
+
+			# Create and run new agent with the task
+			current_agent = Agent(
+				task=task,
+				llm=model,
+				browser_context=context,
+			)
+
+			# Run the agent asynchronously without blocking
+			asyncio.create_task(current_agent.run())
+
+
+asyncio.run(main())
+
+# Now aad the cheapest to the cart
--- a/browser-use/examples/features/outsource_state.py
+++ b/browser-use/examples/features/outsource_state.py
@ -0,0 +1,70 @@
+"""
+Show how to use custom outputs.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import anyio
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.agent.views import AgentState
+from browser_use.browser.browser import Browser, BrowserConfig
+
+
+async def main():
+	task = 'Go to hackernews show hn and give me the first  5 posts'
+
+	browser = Browser(
+		config=BrowserConfig(
+			headless=True,
+		)
+	)
+
+	browser_context = await browser.new_context()
+
+	agent_state = AgentState()
+
+	for i in range(10):
+		agent = Agent(
+			task=task,
+			llm=ChatOpenAI(model='gpt-4o'),
+			browser=browser,
+			browser_context=browser_context,
+			injected_agent_state=agent_state,
+			page_extraction_llm=ChatOpenAI(model='gpt-4o-mini'),
+		)
+
+		done, valid = await agent.take_step()
+		print(f'Step {i}: Done: {done}, Valid: {valid}')
+
+		if done and valid:
+			break
+
+		agent_state.history.history = []
+
+		# Save state to file
+		async with await anyio.open_file('agent_state.json', 'w') as f:
+			serialized = agent_state.model_dump_json(exclude={'history'})
+			await f.write(serialized)
+
+		# Load state back from file
+		async with await anyio.open_file('agent_state.json', 'r') as f:
+			loaded_json = await f.read()
+			agent_state = AgentState.model_validate_json(loaded_json)
+
+		break
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/parallel_agents.py
+++ b/browser-use/examples/features/parallel_agents.py
@ -0,0 +1,59 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use.agent.service import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContextConfig
+
+browser = Browser(
+	config=BrowserConfig(
+		disable_security=True,
+		headless=False,
+		new_context_config=BrowserContextConfig(save_recording_path='./tmp/recordings'),
+	)
+)
+llm = ChatOpenAI(model='gpt-4o')
+
+
+async def main():
+	agents = [
+		Agent(task=task, llm=llm, browser=browser)
+		for task in [
+			'Search Google for weather in Tokyo',
+			'Check Reddit front page title',
+			'Look up Bitcoin price on Coinbase',
+			'Find NASA image of the day',
+			# 'Check top story on CNN',
+			# 'Search latest SpaceX launch date',
+			# 'Look up population of Paris',
+			# 'Find current time in Sydney',
+			# 'Check who won last Super Bowl',
+			# 'Search trending topics on Twitter',
+		]
+	]
+
+	await asyncio.gather(*[agent.run() for agent in agents])
+
+	# async with await browser.new_context() as context:
+	agentX = Agent(
+		task='Go to apple.com and return the title of the page',
+		llm=llm,
+		browser=browser,
+		# browser_context=context,
+	)
+	await agentX.run()
+
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/pause_agent.py
+++ b/browser-use/examples/features/pause_agent.py
@ -0,0 +1,103 @@
+import asyncio
+import os
+import sys
+import threading
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+
+class AgentController:
+	def __init__(self):
+		llm = ChatOpenAI(model='gpt-4o')
+		self.agent = Agent(
+			task='open in one action https://www.google.com, https://www.wikipedia.org, https://www.youtube.com, https://www.github.com, https://amazon.com',
+			llm=llm,
+		)
+		self.running = False
+
+	async def run_agent(self):
+		"""Run the agent"""
+		self.running = True
+		await self.agent.run()
+
+	def start(self):
+		"""Start the agent in a separate thread"""
+		loop = asyncio.new_event_loop()
+		asyncio.set_event_loop(loop)
+		loop.run_until_complete(self.run_agent())
+
+	def pause(self):
+		"""Pause the agent"""
+		self.agent.pause()
+
+	def resume(self):
+		"""Resume the agent"""
+		self.agent.resume()
+
+	def stop(self):
+		"""Stop the agent"""
+		self.agent.stop()
+		self.running = False
+
+
+def print_menu():
+	print('\nAgent Control Menu:')
+	print('1. Start')
+	print('2. Pause')
+	print('3. Resume')
+	print('4. Stop')
+	print('5. Exit')
+
+
+async def main():
+	controller = AgentController()
+	agent_thread = None
+
+	while True:
+		print_menu()
+		try:
+			choice = input('Enter your choice (1-5): ')
+		except KeyboardInterrupt:
+			choice = '5'
+
+		if choice == '1' and not agent_thread:
+			print('Starting agent...')
+			agent_thread = threading.Thread(target=controller.start)
+			agent_thread.start()
+
+		elif choice == '2':
+			print('Pausing agent...')
+			controller.pause()
+
+		elif choice == '3':
+			print('Resuming agent...')
+			controller.resume()
+
+		elif choice == '4':
+			print('Stopping agent...')
+			controller.stop()
+			if agent_thread:
+				agent_thread.join()
+				agent_thread = None
+
+		elif choice == '5':
+			print('Exiting...')
+			if controller.running:
+				controller.stop()
+				if agent_thread:
+					agent_thread.join()
+			break
+
+		await asyncio.sleep(0.1)  # Small delay to prevent CPU spinning
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/planner.py
+++ b/browser-use/examples/features/planner.py
@ -0,0 +1,30 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+planner_llm = ChatOpenAI(
+	model='o3-mini',
+)
+task = 'your task'
+
+
+agent = Agent(task=task, llm=llm, planner_llm=planner_llm, use_vision_for_planner=False, planner_interval=1)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/playwright_script_generation.py
+++ b/browser-use/examples/features/playwright_script_generation.py
@ -0,0 +1,136 @@
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+# Ensure the project root is in the Python path if running directly
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser, BrowserConfig
+
+# Define the task for the agent
+TASK_DESCRIPTION = """
+1. Go to amazon.com
+2. Search for 'i7 14700k'
+4. If there is an 'Add to Cart' button, open the product page and then click add to cart.
+5. the open the shopping cart page /cart button/ go to cart button.
+6. Scroll down to the bottom of the cart page.
+7. Scroll up to the top of the cart page.
+8. Finish the task.
+"""
+
+# Define the path where the Playwright script will be saved
+SCRIPT_DIR = Path('./playwright_scripts')
+SCRIPT_PATH = SCRIPT_DIR / 'playwright_amazon_cart_script.py'
+
+
+# Helper function to stream output from the subprocess
+async def stream_output(stream, prefix):
+	if stream is None:
+		print(f'{prefix}: (No stream available)')
+		return
+	while True:
+		line = await stream.readline()
+		if not line:
+			break
+		print(f'{prefix}: {line.decode().rstrip()}', flush=True)
+
+
+async def main():
+	# Initialize the language model
+	llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
+
+	# Configure the browser
+	# Use headless=False if you want to watch the agent visually
+	browser_config = BrowserConfig(headless=False)
+	browser = Browser(config=browser_config)
+
+	# Configure the agent
+	# The 'save_playwright_script_path' argument tells the agent where to save the script
+	agent = Agent(
+		task=TASK_DESCRIPTION,
+		llm=llm,
+		browser=browser,
+		save_playwright_script_path=str(SCRIPT_PATH),  # Pass the path as a string
+	)
+
+	print('Running the agent to generate the Playwright script...')
+	history = None  # Initialize history to None
+	try:
+		history = await agent.run()
+		print('Agent finished running.')
+
+		if history and history.is_successful():
+			print(f'Agent completed the task successfully. Final result: {history.final_result()}')
+		elif history:
+			print('Agent finished, but the task might not be fully successful.')
+			if history.has_errors():
+				print(f'Errors encountered: {history.errors()}')
+		else:
+			print('Agent run did not return a history object.')
+
+	except Exception as e:
+		print(f'An error occurred during the agent run: {e}')
+		# Ensure browser is closed even if agent run fails
+		if browser:
+			await browser.close()
+		return  # Exit if agent failed
+
+	# --- Execute the Generated Playwright Script ---
+	print(f'\nChecking if Playwright script was generated at: {SCRIPT_PATH}')
+	if SCRIPT_PATH.exists():
+		print('Playwright script found. Attempting to execute...')
+		try:
+			# Ensure the script directory exists before running
+			SCRIPT_DIR.mkdir(parents=True, exist_ok=True)
+
+			# Execute the generated script using asyncio.create_subprocess_exec
+			process = await asyncio.create_subprocess_exec(
+				sys.executable,
+				str(SCRIPT_PATH),
+				stdout=asyncio.subprocess.PIPE,
+				stderr=asyncio.subprocess.PIPE,
+				cwd=Path.cwd(),  # Run from the current working directory
+			)
+
+			print('\n--- Playwright Script Execution ---')
+			# Create tasks to stream stdout and stderr concurrently
+			stdout_task = asyncio.create_task(stream_output(process.stdout, 'stdout'))
+			stderr_task = asyncio.create_task(stream_output(process.stderr, 'stderr'))
+
+			# Wait for both stream tasks and the process to finish
+			await asyncio.gather(stdout_task, stderr_task)
+			returncode = await process.wait()
+			print('-------------------------------------')
+
+			if returncode == 0:
+				print('\n✅ Playwright script executed successfully!')
+			else:
+				print(f'\n⚠️ Playwright script finished with exit code {returncode}.')
+
+		except Exception as e:
+			print(f'\n❌ An error occurred while executing the Playwright script: {e}')
+	else:
+		print(f'\n❌ Playwright script not found at {SCRIPT_PATH}. Generation might have failed.')
+
+	# Close the browser used by the agent (if not already closed by agent.run error handling)
+	# Note: The generated script manages its own browser instance.
+	if browser:
+		await browser.close()
+		print("Agent's browser closed.")
+
+
+if __name__ == '__main__':
+	# Ensure the script directory is clean before running (optional)
+	if SCRIPT_PATH.exists():
+		SCRIPT_PATH.unlink()
+		print(f'Removed existing script: {SCRIPT_PATH}')
+
+	# Run the main async function
+	asyncio.run(main())
--- a/browser-use/examples/features/restrict_urls.py
+++ b/browser-use/examples/features/restrict_urls.py
@ -0,0 +1,47 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContextConfig
+
+llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+task = (
+	"go to google.com and search for openai.com and click on the first link then extract content and scroll down - what's there?"
+)
+
+allowed_domains = ['google.com']
+
+browser = Browser(
+	config=BrowserConfig(
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+		new_context_config=BrowserContextConfig(
+			allowed_domains=allowed_domains,
+		),
+	),
+)
+
+agent = Agent(
+	task=task,
+	llm=llm,
+	browser=browser,
+)
+
+
+async def main():
+	await agent.run(max_steps=25)
+
+	input('Press Enter to close the browser...')
+	await browser.close()
+
+
+asyncio.run(main())
--- a/browser-use/examples/features/result_processing.py
+++ b/browser-use/examples/features/result_processing.py
@ -0,0 +1,60 @@
+import asyncio
+import os
+import sys
+from pprint import pprint
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.agent.views import AgentHistoryList
+from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
+
+llm = ChatOpenAI(model='gpt-4o')
+browser = Browser(
+	config=BrowserConfig(
+		headless=False,
+		disable_security=True,
+	)
+)
+
+
+async def main():
+	async with await browser.new_context(
+		config=BrowserContextConfig(
+			trace_path='./tmp/result_processing',
+			no_viewport=False,
+			window_width=1280,
+			window_height=1000,
+		)
+	) as browser_context:
+		agent = Agent(
+			task="go to google.com and type 'OpenAI' click search and give me the first url",
+			llm=llm,
+			browser_context=browser_context,
+		)
+		history: AgentHistoryList = await agent.run(max_steps=3)
+
+		print('Final Result:')
+		pprint(history.final_result(), indent=4)
+
+		print('\nErrors:')
+		pprint(history.errors(), indent=4)
+
+		# e.g. xPaths the model clicked on
+		print('\nModel Outputs:')
+		pprint(history.model_actions(), indent=4)
+
+		print('\nThoughts:')
+		pprint(history.model_thoughts(), indent=4)
+	# close browser
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/save_trace.py
+++ b/browser-use/examples/features/save_trace.py
@ -0,0 +1,34 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use.agent.service import Agent
+from browser_use.browser.browser import Browser
+from browser_use.browser.context import BrowserContextConfig
+
+llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+
+
+async def main():
+	browser = Browser()
+
+	async with await browser.new_context(config=BrowserContextConfig(trace_path='./tmp/traces/')) as context:
+		agent = Agent(
+			task='Go to hackernews, then go to apple.com and return all titles of open tabs',
+			llm=llm,
+			browser_context=context,
+		)
+		await agent.run()
+
+	await browser.close()
+
+
+asyncio.run(main())
--- a/browser-use/examples/features/sensitive_data.py
+++ b/browser-use/examples/features/sensitive_data.py
@ -0,0 +1,32 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+# Initialize the model
+llm = ChatOpenAI(
+	model='gpt-4o',
+	temperature=0.0,
+)
+# the model will see x_name and x_password, but never the actual values.
+sensitive_data = {'x_name': 'my_x_name', 'x_password': 'my_x_password'}
+task = 'go to x.com and login with x_name and x_password then find interesting posts and like them'
+
+agent = Agent(task=task, llm=llm, sensitive_data=sensitive_data)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/small_model_for_extraction.py
+++ b/browser-use/examples/features/small_model_for_extraction.py
@ -0,0 +1,26 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
+small_llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.0)
+task = 'Find the founders of browser-use in ycombinator, extract all links and open the links one by one'
+agent = Agent(task=task, llm=llm, page_extraction_llm=small_llm)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/task_with_memory.py
+++ b/browser-use/examples/features/task_with_memory.py
@ -0,0 +1,102 @@
+import asyncio
+import json
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import anyio
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use import Agent, Browser, BrowserConfig, Controller
+
+links = [
+	'https://docs.mem0.ai/components/llms/models/litellm',
+	'https://docs.mem0.ai/components/llms/models/mistral_AI',
+	'https://docs.mem0.ai/components/llms/models/ollama',
+	'https://docs.mem0.ai/components/llms/models/openai',
+	'https://docs.mem0.ai/components/llms/models/together',
+	'https://docs.mem0.ai/components/llms/models/xAI',
+	'https://docs.mem0.ai/components/llms/overview',
+	'https://docs.mem0.ai/components/vectordbs/config',
+	'https://docs.mem0.ai/components/vectordbs/dbs/azure_ai_search',
+	'https://docs.mem0.ai/components/vectordbs/dbs/chroma',
+	'https://docs.mem0.ai/components/vectordbs/dbs/elasticsearch',
+	'https://docs.mem0.ai/components/vectordbs/dbs/milvus',
+	'https://docs.mem0.ai/components/vectordbs/dbs/opensearch',
+	'https://docs.mem0.ai/components/vectordbs/dbs/pgvector',
+	'https://docs.mem0.ai/components/vectordbs/dbs/pinecone',
+	'https://docs.mem0.ai/components/vectordbs/dbs/qdrant',
+	'https://docs.mem0.ai/components/vectordbs/dbs/redis',
+	'https://docs.mem0.ai/components/vectordbs/dbs/supabase',
+	'https://docs.mem0.ai/components/vectordbs/dbs/vertex_ai_vector_search',
+	'https://docs.mem0.ai/components/vectordbs/dbs/weaviate',
+	'https://docs.mem0.ai/components/vectordbs/overview',
+	'https://docs.mem0.ai/contributing/development',
+	'https://docs.mem0.ai/contributing/documentation',
+	'https://docs.mem0.ai/core-concepts/memory-operations',
+	'https://docs.mem0.ai/core-concepts/memory-types',
+]
+
+
+class Link(BaseModel):
+	url: str
+	title: str
+	summary: str
+
+
+class Links(BaseModel):
+	links: list[Link]
+
+
+initial_actions = [
+	{'open_tab': {'url': 'https://docs.mem0.ai/'}},
+]
+controller = Controller(output_model=Links)
+task_description = f"""
+Visit all the links provided in {links} and summarize the content of the page with url and title. There are {len(links)} links to visit. Make sure to visit all the links. Return a json with the following format: [{{url: <url>, title: <title>, summary: <summary>}}].
+
+Guidelines:
+1. Strictly stay on the domain https://docs.mem0.ai
+2. Do not visit any other websites.
+3. Ignore the links that are hashed (#) or javascript (:), or mailto, or tel, or other protocols
+4. Don't visit any other url other than the ones provided above.
+5. Capture the unique urls which are not already visited.
+6. If you visit any page that doesn't have host name docs.mem0.ai, then do not visit it and come back to the page with host name docs.mem0.ai.
+"""
+
+
+async def main(max_steps=500):
+	config = BrowserConfig(headless=True)
+	browser = Browser(config=config)
+
+	agent = Agent(
+		task=task_description,
+		llm=ChatOpenAI(model='gpt-4o-mini'),
+		controller=controller,
+		initial_actions=initial_actions,
+		enable_memory=True,
+		browser=browser,
+	)
+	history = await agent.run(max_steps=max_steps)
+	result = history.final_result()
+	parsed_result = []
+	if result:
+		parsed: Links = Links.model_validate_json(result)
+		print(f'Total parsed links: {len(parsed.links)}')
+		for link in parsed.links:
+			parsed_result.append({'title': link.title, 'url': link.url, 'summary': link.summary})
+	else:
+		print('No result')
+
+	async with await anyio.open_file('result.json', 'w+') as f:
+		await f.write(json.dumps(parsed_result, indent=4))
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/features/validate_output.py
+++ b/browser-use/examples/features/validate_output.py
@ -0,0 +1,49 @@
+"""
+Demonstrate output validator.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use import ActionResult, Agent, Controller
+
+controller = Controller()
+
+
+class DoneResult(BaseModel):
+	title: str
+	comments: str
+	hours_since_start: int
+
+
+# we overwrite done() in this example to demonstrate the validator
+@controller.registry.action('Done with task', param_model=DoneResult)
+async def done(params: DoneResult):
+	result = ActionResult(is_done=True, extracted_content=params.model_dump_json())
+	print(result)
+	# NOTE: this is clearly wrong - to demonstrate the validator
+	return 'blablabla'
+
+
+async def main():
+	task = 'Go to hackernews hn and give me the top 1 post'
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, controller=controller, validate_output=True)
+	# NOTE: this should fail to demonstrate the validator
+	await agent.run(max_steps=5)
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/integrations/discord/discord_api.py
+++ b/browser-use/examples/integrations/discord/discord_api.py
@ -0,0 +1,123 @@
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import discord
+from discord.ext import commands
+from langchain_core.language_models.chat_models import BaseChatModel
+
+from browser_use import BrowserConfig
+from browser_use.agent.service import Agent, Browser
+
+
+class DiscordBot(commands.Bot):
+	"""Discord bot implementation for Browser-Use tasks.
+
+	This bot allows users to run browser automation tasks through Discord messages.
+	Processes tasks asynchronously and sends the result back to the user in response to the message.
+	Messages must start with the configured prefix (default: "$bu") followed by the task description.
+
+	Args:
+	    llm (BaseChatModel): Language model instance to use for task processing
+	    prefix (str, optional): Command prefix for triggering browser tasks. Defaults to "$bu"
+	    ack (bool, optional): Whether to acknowledge task receipt with a message. Defaults to False
+	    browser_config (BrowserConfig, optional): Browser configuration settings.
+	        Defaults to headless mode
+
+	Usage:
+	    ```python
+	    from langchain_openai import ChatOpenAI
+
+	    llm = ChatOpenAI()
+	    bot = DiscordBot(llm=llm, prefix='$bu', ack=True)
+	    bot.run('YOUR_DISCORD_TOKEN')
+	    ```
+
+	Discord Usage:
+	    Send messages starting with the prefix:
+	    "$bu search for python tutorials"
+	"""
+
+	def __init__(
+		self,
+		llm: BaseChatModel,
+		prefix: str = '$bu',
+		ack: bool = False,
+		browser_config: BrowserConfig = BrowserConfig(headless=True),
+	):
+		self.llm = llm
+		self.prefix = prefix.strip()
+		self.ack = ack
+		self.browser_config = browser_config
+
+		# Define intents.
+		intents = discord.Intents.default()
+		intents.message_content = True  # Enable message content intent
+		intents.members = True  # Enable members intent for user info
+
+		# Initialize the bot with a command prefix and intents.
+		super().__init__(command_prefix='!', intents=intents)  # You may not need prefix, just here for flexibility
+
+		# self.tree = app_commands.CommandTree(self) # Initialize command tree for slash commands.
+
+	async def on_ready(self):
+		"""Called when the bot is ready."""
+		try:
+			print(f'We have logged in as {self.user}')
+			cmds = await self.tree.sync()  # Sync the command tree with discord
+
+		except Exception as e:
+			print(f'Error during bot startup: {e}')
+
+	async def on_message(self, message):
+		"""Called when a message is received."""
+		try:
+			if message.author == self.user:  # Ignore the bot's messages
+				return
+			if message.content.strip().startswith(f'{self.prefix} '):
+				if self.ack:
+					try:
+						await message.reply(
+							'Starting browser use task...',
+							mention_author=True,  # Don't ping the user
+						)
+					except Exception as e:
+						print(f'Error sending start message: {e}')
+
+				try:
+					agent_message = await self.run_agent(message.content.replace(f'{self.prefix} ', '').strip())
+					await message.channel.send(content=f'{agent_message}', reference=message, mention_author=True)
+				except Exception as e:
+					await message.channel.send(
+						content=f'Error during task execution: {str(e)}',
+						reference=message,
+						mention_author=True,
+					)
+
+		except Exception as e:
+			print(f'Error in message handling: {e}')
+
+	#    await self.process_commands(message)  # Needed to process bot commands
+
+	async def run_agent(self, task: str) -> str:
+		try:
+			browser = Browser(config=self.browser_config)
+			agent = Agent(task=(task), llm=self.llm, browser=browser)
+			result = await agent.run()
+
+			agent_message = None
+			if result.is_done():
+				agent_message = result.history[-1].result[0].extracted_content
+
+			if agent_message is None:
+				agent_message = 'Oops! Something went wrong while running Browser-Use.'
+
+			return agent_message
+
+		except Exception as e:
+			raise Exception(f'Browser-use task failed: {str(e)}')
--- a/browser-use/examples/integrations/discord/discord_example.py
+++ b/browser-use/examples/integrations/discord/discord_example.py
@ -0,0 +1,72 @@
+"""
+This examples requires you to have a Discord bot token and the bot already added to a server.
+
+Five Steps to create and invite a Discord bot:
+
+1. Create a Discord Application:
+    *   Go to the Discord Developer Portal: https://discord.com/developers/applications
+    *   Log in to the Discord website.
+    *   Click on "New Application".
+    *   Give the application a name and click "Create".
+2. Configure the Bot:
+    *   Navigate to the "Bot" tab on the left side of the screen.
+    *   Make sure "Public Bot" is ticked if you want others to invite your bot.
+	*	Generate your bot token by clicking on "Reset Token", Copy the token and save it securely.
+        *   Do not share the bot token. Treat it like a password. If the token is leaked, regenerate it.
+3. Enable Privileged Intents:
+    *   Scroll down to the "Privileged Gateway Intents" section.
+    *   Enable the necessary intents (e.g., "Server Members Intent" and "Message Content Intent").
+   -->  Note: Enabling privileged intents for bots in over 100 guilds requires bot verification. You may need to contact Discord support to enable privileged intents for verified bots.
+4. Generate Invite URL:
+    *   Go to "OAuth2" tab and "OAuth2 URL Generator" section.
+    *   Under "scopes", tick the "bot" checkbox.
+    *   Tick the permissions required for your bot to function under “Bot Permissions”.
+		*	e.g. "Send Messages", "Send Messages in Threads", "Read Message History",  "Mention Everyone".
+    *   Copy the generated URL under the "GENERATED URL" section at the bottom.
+5. Invite the Bot:
+    *   Paste the URL into your browser.
+    *   Choose a server to invite the bot to.
+    *   Click “Authorize”.
+   -->  Note: The person adding the bot needs "Manage Server" permissions.
+6. Run the code below to start the bot with your bot token.
+7. Write e.g. "/bu what's the weather in Tokyo?" to start a browser-use task and get a response inside the Discord channel.
+"""
+
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import SecretStr
+
+from browser_use import BrowserConfig
+from examples.integrations.discord.discord_api import DiscordBot
+
+# load credentials from environment variables
+bot_token = os.getenv('DISCORD_BOT_TOKEN')
+if not bot_token:
+	raise ValueError('Discord bot token not found in .env file.')
+
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+	raise ValueError('GOOGLE_API_KEY is not set')
+
+llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
+
+bot = DiscordBot(
+	llm=llm,  # required; instance of BaseChatModel
+	prefix='$bu',  # optional; prefix of messages to trigger browser-use, defaults to "$bu"
+	ack=True,  # optional; whether to acknowledge task receipt with a message, defaults to False
+	browser_config=BrowserConfig(
+		headless=False
+	),  # optional; useful for changing headless mode or other browser configs, defaults to headless mode
+)
+
+bot.run(
+	token=bot_token,  # required; Discord bot token
+)
--- a/browser-use/examples/integrations/slack/README.md
+++ b/browser-use/examples/integrations/slack/README.md
@ -0,0 +1,76 @@
+# Slack Integration
+
+Steps to create and configure a Slack bot:
+
+1. Create a Slack App:
+    *   Go to the Slack API: https://api.slack.com/apps
+    *   Click on "Create New App".
+    *   Choose "From scratch" and give your app a name and select the workspace.
+    *   Provide a name and description for your bot (these are required fields).
+2. Configure the Bot:
+    *   Navigate to the "OAuth & Permissions" tab on the left side of the screen.
+    *   Under "Scopes", add the necessary bot token scopes (add these "chat:write", "channels:history", "im:history").
+3. Enable Event Subscriptions:
+    *   Navigate to the "Event Subscriptions" tab.
+    *   Enable events and add the necessary bot events (add these "message.channels", "message.im").
+    *   Add your request URL (you can use ngrok to expose your local server if needed). [See how to set up ngrok](#installing-and-starting-ngrok).
+    *   **Note:** The URL provided by ngrok is ephemeral and will change each time ngrok is started. You will need to update the request URL in the bot's settings each time you restart ngrok. [See how to update the request URL](#updating-the-request-url-in-bots-settings).
+4. Add the bot to your Slack workspace:
+    *   Navigate to the "OAuth & Permissions" tab.
+    *   Under "OAuth Tokens for Your Workspace", click on "Install App to Workspace".
+    *   Follow the prompts to authorize the app and add it to your workspace.
+5. Set up environment variables:
+    *   Obtain the `SLACK_SIGNING_SECRET`:
+        *   Go to the Slack API: https://api.slack.com/apps
+        *   Select your app.
+        *   Navigate to the "Basic Information" tab.
+        *   Copy the "Signing Secret".
+    *   Obtain the `SLACK_BOT_TOKEN`:
+        *   Go to the Slack API: https://api.slack.com/apps
+        *   Select your app.
+        *   Navigate to the "OAuth & Permissions" tab.
+        *   Copy the "Bot User OAuth Token".
+    *   Create a `.env` file in the root directory of your project and add the following lines:
+        ```env
+        SLACK_SIGNING_SECRET=your-signing-secret
+        SLACK_BOT_TOKEN=your-bot-token
+        ```
+6. Invite the bot to a channel:
+    *   Use the `/invite @your-bot-name` command in the Slack channel where you want the bot to be active.
+7. Run the code in `examples/slack_example.py` to start the bot with your bot token and signing secret.
+8. Write e.g. "$bu what's the weather in Tokyo?" to start a browser-use task and get a response inside the Slack channel.
+
+## Installing and Starting ngrok
+
+To expose your local server to the internet, you can use ngrok. Follow these steps to install and start ngrok:
+
+1. Download ngrok from the official website: https://ngrok.com/download
+2. Create a free account and follow the official steps to install ngrok.
+3. Start ngrok by running the following command in your terminal:
+    ```sh
+    ngrok http 3000
+    ```
+    Replace `3000` with the port number your local server is running on.
+
+## Updating the Request URL in Bot's Settings
+
+If you need to update the request URL (e.g., when the ngrok URL changes), follow these steps:
+
+1. Go to the Slack API: https://api.slack.com/apps
+2. Select your app.
+3. Navigate to the "Event Subscriptions" tab.
+4. Update the "Request URL" field with the new ngrok URL. The URL should be something like: `https://<ngrok-id>.ngrok-free.app/slack/events`
+5. Save the changes.
+
+## Installing Required Packages
+
+To run this example, you need to install the following packages:
+
+- `fastapi`
+- `uvicorn`
+- `slack_sdk`
+
+You can install these packages using pip:
+
+```sh
+pip install fastapi uvicorn slack_sdk
--- a/browser-use/examples/integrations/slack/slack_api.py
+++ b/browser-use/examples/integrations/slack/slack_api.py
@ -0,0 +1,130 @@
+import logging
+import os
+import sys
+from typing import Annotated
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from fastapi import Depends, FastAPI, HTTPException, Request
+from langchain_core.language_models.chat_models import BaseChatModel
+from slack_sdk.errors import SlackApiError
+from slack_sdk.signature import SignatureVerifier
+from slack_sdk.web.async_client import AsyncWebClient
+
+from browser_use import BrowserConfig
+from browser_use.agent.service import Agent, Browser
+from browser_use.logging_config import setup_logging
+
+setup_logging()
+logger = logging.getLogger('slack')
+
+app = FastAPI()
+
+
+class SlackBot:
+	def __init__(
+		self,
+		llm: BaseChatModel,
+		bot_token: str,
+		signing_secret: str,
+		ack: bool = False,
+		browser_config: BrowserConfig = BrowserConfig(headless=True),
+	):
+		if not bot_token or not signing_secret:
+			raise ValueError('Bot token and signing secret must be provided')
+
+		self.llm = llm
+		self.ack = ack
+		self.browser_config = browser_config
+		self.client = AsyncWebClient(token=bot_token)
+		self.signature_verifier = SignatureVerifier(signing_secret)
+		self.processed_events = set()
+		logger.info('SlackBot initialized')
+
+	async def handle_event(self, event, event_id):
+		try:
+			logger.info(f'Received event id: {event_id}')
+			if not event_id:
+				logger.warning('Event ID missing in event data')
+				return
+
+			if event_id in self.processed_events:
+				logger.info(f'Event {event_id} already processed')
+				return
+			self.processed_events.add(event_id)
+
+			if 'subtype' in event and event['subtype'] == 'bot_message':
+				return
+
+			text = event.get('text')
+			user_id = event.get('user')
+			if text and text.startswith('$bu '):
+				task = text[len('$bu ') :].strip()
+				if self.ack:
+					try:
+						await self.send_message(
+							event['channel'], f'<@{user_id}> Starting browser use task...', thread_ts=event.get('ts')
+						)
+					except Exception as e:
+						logger.error(f'Error sending start message: {e}')
+
+				try:
+					agent_message = await self.run_agent(task)
+					await self.send_message(event['channel'], f'<@{user_id}> {agent_message}', thread_ts=event.get('ts'))
+				except Exception as e:
+					await self.send_message(event['channel'], f'Error during task execution: {str(e)}', thread_ts=event.get('ts'))
+		except Exception as e:
+			logger.error(f'Error in handle_event: {str(e)}')
+
+	async def run_agent(self, task: str) -> str:
+		try:
+			browser = Browser(config=self.browser_config)
+			agent = Agent(task=task, llm=self.llm, browser=browser)
+			result = await agent.run()
+
+			agent_message = None
+			if result.is_done():
+				agent_message = result.history[-1].result[0].extracted_content
+
+			if agent_message is None:
+				agent_message = 'Oops! Something went wrong while running Browser-Use.'
+
+			return agent_message
+
+		except Exception as e:
+			logger.error(f'Error during task execution: {str(e)}')
+			return f'Error during task execution: {str(e)}'
+
+	async def send_message(self, channel, text, thread_ts=None):
+		try:
+			await self.client.chat_postMessage(channel=channel, text=text, thread_ts=thread_ts)
+		except SlackApiError as e:
+			logger.error(f'Error sending message: {e.response["error"]}')
+
+
+@app.post('/slack/events')
+async def slack_events(request: Request, slack_bot: Annotated[SlackBot, Depends()]):
+	try:
+		if not slack_bot.signature_verifier.is_valid_request(await request.body(), dict(request.headers)):
+			logger.warning('Request verification failed')
+			raise HTTPException(status_code=400, detail='Request verification failed')
+
+		event_data = await request.json()
+		logger.info(f'Received event data: {event_data}')
+		if 'challenge' in event_data:
+			return {'challenge': event_data['challenge']}
+
+		if 'event' in event_data:
+			try:
+				await slack_bot.handle_event(event_data.get('event'), event_data.get('event_id'))
+			except Exception as e:
+				logger.error(f'Error handling event: {str(e)}')
+
+		return {}
+	except Exception as e:
+		logger.error(f'Error in slack_events: {str(e)}')
+		raise HTTPException(status_code=500, detail='Internal Server Error')
--- a/browser-use/examples/integrations/slack/slack_example.py
+++ b/browser-use/examples/integrations/slack/slack_example.py
@ -0,0 +1,46 @@
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import SecretStr
+
+from browser_use import BrowserConfig
+from examples.integrations.slack.slack_api import SlackBot, app
+
+# load credentials from environment variables
+bot_token = os.getenv('SLACK_BOT_TOKEN')
+if not bot_token:
+	raise ValueError('Slack bot token not found in .env file.')
+
+signing_secret = os.getenv('SLACK_SIGNING_SECRET')
+if not signing_secret:
+	raise ValueError('Slack signing secret not found in .env file.')
+
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+	raise ValueError('GOOGLE_API_KEY is not set')
+
+llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
+
+slack_bot = SlackBot(
+	llm=llm,  # required; instance of BaseChatModel
+	bot_token=bot_token,  # required; Slack bot token
+	signing_secret=signing_secret,  # required; Slack signing secret
+	ack=True,  # optional; whether to acknowledge task receipt with a message, defaults to False
+	browser_config=BrowserConfig(
+		headless=True
+	),  # optional; useful for changing headless mode or other browser configs, defaults to headless mode
+)
+
+app.dependency_overrides[SlackBot] = lambda: slack_bot
+
+if __name__ == '__main__':
+	import uvicorn
+
+	uvicorn.run('integrations.slack.slack_api:app', host='0.0.0.0', port=3000)
--- a/browser-use/examples/models/README.md
+++ b/browser-use/examples/models/README.md
@ -0,0 +1,2 @@
+# Gemini
+Detailed video on how to integrate browser-use with Gemini: https://www.youtube.com/watch?v=JluZiWBV_Tc
--- a/browser-use/examples/models/_ollama.py
+++ b/browser-use/examples/models/_ollama.py
@ -0,0 +1,42 @@
+# Optional: Disable telemetry
+# os.environ["ANONYMIZED_TELEMETRY"] = "false"
+
+# Optional: Set the OLLAMA host to a remote server
+# os.environ["OLLAMA_HOST"] = "http://x.x.x.x:11434"
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_ollama import ChatOllama
+
+from browser_use import Agent
+from browser_use.agent.views import AgentHistoryList
+
+
+async def run_search() -> AgentHistoryList:
+	agent = Agent(
+		task="Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it.",
+		llm=ChatOllama(
+			model='qwen2.5:32b-instruct-q4_K_M',
+			num_ctx=32000,
+		),
+	)
+
+	result = await agent.run()
+	return result
+
+
+async def main():
+	result = await run_search()
+	print('\n\n', result)
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/models/azure_openai.py
+++ b/browser-use/examples/models/azure_openai.py
@ -0,0 +1,49 @@
+"""
+Simple try of the agent.
+
+@dev You need to add AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import AzureChatOpenAI
+
+from browser_use import Agent
+
+# Retrieve Azure-specific environment variables
+azure_openai_api_key = os.getenv('AZURE_OPENAI_KEY')
+azure_openai_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
+
+if not azure_openai_api_key or not azure_openai_endpoint:
+	raise ValueError('AZURE_OPENAI_KEY or AZURE_OPENAI_ENDPOINT is not set')
+
+# Initialize the Azure OpenAI client
+llm = AzureChatOpenAI(
+	model_name='gpt-4o',
+	openai_api_key=azure_openai_api_key,
+	azure_endpoint=azure_openai_endpoint,  # Corrected to use azure_endpoint instead of openai_api_base
+	deployment_name='gpt-4o',  # Use deployment_name for Azure models
+	api_version='2024-08-01-preview',  # Explicitly set the API version here
+)
+
+agent = Agent(
+	task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
+	llm=llm,
+	enable_memory=True,
+)
+
+
+async def main():
+	await agent.run(max_steps=10)
+	input('Press Enter to continue...')
+
+
+asyncio.run(main())
--- a/browser-use/examples/models/bedrock_claude.py
+++ b/browser-use/examples/models/bedrock_claude.py
@ -0,0 +1,75 @@
+"""
+Automated news analysis and sentiment scoring using Bedrock.
+
+Ensure you have browser-use installed with `examples` extra, i.e. `uv install 'browser-use[examples]'`
+
+@dev Ensure AWS environment variables are set correctly for Bedrock access.
+"""
+
+import argparse
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import boto3
+from botocore.config import Config
+from langchain_aws import ChatBedrockConverse
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.controller.service import Controller
+
+
+def get_llm():
+	config = Config(retries={'max_attempts': 10, 'mode': 'adaptive'})
+	bedrock_client = boto3.client('bedrock-runtime', region_name='us-east-1', config=config)
+
+	return ChatBedrockConverse(
+		model_id='us.anthropic.claude-3-5-sonnet-20241022-v2:0',
+		temperature=0.0,
+		max_tokens=None,
+		client=bedrock_client,
+	)
+
+
+# Define the task for the agent
+task = (
+	"Visit cnn.com, navigate to the 'World News' section, and identify the latest headline. "
+	'Open the first article and summarize its content in 3-4 sentences. '
+	'Additionally, analyze the sentiment of the article (positive, neutral, or negative) '
+	'and provide a confidence score for the sentiment. Present the result in a tabular format.'
+)
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--query', type=str, help='The query for the agent to execute', default=task)
+args = parser.parse_args()
+
+llm = get_llm()
+
+browser = Browser(
+	config=BrowserConfig(
+		# browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	)
+)
+
+agent = Agent(
+	task=args.query,
+	llm=llm,
+	controller=Controller(),
+	browser=browser,
+	validate_output=True,
+)
+
+
+async def main():
+	await agent.run(max_steps=30)
+	await browser.close()
+
+
+asyncio.run(main())
--- a/browser-use/examples/models/claude-3.7-sonnet.py
+++ b/browser-use/examples/models/claude-3.7-sonnet.py
@ -0,0 +1,32 @@
+"""
+Simple script that runs the task of opening amazon and searching.
+@dev Ensure we have a `ANTHROPIC_API_KEY` variable in our `.env` file.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_anthropic import ChatAnthropic
+
+from browser_use import Agent
+
+llm = ChatAnthropic(model_name='claude-3-7-sonnet-20250219', temperature=0.0, timeout=30, stop=None)
+
+agent = Agent(
+	task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
+	llm=llm,
+)
+
+
+async def main():
+	await agent.run(max_steps=10)
+
+
+asyncio.run(main())
--- a/browser-use/examples/models/deepseek-r1.py
+++ b/browser-use/examples/models/deepseek-r1.py
@ -0,0 +1,38 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_deepseek import ChatDeepSeek
+from pydantic import SecretStr
+
+from browser_use import Agent
+
+api_key = os.getenv('DEEPSEEK_API_KEY', '')
+if not api_key:
+	raise ValueError('DEEPSEEK_API_KEY is not set')
+
+
+async def run_search():
+	agent = Agent(
+		task=('go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result'),
+		llm=ChatDeepSeek(
+			base_url='https://api.deepseek.com/v1',
+			model='deepseek-reasoner',
+			api_key=SecretStr(api_key),
+		),
+		use_vision=False,
+		max_failures=2,
+		max_actions_per_step=1,
+	)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(run_search())
--- a/browser-use/examples/models/deepseek.py
+++ b/browser-use/examples/models/deepseek.py
@ -0,0 +1,41 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_deepseek import ChatDeepSeek
+from pydantic import SecretStr
+
+from browser_use import Agent
+
+api_key = os.getenv('DEEPSEEK_API_KEY', '')
+if not api_key:
+	raise ValueError('DEEPSEEK_API_KEY is not set')
+
+
+async def run_search():
+	agent = Agent(
+		task=(
+			'1. Go to https://www.reddit.com/r/LocalLLaMA '
+			"2. Search for 'browser use' in the search bar"
+			'3. Click on first result'
+			'4. Return the first comment'
+		),
+		llm=ChatDeepSeek(
+			base_url='https://api.deepseek.com/v1',
+			model='deepseek-chat',
+			api_key=SecretStr(api_key),
+		),
+		use_vision=False,
+	)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(run_search())
--- a/browser-use/examples/models/gemini.py
+++ b/browser-use/examples/models/gemini.py
@ -0,0 +1,45 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import SecretStr
+
+from browser_use import Agent, BrowserConfig
+from browser_use.browser.browser import Browser
+from browser_use.browser.context import BrowserContextConfig
+
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+	raise ValueError('GOOGLE_API_KEY is not set')
+
+llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
+
+browser = Browser(
+	config=BrowserConfig(
+		new_context_config=BrowserContextConfig(
+			viewport_expansion=0,
+		)
+	)
+)
+
+
+async def run_search():
+	agent = Agent(
+		task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
+		llm=llm,
+		max_actions_per_step=4,
+		browser=browser,
+	)
+
+	await agent.run(max_steps=25)
+
+
+if __name__ == '__main__':
+	asyncio.run(run_search())
--- a/browser-use/examples/models/gpt-4o.py
+++ b/browser-use/examples/models/gpt-4o.py
@ -0,0 +1,33 @@
+"""
+Simple try of the agent.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+llm = ChatOpenAI(model='gpt-4o')
+agent = Agent(
+	task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
+	llm=llm,
+)
+
+
+async def main():
+	await agent.run(max_steps=10)
+	input('Press Enter to continue...')
+
+
+asyncio.run(main())
--- a/browser-use/examples/models/grok.py
+++ b/browser-use/examples/models/grok.py
@ -0,0 +1,41 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from pydantic import SecretStr
+
+from browser_use import Agent
+
+api_key = os.getenv('GROK_API_KEY', '')
+if not api_key:
+	raise ValueError('GROK_API_KEY is not set')
+
+
+async def run_search():
+	agent = Agent(
+		task=(
+			'1. Go to https://www.amazon.com'
+			'2. Search for "wireless headphones"'
+			'3. Filter by "Highest customer rating"'
+			'4. Return the title and price of the first product'
+		),
+		llm=ChatOpenAI(
+			base_url='https://api.x.ai/v1',
+			model='grok-3-beta',
+			api_key=SecretStr(api_key),
+		),
+		use_vision=False,
+	)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(run_search())
--- a/browser-use/examples/models/novita.py
+++ b/browser-use/examples/models/novita.py
@ -0,0 +1,47 @@
+"""
+Simple try of the agent.
+
+@dev You need to add NOVITA_API_KEY to your environment variables.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from pydantic import SecretStr
+
+from browser_use import Agent
+
+api_key = os.getenv('NOVITA_API_KEY', '')
+if not api_key:
+	raise ValueError('NOVITA_API_KEY is not set')
+
+
+async def run_search():
+	agent = Agent(
+		task=(
+			'1. Go to https://www.reddit.com/r/LocalLLaMA '
+			"2. Search for 'browser use' in the search bar"
+			'3. Click on first result'
+			'4. Return the first comment'
+		),
+		llm=ChatOpenAI(
+			base_url='https://api.novita.ai/v3/openai',
+			model='deepseek/deepseek-v3-0324',
+			api_key=SecretStr(api_key),
+		),
+		use_vision=False,
+	)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(run_search())
--- a/browser-use/examples/models/qwen.py
+++ b/browser-use/examples/models/qwen.py
@ -0,0 +1,34 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_ollama import ChatOllama
+
+from browser_use import Agent
+
+
+async def run_search():
+	agent = Agent(
+		task=(
+			"1. Go to https://www.reddit.com/r/LocalLLaMA2. Search for 'browser use' in the search bar3. Click search4. Call done"
+		),
+		llm=ChatOllama(
+			# model='qwen2.5:32b-instruct-q4_K_M',
+			# model='qwen2.5:14b',
+			model='qwen2.5:latest',
+			num_ctx=128000,
+		),
+		max_actions_per_step=1,
+	)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(run_search())
--- a/browser-use/examples/notebook/agent_browsing.ipynb
+++ b/browser-use/examples/notebook/agent_browsing.ipynb
--- a/browser-use/examples/simple.py
+++ b/browser-use/examples/simple.py
@ -0,0 +1,30 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+# Initialize the model
+llm = ChatOpenAI(
+	model='gpt-4o',
+	temperature=0.0,
+)
+task = 'Go to kayak.com and find the cheapest flight from Zurich to San Francisco on 2025-05-01'
+
+agent = Agent(task=task, llm=llm)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/ui/README.md
+++ b/browser-use/examples/ui/README.md
@ -0,0 +1,7 @@
+# **User Interfaces of Browser-Use**
+
+| **File Name**          | **User Interface** | **Description**                           | **Example Usage**                         |
+|------------------------|-------------------|-------------------------------------------|-------------------------------------------|
+| `command_line.py`      | **Terminal**      | Parses arguments for command-line execution. | `python command_line.py`                  |
+| `gradio_demo.py`       | **Gradio**        | Provides a Gradio-based interactive UI.  | `python gradio_demo.py`                   |
+| `streamlit_demo.py`    | **Streamlit**     | Runs a Streamlit-based web interface.    | `python -m streamlit run streamlit_demo.py` |
--- a/browser-use/examples/ui/command_line.py
+++ b/browser-use/examples/ui/command_line.py
@ -0,0 +1,98 @@
+"""
+To Use It:
+
+Example 1: Using OpenAI (default), with default task: 'go to reddit and search for posts about browser-use'
+python command_line.py
+
+Example 2: Using OpenAI with a Custom Query
+python command_line.py --query "go to google and search for browser-use"
+
+Example 3: Using Anthropic's Claude Model with a Custom Query
+python command_line.py --query "find latest Python tutorials on Medium" --provider anthropic
+
+"""
+
+import argparse
+import asyncio
+import os
+import sys
+
+# Ensure local repository (browser_use) is accessible
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.controller.service import Controller
+
+
+def get_llm(provider: str):
+	if provider == 'anthropic':
+		from langchain_anthropic import ChatAnthropic
+
+		api_key = os.getenv('ANTHROPIC_API_KEY')
+		if not api_key:
+			raise ValueError('Error: ANTHROPIC_API_KEY is not set. Please provide a valid API key.')
+
+		return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None, temperature=0.0)
+	elif provider == 'openai':
+		from langchain_openai import ChatOpenAI
+
+		api_key = os.getenv('OPENAI_API_KEY')
+		if not api_key:
+			raise ValueError('Error: OPENAI_API_KEY is not set. Please provide a valid API key.')
+
+		return ChatOpenAI(model='gpt-4o', temperature=0.0)
+
+	else:
+		raise ValueError(f'Unsupported provider: {provider}')
+
+
+def parse_arguments():
+	"""Parse command-line arguments."""
+	parser = argparse.ArgumentParser(description='Automate browser tasks using an LLM agent.')
+	parser.add_argument(
+		'--query', type=str, help='The query to process', default='go to reddit and search for posts about browser-use'
+	)
+	parser.add_argument(
+		'--provider',
+		type=str,
+		choices=['openai', 'anthropic'],
+		default='openai',
+		help='The model provider to use (default: openai)',
+	)
+	return parser.parse_args()
+
+
+def initialize_agent(query: str, provider: str):
+	"""Initialize the browser agent with the given query and provider."""
+	llm = get_llm(provider)
+	controller = Controller()
+	browser = Browser(config=BrowserConfig())
+
+	return Agent(
+		task=query,
+		llm=llm,
+		controller=controller,
+		browser=browser,
+		use_vision=True,
+		max_actions_per_step=1,
+	), browser
+
+
+async def main():
+	"""Main async function to run the agent."""
+	args = parse_arguments()
+	agent, browser = initialize_agent(args.query, args.provider)
+
+	await agent.run(max_steps=25)
+
+	input('Press Enter to close the browser...')
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/ui/gradio_demo.py
+++ b/browser-use/examples/ui/gradio_demo.py
@ -0,0 +1,109 @@
+import asyncio
+import os
+import sys
+from dataclasses import dataclass
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Third-party imports
+import gradio as gr
+from langchain_openai import ChatOpenAI
+from rich.console import Console
+from rich.panel import Panel
+from rich.text import Text
+
+# Local module imports
+from browser_use import Agent
+
+
+@dataclass
+class ActionResult:
+	is_done: bool
+	extracted_content: str | None
+	error: str | None
+	include_in_memory: bool
+
+
+@dataclass
+class AgentHistoryList:
+	all_results: list[ActionResult]
+	all_model_outputs: list[dict]
+
+
+def parse_agent_history(history_str: str) -> None:
+	console = Console()
+
+	# Split the content into sections based on ActionResult entries
+	sections = history_str.split('ActionResult(')
+
+	for i, section in enumerate(sections[1:], 1):  # Skip first empty section
+		# Extract relevant information
+		content = ''
+		if 'extracted_content=' in section:
+			content = section.split('extracted_content=')[1].split(',')[0].strip("'")
+
+		if content:
+			header = Text(f'Step {i}', style='bold blue')
+			panel = Panel(content, title=header, border_style='blue')
+			console.print(panel)
+			console.print()
+
+
+async def run_browser_task(
+	task: str,
+	api_key: str,
+	model: str = 'gpt-4o',
+	headless: bool = True,
+) -> str:
+	if not api_key.strip():
+		return 'Please provide an API key'
+
+	os.environ['OPENAI_API_KEY'] = api_key
+
+	try:
+		agent = Agent(
+			task=task,
+			llm=ChatOpenAI(model='gpt-4o'),
+		)
+		result = await agent.run()
+		#  TODO: The result cloud be parsed better
+		return result
+	except Exception as e:
+		return f'Error: {str(e)}'
+
+
+def create_ui():
+	with gr.Blocks(title='Browser Use GUI') as interface:
+		gr.Markdown('# Browser Use Task Automation')
+
+		with gr.Row():
+			with gr.Column():
+				api_key = gr.Textbox(label='OpenAI API Key', placeholder='sk-...', type='password')
+				task = gr.Textbox(
+					label='Task Description',
+					placeholder='E.g., Find flights from New York to London for next week',
+					lines=3,
+				)
+				model = gr.Dropdown(choices=['gpt-4', 'gpt-3.5-turbo'], label='Model', value='gpt-4')
+				headless = gr.Checkbox(label='Run Headless', value=True)
+				submit_btn = gr.Button('Run Task')
+
+			with gr.Column():
+				output = gr.Textbox(label='Output', lines=10, interactive=False)
+
+		submit_btn.click(
+			fn=lambda *args: asyncio.run(run_browser_task(*args)),
+			inputs=[task, api_key, model, headless],
+			outputs=output,
+		)
+
+	return interface
+
+
+if __name__ == '__main__':
+	demo = create_ui()
+	demo.launch()
--- a/browser-use/examples/ui/streamlit_demo.py
+++ b/browser-use/examples/ui/streamlit_demo.py
@ -0,0 +1,86 @@
+"""
+To use it, you'll need to install streamlit, and run with:
+
+python -m streamlit run streamlit_demo.py
+
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import streamlit as st
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.controller.service import Controller
+
+if os.name == 'nt':
+	asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
+
+
+# Function to get the LLM based on provider
+def get_llm(provider: str):
+	if provider == 'anthropic':
+		from langchain_anthropic import ChatAnthropic
+
+		api_key = os.getenv('ANTHROPIC_API_KEY')
+		if not api_key:
+			st.error('Error: ANTHROPIC_API_KEY is not set. Please provide a valid API key.')
+			st.stop()
+
+		return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None, temperature=0.0)
+	elif provider == 'openai':
+		from langchain_openai import ChatOpenAI
+
+		api_key = os.getenv('OPENAI_API_KEY')
+		if not api_key:
+			st.error('Error: OPENAI_API_KEY is not set. Please provide a valid API key.')
+			st.stop()
+
+		return ChatOpenAI(model='gpt-4o', temperature=0.0)
+	else:
+		st.error(f'Unsupported provider: {provider}')
+		st.stop()
+
+
+# Function to initialize the agent
+def initialize_agent(query: str, provider: str):
+	llm = get_llm(provider)
+	controller = Controller()
+	browser = Browser(config=BrowserConfig())
+
+	return Agent(
+		task=query,
+		llm=llm,
+		controller=controller,
+		browser=browser,
+		use_vision=True,
+		max_actions_per_step=1,
+	), browser
+
+
+# Streamlit UI
+st.title('Automated Browser Agent with LLMs 🤖')
+
+query = st.text_input('Enter your query:', 'go to reddit and search for posts about browser-use')
+provider = st.radio('Select LLM Provider:', ['openai', 'anthropic'], index=0)
+
+if st.button('Run Agent'):
+	st.write('Initializing agent...')
+	agent, browser = initialize_agent(query, provider)
+
+	async def run_agent():
+		with st.spinner('Running automation...'):
+			await agent.run(max_steps=25)
+		st.success('Task completed! 🎉')
+
+	asyncio.run(run_agent())
+
+	st.button('Close Browser', on_click=lambda: asyncio.run(browser.close()))
--- a/browser-use/examples/use-cases/README.md
+++ b/browser-use/examples/use-cases/README.md
@ -0,0 +1,12 @@
+# Use Cases of Browser-Use
+
+| File Name | Description |
+|-----------|------------|
+| `captcha.py` | Automates CAPTCHA solving on a demo website. |
+| `check_appointment.py` | Checks for available visa appointment slots on the Greece MFA website. |
+| `find_and_apply_to_jobs.py` | Searches for job listings, evaluates relevance based on a CV, and applies automatically. |
+| `online_coding_agent.py` | Implements a multi-agent system for online code editors, with separate agents for coding and execution. |
+| `post-twitter.py` | Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies. |
+| `scrolling_page.py` | Automates webpage scrolling with various scrolling actions and text search functionality. |
+| `twitter_post_using_cookies.py` | Automates posting on X (Twitter) using stored authentication cookies. |
+| `web_voyager_agent.py` | A general-purpose web navigation agent for tasks like flight booking and course searching. |
--- a/browser-use/examples/use-cases/captcha.py
+++ b/browser-use/examples/use-cases/captcha.py
@ -0,0 +1,40 @@
+"""
+Goal: Automates CAPTCHA solving on a demo website.
+
+
+Simple try of the agent.
+@dev You need to add OPENAI_API_KEY to your environment variables.
+NOTE: captchas are hard. For this example it works. But e.g. for iframes it does not.
+for this example it helps to zoom in.
+"""
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+
+async def main():
+	llm = ChatOpenAI(model='gpt-4o')
+	agent = Agent(
+		task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
+		llm=llm,
+	)
+	await agent.run()
+	input('Press Enter to exit')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/check_appointment.py
+++ b/browser-use/examples/use-cases/check_appointment.py
@ -0,0 +1,52 @@
+# Goal: Checks for available visa appointment slots on the Greece MFA website.
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.controller.service import Controller
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+controller = Controller()
+
+
+class WebpageInfo(BaseModel):
+	"""Model for webpage link."""
+
+	link: str = 'https://appointment.mfa.gr/en/reservations/aero/ireland-grcon-dub/'
+
+
+@controller.action('Go to the webpage', param_model=WebpageInfo)
+def go_to_webpage(webpage_info: WebpageInfo):
+	"""Returns the webpage link."""
+	return webpage_info.link
+
+
+async def main():
+	"""Main function to execute the agent task."""
+	task = (
+		'Go to the Greece MFA webpage via the link I provided you.'
+		'Check the visa appointment dates. If there is no available date in this month, check the next month.'
+		'If there is no available date in both months, tell me there is no available date.'
+	)
+
+	model = ChatOpenAI(model='gpt-4o-mini', api_key=SecretStr(os.getenv('OPENAI_API_KEY', '')))
+	agent = Agent(task, model, controller=controller, use_vision=True)
+
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/find_and_apply_to_jobs.py
+++ b/browser-use/examples/use-cases/find_and_apply_to_jobs.py
@ -0,0 +1,160 @@
+"""
+Goal: Searches for job listings, evaluates relevance based on a CV, and applies
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+Also you have to install PyPDF2 to read pdf files: pip install PyPDF2
+"""
+
+import asyncio
+import csv
+import logging
+import os
+import sys
+from pathlib import Path
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import AzureChatOpenAI
+from pydantic import BaseModel, SecretStr
+from PyPDF2 import PdfReader
+
+from browser_use import ActionResult, Agent, Controller
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext
+
+required_env_vars = ['AZURE_OPENAI_KEY', 'AZURE_OPENAI_ENDPOINT']
+for var in required_env_vars:
+	if not os.getenv(var):
+		raise ValueError(f'{var} is not set. Please add it to your environment variables.')
+
+logger = logging.getLogger(__name__)
+# full screen mode
+controller = Controller()
+
+# NOTE: This is the path to your cv file
+CV = Path.cwd() / 'cv_04_24.pdf'
+
+if not CV.exists():
+	raise FileNotFoundError(f'You need to set the path to your cv file in the CV variable. CV file not found at {CV}')
+
+
+class Job(BaseModel):
+	title: str
+	link: str
+	company: str
+	fit_score: float
+	location: str | None = None
+	salary: str | None = None
+
+
+@controller.action('Save jobs to file - with a score how well it fits to my profile', param_model=Job)
+def save_jobs(job: Job):
+	with open('jobs.csv', 'a', newline='') as f:
+		writer = csv.writer(f)
+		writer.writerow([job.title, job.company, job.link, job.salary, job.location])
+
+	return 'Saved job to file'
+
+
+@controller.action('Read jobs from file')
+def read_jobs():
+	with open('jobs.csv') as f:
+		return f.read()
+
+
+@controller.action('Read my cv for context to fill forms')
+def read_cv():
+	pdf = PdfReader(CV)
+	text = ''
+	for page in pdf.pages:
+		text += page.extract_text() or ''
+	logger.info(f'Read cv with {len(text)} characters')
+	return ActionResult(extracted_content=text, include_in_memory=True)
+
+
+@controller.action(
+	'Upload cv to element - call this function to upload if element is not found, try with different index of the same upload element',
+)
+async def upload_cv(index: int, browser: BrowserContext):
+	path = str(CV.absolute())
+	dom_el = await browser.get_dom_element_by_index(index)
+
+	if dom_el is None:
+		return ActionResult(error=f'No element found at index {index}')
+
+	file_upload_dom_el = dom_el.get_file_upload_element()
+
+	if file_upload_dom_el is None:
+		logger.info(f'No file upload element found at index {index}')
+		return ActionResult(error=f'No file upload element found at index {index}')
+
+	file_upload_el = await browser.get_locate_element(file_upload_dom_el)
+
+	if file_upload_el is None:
+		logger.info(f'No file upload element found at index {index}')
+		return ActionResult(error=f'No file upload element found at index {index}')
+
+	try:
+		await file_upload_el.set_input_files(path)
+		msg = f'Successfully uploaded file "{path}" to index {index}'
+		logger.info(msg)
+		return ActionResult(extracted_content=msg)
+	except Exception as e:
+		logger.debug(f'Error in set_input_files: {str(e)}')
+		return ActionResult(error=f'Failed to upload file to index {index}')
+
+
+browser = Browser(
+	config=BrowserConfig(
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+		disable_security=True,
+	)
+)
+
+
+async def main():
+	# ground_task = (
+	# 	'You are a professional job finder. '
+	# 	'1. Read my cv with read_cv'
+	# 	'2. Read the saved jobs file '
+	# 	'3. start applying to the first link of Amazon '
+	# 	'You can navigate through pages e.g. by scrolling '
+	# 	'Make sure to be on the english version of the page'
+	# )
+	ground_task = (
+		'You are a professional job finder. '
+		'1. Read my cv with read_cv'
+		'find ml internships in and save them to a file'
+		'search at company:'
+	)
+	tasks = [
+		ground_task + '\n' + 'Google',
+		# ground_task + '\n' + 'Amazon',
+		# ground_task + '\n' + 'Apple',
+		# ground_task + '\n' + 'Microsoft',
+		# ground_task
+		# + '\n'
+		# + 'go to https://nvidia.wd5.myworkdayjobs.com/en-US/NVIDIAExternalCareerSite/job/Taiwan%2C-Remote/Fulfillment-Analyst---New-College-Graduate-2025_JR1988949/apply/autofillWithResume?workerSubType=0c40f6bd1d8f10adf6dae42e46d44a17&workerSubType=ab40a98049581037a3ada55b087049b7 NVIDIA',
+		# ground_task + '\n' + 'Meta',
+	]
+	model = AzureChatOpenAI(
+		model='gpt-4o',
+		api_version='2024-10-21',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+
+	agents = []
+	for task in tasks:
+		agent = Agent(task=task, llm=model, controller=controller, browser=browser)
+		agents.append(agent)
+
+	await asyncio.gather(*[agent.run() for agent in agents])
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/find_influencer_profiles.py
+++ b/browser-use/examples/use-cases/find_influencer_profiles.py
@ -0,0 +1,90 @@
+"""
+Show how to use custom outputs.
+
+@dev You need to add OPENAI_API_KEY to your environment variables.
+"""
+
+import asyncio
+import json
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+import httpx
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel
+
+from browser_use import Agent, Controller
+from browser_use.agent.views import ActionResult
+
+
+class Profile(BaseModel):
+	platform: str
+	profile_url: str
+
+
+class Profiles(BaseModel):
+	profiles: list[Profile]
+
+
+controller = Controller(exclude_actions=['search_google'], output_model=Profiles)
+BEARER_TOKEN = os.getenv('BEARER_TOKEN')
+
+if not BEARER_TOKEN:
+	# use the api key for ask tessa
+	# you can also use other apis like exa, xAI, perplexity, etc.
+	raise ValueError('BEARER_TOKEN is not set - go to https://www.heytessa.ai/ and create an api key')
+
+
+@controller.registry.action('Search the web for a specific query')
+async def search_web(query: str):
+	keys_to_use = ['url', 'title', 'content', 'author', 'score']
+	headers = {'Authorization': f'Bearer {BEARER_TOKEN}'}
+	async with httpx.AsyncClient() as client:
+		response = await client.post(
+			'https://asktessa.ai/api/search',
+			headers=headers,
+			json={'query': query},
+		)
+
+	final_results = [
+		{key: source[key] for key in keys_to_use if key in source}
+		for source in await response.json()['sources']
+		if source['score'] >= 0.2
+	]
+	# print(json.dumps(final_results, indent=4))
+	result_text = json.dumps(final_results, indent=4)
+	print(result_text)
+	return ActionResult(extracted_content=result_text, include_in_memory=True)
+
+
+async def main():
+	task = (
+		'Go to this tiktok video url, open it and extract the @username from the resulting url. Then do a websearch for this username to find all his social media profiles. Return me the links to the social media profiles with the platform name.'
+		' https://www.tiktokv.com/share/video/7470981717659110678/  '
+	)
+	model = ChatOpenAI(model='gpt-4o')
+	agent = Agent(task=task, llm=model, controller=controller)
+
+	history = await agent.run()
+
+	result = history.final_result()
+	if result:
+		parsed: Profiles = Profiles.model_validate_json(result)
+
+		for profile in parsed.profiles:
+			print('\n--------------------------------')
+			print(f'Platform:         {profile.platform}')
+			print(f'Profile URL:      {profile.profile_url}')
+
+	else:
+		print('No result')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/google_sheets.py
+++ b/browser-use/examples/use-cases/google_sheets.py
@ -0,0 +1,193 @@
+import os
+import sys
+
+from browser_use.browser.context import BrowserContext
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+import asyncio
+
+import pyperclip
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+
+from browser_use import ActionResult, Agent, Controller
+from browser_use.browser.browser import Browser, BrowserConfig
+
+browser = Browser(
+	config=BrowserConfig(
+		browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	),
+)
+
+# Load environment variables
+load_dotenv()
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+
+controller = Controller()
+
+
+def is_google_sheet(page) -> bool:
+	return page.url.startswith('https://docs.google.com/spreadsheets/')
+
+
+@controller.registry.action('Google Sheets: Open a specific Google Sheet')
+async def open_google_sheet(browser: BrowserContext, google_sheet_url: str):
+	page = await browser.get_current_page()
+	if page.url != google_sheet_url:
+		await page.goto(google_sheet_url)
+		await page.wait_for_load_state()
+	if not is_google_sheet(page):
+		return ActionResult(error='Failed to open Google Sheet, are you sure you have permissions to access this sheet?')
+	return ActionResult(extracted_content=f'Opened Google Sheet {google_sheet_url}', include_in_memory=False)
+
+
+@controller.registry.action('Google Sheets: Get the contents of the entire sheet', page_filter=is_google_sheet)
+async def get_sheet_contents(browser: BrowserContext):
+	page = await browser.get_current_page()
+
+	# select all cells
+	await page.keyboard.press('Enter')
+	await page.keyboard.press('Escape')
+	await page.keyboard.press('ControlOrMeta+A')
+	await page.keyboard.press('ControlOrMeta+C')
+
+	extracted_tsv = pyperclip.paste()
+	return ActionResult(extracted_content=extracted_tsv, include_in_memory=True)
+
+
+@controller.registry.action('Google Sheets: Select a specific cell or range of cells', page_filter=is_google_sheet)
+async def select_cell_or_range(browser: BrowserContext, cell_or_range: str):
+	page = await browser.get_current_page()
+
+	await page.keyboard.press('Enter')  # make sure we dont delete current cell contents if we were last editing
+	await page.keyboard.press('Escape')  # to clear current focus (otherwise select range popup is additive)
+	await asyncio.sleep(0.1)
+	await page.keyboard.press('Home')  # move cursor to the top left of the sheet first
+	await page.keyboard.press('ArrowUp')
+	await asyncio.sleep(0.1)
+	await page.keyboard.press('Control+G')  # open the goto range popup
+	await asyncio.sleep(0.2)
+	await page.keyboard.type(cell_or_range, delay=0.05)
+	await asyncio.sleep(0.2)
+	await page.keyboard.press('Enter')
+	await asyncio.sleep(0.2)
+	await page.keyboard.press('Escape')  # to make sure the popup still closes in the case where the jump failed
+	return ActionResult(extracted_content=f'Selected cell {cell_or_range}', include_in_memory=False)
+
+
+@controller.registry.action('Google Sheets: Get the contents of a specific cell or range of cells', page_filter=is_google_sheet)
+async def get_range_contents(browser: BrowserContext, cell_or_range: str):
+	page = await browser.get_current_page()
+
+	await select_cell_or_range(browser, cell_or_range)
+
+	await page.keyboard.press('ControlOrMeta+C')
+	await asyncio.sleep(0.1)
+	extracted_tsv = pyperclip.paste()
+	return ActionResult(extracted_content=extracted_tsv, include_in_memory=True)
+
+
+@controller.registry.action('Google Sheets: Clear the currently selected cells', page_filter=is_google_sheet)
+async def clear_selected_range(browser: BrowserContext):
+	page = await browser.get_current_page()
+
+	await page.keyboard.press('Backspace')
+	return ActionResult(extracted_content='Cleared selected range', include_in_memory=False)
+
+
+@controller.registry.action('Google Sheets: Input text into the currently selected cell', page_filter=is_google_sheet)
+async def input_selected_cell_text(browser: BrowserContext, text: str):
+	page = await browser.get_current_page()
+
+	await page.keyboard.type(text, delay=0.1)
+	await page.keyboard.press('Enter')  # make sure to commit the input so it doesn't get overwritten by the next action
+	await page.keyboard.press('ArrowUp')
+	return ActionResult(extracted_content=f'Inputted text {text}', include_in_memory=False)
+
+
+@controller.registry.action('Google Sheets: Batch update a range of cells', page_filter=is_google_sheet)
+async def update_range_contents(browser: BrowserContext, range: str, new_contents_tsv: str):
+	page = await browser.get_current_page()
+
+	await select_cell_or_range(browser, range)
+
+	# simulate paste event from clipboard with TSV content
+	await page.evaluate(f"""
+		const clipboardData = new DataTransfer();
+		clipboardData.setData('text/plain', `{new_contents_tsv}`);
+		document.activeElement.dispatchEvent(new ClipboardEvent('paste', {{clipboardData}}));
+	""")
+
+	return ActionResult(extracted_content=f'Updated cell {range} with {new_contents_tsv}', include_in_memory=False)
+
+
+# many more snippets for keyboard-shortcut based Google Sheets automation can be found here, see:
+# - https://github.com/philc/sheetkeys/blob/master/content_scripts/sheet_actions.js
+# - https://github.com/philc/sheetkeys/blob/master/content_scripts/commands.js
+# - https://support.google.com/docs/answer/181110?hl=en&co=GENIE.Platform%3DDesktop#zippy=%2Cmac-shortcuts
+
+# Tip: LLM is bad at spatial reasoning, don't make it navigate with arrow keys relative to current cell
+# if given arrow keys, it will try to jump from G1 to A2 by pressing Down, without realizing needs to go Down+LeftLeftLeftLeft
+
+
+async def main():
+	async with await browser.new_context() as context:
+		model = ChatOpenAI(model='gpt-4o')
+
+		eraser = Agent(
+			task="""
+				Clear all the existing values in columns A through F in this Google Sheet:
+				https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
+			""",
+			llm=model,
+			browser_context=context,
+			controller=controller,
+		)
+		await eraser.run()
+
+		researcher = Agent(
+			task="""
+				Google to find the full name, nationality, and date of birth of the CEO of the top 10 Fortune 100 companies.
+				For each company, append a row to this existing Google Sheet: https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
+				Make sure column headers are present and all existing values in the sheet are formatted correctly.
+				Columns:
+					A: Company Name
+					B: CEO Full Name
+					C: CEO Country of Birth
+					D: CEO Date of Birth (YYYY-MM-DD)
+					E: Source URL where the information was found
+			""",
+			llm=model,
+			browser_context=context,
+			controller=controller,
+		)
+		await researcher.run()
+
+		improvised_continuer = Agent(
+			task="""
+				Read the Google Sheet https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
+				Add 3 more rows to the bottom continuing the existing pattern, make sure any data you add is sourced correctly.
+			""",
+			llm=model,
+			browser_context=context,
+			controller=controller,
+		)
+		await improvised_continuer.run()
+
+		final_fact_checker = Agent(
+			task="""
+				Read the Google Sheet https://docs.google.com/spreadsheets/d/1INaIcfpYXlMRWO__de61SHFCaqt1lfHlcvtXZPItlpI/edit
+				Fact-check every entry, add a new column F with your findings for each row.
+				Make sure to check the source URL for each row, and make sure the information is correct.
+			""",
+			llm=model,
+			browser_context=context,
+			controller=controller,
+		)
+		await final_fact_checker.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/online_coding_agent.py
+++ b/browser-use/examples/use-cases/online_coding_agent.py
@ -0,0 +1,49 @@
+# Goal: Implements a multi-agent system for online code editors, with separate agents for coding and execution.
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+
+async def main():
+	browser = Browser()
+	async with await browser.new_context() as context:
+		model = ChatOpenAI(model='gpt-4o')
+
+		# Initialize browser agent
+		agent1 = Agent(
+			task='Open an online code editor programiz.',
+			llm=model,
+			browser_context=context,
+		)
+		executor = Agent(
+			task='Executor. Execute the code written by the coder and suggest some updates if there are errors.',
+			llm=model,
+			browser_context=context,
+		)
+
+		coder = Agent(
+			task='Coder. Your job is to write and complete code. You are an expert coder. Code a simple calculator. Write the code on the coding interface after agent1 has opened the link.',
+			llm=model,
+			browser_context=context,
+		)
+		await agent1.run()
+		await executor.run()
+		await coder.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/post-twitter.py
+++ b/browser-use/examples/use-cases/post-twitter.py
@ -0,0 +1,127 @@
+"""
+Goal: Provides a template for automated posting on X (Twitter), including new tweets, tagging, and replies.
+
+X Posting Template using browser-use
+----------------------------------------
+
+This template allows you to automate posting on X using browser-use.
+It supports:
+- Posting new tweets
+- Tagging users
+- Replying to tweets
+
+Add your target user and message in the config section.
+
+target_user="XXXXX"
+message="XXXXX"
+reply_url="XXXXX"
+
+Any issues, contact me on X @defichemist95
+"""
+
+import asyncio
+import os
+import sys
+from dataclasses import dataclass
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Controller
+from browser_use.browser.browser import Browser, BrowserConfig
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set. Please add it to your environment variables.')
+
+
+# ============ Configuration Section ============
+@dataclass
+class TwitterConfig:
+	"""Configuration for Twitter posting"""
+
+	openai_api_key: str
+	chrome_path: str
+	target_user: str  # Twitter handle without @
+	message: str
+	reply_url: str
+	headless: bool = False
+	model: str = 'gpt-4o-mini'
+	base_url: str = 'https://x.com/home'
+
+
+# Customize these settings
+config = TwitterConfig(
+	openai_api_key=os.getenv('OPENAI_API_KEY'),
+	chrome_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',  # This is for MacOS (Chrome)
+	target_user='XXXXX',
+	message='XXXXX',
+	reply_url='XXXXX',
+	headless=False,
+)
+
+
+def create_twitter_agent(config: TwitterConfig) -> Agent:
+	llm = ChatOpenAI(model=config.model, api_key=config.openai_api_key)
+
+	browser = Browser(
+		config=BrowserConfig(
+			headless=config.headless,
+			browser_binary_path=config.chrome_path,
+		)
+	)
+
+	controller = Controller()
+
+	# Construct the full message with tag
+	full_message = f'@{config.target_user} {config.message}'
+
+	# Create the agent with detailed instructions
+	return Agent(
+		task=f"""Navigate to Twitter and create a post and reply to a tweet.
+
+        Here are the specific steps:
+
+        1. Go to {config.base_url}. See the text input field at the top of the page that says "What's happening?"
+        2. Look for the text input field at the top of the page that says "What's happening?"
+        3. Click the input field and type exactly this message:
+        "{full_message}"
+        4. Find and click the "Post" button (look for attributes: 'button' and 'data-testid="tweetButton"')
+        5. Do not click on the '+' button which will add another tweet.
+
+        6. Navigate to {config.reply_url}
+        7. Before replying, understand the context of the tweet by scrolling down and reading the comments.
+        8. Reply to the tweet under 50 characters.
+
+        Important:
+        - Wait for each element to load before interacting
+        - Make sure the message is typed exactly as shown
+        - Verify the post button is clickable before clicking
+        - Do not click on the '+' button which will add another tweet
+        """,
+		llm=llm,
+		controller=controller,
+		browser=browser,
+	)
+
+
+async def post_tweet(agent: Agent):
+	try:
+		await agent.run(max_steps=100)
+		agent.create_history_gif()
+		print('Tweet posted successfully!')
+	except Exception as e:
+		print(f'Error posting tweet: {str(e)}')
+
+
+async def main():
+	agent = create_twitter_agent(config)
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/scrolling_page.py
+++ b/browser-use/examples/use-cases/scrolling_page.py
@ -0,0 +1,43 @@
+# Goal: Automates webpage scrolling with various scrolling actions and text search functionality.
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+
+if not os.getenv('OPENAI_API_KEY'):
+	raise ValueError('OPENAI_API_KEY is not set')
+
+"""
+Example: Using the 'Scroll down' action.
+
+This script demonstrates how the agent can navigate to a webpage and scroll down the content.
+If no amount is specified, the agent will scroll down by one page height.
+"""
+
+llm = ChatOpenAI(model='gpt-4o')
+
+agent = Agent(
+	# task="Navigate to 'https://en.wikipedia.org/wiki/Internet' and scroll down by one page - then scroll up by 100 pixels - then scroll down by 100 pixels - then scroll down by 10000 pixels.",
+	task="Navigate to 'https://en.wikipedia.org/wiki/Internet' and scroll to the string 'The vast majority of computer'",
+	llm=llm,
+	browser=Browser(config=BrowserConfig(headless=False)),
+)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/shopping.py
+++ b/browser-use/examples/use-cases/shopping.py
@ -0,0 +1,128 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent, Browser
+
+task = """
+   ### Prompt for Shopping Agent – Migros Online Grocery Order
+
+**Objective:**
+Visit [Migros Online](https://www.migros.ch/en), search for the required grocery items, add them to the cart, select an appropriate delivery window, and complete the checkout process using TWINT.
+
+**Important:**
+- Make sure that you don't buy more than it's needed for each article.
+- After your search, if you click  the "+" button, it adds the item to the basket.
+- if you open the basket sidewindow menu, you can close it by clicking the X button on the top right. This will help you navigate easier.
+---
+
+### Step 1: Navigate to the Website
+- Open [Migros Online](https://www.migros.ch/en).
+- You should be logged in as Nikolaos Kaliorakis
+
+---
+
+### Step 2: Add Items to the Basket
+
+#### Shopping List:
+
+**Meat & Dairy:**
+- Beef Minced meat (1 kg)
+- Gruyère cheese (grated preferably)
+- 2 liters full-fat milk
+- Butter (cheapest available)
+
+**Vegetables:**
+- Carrots (1kg pack)
+- Celery
+- Leeks (1 piece)
+- 1 kg potatoes
+
+At this stage, check the basket on the top right (indicates the price) and check if you bought the right items.
+
+**Fruits:**
+- 2 lemons
+- Oranges (for snacking)
+
+**Pantry Items:**
+- Lasagna sheets
+- Tahini
+- Tomato paste (below CHF2)
+- Black pepper refill (not with the mill)
+- 2x 1L Oatly Barista(oat milk)
+- 1 pack of eggs (10 egg package)
+
+#### Ingredients I already have (DO NOT purchase):
+- Olive oil, garlic, canned tomatoes, dried oregano, bay leaves, salt, chili flakes, flour, nutmeg, cumin.
+
+---
+
+### Step 3: Handling Unavailable Items
+- If an item is **out of stock**, find the best alternative.
+- Use the following recipe contexts to choose substitutions:
+  - **Pasta Bolognese & Lasagna:** Minced meat, tomato paste, lasagna sheets, milk (for béchamel), Gruyère cheese.
+  - **Hummus:** Tahini, chickpeas, lemon juice, olive oil.
+  - **Chickpea Curry Soup:** Chickpeas, leeks, curry, lemons.
+  - **Crispy Slow-Cooked Pork Belly with Vegetables:** Potatoes, butter.
+- Example substitutions:
+  - If Gruyère cheese is unavailable, select another semi-hard cheese.
+  - If Tahini is unavailable, a sesame-based alternative may work.
+
+---
+
+### Step 4: Adjusting for Minimum Order Requirement
+- If the total order **is below CHF 99**, add **a liquid soap refill** to reach the minimum. If it;s still you can buy some bread, dark chockolate.
+- At this step, check if you have bought MORE items than needed. If the price is more then CHF200, you MUST remove items.
+- If an item is not available, choose an alternative.
+- if an age verification is needed, remove alcoholic products, we haven't verified yet.
+
+---
+
+### Step 5: Select Delivery Window
+- Choose a **delivery window within the current week**. It's ok to pay up to CHF2 for the window selection.
+- Preferably select a slot within the workweek.
+
+---
+
+### Step 6: Checkout
+- Proceed to checkout.
+- Select **TWINT** as the payment method.
+- Check out.
+- 
+- if it's needed the username is: nikoskalio.dev@gmail.com 
+- and the password is : TheCircuit.Migros.dev!
+---
+
+### Step 7: Confirm Order & Output Summary
+- Once the order is placed, output a summary including:
+  - **Final list of items purchased** (including any substitutions).
+  - **Total cost**.
+  - **Chosen delivery time**.
+
+**Important:** Ensure efficiency and accuracy throughout the process."""
+
+browser = Browser()
+
+agent = Agent(
+	task=task,
+	llm=ChatOpenAI(model='gpt-4o'),
+	browser=browser,
+)
+
+
+async def main():
+	await agent.run()
+	input('Press Enter to close the browser...')
+	await browser.close()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/twitter_post_using_cookies.py
+++ b/browser-use/examples/use-cases/twitter_post_using_cookies.py
@ -0,0 +1,48 @@
+# Goal: Automates posting on X (Twitter) using stored authentication cookies.
+
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_google_genai import ChatGoogleGenerativeAI
+from pydantic import SecretStr
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+
+api_key = os.getenv('GOOGLE_API_KEY')
+if not api_key:
+	raise ValueError('GOOGLE_API_KEY is not set')
+
+llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
+
+
+browser = Browser(
+	config=BrowserConfig(
+		# browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
+	)
+)
+file_path = os.path.join(os.path.dirname(__file__), 'twitter_cookies.txt')
+context = BrowserContext(browser=browser, config=BrowserContextConfig(cookies_file=file_path))
+
+
+async def main():
+	agent = Agent(
+		browser_context=context,
+		task=('go to https://x.com. write a new post with the text "browser-use ftw", and submit it'),
+		llm=llm,
+		max_actions_per_step=4,
+	)
+	await agent.run(max_steps=25)
+	input('Press Enter to close the browser...')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/web_voyager_agent.py
+++ b/browser-use/examples/use-cases/web_voyager_agent.py
@ -0,0 +1,77 @@
+# Goal: A general-purpose web navigation agent for tasks like flight booking and course searching.
+
+import asyncio
+import os
+import sys
+
+# Adjust Python path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import AzureChatOpenAI, ChatOpenAI
+from pydantic import SecretStr
+
+from browser_use.agent.service import Agent
+from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
+
+# Set LLM based on defined environment variables
+if os.getenv('OPENAI_API_KEY'):
+	llm = ChatOpenAI(
+		model='gpt-4o',
+	)
+elif os.getenv('AZURE_OPENAI_KEY') and os.getenv('AZURE_OPENAI_ENDPOINT'):
+	llm = AzureChatOpenAI(
+		model='gpt-4o',
+		api_version='2024-10-21',
+		azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
+		api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
+	)
+else:
+	raise ValueError('No LLM found. Please set OPENAI_API_KEY or AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT.')
+
+
+browser = Browser(
+	config=BrowserConfig(
+		headless=False,  # This is True in production
+		disable_security=True,
+		new_context_config=BrowserContextConfig(
+			disable_security=True,
+			minimum_wait_page_load_time=1,  # 3 on prod
+			maximum_wait_page_load_time=10,  # 20 on prod
+			# Set no_viewport=False to constrain the viewport to the specified dimensions
+			# This is useful for specific cases where you need a fixed viewport size
+			no_viewport=False,
+			window_width=1280,
+			window_height=1100,
+			# trace_path='./tmp/web_voyager_agent',
+		),
+	)
+)
+
+# TASK = """
+# Find the lowest-priced one-way flight from Cairo to Montreal on February 21, 2025, including the total travel time and number of stops. on https://www.google.com/travel/flights/
+# """
+# TASK = """
+# Browse Coursera, which universities offer Master of Advanced Study in Engineering degrees? Tell me what is the latest application deadline for this degree? on https://www.coursera.org/"""
+TASK = """
+Find and book a hotel in Paris with suitable accommodations for a family of four (two adults and two children) offering free cancellation for the dates of February 14-21, 2025. on https://www.booking.com/
+"""
+
+
+async def main():
+	agent = Agent(
+		task=TASK,
+		llm=llm,
+		browser=browser,
+		validate_output=True,
+		enable_memory=False,
+	)
+	history = await agent.run(max_steps=50)
+	history.save_to_file('./tmp/history.json')
+
+
+if __name__ == '__main__':
+	asyncio.run(main())
--- a/browser-use/examples/use-cases/wikipedia_banana_to_quantum.py
+++ b/browser-use/examples/use-cases/wikipedia_banana_to_quantum.py
@ -0,0 +1,39 @@
+import asyncio
+import os
+import sys
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from langchain_openai import ChatOpenAI
+
+from browser_use import Agent
+from browser_use.browser.browser import Browser, BrowserConfig, BrowserContextConfig
+
+# video https://preview.screen.studio/share/vuq91Ej8
+llm = ChatOpenAI(
+	model='gpt-4o',
+	temperature=0.0,
+)
+task = 'go to https://en.wikipedia.org/wiki/Banana and click on buttons on the wikipedia page to go as fast as possible from banna to Quantum mechanics'
+
+browser = Browser(
+	config=BrowserConfig(
+		new_context_config=BrowserContextConfig(
+			viewport_expansion=-1,
+			highlight_elements=False,
+		),
+	),
+)
+agent = Agent(task=task, llm=llm, browser=browser, use_vision=False)
+
+
+async def main():
+	await agent.run()
+
+
+if __name__ == '__main__':
+	asyncio.run(main())