[Add] browser-use and main.py
This commit is contained in:
parent
08e64bdf45
commit
96914d44ac
221 changed files with 30952 additions and 1 deletions
94
browser-use/examples/custom-functions/action_filters.py
Normal file
94
browser-use/examples/custom-functions/action_filters.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
"""
|
||||
Action filters (domains and page_filter) let you limit actions available to the Agent on a step-by-step/page-by-page basis.
|
||||
|
||||
@registry.action(..., domains=['*'], page_filter=lambda page: return True)
|
||||
async def some_action(browser: BrowserContext):
|
||||
...
|
||||
|
||||
This helps prevent the LLM from deciding to use an action that is not compatible with the current page.
|
||||
It helps limit decision fatique by scoping actions only to pages where they make sense.
|
||||
It also helps prevent mis-triggering stateful actions or actions that could break other programs or leak secrets.
|
||||
|
||||
For example:
|
||||
- only run on certain domains @registry.action(..., domains=['example.com', '*.example.com', 'example.co.*']) (supports globs, but no regex)
|
||||
- only fill in a password on a specific login page url
|
||||
- only run if this action has not run before on this page (e.g. by looking up the url in a file on disk)
|
||||
|
||||
During each step, the agent recalculates the actions available specifically for that page, and informs the LLM.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from playwright.async_api import Page
|
||||
|
||||
from browser_use.agent.service import Agent, Browser, BrowserContext, Controller
|
||||
|
||||
# Initialize controller and registry
|
||||
controller = Controller()
|
||||
registry = controller.registry
|
||||
|
||||
|
||||
# Action will only be available to Agent on Google domains because of the domain filter
|
||||
@registry.action(description='Trigger disco mode', domains=['google.com', '*.google.com'])
|
||||
async def disco_mode(browser: BrowserContext):
|
||||
page = await browser.get_current_page()
|
||||
await page.evaluate("""() => {
|
||||
// define the wiggle animation
|
||||
document.styleSheets[0].insertRule('@keyframes wiggle { 0% { transform: rotate(0deg); } 50% { transform: rotate(10deg); } 100% { transform: rotate(0deg); } }');
|
||||
|
||||
document.querySelectorAll("*").forEach(element => {
|
||||
element.style.animation = "wiggle 0.5s infinite";
|
||||
});
|
||||
}""")
|
||||
|
||||
|
||||
# you can create a custom page filter function that determines if the action should be available for a given page
|
||||
def is_login_page(page: Page) -> bool:
|
||||
return 'login' in page.url.lower() or 'signin' in page.url.lower()
|
||||
|
||||
|
||||
# then use it in the action decorator to limit the action to only be available on pages where the filter returns True
|
||||
@registry.action(description='Use the force, luke', page_filter=is_login_page)
|
||||
async def use_the_force(browser: BrowserContext):
|
||||
# this will only ever run on pages that matched the filter
|
||||
page = await browser.get_current_page()
|
||||
assert is_login_page(page)
|
||||
|
||||
await page.evaluate("""() => { document.querySelector('body').innerHTML = 'These are not the droids you are looking for';}""")
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main function to run the example"""
|
||||
browser = Browser()
|
||||
llm = ChatOpenAI(model_name='gpt-4o')
|
||||
|
||||
# Create the agent
|
||||
agent = Agent( # disco mode will not be triggered on apple.com because the LLM won't be able to see that action available, it should work on Google.com though.
|
||||
task="""
|
||||
Go to apple.com and trigger disco mode (if dont know how to do that, then just move on).
|
||||
Then go to google.com and trigger disco mode.
|
||||
After that, go to the Google login page and Use the force, luke.
|
||||
""",
|
||||
llm=llm,
|
||||
browser=browser,
|
||||
controller=controller,
|
||||
)
|
||||
|
||||
# Run the agent
|
||||
await agent.run(max_steps=10)
|
||||
|
||||
# Cleanup
|
||||
await browser.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
98
browser-use/examples/custom-functions/advanced_search.py
Normal file
98
browser-use/examples/custom-functions/advanced_search.py
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import httpx
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use import ActionResult, Agent, Controller
|
||||
|
||||
|
||||
class Person(BaseModel):
|
||||
name: str
|
||||
email: str | None = None
|
||||
|
||||
|
||||
class PersonList(BaseModel):
|
||||
people: list[Person]
|
||||
|
||||
|
||||
controller = Controller(exclude_actions=['search_google'], output_model=PersonList)
|
||||
BEARER_TOKEN = os.getenv('BEARER_TOKEN')
|
||||
|
||||
if not BEARER_TOKEN:
|
||||
# use the api key for ask tessa
|
||||
# you can also use other apis like exa, xAI, perplexity, etc.
|
||||
raise ValueError('BEARER_TOKEN is not set - go to https://www.heytessa.ai/ and create an api key')
|
||||
|
||||
|
||||
@controller.registry.action('Search the web for a specific query')
|
||||
async def search_web(query: str):
|
||||
keys_to_use = ['url', 'title', 'content', 'author', 'score']
|
||||
headers = {'Authorization': f'Bearer {BEARER_TOKEN}'}
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post('https://asktessa.ai/api/search', headers=headers, json={'query': query})
|
||||
|
||||
final_results = [
|
||||
{key: source[key] for key in keys_to_use if key in source}
|
||||
for source in response.json()['sources']
|
||||
if source['score'] >= 0.8
|
||||
]
|
||||
# print(json.dumps(final_results, indent=4))
|
||||
result_text = json.dumps(final_results, indent=4)
|
||||
print(result_text)
|
||||
return ActionResult(extracted_content=result_text, include_in_memory=True)
|
||||
|
||||
|
||||
names = [
|
||||
'Ruedi Aebersold',
|
||||
'Bernd Bodenmiller',
|
||||
'Eugene Demler',
|
||||
'Erich Fischer',
|
||||
'Pietro Gambardella',
|
||||
'Matthias Huss',
|
||||
'Reto Knutti',
|
||||
'Maksym Kovalenko',
|
||||
'Antonio Lanzavecchia',
|
||||
'Maria Lukatskaya',
|
||||
'Jochen Markard',
|
||||
'Javier Pérez-Ramírez',
|
||||
'Federica Sallusto',
|
||||
'Gisbert Schneider',
|
||||
'Sonia I. Seneviratne',
|
||||
'Michael Siegrist',
|
||||
'Johan Six',
|
||||
'Tanja Stadler',
|
||||
'Shinichi Sunagawa',
|
||||
'Michael Bruce Zimmermann',
|
||||
]
|
||||
|
||||
|
||||
async def main():
|
||||
task = 'use search_web with "find email address of the following ETH professor:" for each of the following persons in a list of actions. Finally return the list with name and email if provided'
|
||||
task += '\n' + '\n'.join(names)
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(task=task, llm=model, controller=controller, max_actions_per_step=20)
|
||||
|
||||
history = await agent.run()
|
||||
|
||||
result = history.final_result()
|
||||
if result:
|
||||
parsed: PersonList = PersonList.model_validate_json(result)
|
||||
|
||||
for person in parsed.people:
|
||||
print(f'{person.name} - {person.email}')
|
||||
else:
|
||||
print('No result')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
60
browser-use/examples/custom-functions/clipboard.py
Normal file
60
browser-use/examples/custom-functions/clipboard.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import pyperclip
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, Controller
|
||||
from browser_use.agent.views import ActionResult
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
)
|
||||
)
|
||||
controller = Controller()
|
||||
|
||||
|
||||
@controller.registry.action('Copy text to clipboard')
|
||||
def copy_to_clipboard(text: str):
|
||||
pyperclip.copy(text)
|
||||
return ActionResult(extracted_content=text)
|
||||
|
||||
|
||||
@controller.registry.action('Paste text from clipboard')
|
||||
async def paste_from_clipboard(browser: BrowserContext):
|
||||
text = pyperclip.paste()
|
||||
# send text to browser
|
||||
page = await browser.get_current_page()
|
||||
await page.keyboard.type(text)
|
||||
|
||||
return ActionResult(extracted_content=text)
|
||||
|
||||
|
||||
async def main():
|
||||
task = 'Copy the text "Hello, world!" to the clipboard, then go to google.com and paste the text'
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=model,
|
||||
controller=controller,
|
||||
browser=browser,
|
||||
)
|
||||
|
||||
await agent.run()
|
||||
await browser.close()
|
||||
|
||||
input('Press Enter to close...')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
|
|
@ -0,0 +1,236 @@
|
|||
"""
|
||||
Description: These Python modules are designed to capture detailed
|
||||
browser usage datafor analysis, with both server and client
|
||||
components working together to record and store the information.
|
||||
|
||||
Author: Carlos A. Planchón
|
||||
https://github.com/carlosplanchon/
|
||||
|
||||
Adapt this code to your needs.
|
||||
|
||||
Feedback is appreciated!
|
||||
"""
|
||||
|
||||
#####################
|
||||
# #
|
||||
# --- UTILS --- #
|
||||
# #
|
||||
#####################
|
||||
|
||||
import base64
|
||||
|
||||
|
||||
def b64_to_png(b64_string: str, output_file):
|
||||
"""
|
||||
Convert a Base64-encoded string to a PNG file.
|
||||
|
||||
:param b64_string: A string containing Base64-encoded data
|
||||
:param output_file: The path to the output PNG file
|
||||
"""
|
||||
with open(output_file, 'wb') as f:
|
||||
f.write(base64.b64decode(b64_string))
|
||||
|
||||
|
||||
###################################################################
|
||||
# #
|
||||
# --- FASTAPI API TO RECORD AND SAVE Browser-Use ACTIVITY --- #
|
||||
# #
|
||||
###################################################################
|
||||
|
||||
# Save to api.py and run with `python api.py`
|
||||
|
||||
# ! pip install uvicorn
|
||||
# ! pip install fastapi
|
||||
# ! pip install prettyprinter
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import prettyprinter
|
||||
from fastapi import FastAPI, Request
|
||||
|
||||
prettyprinter.install_extras()
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@app.post('/post_agent_history_step')
|
||||
async def post_agent_history_step(request: Request):
|
||||
data = await request.json()
|
||||
prettyprinter.cpprint(data)
|
||||
|
||||
# Ensure the "recordings" folder exists using pathlib
|
||||
recordings_folder = Path('recordings')
|
||||
recordings_folder.mkdir(exist_ok=True)
|
||||
|
||||
# Determine the next file number by examining existing .json files
|
||||
existing_numbers = []
|
||||
for item in recordings_folder.iterdir():
|
||||
if item.is_file() and item.suffix == '.json':
|
||||
try:
|
||||
file_num = int(item.stem)
|
||||
existing_numbers.append(file_num)
|
||||
except ValueError:
|
||||
# In case the file name isn't just a number
|
||||
...
|
||||
|
||||
if existing_numbers:
|
||||
next_number = max(existing_numbers) + 1
|
||||
else:
|
||||
next_number = 1
|
||||
|
||||
# Construct the file path
|
||||
file_path = recordings_folder / f'{next_number}.json'
|
||||
|
||||
# Save the JSON data to the file
|
||||
with file_path.open('w') as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
return {'status': 'ok', 'message': f'Saved to {file_path}'}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host='0.0.0.0', port=9000)
|
||||
|
||||
|
||||
##############################################################
|
||||
# #
|
||||
# --- CLIENT TO RECORD AND SAVE Browser-Use ACTIVITY --- #
|
||||
# #
|
||||
##############################################################
|
||||
|
||||
"""
|
||||
pyobjtojson:
|
||||
|
||||
A Python library to safely and recursively serialize any Python object
|
||||
(including Pydantic models and dataclasses) into JSON-ready structures,
|
||||
gracefully handling circular references.
|
||||
"""
|
||||
|
||||
# ! pip install -U pyobjtojson
|
||||
# ! pip install -U prettyprinter
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import requests
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pyobjtojson import obj_to_json
|
||||
|
||||
from browser_use import Agent
|
||||
|
||||
# import prettyprinter
|
||||
# prettyprinter.install_extras()
|
||||
|
||||
|
||||
def send_agent_history_step(data):
|
||||
url = 'http://127.0.0.1:9000/post_agent_history_step'
|
||||
response = requests.post(url, json=data)
|
||||
return response.json()
|
||||
|
||||
|
||||
async def record_activity(agent_obj):
|
||||
website_html = None
|
||||
website_screenshot = None
|
||||
urls_json_last_elem = None
|
||||
model_thoughts_last_elem = None
|
||||
model_outputs_json_last_elem = None
|
||||
model_actions_json_last_elem = None
|
||||
extracted_content_json_last_elem = None
|
||||
|
||||
print('--- ON_STEP_START HOOK ---')
|
||||
website_html: str = await agent_obj.browser_context.get_page_html()
|
||||
website_screenshot: str = await agent_obj.browser_context.take_screenshot()
|
||||
|
||||
print('--> History:')
|
||||
if hasattr(agent_obj, 'state'):
|
||||
history = agent_obj.state.history
|
||||
else:
|
||||
history = None
|
||||
|
||||
model_thoughts = obj_to_json(obj=history.model_thoughts(), check_circular=False)
|
||||
|
||||
# print("--- MODEL THOUGHTS ---")
|
||||
if len(model_thoughts) > 0:
|
||||
model_thoughts_last_elem = model_thoughts[-1]
|
||||
# prettyprinter.cpprint(model_thoughts_last_elem)
|
||||
|
||||
# print("--- MODEL OUTPUT ACTION ---")
|
||||
model_outputs = agent_obj.state.history.model_outputs()
|
||||
model_outputs_json = obj_to_json(obj=model_outputs, check_circular=False)
|
||||
|
||||
if len(model_outputs_json) > 0:
|
||||
model_outputs_json_last_elem = model_outputs_json[-1]
|
||||
# prettyprinter.cpprint(model_outputs_json_last_elem)
|
||||
|
||||
# print("--- MODEL INTERACTED ELEM ---")
|
||||
model_actions = agent_obj.state.history.model_actions()
|
||||
model_actions_json = obj_to_json(obj=model_actions, check_circular=False)
|
||||
|
||||
if len(model_actions_json) > 0:
|
||||
model_actions_json_last_elem = model_actions_json[-1]
|
||||
# prettyprinter.cpprint(model_actions_json_last_elem)
|
||||
|
||||
# print("--- EXTRACTED CONTENT ---")
|
||||
extracted_content = agent_obj.state.history.extracted_content()
|
||||
extracted_content_json = obj_to_json(obj=extracted_content, check_circular=False)
|
||||
if len(extracted_content_json) > 0:
|
||||
extracted_content_json_last_elem = extracted_content_json[-1]
|
||||
# prettyprinter.cpprint(extracted_content_json_last_elem)
|
||||
|
||||
# print("--- URLS ---")
|
||||
urls = agent_obj.state.history.urls()
|
||||
# prettyprinter.cpprint(urls)
|
||||
urls_json = obj_to_json(obj=urls, check_circular=False)
|
||||
|
||||
if len(urls_json) > 0:
|
||||
urls_json_last_elem = urls_json[-1]
|
||||
# prettyprinter.cpprint(urls_json_last_elem)
|
||||
|
||||
model_step_summary = {
|
||||
'website_html': website_html,
|
||||
'website_screenshot': website_screenshot,
|
||||
'url': urls_json_last_elem,
|
||||
'model_thoughts': model_thoughts_last_elem,
|
||||
'model_outputs': model_outputs_json_last_elem,
|
||||
'model_actions': model_actions_json_last_elem,
|
||||
'extracted_content': extracted_content_json_last_elem,
|
||||
}
|
||||
|
||||
print('--- MODEL STEP SUMMARY ---')
|
||||
# prettyprinter.cpprint(model_step_summary)
|
||||
|
||||
send_agent_history_step(data=model_step_summary)
|
||||
|
||||
# response = send_agent_history_step(data=history)
|
||||
# print(response)
|
||||
|
||||
# print("--> Website HTML:")
|
||||
# print(website_html[:200])
|
||||
# print("--> Website Screenshot:")
|
||||
# print(website_screenshot[:200])
|
||||
|
||||
|
||||
agent = Agent(
|
||||
task='Compare the price of gpt-4o and DeepSeek-V3',
|
||||
llm=ChatOpenAI(model='gpt-4o'),
|
||||
)
|
||||
|
||||
|
||||
async def run_agent():
|
||||
try:
|
||||
await agent.run(on_step_start=record_activity, max_steps=30)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
|
||||
|
||||
asyncio.run(run_agent())
|
||||
112
browser-use/examples/custom-functions/file_upload.py
Normal file
112
browser-use/examples/custom-functions/file_upload.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
import anyio
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, Controller
|
||||
from browser_use.agent.views import ActionResult
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize controller first
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
|
||||
)
|
||||
)
|
||||
controller = Controller()
|
||||
|
||||
|
||||
@controller.action(
|
||||
'Upload file to interactive element with file path ',
|
||||
)
|
||||
async def upload_file(index: int, path: str, browser: BrowserContext, available_file_paths: list[str]):
|
||||
if path not in available_file_paths:
|
||||
return ActionResult(error=f'File path {path} is not available')
|
||||
|
||||
if not os.path.exists(path):
|
||||
return ActionResult(error=f'File {path} does not exist')
|
||||
|
||||
dom_el = await browser.get_dom_element_by_index(index)
|
||||
|
||||
file_upload_dom_el = dom_el.get_file_upload_element()
|
||||
|
||||
if file_upload_dom_el is None:
|
||||
msg = f'No file upload element found at index {index}'
|
||||
logger.info(msg)
|
||||
return ActionResult(error=msg)
|
||||
|
||||
file_upload_el = await browser.get_locate_element(file_upload_dom_el)
|
||||
|
||||
if file_upload_el is None:
|
||||
msg = f'No file upload element found at index {index}'
|
||||
logger.info(msg)
|
||||
return ActionResult(error=msg)
|
||||
|
||||
try:
|
||||
await file_upload_el.set_input_files(path)
|
||||
msg = f'Successfully uploaded file to index {index}'
|
||||
logger.info(msg)
|
||||
return ActionResult(extracted_content=msg, include_in_memory=True)
|
||||
except Exception as e:
|
||||
msg = f'Failed to upload file to index {index}: {str(e)}'
|
||||
logger.info(msg)
|
||||
return ActionResult(error=msg)
|
||||
|
||||
|
||||
@controller.action('Read the file content of a file given a path')
|
||||
async def read_file(path: str, available_file_paths: list[str]):
|
||||
if path not in available_file_paths:
|
||||
return ActionResult(error=f'File path {path} is not available')
|
||||
|
||||
async with await anyio.open_file(path, 'r') as f:
|
||||
content = await f.read()
|
||||
msg = f'File content: {content}'
|
||||
logger.info(msg)
|
||||
return ActionResult(extracted_content=msg, include_in_memory=True)
|
||||
|
||||
|
||||
def create_file(file_type: str = 'txt'):
|
||||
with open(f'tmp.{file_type}', 'w') as f:
|
||||
f.write('test')
|
||||
file_path = Path.cwd() / f'tmp.{file_type}'
|
||||
logger.info(f'Created file: {file_path}')
|
||||
return str(file_path)
|
||||
|
||||
|
||||
async def main():
|
||||
task = 'Go to https://kzmpmkh2zfk1ojnpxfn1.lite.vusercontent.net/ and - read the file content and upload them to fields'
|
||||
|
||||
available_file_paths = [create_file('txt'), create_file('pdf'), create_file('csv')]
|
||||
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=model,
|
||||
controller=controller,
|
||||
browser=browser,
|
||||
available_file_paths=available_file_paths,
|
||||
)
|
||||
|
||||
await agent.run()
|
||||
|
||||
await browser.close()
|
||||
|
||||
input('Press Enter to close...')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
97
browser-use/examples/custom-functions/hover_element.py
Normal file
97
browser-use/examples/custom-functions/hover_element.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use import Agent, Controller
|
||||
from browser_use.agent.views import ActionResult
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
from browser_use.browser.context import BrowserContext
|
||||
|
||||
|
||||
class HoverAction(BaseModel):
|
||||
index: int | None = None
|
||||
xpath: str | None = None
|
||||
selector: str | None = None
|
||||
|
||||
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
headless=False,
|
||||
)
|
||||
)
|
||||
controller = Controller()
|
||||
|
||||
|
||||
@controller.registry.action(
|
||||
'Hover over an element',
|
||||
param_model=HoverAction, # Define this model with at least "index: int" field
|
||||
)
|
||||
async def hover_element(params: HoverAction, browser: BrowserContext):
|
||||
"""
|
||||
Hovers over the element specified by its index from the cached selector map or by XPath.
|
||||
"""
|
||||
session = await browser.get_session()
|
||||
state = session.cached_state
|
||||
|
||||
if params.xpath:
|
||||
# Use XPath to locate the element
|
||||
element_handle = await browser.get_locate_element_by_xpath(params.xpath)
|
||||
if element_handle is None:
|
||||
raise Exception(f'Failed to locate element with XPath {params.xpath}')
|
||||
elif params.selector:
|
||||
# Use CSS selector to locate the element
|
||||
element_handle = await browser.get_locate_element_by_css_selector(params.selector)
|
||||
if element_handle is None:
|
||||
raise Exception(f'Failed to locate element with CSS Selector {params.selector}')
|
||||
elif params.index is not None:
|
||||
# Use index to locate the element
|
||||
if state is None or params.index not in state.selector_map:
|
||||
raise Exception(f'Element index {params.index} does not exist - retry or use alternative actions')
|
||||
element_node = state.selector_map[params.index]
|
||||
element_handle = await browser.get_locate_element(element_node)
|
||||
if element_handle is None:
|
||||
raise Exception(f'Failed to locate element with index {params.index}')
|
||||
else:
|
||||
raise Exception('Either index or xpath must be provided')
|
||||
|
||||
try:
|
||||
await element_handle.hover()
|
||||
msg = (
|
||||
f'🖱️ Hovered over element at index {params.index}'
|
||||
if params.index is not None
|
||||
else f'🖱️ Hovered over element with XPath {params.xpath}'
|
||||
)
|
||||
return ActionResult(extracted_content=msg, include_in_memory=True)
|
||||
except Exception as e:
|
||||
err_msg = f'❌ Failed to hover over element: {str(e)}'
|
||||
raise Exception(err_msg)
|
||||
|
||||
|
||||
async def main():
|
||||
task = 'Open https://testpages.eviltester.com/styled/csspseudo/css-hover.html and hover the element with the css selector #hoverdivpara, then click on "Can you click me?"'
|
||||
# task = 'Open https://testpages.eviltester.com/styled/csspseudo/css-hover.html and hover the element with the xpath //*[@id="hoverdivpara"], then click on "Can you click me?"'
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=model,
|
||||
controller=controller,
|
||||
browser=browser,
|
||||
)
|
||||
|
||||
await agent.run()
|
||||
await browser.close()
|
||||
|
||||
input('Press Enter to close...')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
45
browser-use/examples/custom-functions/notification.py
Normal file
45
browser-use/examples/custom-functions/notification.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import ActionResult, Agent, Controller
|
||||
|
||||
controller = Controller()
|
||||
|
||||
|
||||
@controller.registry.action('Done with task ')
|
||||
async def done(text: str):
|
||||
import yagmail
|
||||
|
||||
# To send emails use
|
||||
# STEP 1: go to https://support.google.com/accounts/answer/185833
|
||||
# STEP 2: Create an app password (you can't use here your normal gmail password)
|
||||
# STEP 3: Use the app password in the code below for the password
|
||||
yag = yagmail.SMTP('your_email@gmail.com', 'your_app_password')
|
||||
yag.send(
|
||||
to='recipient@example.com',
|
||||
subject='Test Email',
|
||||
contents=f'result\n: {text}',
|
||||
)
|
||||
|
||||
return ActionResult(is_done=True, extracted_content='Email sent!')
|
||||
|
||||
|
||||
async def main():
|
||||
task = 'go to brower-use.com and then done'
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(task=task, llm=model, controller=controller)
|
||||
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
57
browser-use/examples/custom-functions/onepassword_2fa.py
Normal file
57
browser-use/examples/custom-functions/onepassword_2fa.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from onepassword.client import Client # pip install onepassword-sdk
|
||||
|
||||
from browser_use import ActionResult, Agent, Controller
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OP_SERVICE_ACCOUNT_TOKEN = os.getenv('OP_SERVICE_ACCOUNT_TOKEN')
|
||||
OP_ITEM_ID = os.getenv('OP_ITEM_ID') # Go to 1Password, right click on the item, click "Copy Secret Reference"
|
||||
|
||||
|
||||
controller = Controller()
|
||||
|
||||
|
||||
@controller.registry.action('Get 2FA code from 1Password for Google Account', domains=['*.google.com', 'google.com'])
|
||||
async def get_1password_2fa() -> ActionResult:
|
||||
"""
|
||||
Custom action to retrieve 2FA/MFA code from 1Password using onepassword.client SDK.
|
||||
"""
|
||||
client = await Client.authenticate(
|
||||
# setup instructions: https://github.com/1Password/onepassword-sdk-python/#-get-started
|
||||
auth=OP_SERVICE_ACCOUNT_TOKEN,
|
||||
integration_name='Browser-Use',
|
||||
integration_version='v1.0.0',
|
||||
)
|
||||
|
||||
mfa_code = await client.secrets.resolve(f'op://Private/{OP_ITEM_ID}/One-time passcode')
|
||||
|
||||
return ActionResult(extracted_content=mfa_code)
|
||||
|
||||
|
||||
async def main():
|
||||
# Example task using the 1Password 2FA action
|
||||
task = 'Go to account.google.com, enter username and password, then if prompted for 2FA code, get 2FA code from 1Password for and enter it'
|
||||
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(task=task, llm=model, controller=controller)
|
||||
|
||||
result = await agent.run()
|
||||
print(f'Task completed with result: {result}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel
|
||||
|
||||
from browser_use.agent.service import Agent
|
||||
from browser_use.controller.service import Controller
|
||||
|
||||
# Initialize controller first
|
||||
controller = Controller()
|
||||
|
||||
|
||||
class Model(BaseModel):
|
||||
title: str
|
||||
url: str
|
||||
likes: int
|
||||
license: str
|
||||
|
||||
|
||||
class Models(BaseModel):
|
||||
models: list[Model]
|
||||
|
||||
|
||||
@controller.action('Save models', param_model=Models)
|
||||
def save_models(params: Models):
|
||||
with open('models.txt', 'a') as f:
|
||||
for model in params.models:
|
||||
f.write(f'{model.title} ({model.url}): {model.likes} likes, {model.license}\n')
|
||||
|
||||
|
||||
# video: https://preview.screen.studio/share/EtOhIk0P
|
||||
async def main():
|
||||
task = 'Look up models with a license of cc-by-sa-4.0 and sort by most likes on Hugging face, save top 5 to file.'
|
||||
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(task=task, llm=model, controller=controller)
|
||||
|
||||
await agent.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue