[Add] browser-use and main.py
This commit is contained in:
parent
08e64bdf45
commit
96914d44ac
221 changed files with 30952 additions and 1 deletions
64
browser-use/tests/test_vision.py
Normal file
64
browser-use/tests/test_vision.py
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
"""
|
||||
Simple try of the agent.
|
||||
|
||||
@dev You need to add OPENAI_API_KEY to your environment variables.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pprint import pprint
|
||||
|
||||
import pytest
|
||||
|
||||
from browser_use.browser.browser import Browser, BrowserConfig
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import asyncio
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
from browser_use import Agent, AgentHistoryList, Controller
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
controller = Controller()
|
||||
|
||||
# use this test to ask the model questions about the page like
|
||||
# which color do you see for bbox labels, list all with their label
|
||||
# what's the smallest bboxes with labels and
|
||||
|
||||
|
||||
@controller.registry.action(description='explain what you see on the screen and ask user for input')
|
||||
async def explain_screen(text: str) -> str:
|
||||
pprint(text)
|
||||
answer = input('\nuser input next question: \n')
|
||||
return answer
|
||||
|
||||
|
||||
@controller.registry.action(description='done')
|
||||
async def done(text: str) -> str:
|
||||
# pprint(text)
|
||||
return 'call explain_screen'
|
||||
|
||||
|
||||
@pytest.fixture(scope='function')
|
||||
def event_loop():
|
||||
"""Create an instance of the default event loop for each test case."""
|
||||
loop = asyncio.get_event_loop_policy().new_event_loop()
|
||||
yield loop
|
||||
loop.close()
|
||||
|
||||
|
||||
@pytest.mark.skip(reason='this is for local testing only')
|
||||
async def test_vision():
|
||||
agent = Agent(
|
||||
task='call explain_screen all the time the user asks you questions e.g. about the page like bbox which you see are labels - your task is to explain it and get the next question',
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
browser=Browser(config=BrowserConfig(disable_security=True, headless=False)),
|
||||
)
|
||||
try:
|
||||
history: AgentHistoryList = await agent.run(20)
|
||||
finally:
|
||||
# Make sure to close the browser
|
||||
await agent.browser.close()
|
||||
Loading…
Add table
Add a link
Reference in a new issue