[Add] browser-use and main.py

This commit is contained in:
tv0924@icloud.com 2025-05-18 21:57:54 +09:00
commit 96914d44ac
221 changed files with 30952 additions and 1 deletions

View file

@ -0,0 +1,2 @@
# Gemini
Detailed video on how to integrate browser-use with Gemini: https://www.youtube.com/watch?v=JluZiWBV_Tc

View file

@ -0,0 +1,42 @@
# Optional: Disable telemetry
# os.environ["ANONYMIZED_TELEMETRY"] = "false"
# Optional: Set the OLLAMA host to a remote server
# os.environ["OLLAMA_HOST"] = "http://x.x.x.x:11434"
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_ollama import ChatOllama
from browser_use import Agent
from browser_use.agent.views import AgentHistoryList
async def run_search() -> AgentHistoryList:
agent = Agent(
task="Search for a 'browser use' post on the r/LocalLLaMA subreddit and open it.",
llm=ChatOllama(
model='qwen2.5:32b-instruct-q4_K_M',
num_ctx=32000,
),
)
result = await agent.run()
return result
async def main():
result = await run_search()
print('\n\n', result)
if __name__ == '__main__':
asyncio.run(main())

View file

@ -0,0 +1,49 @@
"""
Simple try of the agent.
@dev You need to add AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT to your environment variables.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import AzureChatOpenAI
from browser_use import Agent
# Retrieve Azure-specific environment variables
azure_openai_api_key = os.getenv('AZURE_OPENAI_KEY')
azure_openai_endpoint = os.getenv('AZURE_OPENAI_ENDPOINT')
if not azure_openai_api_key or not azure_openai_endpoint:
raise ValueError('AZURE_OPENAI_KEY or AZURE_OPENAI_ENDPOINT is not set')
# Initialize the Azure OpenAI client
llm = AzureChatOpenAI(
model_name='gpt-4o',
openai_api_key=azure_openai_api_key,
azure_endpoint=azure_openai_endpoint, # Corrected to use azure_endpoint instead of openai_api_base
deployment_name='gpt-4o', # Use deployment_name for Azure models
api_version='2024-08-01-preview', # Explicitly set the API version here
)
agent = Agent(
task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
llm=llm,
enable_memory=True,
)
async def main():
await agent.run(max_steps=10)
input('Press Enter to continue...')
asyncio.run(main())

View file

@ -0,0 +1,75 @@
"""
Automated news analysis and sentiment scoring using Bedrock.
Ensure you have browser-use installed with `examples` extra, i.e. `uv install 'browser-use[examples]'`
@dev Ensure AWS environment variables are set correctly for Bedrock access.
"""
import argparse
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
import boto3
from botocore.config import Config
from langchain_aws import ChatBedrockConverse
from browser_use import Agent
from browser_use.browser.browser import Browser, BrowserConfig
from browser_use.controller.service import Controller
def get_llm():
config = Config(retries={'max_attempts': 10, 'mode': 'adaptive'})
bedrock_client = boto3.client('bedrock-runtime', region_name='us-east-1', config=config)
return ChatBedrockConverse(
model_id='us.anthropic.claude-3-5-sonnet-20241022-v2:0',
temperature=0.0,
max_tokens=None,
client=bedrock_client,
)
# Define the task for the agent
task = (
"Visit cnn.com, navigate to the 'World News' section, and identify the latest headline. "
'Open the first article and summarize its content in 3-4 sentences. '
'Additionally, analyze the sentiment of the article (positive, neutral, or negative) '
'and provide a confidence score for the sentiment. Present the result in a tabular format.'
)
parser = argparse.ArgumentParser()
parser.add_argument('--query', type=str, help='The query for the agent to execute', default=task)
args = parser.parse_args()
llm = get_llm()
browser = Browser(
config=BrowserConfig(
# browser_binary_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
)
)
agent = Agent(
task=args.query,
llm=llm,
controller=Controller(),
browser=browser,
validate_output=True,
)
async def main():
await agent.run(max_steps=30)
await browser.close()
asyncio.run(main())

View file

@ -0,0 +1,32 @@
"""
Simple script that runs the task of opening amazon and searching.
@dev Ensure we have a `ANTHROPIC_API_KEY` variable in our `.env` file.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_anthropic import ChatAnthropic
from browser_use import Agent
llm = ChatAnthropic(model_name='claude-3-7-sonnet-20250219', temperature=0.0, timeout=30, stop=None)
agent = Agent(
task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
llm=llm,
)
async def main():
await agent.run(max_steps=10)
asyncio.run(main())

View file

@ -0,0 +1,38 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_deepseek import ChatDeepSeek
from pydantic import SecretStr
from browser_use import Agent
api_key = os.getenv('DEEPSEEK_API_KEY', '')
if not api_key:
raise ValueError('DEEPSEEK_API_KEY is not set')
async def run_search():
agent = Agent(
task=('go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result'),
llm=ChatDeepSeek(
base_url='https://api.deepseek.com/v1',
model='deepseek-reasoner',
api_key=SecretStr(api_key),
),
use_vision=False,
max_failures=2,
max_actions_per_step=1,
)
await agent.run()
if __name__ == '__main__':
asyncio.run(run_search())

View file

@ -0,0 +1,41 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_deepseek import ChatDeepSeek
from pydantic import SecretStr
from browser_use import Agent
api_key = os.getenv('DEEPSEEK_API_KEY', '')
if not api_key:
raise ValueError('DEEPSEEK_API_KEY is not set')
async def run_search():
agent = Agent(
task=(
'1. Go to https://www.reddit.com/r/LocalLLaMA '
"2. Search for 'browser use' in the search bar"
'3. Click on first result'
'4. Return the first comment'
),
llm=ChatDeepSeek(
base_url='https://api.deepseek.com/v1',
model='deepseek-chat',
api_key=SecretStr(api_key),
),
use_vision=False,
)
await agent.run()
if __name__ == '__main__':
asyncio.run(run_search())

View file

@ -0,0 +1,45 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import SecretStr
from browser_use import Agent, BrowserConfig
from browser_use.browser.browser import Browser
from browser_use.browser.context import BrowserContextConfig
api_key = os.getenv('GOOGLE_API_KEY')
if not api_key:
raise ValueError('GOOGLE_API_KEY is not set')
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
browser = Browser(
config=BrowserConfig(
new_context_config=BrowserContextConfig(
viewport_expansion=0,
)
)
)
async def run_search():
agent = Agent(
task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
llm=llm,
max_actions_per_step=4,
browser=browser,
)
await agent.run(max_steps=25)
if __name__ == '__main__':
asyncio.run(run_search())

View file

@ -0,0 +1,33 @@
"""
Simple try of the agent.
@dev You need to add OPENAI_API_KEY to your environment variables.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from browser_use import Agent
llm = ChatOpenAI(model='gpt-4o')
agent = Agent(
task='Go to amazon.com, search for laptop, sort by best rating, and give me the price of the first result',
llm=llm,
)
async def main():
await agent.run(max_steps=10)
input('Press Enter to continue...')
asyncio.run(main())

View file

@ -0,0 +1,41 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
from browser_use import Agent
api_key = os.getenv('GROK_API_KEY', '')
if not api_key:
raise ValueError('GROK_API_KEY is not set')
async def run_search():
agent = Agent(
task=(
'1. Go to https://www.amazon.com'
'2. Search for "wireless headphones"'
'3. Filter by "Highest customer rating"'
'4. Return the title and price of the first product'
),
llm=ChatOpenAI(
base_url='https://api.x.ai/v1',
model='grok-3-beta',
api_key=SecretStr(api_key),
),
use_vision=False,
)
await agent.run()
if __name__ == '__main__':
asyncio.run(run_search())

View file

@ -0,0 +1,47 @@
"""
Simple try of the agent.
@dev You need to add NOVITA_API_KEY to your environment variables.
"""
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from pydantic import SecretStr
from browser_use import Agent
api_key = os.getenv('NOVITA_API_KEY', '')
if not api_key:
raise ValueError('NOVITA_API_KEY is not set')
async def run_search():
agent = Agent(
task=(
'1. Go to https://www.reddit.com/r/LocalLLaMA '
"2. Search for 'browser use' in the search bar"
'3. Click on first result'
'4. Return the first comment'
),
llm=ChatOpenAI(
base_url='https://api.novita.ai/v3/openai',
model='deepseek/deepseek-v3-0324',
api_key=SecretStr(api_key),
),
use_vision=False,
)
await agent.run()
if __name__ == '__main__':
asyncio.run(run_search())

View file

@ -0,0 +1,34 @@
import asyncio
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_ollama import ChatOllama
from browser_use import Agent
async def run_search():
agent = Agent(
task=(
"1. Go to https://www.reddit.com/r/LocalLLaMA2. Search for 'browser use' in the search bar3. Click search4. Call done"
),
llm=ChatOllama(
# model='qwen2.5:32b-instruct-q4_K_M',
# model='qwen2.5:14b',
model='qwen2.5:latest',
num_ctx=128000,
),
max_actions_per_step=1,
)
await agent.run()
if __name__ == '__main__':
asyncio.run(run_search())