[Add] browser-use and main.py

This commit is contained in:
tv0924@icloud.com 2025-05-18 21:57:54 +09:00
commit 96914d44ac
221 changed files with 30952 additions and 1 deletions

View file

@ -0,0 +1,123 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
from dotenv import load_dotenv
load_dotenv()
import discord
from discord.ext import commands
from langchain_core.language_models.chat_models import BaseChatModel
from browser_use import BrowserConfig
from browser_use.agent.service import Agent, Browser
class DiscordBot(commands.Bot):
"""Discord bot implementation for Browser-Use tasks.
This bot allows users to run browser automation tasks through Discord messages.
Processes tasks asynchronously and sends the result back to the user in response to the message.
Messages must start with the configured prefix (default: "$bu") followed by the task description.
Args:
llm (BaseChatModel): Language model instance to use for task processing
prefix (str, optional): Command prefix for triggering browser tasks. Defaults to "$bu"
ack (bool, optional): Whether to acknowledge task receipt with a message. Defaults to False
browser_config (BrowserConfig, optional): Browser configuration settings.
Defaults to headless mode
Usage:
```python
from langchain_openai import ChatOpenAI
llm = ChatOpenAI()
bot = DiscordBot(llm=llm, prefix='$bu', ack=True)
bot.run('YOUR_DISCORD_TOKEN')
```
Discord Usage:
Send messages starting with the prefix:
"$bu search for python tutorials"
"""
def __init__(
self,
llm: BaseChatModel,
prefix: str = '$bu',
ack: bool = False,
browser_config: BrowserConfig = BrowserConfig(headless=True),
):
self.llm = llm
self.prefix = prefix.strip()
self.ack = ack
self.browser_config = browser_config
# Define intents.
intents = discord.Intents.default()
intents.message_content = True # Enable message content intent
intents.members = True # Enable members intent for user info
# Initialize the bot with a command prefix and intents.
super().__init__(command_prefix='!', intents=intents) # You may not need prefix, just here for flexibility
# self.tree = app_commands.CommandTree(self) # Initialize command tree for slash commands.
async def on_ready(self):
"""Called when the bot is ready."""
try:
print(f'We have logged in as {self.user}')
cmds = await self.tree.sync() # Sync the command tree with discord
except Exception as e:
print(f'Error during bot startup: {e}')
async def on_message(self, message):
"""Called when a message is received."""
try:
if message.author == self.user: # Ignore the bot's messages
return
if message.content.strip().startswith(f'{self.prefix} '):
if self.ack:
try:
await message.reply(
'Starting browser use task...',
mention_author=True, # Don't ping the user
)
except Exception as e:
print(f'Error sending start message: {e}')
try:
agent_message = await self.run_agent(message.content.replace(f'{self.prefix} ', '').strip())
await message.channel.send(content=f'{agent_message}', reference=message, mention_author=True)
except Exception as e:
await message.channel.send(
content=f'Error during task execution: {str(e)}',
reference=message,
mention_author=True,
)
except Exception as e:
print(f'Error in message handling: {e}')
# await self.process_commands(message) # Needed to process bot commands
async def run_agent(self, task: str) -> str:
try:
browser = Browser(config=self.browser_config)
agent = Agent(task=(task), llm=self.llm, browser=browser)
result = await agent.run()
agent_message = None
if result.is_done():
agent_message = result.history[-1].result[0].extracted_content
if agent_message is None:
agent_message = 'Oops! Something went wrong while running Browser-Use.'
return agent_message
except Exception as e:
raise Exception(f'Browser-use task failed: {str(e)}')

View file

@ -0,0 +1,72 @@
"""
This examples requires you to have a Discord bot token and the bot already added to a server.
Five Steps to create and invite a Discord bot:
1. Create a Discord Application:
* Go to the Discord Developer Portal: https://discord.com/developers/applications
* Log in to the Discord website.
* Click on "New Application".
* Give the application a name and click "Create".
2. Configure the Bot:
* Navigate to the "Bot" tab on the left side of the screen.
* Make sure "Public Bot" is ticked if you want others to invite your bot.
* Generate your bot token by clicking on "Reset Token", Copy the token and save it securely.
* Do not share the bot token. Treat it like a password. If the token is leaked, regenerate it.
3. Enable Privileged Intents:
* Scroll down to the "Privileged Gateway Intents" section.
* Enable the necessary intents (e.g., "Server Members Intent" and "Message Content Intent").
--> Note: Enabling privileged intents for bots in over 100 guilds requires bot verification. You may need to contact Discord support to enable privileged intents for verified bots.
4. Generate Invite URL:
* Go to "OAuth2" tab and "OAuth2 URL Generator" section.
* Under "scopes", tick the "bot" checkbox.
* Tick the permissions required for your bot to function under Bot Permissions.
* e.g. "Send Messages", "Send Messages in Threads", "Read Message History", "Mention Everyone".
* Copy the generated URL under the "GENERATED URL" section at the bottom.
5. Invite the Bot:
* Paste the URL into your browser.
* Choose a server to invite the bot to.
* Click Authorize.
--> Note: The person adding the bot needs "Manage Server" permissions.
6. Run the code below to start the bot with your bot token.
7. Write e.g. "/bu what's the weather in Tokyo?" to start a browser-use task and get a response inside the Discord channel.
"""
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
from dotenv import load_dotenv
load_dotenv()
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import SecretStr
from browser_use import BrowserConfig
from examples.integrations.discord.discord_api import DiscordBot
# load credentials from environment variables
bot_token = os.getenv('DISCORD_BOT_TOKEN')
if not bot_token:
raise ValueError('Discord bot token not found in .env file.')
api_key = os.getenv('GOOGLE_API_KEY')
if not api_key:
raise ValueError('GOOGLE_API_KEY is not set')
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
bot = DiscordBot(
llm=llm, # required; instance of BaseChatModel
prefix='$bu', # optional; prefix of messages to trigger browser-use, defaults to "$bu"
ack=True, # optional; whether to acknowledge task receipt with a message, defaults to False
browser_config=BrowserConfig(
headless=False
), # optional; useful for changing headless mode or other browser configs, defaults to headless mode
)
bot.run(
token=bot_token, # required; Discord bot token
)

View file

@ -0,0 +1,76 @@
# Slack Integration
Steps to create and configure a Slack bot:
1. Create a Slack App:
* Go to the Slack API: https://api.slack.com/apps
* Click on "Create New App".
* Choose "From scratch" and give your app a name and select the workspace.
* Provide a name and description for your bot (these are required fields).
2. Configure the Bot:
* Navigate to the "OAuth & Permissions" tab on the left side of the screen.
* Under "Scopes", add the necessary bot token scopes (add these "chat:write", "channels:history", "im:history").
3. Enable Event Subscriptions:
* Navigate to the "Event Subscriptions" tab.
* Enable events and add the necessary bot events (add these "message.channels", "message.im").
* Add your request URL (you can use ngrok to expose your local server if needed). [See how to set up ngrok](#installing-and-starting-ngrok).
* **Note:** The URL provided by ngrok is ephemeral and will change each time ngrok is started. You will need to update the request URL in the bot's settings each time you restart ngrok. [See how to update the request URL](#updating-the-request-url-in-bots-settings).
4. Add the bot to your Slack workspace:
* Navigate to the "OAuth & Permissions" tab.
* Under "OAuth Tokens for Your Workspace", click on "Install App to Workspace".
* Follow the prompts to authorize the app and add it to your workspace.
5. Set up environment variables:
* Obtain the `SLACK_SIGNING_SECRET`:
* Go to the Slack API: https://api.slack.com/apps
* Select your app.
* Navigate to the "Basic Information" tab.
* Copy the "Signing Secret".
* Obtain the `SLACK_BOT_TOKEN`:
* Go to the Slack API: https://api.slack.com/apps
* Select your app.
* Navigate to the "OAuth & Permissions" tab.
* Copy the "Bot User OAuth Token".
* Create a `.env` file in the root directory of your project and add the following lines:
```env
SLACK_SIGNING_SECRET=your-signing-secret
SLACK_BOT_TOKEN=your-bot-token
```
6. Invite the bot to a channel:
* Use the `/invite @your-bot-name` command in the Slack channel where you want the bot to be active.
7. Run the code in `examples/slack_example.py` to start the bot with your bot token and signing secret.
8. Write e.g. "$bu what's the weather in Tokyo?" to start a browser-use task and get a response inside the Slack channel.
## Installing and Starting ngrok
To expose your local server to the internet, you can use ngrok. Follow these steps to install and start ngrok:
1. Download ngrok from the official website: https://ngrok.com/download
2. Create a free account and follow the official steps to install ngrok.
3. Start ngrok by running the following command in your terminal:
```sh
ngrok http 3000
```
Replace `3000` with the port number your local server is running on.
## Updating the Request URL in Bot's Settings
If you need to update the request URL (e.g., when the ngrok URL changes), follow these steps:
1. Go to the Slack API: https://api.slack.com/apps
2. Select your app.
3. Navigate to the "Event Subscriptions" tab.
4. Update the "Request URL" field with the new ngrok URL. The URL should be something like: `https://<ngrok-id>.ngrok-free.app/slack/events`
5. Save the changes.
## Installing Required Packages
To run this example, you need to install the following packages:
- `fastapi`
- `uvicorn`
- `slack_sdk`
You can install these packages using pip:
```sh
pip install fastapi uvicorn slack_sdk

View file

@ -0,0 +1,130 @@
import logging
import os
import sys
from typing import Annotated
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from fastapi import Depends, FastAPI, HTTPException, Request
from langchain_core.language_models.chat_models import BaseChatModel
from slack_sdk.errors import SlackApiError
from slack_sdk.signature import SignatureVerifier
from slack_sdk.web.async_client import AsyncWebClient
from browser_use import BrowserConfig
from browser_use.agent.service import Agent, Browser
from browser_use.logging_config import setup_logging
setup_logging()
logger = logging.getLogger('slack')
app = FastAPI()
class SlackBot:
def __init__(
self,
llm: BaseChatModel,
bot_token: str,
signing_secret: str,
ack: bool = False,
browser_config: BrowserConfig = BrowserConfig(headless=True),
):
if not bot_token or not signing_secret:
raise ValueError('Bot token and signing secret must be provided')
self.llm = llm
self.ack = ack
self.browser_config = browser_config
self.client = AsyncWebClient(token=bot_token)
self.signature_verifier = SignatureVerifier(signing_secret)
self.processed_events = set()
logger.info('SlackBot initialized')
async def handle_event(self, event, event_id):
try:
logger.info(f'Received event id: {event_id}')
if not event_id:
logger.warning('Event ID missing in event data')
return
if event_id in self.processed_events:
logger.info(f'Event {event_id} already processed')
return
self.processed_events.add(event_id)
if 'subtype' in event and event['subtype'] == 'bot_message':
return
text = event.get('text')
user_id = event.get('user')
if text and text.startswith('$bu '):
task = text[len('$bu ') :].strip()
if self.ack:
try:
await self.send_message(
event['channel'], f'<@{user_id}> Starting browser use task...', thread_ts=event.get('ts')
)
except Exception as e:
logger.error(f'Error sending start message: {e}')
try:
agent_message = await self.run_agent(task)
await self.send_message(event['channel'], f'<@{user_id}> {agent_message}', thread_ts=event.get('ts'))
except Exception as e:
await self.send_message(event['channel'], f'Error during task execution: {str(e)}', thread_ts=event.get('ts'))
except Exception as e:
logger.error(f'Error in handle_event: {str(e)}')
async def run_agent(self, task: str) -> str:
try:
browser = Browser(config=self.browser_config)
agent = Agent(task=task, llm=self.llm, browser=browser)
result = await agent.run()
agent_message = None
if result.is_done():
agent_message = result.history[-1].result[0].extracted_content
if agent_message is None:
agent_message = 'Oops! Something went wrong while running Browser-Use.'
return agent_message
except Exception as e:
logger.error(f'Error during task execution: {str(e)}')
return f'Error during task execution: {str(e)}'
async def send_message(self, channel, text, thread_ts=None):
try:
await self.client.chat_postMessage(channel=channel, text=text, thread_ts=thread_ts)
except SlackApiError as e:
logger.error(f'Error sending message: {e.response["error"]}')
@app.post('/slack/events')
async def slack_events(request: Request, slack_bot: Annotated[SlackBot, Depends()]):
try:
if not slack_bot.signature_verifier.is_valid_request(await request.body(), dict(request.headers)):
logger.warning('Request verification failed')
raise HTTPException(status_code=400, detail='Request verification failed')
event_data = await request.json()
logger.info(f'Received event data: {event_data}')
if 'challenge' in event_data:
return {'challenge': event_data['challenge']}
if 'event' in event_data:
try:
await slack_bot.handle_event(event_data.get('event'), event_data.get('event_id'))
except Exception as e:
logger.error(f'Error handling event: {str(e)}')
return {}
except Exception as e:
logger.error(f'Error in slack_events: {str(e)}')
raise HTTPException(status_code=500, detail='Internal Server Error')

View file

@ -0,0 +1,46 @@
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
from dotenv import load_dotenv
load_dotenv()
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import SecretStr
from browser_use import BrowserConfig
from examples.integrations.slack.slack_api import SlackBot, app
# load credentials from environment variables
bot_token = os.getenv('SLACK_BOT_TOKEN')
if not bot_token:
raise ValueError('Slack bot token not found in .env file.')
signing_secret = os.getenv('SLACK_SIGNING_SECRET')
if not signing_secret:
raise ValueError('Slack signing secret not found in .env file.')
api_key = os.getenv('GOOGLE_API_KEY')
if not api_key:
raise ValueError('GOOGLE_API_KEY is not set')
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp', api_key=SecretStr(api_key))
slack_bot = SlackBot(
llm=llm, # required; instance of BaseChatModel
bot_token=bot_token, # required; Slack bot token
signing_secret=signing_secret, # required; Slack signing secret
ack=True, # optional; whether to acknowledge task receipt with a message, defaults to False
browser_config=BrowserConfig(
headless=True
), # optional; useful for changing headless mode or other browser configs, defaults to headless mode
)
app.dependency_overrides[SlackBot] = lambda: slack_bot
if __name__ == '__main__':
import uvicorn
uvicorn.run('integrations.slack.slack_api:app', host='0.0.0.0', port=3000)