[Add] browser-use and main.py
This commit is contained in:
parent
08e64bdf45
commit
96914d44ac
221 changed files with 30952 additions and 1 deletions
198
browser-use/docs/cloud/implementation.mdx
Normal file
198
browser-use/docs/cloud/implementation.mdx
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
---
|
||||
title: "Implementing the API"
|
||||
description: "Learn how to implement the Browser Use API in Python"
|
||||
icon: "code"
|
||||
---
|
||||
|
||||
This guide shows how to implement common API patterns using Python. We'll create a complete example that creates and monitors a browser automation task.
|
||||
|
||||
## Basic Implementation
|
||||
|
||||
For all settings see [Run Task](cloud/api-v10/run-task).
|
||||
|
||||
Here's a simple implementation using Python's `requests` library to stream the task steps:
|
||||
|
||||
```python
|
||||
import json
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
API_KEY = 'your_api_key_here'
|
||||
BASE_URL = 'https://api.browser-use.com/api/v1'
|
||||
HEADERS = {'Authorization': f'Bearer {API_KEY}'}
|
||||
|
||||
|
||||
def create_task(instructions: str):
|
||||
"""Create a new browser automation task"""
|
||||
response = requests.post(f'{BASE_URL}/run-task', headers=HEADERS, json={'task': instructions})
|
||||
return response.json()['id']
|
||||
|
||||
|
||||
def get_task_status(task_id: str):
|
||||
"""Get current task status"""
|
||||
response = requests.get(f'{BASE_URL}/task/{task_id}/status', headers=HEADERS)
|
||||
return response.json()
|
||||
|
||||
|
||||
def get_task_details(task_id: str):
|
||||
"""Get full task details including output"""
|
||||
response = requests.get(f'{BASE_URL}/task/{task_id}', headers=HEADERS)
|
||||
return response.json()
|
||||
|
||||
|
||||
def wait_for_completion(task_id: str, poll_interval: int = 2):
|
||||
"""Poll task status until completion"""
|
||||
count = 0
|
||||
unique_steps = []
|
||||
while True:
|
||||
details = get_task_details(task_id)
|
||||
new_steps = details['steps']
|
||||
# use only the new steps that are not in unique_steps.
|
||||
if new_steps != unique_steps:
|
||||
for step in new_steps:
|
||||
if step not in unique_steps:
|
||||
print(json.dumps(step, indent=4))
|
||||
unique_steps = new_steps
|
||||
count += 1
|
||||
status = details['status']
|
||||
|
||||
if status in ['finished', 'failed', 'stopped']:
|
||||
return details
|
||||
time.sleep(poll_interval)
|
||||
|
||||
|
||||
def main():
|
||||
task_id = create_task('Open https://www.google.com and search for openai')
|
||||
print(f'Task created with ID: {task_id}')
|
||||
task_details = wait_for_completion(task_id)
|
||||
print(f"Final output: {task_details['output']}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
```
|
||||
|
||||
## Task Control Example
|
||||
|
||||
Here's how to implement task control with pause/resume functionality:
|
||||
|
||||
```python
|
||||
def control_task():
|
||||
# Create a new task
|
||||
task_id = create_task("Go to google.com and search for Browser Use")
|
||||
|
||||
# Wait for 5 seconds
|
||||
time.sleep(5)
|
||||
|
||||
# Pause the task
|
||||
requests.put(f"{BASE_URL}/pause-task?task_id={task_id}", headers=HEADERS)
|
||||
print("Task paused! Check the live preview.")
|
||||
|
||||
# Wait for user input
|
||||
input("Press Enter to resume...")
|
||||
|
||||
# Resume the task
|
||||
requests.put(f"{BASE_URL}/resume-task?task_id={task_id}", headers=HEADERS)
|
||||
|
||||
# Wait for completion
|
||||
result = wait_for_completion(task_id)
|
||||
print(f"Task completed with output: {result['output']}")
|
||||
```
|
||||
|
||||
## Structured Output Example
|
||||
|
||||
Here's how to implement a task with structured JSON output:
|
||||
|
||||
```python
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import requests
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
|
||||
|
||||
API_KEY = os.getenv("API_KEY")
|
||||
BASE_URL = 'https://api.browser-use.com/api/v1'
|
||||
HEADERS = {
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
|
||||
# Define output schema using Pydantic
|
||||
class SocialMediaCompany(BaseModel):
|
||||
name: str
|
||||
market_cap: float
|
||||
headquarters: str
|
||||
founded_year: int
|
||||
|
||||
|
||||
class SocialMediaCompanies(BaseModel):
|
||||
companies: List[SocialMediaCompany]
|
||||
|
||||
|
||||
def create_structured_task(instructions: str, schema: dict):
|
||||
"""Create a task that expects structured output"""
|
||||
payload = {
|
||||
"task": instructions,
|
||||
"structured_output_json": json.dumps(schema)
|
||||
}
|
||||
response = requests.post(f"{BASE_URL}/run-task", headers=HEADERS, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()["id"]
|
||||
|
||||
|
||||
def wait_for_task_completion(task_id: str, poll_interval: int = 5):
|
||||
"""Poll task status until it completes"""
|
||||
while True:
|
||||
response = requests.get(f"{BASE_URL}/task/{task_id}/status", headers=HEADERS)
|
||||
response.raise_for_status()
|
||||
status = response.json()
|
||||
if status == "finished":
|
||||
break
|
||||
elif status in ["failed", "stopped"]:
|
||||
raise RuntimeError(f"Task {task_id} ended with status: {status}")
|
||||
print("Waiting for task to finish...")
|
||||
time.sleep(poll_interval)
|
||||
|
||||
|
||||
def fetch_task_output(task_id: str):
|
||||
"""Retrieve the final task result"""
|
||||
response = requests.get(f"{BASE_URL}/task/{task_id}", headers=HEADERS)
|
||||
response.raise_for_status()
|
||||
return response.json()["output"]
|
||||
|
||||
|
||||
def main():
|
||||
schema = SocialMediaCompanies.model_json_schema()
|
||||
task_id = create_structured_task(
|
||||
"Get me the top social media companies by market cap",
|
||||
schema
|
||||
)
|
||||
print(f"Task created with ID: {task_id}")
|
||||
|
||||
wait_for_task_completion(task_id)
|
||||
print("Task completed!")
|
||||
|
||||
output = fetch_task_output(task_id)
|
||||
print("Raw output:", output)
|
||||
|
||||
try:
|
||||
parsed = SocialMediaCompanies.model_validate_json(output)
|
||||
print("Parsed output:")
|
||||
print(parsed)
|
||||
except Exception as e:
|
||||
print(f"Failed to parse structured output: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
<Note>
|
||||
Remember to handle your API key securely and implement proper error handling
|
||||
in production code.
|
||||
</Note>
|
||||
124
browser-use/docs/cloud/quickstart.mdx
Normal file
124
browser-use/docs/cloud/quickstart.mdx
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
---
|
||||
title: "Quickstart"
|
||||
description: "Learn how to get started with the Browser Use Cloud API"
|
||||
icon: "cloud"
|
||||
---
|
||||
|
||||
The Browser Use Cloud API lets you create and manage browser automation agents programmatically. Each agent can execute tasks and provide real-time feedback through a live preview URL.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
<Note>
|
||||
You need an active subscription and an API key from
|
||||
[cloud.browser-use.com/billing](https://cloud.browser-use.com/billing)
|
||||
</Note>
|
||||
|
||||
## Pricing
|
||||
|
||||
The Browser Use Cloud API pricing consists of two components:
|
||||
|
||||
1. **Task Initialization Cost**: $0.01 per started task
|
||||
2. **Task Step Cost**: Additional cost based on the specific model used for each step
|
||||
|
||||
### LLM Model Step Pricing
|
||||
|
||||
The following table shows the total cost per step for each available LLM model:
|
||||
|
||||
| Model | Cost per Step |
|
||||
| ------------------------------ | ------------- |
|
||||
| GPT-4o | $0.03 |
|
||||
| GPT-4.1 | $0.03 |
|
||||
| Claude 3.7 Sonnet (2025-02-19) | $0.03 |
|
||||
| GPT-4o mini | $0.01 |
|
||||
| GPT-4.1 mini | $0.01 |
|
||||
| Gemini 2.0 Flash | $0.01 |
|
||||
| Gemini 2.0 Flash Lite | $0.01 |
|
||||
| Llama 4 Maverick | $0.01 |
|
||||
|
||||
### Example Cost Calculation
|
||||
|
||||
For example, using GPT-4o for a 10 step task:
|
||||
|
||||
- Task initialization: $0.01
|
||||
- 10 steps × $0.03 per step
|
||||
- Total cost: $0.31
|
||||
|
||||
## Creating Your First Agent
|
||||
|
||||
Create a new browser automation task by providing instructions in natural language:
|
||||
|
||||
```bash
|
||||
curl -X POST https://api.browser-use.com/api/v1/run-task \
|
||||
-H "Authorization: Bearer your_api_key_here" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"task": "Go to google.com and search for Browser Use"
|
||||
}'
|
||||
```
|
||||
|
||||
The API returns a task ID that you can use to manage the task and check the live preview URL.
|
||||
|
||||
<Note>
|
||||
The task response includes a `live_url` that you can embed in an iframe to
|
||||
watch and control the agent in real-time.
|
||||
</Note>
|
||||
|
||||
## Managing Tasks
|
||||
|
||||
Control running tasks with these operations:
|
||||
|
||||
<AccordionGroup>
|
||||
<Accordion title="Pause/Resume Tasks">
|
||||
Temporarily pause task execution with [`/api/v1/pause-task`](/cloud/api-v1/pause-task) and resume with
|
||||
[`/api/v1/resume-task`](/cloud/api-v1/resume-task). Useful for manual inspection or intervention.
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Stop Tasks">
|
||||
Permanently stop a task using [`/api/v1/stop-task`](/cloud/api-v1/stop-task). The task cannot be
|
||||
resumed after being stopped.
|
||||
</Accordion>
|
||||
</AccordionGroup>
|
||||
|
||||
For detailed API documentation, see the tabs on the left, which include the full coverage of the API.
|
||||
|
||||
## Building your own client (OpenAPI)
|
||||
|
||||
<Note>
|
||||
We recommend this only if you don't need control and only need to run simple
|
||||
tasks.
|
||||
</Note>
|
||||
|
||||
The best way to build your own client is to use our [OpenAPI specification](http://api.browser-use.com/openapi.json) to generate a type-safe client library.
|
||||
|
||||
### Python
|
||||
|
||||
Use [openapi-python-client](https://github.com/openapi-generators/openapi-python-client) to generate a modern Python client:
|
||||
|
||||
```bash
|
||||
# Install the generator
|
||||
pipx install openapi-python-client --include-deps
|
||||
|
||||
# Generate the client
|
||||
openapi-python-client generate --url http://api.browser-use.com/openapi.json
|
||||
```
|
||||
|
||||
This will create a Python package with full type hints, modern dataclasses, and async support.
|
||||
|
||||
### TypeScript/JavaScript
|
||||
|
||||
For TypeScript projects, use [openapi-typescript](https://www.npmjs.com/package/openapi-typescript) to generate type definitions:
|
||||
|
||||
```bash
|
||||
# Install the generator
|
||||
npm install -D openapi-typescript
|
||||
|
||||
# Generate the types
|
||||
npx openapi-typescript http://api.browser-use.com/openapi.json -o browser-use-api.ts
|
||||
```
|
||||
|
||||
This will create TypeScript definitions you can use with your preferred HTTP client.
|
||||
|
||||
<Note>
|
||||
Need help? Contact our support team at support@browser-use.com or join our
|
||||
[Discord community](https://link.browser-use.com/discord)
|
||||
</Note>
|
||||
Loading…
Add table
Add a link
Reference in a new issue