[Add] browser-use and main.py

2025-05-18 21:57:54 +09:00 · 2025-05-18 21:57:54 +09:00 · 96914d44ac
commit 96914d44ac
parent 08e64bdf45
221 changed files with 30952 additions and 1 deletions
--- a/browser-use/docs/cloud/implementation.mdx
+++ b/browser-use/docs/cloud/implementation.mdx
@ -0,0 +1,198 @@
+---
+title: "Implementing the API"
+description: "Learn how to implement the Browser Use API in Python"
+icon: "code"
+---
+
+This guide shows how to implement common API patterns using Python. We'll create a complete example that creates and monitors a browser automation task.
+
+## Basic Implementation
+
+For all settings see [Run Task](cloud/api-v10/run-task).
+
+Here's a simple implementation using Python's `requests` library to stream the task steps:
+
+```python
+import json
+import time
+
+import requests
+
+API_KEY = 'your_api_key_here'
+BASE_URL = 'https://api.browser-use.com/api/v1'
+HEADERS = {'Authorization': f'Bearer {API_KEY}'}
+
+
+def create_task(instructions: str):
+	"""Create a new browser automation task"""
+	response = requests.post(f'{BASE_URL}/run-task', headers=HEADERS, json={'task': instructions})
+	return response.json()['id']
+
+
+def get_task_status(task_id: str):
+	"""Get current task status"""
+	response = requests.get(f'{BASE_URL}/task/{task_id}/status', headers=HEADERS)
+	return response.json()
+
+
+def get_task_details(task_id: str):
+	"""Get full task details including output"""
+	response = requests.get(f'{BASE_URL}/task/{task_id}', headers=HEADERS)
+	return response.json()
+
+
+def wait_for_completion(task_id: str, poll_interval: int = 2):
+	"""Poll task status until completion"""
+	count = 0
+	unique_steps = []
+	while True:
+		details = get_task_details(task_id)
+		new_steps = details['steps']
+		# use only the new steps that are not in unique_steps.
+		if new_steps != unique_steps:
+			for step in new_steps:
+				if step not in unique_steps:
+					print(json.dumps(step, indent=4))
+			unique_steps = new_steps
+		count += 1
+		status = details['status']
+
+		if status in ['finished', 'failed', 'stopped']:
+			return details
+		time.sleep(poll_interval)
+
+
+def main():
+	task_id = create_task('Open https://www.google.com and search for openai')
+	print(f'Task created with ID: {task_id}')
+	task_details = wait_for_completion(task_id)
+	print(f"Final output: {task_details['output']}")
+
+
+if __name__ == '__main__':
+	main()
+
+```
+
+## Task Control Example
+
+Here's how to implement task control with pause/resume functionality:
+
+```python
+def control_task():
+    # Create a new task
+    task_id = create_task("Go to google.com and search for Browser Use")
+
+    # Wait for 5 seconds
+    time.sleep(5)
+
+    # Pause the task
+    requests.put(f"{BASE_URL}/pause-task?task_id={task_id}", headers=HEADERS)
+    print("Task paused! Check the live preview.")
+
+    # Wait for user input
+    input("Press Enter to resume...")
+
+    # Resume the task
+    requests.put(f"{BASE_URL}/resume-task?task_id={task_id}", headers=HEADERS)
+
+    # Wait for completion
+    result = wait_for_completion(task_id)
+    print(f"Task completed with output: {result['output']}")
+```
+
+## Structured Output Example
+
+Here's how to implement a task with structured JSON output:
+
+```python
+import json
+import os
+import time
+import requests
+from pydantic import BaseModel
+from typing import List
+
+
+API_KEY = os.getenv("API_KEY")
+BASE_URL = 'https://api.browser-use.com/api/v1'
+HEADERS = {
+    "Authorization": f"Bearer {API_KEY}",
+    "Content-Type": "application/json"
+}
+
+
+# Define output schema using Pydantic
+class SocialMediaCompany(BaseModel):
+    name: str
+    market_cap: float
+    headquarters: str
+    founded_year: int
+
+
+class SocialMediaCompanies(BaseModel):
+    companies: List[SocialMediaCompany]
+
+
+def create_structured_task(instructions: str, schema: dict):
+    """Create a task that expects structured output"""
+    payload = {
+        "task": instructions,
+        "structured_output_json": json.dumps(schema)
+    }
+    response = requests.post(f"{BASE_URL}/run-task", headers=HEADERS, json=payload)
+    response.raise_for_status()
+    return response.json()["id"]
+
+
+def wait_for_task_completion(task_id: str, poll_interval: int = 5):
+    """Poll task status until it completes"""
+    while True:
+        response = requests.get(f"{BASE_URL}/task/{task_id}/status", headers=HEADERS)
+        response.raise_for_status()
+        status = response.json()
+        if status == "finished":
+            break
+        elif status in ["failed", "stopped"]:
+            raise RuntimeError(f"Task {task_id} ended with status: {status}")
+        print("Waiting for task to finish...")
+        time.sleep(poll_interval)
+
+
+def fetch_task_output(task_id: str):
+    """Retrieve the final task result"""
+    response = requests.get(f"{BASE_URL}/task/{task_id}", headers=HEADERS)
+    response.raise_for_status()
+    return response.json()["output"]
+
+
+def main():
+    schema = SocialMediaCompanies.model_json_schema()
+    task_id = create_structured_task(
+        "Get me the top social media companies by market cap",
+        schema
+    )
+    print(f"Task created with ID: {task_id}")
+
+    wait_for_task_completion(task_id)
+    print("Task completed!")
+
+    output = fetch_task_output(task_id)
+    print("Raw output:", output)
+
+    try:
+        parsed = SocialMediaCompanies.model_validate_json(output)
+        print("Parsed output:")
+        print(parsed)
+    except Exception as e:
+        print(f"Failed to parse structured output: {e}")
+
+
+if __name__ == "__main__":
+    main()
+```
+
+<Note>
+  Remember to handle your API key securely and implement proper error handling
+  in production code.
+</Note>
--- a/browser-use/docs/cloud/quickstart.mdx
+++ b/browser-use/docs/cloud/quickstart.mdx
@ -0,0 +1,124 @@
+---
+title: "Quickstart"
+description: "Learn how to get started with the Browser Use Cloud API"
+icon: "cloud"
+---
+
+The Browser Use Cloud API lets you create and manage browser automation agents programmatically. Each agent can execute tasks and provide real-time feedback through a live preview URL.
+
+## Prerequisites
+
+<Note>
+  You need an active subscription and an API key from
+  [cloud.browser-use.com/billing](https://cloud.browser-use.com/billing)
+</Note>
+
+## Pricing
+
+The Browser Use Cloud API pricing consists of two components:
+
+1. **Task Initialization Cost**: $0.01 per started task
+2. **Task Step Cost**: Additional cost based on the specific model used for each step
+
+### LLM Model Step Pricing
+
+The following table shows the total cost per step for each available LLM model:
+
+| Model                          | Cost per Step |
+| ------------------------------ | ------------- |
+| GPT-4o                         | $0.03         |
+| GPT-4.1                        | $0.03         |
+| Claude 3.7 Sonnet (2025-02-19) | $0.03         |
+| GPT-4o mini                    | $0.01         |
+| GPT-4.1 mini                   | $0.01         |
+| Gemini 2.0 Flash               | $0.01         |
+| Gemini 2.0 Flash Lite          | $0.01         |
+| Llama 4 Maverick               | $0.01         |
+
+### Example Cost Calculation
+
+For example, using GPT-4o for a 10 step task:
+
+- Task initialization: $0.01
+- 10 steps × $0.03 per step
+- Total cost: $0.31
+
+## Creating Your First Agent
+
+Create a new browser automation task by providing instructions in natural language:
+
+```bash
+curl -X POST https://api.browser-use.com/api/v1/run-task \
+  -H "Authorization: Bearer your_api_key_here" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "task": "Go to google.com and search for Browser Use"
+  }'
+```
+
+The API returns a task ID that you can use to manage the task and check the live preview URL.
+
+<Note>
+  The task response includes a `live_url` that you can embed in an iframe to
+  watch and control the agent in real-time.
+</Note>
+
+## Managing Tasks
+
+Control running tasks with these operations:
+
+<AccordionGroup>
+  <Accordion title="Pause/Resume Tasks">
+    Temporarily pause task execution with [`/api/v1/pause-task`](/cloud/api-v1/pause-task) and resume with
+    [`/api/v1/resume-task`](/cloud/api-v1/resume-task). Useful for manual inspection or intervention.
+  </Accordion>
+
+  <Accordion title="Stop Tasks">
+    Permanently stop a task using [`/api/v1/stop-task`](/cloud/api-v1/stop-task). The task cannot be
+    resumed after being stopped.
+  </Accordion>
+</AccordionGroup>
+
+For detailed API documentation, see the tabs on the left, which include the full coverage of the API.
+
+## Building your own client (OpenAPI)
+
+<Note>
+  We recommend this only if you don't need control and only need to run simple
+  tasks.
+</Note>
+
+The best way to build your own client is to use our [OpenAPI specification](http://api.browser-use.com/openapi.json) to generate a type-safe client library.
+
+### Python
+
+Use [openapi-python-client](https://github.com/openapi-generators/openapi-python-client) to generate a modern Python client:
+
+```bash
+# Install the generator
+pipx install openapi-python-client --include-deps
+
+# Generate the client
+openapi-python-client generate --url http://api.browser-use.com/openapi.json
+```
+
+This will create a Python package with full type hints, modern dataclasses, and async support.
+
+### TypeScript/JavaScript
+
+For TypeScript projects, use [openapi-typescript](https://www.npmjs.com/package/openapi-typescript) to generate type definitions:
+
+```bash
+# Install the generator
+npm install -D openapi-typescript
+
+# Generate the types
+npx openapi-typescript http://api.browser-use.com/openapi.json -o browser-use-api.ts
+```
+
+This will create TypeScript definitions you can use with your preferred HTTP client.
+
+<Note>
+  Need help? Contact our support team at support@browser-use.com or join our
+  [Discord community](https://link.browser-use.com/discord)
+</Note>