[Add] browser-use and main.py

2025-05-18 21:57:54 +09:00 · 2025-05-18 21:57:54 +09:00 · 96914d44ac
commit 96914d44ac
parent 08e64bdf45
221 changed files with 30952 additions and 1 deletions
--- a/browser-use/.github/CONTRIBUTING.md
+++ b/browser-use/.github/CONTRIBUTING.md
@ -0,0 +1,7 @@
+# Contributing to browser-use
+
+We love contributions! Please read through these links to get started:
+
+ - 🔢 [Contribution Guidelines](https://docs.browser-use.com/development/contribution-guide)
+ - 👾 [Local Development Setup Guide](https://docs.browser-use.com/development/local-setup)
+ - 🏷️ [Issues Tagged: `#help-wanted`](https://github.com/browser-use/browser-use/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22help%20wanted%22)
--- a/browser-use/.github/ISSUE_TEMPLATE/1_element_detection_bug.yml
+++ b/browser-use/.github/ISSUE_TEMPLATE/1_element_detection_bug.yml
@ -0,0 +1,116 @@
+name: 🎯 Agent Page Interaction Issue
+description: Agent fails to detect, click, scroll, input, or otherwise interact with some type of element on some page(s)
+labels: ["bug", "element-detection"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this bug report! Please fill out the form below to help us reproduce and fix the issue.
+
+  - type: input
+    id: version
+    attributes:
+      label: Browser Use Version
+      description: What version of the `browser-use` library are you using? (Run `uv pip show browser-use` or `git log -n 1` to find out) **DO NOT JUST WRITE `latest version` or `main`**
+      placeholder: "e.g. 0.4.45 or 62760baaefd"
+    validations:
+      required: true
+
+  - type: dropdown
+    id: model
+    attributes:
+      label: LLM Model
+      description: Which LLM model(s) are you using?
+      multiple: true
+      options:
+        - gpt-4o
+        - gpt-4o-mini
+        - gpt-4
+        - gpt-4.1
+        - gpt-4.1-mini
+        - gpt-4.1-nano
+        - claude-3.7-sonnet
+        - claude-3.5-sonnet
+        - gemini-2.6-flash-preview
+        - gemini-2.5-pro
+        - gemini-2.0-flash
+        - gemini-2.0-flash-lite
+        - gemini-1.5-flash
+        - deepseek-chat
+        - Local Model (Specify model in description)
+        - Other (specify in description)
+    validations:
+      required: true
+
+  - type: textarea
+    id: prompt
+    attributes:
+      label: Screenshots, Description, and Task Prompt Given to Agent
+      description: The full task prompt you're giving the agent (redact any sensitive data) + a description of the issue and screenshots.
+      placeholder: |
+        1. go to https://example.com and click the xyz button...
+        2. type "abc" in the dropdown search to find the "abc" option  <- agent fails to click dropdown here
+        3. Click the "Submit" button, then extract the result as JSON
+        ...
+        include relevant URLs and/or redacted screenshots of the relevant page(s) if possible
+    validations:
+      required: true
+
+  - type: textarea
+    id: html
+    attributes:
+      label: HTML around where it's failing
+      description: A snippet of the HTML from the failing page around where the Agent is failing to interact.
+      render: html
+      placeholder: |
+        <form na-someform="abc">
+          <div class="element-to-click">
+            <div data-isbutton="true">Click me</div>
+          </div>
+          <input id="someinput" name="someinput" type="text" />
+          ...
+        </form>
+    validations:
+      required: true
+
+  - type: input
+    id: os
+    attributes:
+      label: Operating System
+      description: What operating system are you using?
+      placeholder: "e.g., macOS 13.1, Windows 11, Ubuntu 22.04"
+    validations:
+      required: true
+
+  - type: textarea
+    id: code
+    attributes:
+      label: Python Code Sample
+      description: Include some python code that reproduces the issue
+      render: python
+      placeholder: |
+        from dotenv import load_dotenv
+        load_dotenv()
+        from browser_use import Agent, Browser, Controller
+        from langchain_openai import ChatOpenAI
+
+        llm = ChatOpenAI(model="gpt-4o")
+        browser = Browser(chrome_binary_path='/usr/bin/google-chrome')
+        agent = Agent(llm=llm, browser=browser))
+        ...
+
+  - type: textarea
+    id: logs
+    attributes:
+      label: Full DEBUG Log Output
+      description: Please copy and paste the *full* log output *from the start of the run*. Make sure to set `BROWSER_USE_LOG_LEVEL=DEBUG` in your `.env` or shell environment.
+      render: shell
+      placeholder: |
+        $ python /app/browser-use/examples/browser/real_browser.py
+        DEBUG    [browser] 🌎  Initializing new browser
+        DEBUG    [agent] Version: 0.1.46-9-g62760ba, Source: git
+        INFO     [agent] 🧠 Starting an agent with main_model=gpt-4o +tools +vision +memory, planner_model=None, extraction_model=gpt-4o
+        DEBUG    [agent] Verifying the ChatOpenAI LLM knows the capital of France...
+        DEBUG    [langsmith.client] Sending multipart request with context: trace=91282a01-6667-48a1-8cd7-21aa9337a580,id=91282a01-6667-48a1-8cd7-21aa9337a580
+        DEBUG    [agent] 🪪 LLM API keys OPENAI_API_KEY work, ChatOpenAI model is connected & responding correctly.
+        ...
--- a/browser-use/.github/ISSUE_TEMPLATE/2_bug_report.yml
+++ b/browser-use/.github/ISSUE_TEMPLATE/2_bug_report.yml
@ -0,0 +1,98 @@
+name: 🐛 Library Bug Report
+description: Report a bug in the browser-use Python library
+labels: ["bug", "triage"]
+body:
+  # - type: markdown
+  #   attributes:
+  #     value: |
+  #       Thanks for taking the time to fill out this bug report! Please fill out the form below to help us reproduce and fix the issue.
+
+  - type: input
+    id: version
+    attributes:
+      label: Browser Use Version
+      description: What version of the `browser-use` library are you using? (Run `uv pip show browser-use` or `git log -n 1` to find out) **DO NOT JUST WRITE `latest version` or `main`**
+      placeholder: "e.g. 0.4.45 or 62760baaefd"
+    validations:
+      required: true
+
+  - type: textarea
+    id: description
+    attributes:
+      label: Bug Description, Steps to Reproduce, Screenshots
+      description: A clear and concise description of what the bug is + steps taken, drag screenshots in showing any error messages and relevant pages.
+      placeholder: |
+        1. Installed browser-use library by running: `uv pip install browser-use`
+        2. Installed the browser by running: `playwright install chromium --with-deps`
+        3. Ran the code below with the following prompt: `go to example.com and do xyz...`
+        4. Agent crashed and showed the following error: ...
+    validations:
+      required: true
+
+  - type: textarea
+    id: code
+    attributes:
+      label: Failing Python Code
+      description: Include the exact python code you ran that encountered the issue, redact any sensitive URLs and API keys.
+      render: python
+      placeholder: |
+        from dotenv import load_dotenv
+        load_dotenv()
+        from browser_use import Agent, Browser, Controller
+        from langchain_openai import ChatOpenAI
+
+        llm = ChatOpenAI(model="gpt-4o")
+        browser = Browser(chrome_binary_path='/usr/bin/google-chrome')
+        agent = Agent(llm=llm, browser=browser))
+        ...
+
+  - type: dropdown
+    id: model
+    attributes:
+      label: LLM Model
+      description: Which LLM model(s) are you using?
+      multiple: true
+      options:
+        - gpt-4o
+        - gpt-4o-mini
+        - gpt-4
+        - gpt-4.1
+        - gpt-4.1-mini
+        - gpt-4.1-nano
+        - claude-3.7-sonnet
+        - claude-3.5-sonnet
+        - gemini-2.6-flash-preview
+        - gemini-2.5-pro
+        - gemini-2.0-flash
+        - gemini-2.0-flash-lite
+        - gemini-1.5-flash
+        - deepseek-chat
+        - Local Model (Specify model in description)
+        - Other (specify in description)
+    validations:
+      required: true
+
+  - type: input
+    id: os
+    attributes:
+      label: Operating System
+      description: What operating system are you using?
+      placeholder: "e.g., macOS 13.1, Windows 11, Ubuntu 22.04"
+    validations:
+      required: true
+
+  - type: textarea
+    id: logs
+    attributes:
+      label: Full DEBUG Log Output
+      description: Please copy and paste the *full* log output *from the start of the run*. Make sure to set `BROWSER_USE_LOG_LEVEL=DEBUG` in your `.env` or shell environment.
+      render: shell
+      placeholder: |
+        $ python /app/browser-use/examples/browser/real_browser.py
+        DEBUG    [browser] 🌎  Initializing new browser
+        DEBUG    [agent] Version: 0.1.46-9-g62760ba, Source: git
+        INFO     [agent] 🧠 Starting an agent with main_model=gpt-4o +tools +vision +memory, planner_model=None, extraction_model=gpt-4o
+        DEBUG    [agent] Verifying the ChatOpenAI LLM knows the capital of France...
+        DEBUG    [langsmith.client] Sending multipart request with context: trace=91282a01-6667-48a1-8cd7-21aa9337a580,id=91282a01-6667-48a1-8cd7-21aa9337a580
+        DEBUG    [agent] 🪪 LLM API keys OPENAI_API_KEY work, ChatOpenAI model is connected & responding correctly.
+        ...
--- a/browser-use/.github/ISSUE_TEMPLATE/3_feature_request.yml
+++ b/browser-use/.github/ISSUE_TEMPLATE/3_feature_request.yml
@ -0,0 +1,43 @@
+name: 💡 Feature Request
+description: Suggest a new feature for browser-use
+labels: ["enhancement"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to suggest a new feature! Please fill out the form below to help us understand your suggestion.
+
+  - type: textarea
+    id: problem
+    attributes:
+      label: Problem Description
+      description: Is your feature request related to a problem? Please describe.
+      placeholder: I'm always frustrated when...
+    validations:
+      required: true
+
+  - type: textarea
+    id: solution
+    attributes:
+      label: Proposed Solution
+      description: Describe the solution you'd like to see
+      placeholder: It would be great if...
+    validations:
+      required: true
+
+  - type: textarea
+    id: alternatives
+    attributes:
+      label: Alternative Solutions
+      description: Describe any alternative solutions or features you've considered
+      placeholder: I've also thought about...
+
+  - type: textarea
+    id: context
+    attributes:
+      label: Additional Context
+      description: Add any other context or examples about the feature request here
+      placeholder: |
+        - Example use cases
+        - Screenshots or mockups
+        - Related issues or discussions
--- a/browser-use/.github/ISSUE_TEMPLATE/4_docs_issue.yml
+++ b/browser-use/.github/ISSUE_TEMPLATE/4_docs_issue.yml
@ -0,0 +1,55 @@
+name: 📚 Documentation Issue
+description: Report an issue in the browser-use documentation
+labels: ["documentation"]
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to improve our documentation! Please fill out the form below to help us understand the issue.
+
+  - type: dropdown
+    id: type
+    attributes:
+      label: Type of Documentation Issue
+      description: What type of documentation issue is this?
+      options:
+        - Missing documentation
+        - Incorrect documentation
+        - Unclear documentation
+        - Broken link
+        - Other (specify in description)
+    validations:
+      required: true
+
+  - type: input
+    id: page
+    attributes:
+      label: Documentation Page
+      description: Which page or section of the documentation is this about?
+      placeholder: "e.g., https://docs.browser-use.com/getting-started or Installation Guide"
+    validations:
+      required: true
+
+  - type: textarea
+    id: description
+    attributes:
+      label: Issue Description
+      description: Describe what's wrong or missing in the documentation
+      placeholder: The documentation should...
+    validations:
+      required: true
+
+  - type: textarea
+    id: suggestion
+    attributes:
+      label: Suggested Changes
+      description: If you have specific suggestions for how to improve the documentation, please share them
+      placeholder: |
+        The documentation could be improved by...
+
+        Example:
+        ```python
+        # Your suggested code example or text here
+        ```
+    validations:
+      required: true
--- a/browser-use/.github/ISSUE_TEMPLATE/config.yml
+++ b/browser-use/.github/ISSUE_TEMPLATE/config.yml
@ -0,0 +1,11 @@
+blank_issues_enabled: false  # Set to true if you want to allow blank issues
+contact_links:
+  - name: 🤔 Quickstart Guide
+    url: https://docs.browser-use.com/quickstart
+    about: Most common issues can be resolved by following our quickstart guide
+  - name: 🤔 Questions and Help
+    url: https://link.browser-use.com/discord
+    about: Please ask questions in our Discord community
+  - name: 📖 Documentation
+    url: https://docs.browser-use.com
+    about: Check our documentation for answers first
--- a/browser-use/.github/workflows/cloud_evals.yml
+++ b/browser-use/.github/workflows/cloud_evals.yml
@ -0,0 +1,28 @@
+name: cloud_evals
+
+on:
+  push:
+    branches:
+      - main
+      - 'releases/*'
+  workflow_dispatch:
+    inputs:
+      commit_hash:
+        description: Commit hash of the library to build the Cloud eval image for
+        required: false
+
+jobs:
+  trigger_cloud_eval_image_build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.TRIGGER_CLOUD_BUILD_GH_KEY }}
+          script: |
+            const result = await github.rest.repos.createDispatchEvent({
+              owner: 'browser-use',
+              repo: 'cloud',
+              event_type: 'trigger-workflow',
+              client_payload: {"commit_hash": "${{ github.event.inputs.commit_hash || github.sha }}"}
+            })
+            console.log(result)
--- a/browser-use/.github/workflows/docker.yml
+++ b/browser-use/.github/workflows/docker.yml
@ -0,0 +1,64 @@
+name: docker
+
+on:
+  push:
+  release:
+    types: [published]
+
+jobs:
+  build_publish_image:
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+      attestations: write
+      id-token: write
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v4
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Compute Docker tags based on tag/branch
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: |
+            browseruse/browseruse
+            ghcr.io/browser-use/browser-use
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=pep440,pattern={{version}}
+            type=pep440,pattern={{major}}.{{minor}}
+            type=sha
+
+      - name: Build and push Docker image
+        id: push
+        uses: docker/build-push-action@v6
+        with:
+          platforms: linux/amd64,linux/arm64
+          context: .
+          file: ./Dockerfile
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=registry,ref=browseruse/browseruse:buildcache
+          cache-to: type=registry,ref=browseruse/browseruse:buildcache,mode=max
--- a/browser-use/.github/workflows/lint.yml
+++ b/browser-use/.github/workflows/lint.yml
@ -0,0 +1,42 @@
+name: lint
+on:
+  push:
+    branches:
+      - main
+      - stable
+      - 'releases/**'
+    tags:
+      - '*'
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  lint-syntax:
+    name: syntax-errors
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+      - run: uv run ruff check --no-fix --select PLE
+
+  lint-style:
+    name: code-style
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+      - run: uv run pre-commit run --all-files --show-diff-on-failure
+
+  lint-typecheck:
+    name: type-checker
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+      - run: uv run pyright
--- a/browser-use/.github/workflows/package.yaml
+++ b/browser-use/.github/workflows/package.yaml
@ -0,0 +1,54 @@
+name: package
+on:
+  push:
+    branches:
+      - main
+      - stable
+      - 'releases/**'
+    tags:
+      - '*'
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  build:
+    name: pip-build
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v5
+      - run: uv build --python 3.12
+      - uses: actions/upload-artifact@v4
+        with:
+          name: dist-artifact
+          path: |
+            dist/*.whl
+            dist/*.tar.gz
+
+  build_test:
+    name: pip-install-on-${{ matrix.os }}-py-${{ matrix.python-version }}
+    needs: build
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.11", "3.12", "3.13"]
+
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v5
+      - uses: actions/download-artifact@v4
+        with:
+          name: dist-artifact
+
+      - name: Set up venv and test for OS/Python versions
+        shell: bash
+        run: |
+          uv venv /tmp/testenv --python ${{ matrix.python-version }}
+          if [[ "$RUNNER_OS" == "Windows" ]]; then
+            . /tmp/testenv/Scripts/activate
+          else
+            source /tmp/testenv/bin/activate
+          fi
+          uv pip install *.whl
+          python -c 'from browser_use import Agent, Browser, Controller, ActionModel, ActionResult'
--- a/browser-use/.github/workflows/publish.yml
+++ b/browser-use/.github/workflows/publish.yml
@ -0,0 +1,66 @@
+# This workflow will upload a Python Package using Twine when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+name: publish
+
+on:
+  release:
+    types: [published]     # publish full release to PyPI when a release is created on Github
+  schedule:
+    - cron: "0 17 * * FRI" # tag a pre-release on Github every Friday at 5 PM UTC
+
+permissions:
+  contents: write
+  id-token: write
+
+jobs:
+  tag_pre_release:
+    if: github.event_name == 'schedule'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Create pre-release tag
+        run: |
+          git fetch --tags
+          latest_tag=$(git tag --list --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+rc[0-9]+$' | head -n 1)
+          if [ -z "$latest_tag" ]; then
+            new_tag="v0.1.0rc1"
+          else
+            new_tag=$(echo $latest_tag | awk -F'rc' '{print $1 "rc" $2+1}')
+          fi
+          git tag $new_tag
+          git push origin $new_tag
+
+  publish_to_pypi:
+    if: github.event_name == 'release'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+      - uses: astral-sh/setup-uv@v5
+      - run: uv run ruff check --no-fix --select PLE # check only for syntax errors
+      - run: uv build
+      - run: uv run --isolated --no-project --with pytest --with dist/*.whl tests/conftest.py
+      - run: uv run --isolated --no-project --with pytest --with dist/*.tar.gz tests/conftest.py
+      - run: uv run --with=dotenv pytest \
+          --ignore=tests/test_dropdown_error.py \
+          --ignore=tests/test_gif_path.py \
+          --ignore=tests/test_models.py \
+          --ignore=tests/test_react_dropdown.py \
+          --ignore=tests/test_save_conversation.py \
+          --ignore=tests/test_vision.py \
+          --ignore=tests/test_wait_for_element.py || true
+      - run: uv publish --trusted-publishing always
+      - name: Push to stable branch (if stable release)
+        if: startsWith(github.ref_name, 'v') && !contains(github.ref_name, 'rc')
+        run: |
+          git checkout -b stable
+          git push origin stable
--- a/browser-use/.github/workflows/test.yaml
+++ b/browser-use/.github/workflows/test.yaml
@ -0,0 +1,68 @@
+name: test
+
+on:
+  push:
+    branches:
+      - main
+      - stable
+      - 'releases/**'
+    tags:
+      - '*'
+  pull_request:
+  workflow_dispatch:
+    
+jobs:
+  tests:
+    name: ${{matrix.test}} 
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        test:
+        # TODO:
+        # - browser/patchright
+        # - browser/playwright
+        # - browser/user_binary
+        # - browser/remote_cdp
+        # - models/openai
+        # - models/google
+        # - models/anthropic
+        # - models/azure
+        # - models/deepseek
+        # - models/grok
+        # - functionality/click
+        # - functionality/tabs
+        # - functionality/input
+        # - functionality/scroll
+        # - functionality/upload
+        # - functionality/download
+        # - functionality/save
+        # - functionality/vision
+        # - functionality/memory
+        # - functionality/planner
+        # - functionality/hooks
+        - test_controller
+        - test_tab_management
+        - test_sensitive_data
+        - test_url_allowlist_security
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+          activate-environment: true
+
+      - run: uv sync
+
+      - name: Detect installed Playwright or Patchright version
+        run: echo "PLAYWRIGHT_VERSION=$(uv pip list --format json | jq -r '.[] | select(.name == "playwright") | .version')" >> $GITHUB_ENV
+
+      - name: Cache playwright binaries
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/.cache/ms-playwright
+          key: ${{ runner.os }}-playwright-${{ env.PLAYWRIGHT_VERSION }}
+
+      - run: playwright install --no-shell chromium
+
+      - run: pytest tests/${{ matrix.test }}.py