mirror of
https://github.com/j93es/browser-use-oauth.git
synced 2026-06-04 02:21:52 +09:00
Compare commits
83 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
20578d7f7b |
||
| 2d3677fe4a | |||
| ba1e81177b | |||
|
9858d7acd2 |
|||
| d6803ad20e | |||
| 0f5ab6dea1 | |||
|
|
657d5370b9 |
||
|
|
628c994f22 |
||
|
|
a8165e9cdc |
||
|
|
90f0487dd7 |
||
|
|
0904f282ff | ||
|
|
d01a78d442 | ||
|
|
52b10446f7 | ||
|
|
ff54e8d4e3 | ||
|
08e7e34b9f |
|||
|
1742afcb5e |
|||
| 68eedc3fa4 | |||
| 2b805df001 | |||
|
|
264f29ffb9 | ||
|
|
8d1aa3df1a |
||
|
952db8d2b9 |
|||
|
2fdd187f9a |
|||
| a9f3bc7233 | |||
| 735e90739c | |||
| 72f784dec5 | |||
|
b3b5f05697 |
|||
|
c5e0a8c2f1 |
|||
|
c3a0132489 |
|||
|
b45daebc81 |
|||
| f5ee676468 | |||
| 8cfb6488d1 | |||
| 3199a53a44 | |||
| 146c187b05 | |||
|
92eea9c0c9 |
|||
|
|
b2aedf53db | ||
|
65c865b620 |
|||
|
54e923ae95 |
|||
|
13a2798fa4 |
|||
|
3e2b598298 |
|||
| 18a575a8af | |||
|
|
1c43e63cca | ||
|
|
d1737f27a3 | ||
|
|
ba0bb61ef3 | ||
|
|
fe003013f4 | ||
|
|
91c18e12c7 | ||
|
|
686333e75c |
||
|
|
79c9dffa08 | ||
|
|
1d7d9c8ad5 | ||
|
|
812a26a103 | ||
|
|
b7df8cffcd | ||
|
|
bcca364021 | ||
|
|
e1c07c4a1e | ||
|
|
b26c47d1ad | ||
|
2202a1a2d8 |
|||
| bc9b598993 | |||
| 353e98e28c | |||
|
|
54682cdb72 | ||
|
|
ce70191d49 | ||
| 20601cec76 | |||
| 70e8bdbbde | |||
|
|
d8ec21c61b |
||
| 20ac0ccc06 | |||
| 069dbf446d | |||
|
1ddc3c41bc |
|||
|
4261bdc7d6 |
|||
| 5c816baf67 | |||
| 3d55b6275e | |||
| 5535515dbd | |||
| c2e610ec54 | |||
| 32f2ce486e | |||
| 27192dab3a | |||
| b0a4727e13 | |||
| a5d8d674b4 | |||
| 9af67fbd49 | |||
| e85c128ed8 | |||
| 1767cd5861 | |||
| 6ddeed2173 | |||
| e1319a108d | |||
| 4b3637b762 | |||
|
c1ade99b8a |
|||
|
46a169f1d0 |
|||
| 4f90285bdd | |||
| 3dd86982d7 |
72 changed files with 2836 additions and 4608 deletions
11
.env.example
11
.env.example
|
|
@ -1,16 +1,20 @@
|
|||
# 권장 (다른 모델로 교체 가능) [다른 모델로 교체시 성능 보장 불가]
|
||||
ANONYMIZED_TELEMETRY=false
|
||||
|
||||
# ========== LLM ==========
|
||||
|
||||
GOOGLE_API_KEY=
|
||||
# 권장 (다른 모델로 교체 가능) [다른 모델로 교체시 성능 보장 불가]
|
||||
GOOGLE_MODEL=gemini-2.5-flash-preview-05-20
|
||||
GOOGLE_PLANNER_MODEL=gemini-2.5-flash-preview-05-20
|
||||
GOOGLE_MODEL=gemini-2.5-flash
|
||||
#GOOGLE_PLANNER_MODEL=gemini-2.5-flash # 왜 비활성화 되었나요? // Planner 모델이 오히려 문제를 일으키는 경우가 있어 비활성화했습니다. 필요시 활성화하세요.
|
||||
|
||||
# min(INITIAL_BACKOFF * (2 ** try_cnt), MAX_BACKOFF)만큼 API가 실패시 대기합니다.
|
||||
INITIAL_BACKOFF=60
|
||||
MAX_BACKOFF=600
|
||||
|
||||
#ENABLE_PLANNER_MODEL_OAUTH_LOGIN=true # OAuth 로그인 시 Planner 모델을 활성화합니다.
|
||||
#ENABLE_PLANNER_MODEL_OAUTH_LIST=true # OAuth List를 찾을 때 Planner 모델을 활성화합니다.
|
||||
|
||||
# ========== Monitoring ==========
|
||||
|
||||
# 선택
|
||||
|
|
@ -18,13 +22,14 @@ PROXY_HOST=127.0.0.1
|
|||
PROXY_PORT=11080
|
||||
BACKEND_URL=http://localhost:11081
|
||||
|
||||
# https://docs.browser-use.com/development/observability
|
||||
# https://docs.browser-use.com/development/observability - 선택
|
||||
# Lmnr 계정이 필요합니다.
|
||||
# https://lmnr.ai/
|
||||
LMNR_PROJECT_API_KEY=
|
||||
|
||||
# 브라우저 언어 설정
|
||||
LANG=en_US
|
||||
HEADLESS=False # 브라우저를 헤드리스 모드로 실행할지 여부. True로 설정하면 브라우저가 보이지 않습니다.
|
||||
|
||||
# ========= Account ==========
|
||||
|
||||
|
|
|
|||
345
.github/instructions/agent-settings.instructions.md
vendored
345
.github/instructions/agent-settings.instructions.md
vendored
|
|
@ -1,345 +0,0 @@
|
|||
---
|
||||
description: "Learn how to configure the agent"
|
||||
applyTo: '**'
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
The `Agent` class is the core component of Browser Use that handles browser automation. Here are the main configuration options you can use when initializing an agent.
|
||||
|
||||
## Basic Settings
|
||||
|
||||
```python
|
||||
from browser_use import Agent
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
agent = Agent(
|
||||
task="Search for latest news about AI",
|
||||
llm=ChatOpenAI(model="gpt-4o"),
|
||||
)
|
||||
```
|
||||
|
||||
### Required Parameters
|
||||
|
||||
- `task`: The instruction for the agent to execute
|
||||
- `llm`: A LangChain chat model instance. See <a href="/customize/supported-models">LangChain Models</a> for supported models.
|
||||
|
||||
## Agent Behavior
|
||||
|
||||
Control how the agent operates:
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
task="your task",
|
||||
llm=llm,
|
||||
controller=custom_controller, # For custom tool calling
|
||||
use_vision=True, # Enable vision capabilities
|
||||
save_conversation_path="logs/conversation" # Save chat logs
|
||||
)
|
||||
```
|
||||
|
||||
### Behavior Parameters
|
||||
|
||||
- `controller`: Registry of functions the agent can call. Defaults to base Controller. See <a href="/customize/custom-functions">Custom Functions</a> for details.
|
||||
- `use_vision`: Enable/disable vision capabilities. Defaults to `True`.
|
||||
- When enabled, the model processes visual information from web pages
|
||||
- Disable to reduce costs or use models without vision support
|
||||
- For GPT-4o, image processing costs approximately 800-1000 tokens (~$0.002 USD) per image (but this depends on the defined screen size)
|
||||
- `save_conversation_path`: Path to save the complete conversation history. Useful for debugging.
|
||||
- `override_system_message`: Completely replace the default system prompt with a custom one.
|
||||
- `extend_system_message`: Add additional instructions to the default system prompt.
|
||||
|
||||
<Note>
|
||||
Vision capabilities are recommended for better web interaction understanding,
|
||||
but can be disabled to reduce costs or when using models without vision
|
||||
support.
|
||||
</Note>
|
||||
|
||||
|
||||
### Reuse Existing Browser Context
|
||||
|
||||
By default browser-use launches its own builtin browser using playwright chromium.
|
||||
You can also connect to a remote browser or pass any of the following
|
||||
existing playwright objects to the Agent: `page`, `browser_context`, `browser`, `browser_session`, or `browser_profile`.
|
||||
|
||||
These all get passed down to create a `BrowserSession` for the `Agent`:
|
||||
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
task='book a flight to fiji',
|
||||
llm=llm,
|
||||
browser_profile=browser_profile, # use this profile to create a BrowserSession
|
||||
browser_session=BrowserSession( # use an existing BrowserSession
|
||||
cdp_url=..., # remote CDP browser to connect to
|
||||
# or
|
||||
wss_url=..., # remote wss playwright server provider
|
||||
# or
|
||||
browser_pid=... # pid of a locally running browser process to attach to
|
||||
# or
|
||||
executable_path=... # provide a custom chrome binary path
|
||||
# or
|
||||
channel=... # specify chrome, chromium, ms-edge, etc.
|
||||
# or
|
||||
page=page, # use an existing playwright Page object
|
||||
# or
|
||||
browser_context=browser_context, # use an existing playwright BrowserContext object
|
||||
# or
|
||||
browser=browser, # use an existing playwright Browser object
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
For example, to connect to an existing browser over CDP you could do:
|
||||
```python
|
||||
agent = Agent(
|
||||
...
|
||||
browser_session=BrowserSession(cdp_url='http://localhost:9222'),
|
||||
)
|
||||
```
|
||||
|
||||
For example, to connect to a local running chrome instance you can do:
|
||||
```python
|
||||
agent = Agent(
|
||||
...
|
||||
browser_session=BrowserSession(browser_pid=1234),
|
||||
)
|
||||
```
|
||||
|
||||
See <a href="/customize/real-browser">Connect to your Browser</a> for more info.
|
||||
|
||||
<Note>
|
||||
You can reuse the same `BrowserSession` after an agent has completed running. If you do nothing, the
|
||||
browser will be automatically closed on `run()` completion only if it was launched by us.
|
||||
</Note>
|
||||
|
||||
## Running the Agent
|
||||
|
||||
The agent is executed using the async `run()` method:
|
||||
|
||||
- `max_steps` (default: `100`)
|
||||
Maximum number of steps the agent can take during execution. This prevents infinite loops and helps control execution time.
|
||||
|
||||
## Agent History
|
||||
|
||||
The method returns an `AgentHistoryList` object containing the complete execution history. This history is invaluable for debugging, analysis, and creating reproducible scripts.
|
||||
|
||||
```python
|
||||
# Example of accessing history
|
||||
history = await agent.run()
|
||||
|
||||
# Access (some) useful information
|
||||
history.urls() # List of visited URLs
|
||||
history.screenshots() # List of screenshot paths
|
||||
history.action_names() # Names of executed actions
|
||||
history.extracted_content() # Content extracted during execution
|
||||
history.errors() # Any errors that occurred
|
||||
history.model_actions() # All actions with their parameters
|
||||
```
|
||||
|
||||
The `AgentHistoryList` provides many helper methods to analyze the execution:
|
||||
|
||||
- `final_result()`: Get the final extracted content
|
||||
- `is_done()`: Check if the agent completed successfully
|
||||
- `has_errors()`: Check if any errors occurred
|
||||
- `model_thoughts()`: Get the agent's reasoning process
|
||||
- `action_results()`: Get results of all actions
|
||||
|
||||
<Note>
|
||||
For a complete list of helper methods and detailed history analysis
|
||||
capabilities, refer to the [AgentHistoryList source
|
||||
code](https://github.com/browser-use/browser-use/blob/main/browser_use/agent/views.py#L111).
|
||||
</Note>
|
||||
|
||||
## Run initial actions without LLM
|
||||
With [this example](https://github.com/browser-use/browser-use/blob/main/examples/features/initial_actions.py) you can run initial actions without the LLM.
|
||||
Specify the action as a dictionary where the key is the action name and the value is the action parameters. You can find all our actions in the [Controller](https://github.com/browser-use/browser-use/blob/main/browser_use/controller/service.py) source code.
|
||||
```python
|
||||
|
||||
initial_actions = [
|
||||
{'open_tab': {'url': 'https://www.google.com'}},
|
||||
{'open_tab': {'url': 'https://en.wikipedia.org/wiki/Randomness'}},
|
||||
{'scroll_down': {'amount': 1000}},
|
||||
]
|
||||
agent = Agent(
|
||||
task='What theories are displayed on the page?',
|
||||
initial_actions=initial_actions,
|
||||
llm=llm,
|
||||
)
|
||||
```
|
||||
|
||||
## Run with message context
|
||||
|
||||
You can configure the agent and provide a separate message to help the LLM understand the task better.
|
||||
|
||||
```python
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
agent = Agent(
|
||||
task="your task",
|
||||
message_context="Additional information about the task",
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
)
|
||||
```
|
||||
|
||||
## Run with planner model
|
||||
|
||||
You can configure the agent to use a separate planner model for high-level task planning:
|
||||
|
||||
```python
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
# Initialize models
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
planner_llm = ChatOpenAI(model='o3-mini')
|
||||
|
||||
agent = Agent(
|
||||
task="your task",
|
||||
llm=llm,
|
||||
planner_llm=planner_llm, # Separate model for planning
|
||||
use_vision_for_planner=False, # Disable vision for planner
|
||||
planner_interval=4 # Plan every 4 steps
|
||||
)
|
||||
```
|
||||
|
||||
### Planner Parameters
|
||||
|
||||
- `planner_llm`: A LangChain chat model instance used for high-level task planning. Can be a smaller/cheaper model than the main LLM.
|
||||
- `use_vision_for_planner`: Enable/disable vision capabilities for the planner model. Defaults to `True`.
|
||||
- `planner_interval`: Number of steps between planning phases. Defaults to `1`.
|
||||
|
||||
Using a separate planner model can help:
|
||||
- Reduce costs by using a smaller model for high-level planning
|
||||
- Improve task decomposition and strategic thinking
|
||||
- Better handle complex, multi-step tasks
|
||||
|
||||
<Note>
|
||||
The planner model is optional. If not specified, the agent will not use the planner model.
|
||||
</Note>
|
||||
|
||||
### Optional Parameters
|
||||
|
||||
- `message_context`: Additional information about the task to help the LLM understand the task better.
|
||||
- `initial_actions`: List of initial actions to run before the main task.
|
||||
- `max_actions_per_step`: Maximum number of actions to run in a step. Defaults to `10`.
|
||||
- `max_failures`: Maximum number of failures before giving up. Defaults to `3`.
|
||||
- `retry_delay`: Time to wait between retries in seconds when rate limited. Defaults to `10`.
|
||||
- `generate_gif`: Enable/disable GIF generation. Defaults to `False`. Set to `True` or a string path to save the GIF.
|
||||
## Memory Management
|
||||
|
||||
Browser Use includes a procedural memory system using [Mem0](https://mem0.ai) that automatically summarizes the agent's conversation history at regular intervals to optimize context window usage during long tasks.
|
||||
|
||||
```python
|
||||
from browser_use.agent.memory import MemoryConfig
|
||||
|
||||
agent = Agent(
|
||||
task="your task",
|
||||
llm=llm,
|
||||
enable_memory=True,
|
||||
memory_config=MemoryConfig( # Ensure llm_instance is passed if not using default LLM config
|
||||
llm_instance=llm, # Important: Pass the agent's LLM instance here
|
||||
agent_id="my_custom_agent",
|
||||
memory_interval=15
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### Memory Parameters
|
||||
|
||||
- `enable_memory`: Enable/disable the procedural memory system. Defaults to `True`.
|
||||
- `memory_config`: A `MemoryConfig` Pydantic model instance (required if `enable_memory` is `True`). Dictionary format is not supported.
|
||||
|
||||
### Using MemoryConfig
|
||||
|
||||
You must configure the memory system using the `MemoryConfig` Pydantic model for a type-safe approach:
|
||||
|
||||
```python
|
||||
from browser_use.agent.memory import MemoryConfig
|
||||
from langchain_openai import ChatOpenAI # Assuming llm is an instance of ChatOpenAI
|
||||
|
||||
llm_for_agent = ChatOpenAI(model="gpt-4o")
|
||||
|
||||
agent = Agent(
|
||||
task=task_description,
|
||||
llm=llm_for_agent,
|
||||
enable_memory=True, # This is True by default
|
||||
memory_config=MemoryConfig(
|
||||
llm_instance=llm_for_agent, # Pass the LLM instance for Mem0
|
||||
agent_id="my_agent",
|
||||
memory_interval=15, # Summarize every 15 steps
|
||||
embedder_provider="openai",
|
||||
embedder_model="text-embedding-3-large",
|
||||
embedder_dims=1536,
|
||||
# --- Vector Store Customization ---
|
||||
vector_store_provider="qdrant", # e.g., Qdrant, Pinecone, Chroma, etc.
|
||||
vector_store_collection_name="my_browser_use_memories", # Optional: custom collection name
|
||||
vector_store_config_override={ # Provider-specific config
|
||||
"host": "localhost",
|
||||
"port": 6333
|
||||
# Add other Qdrant specific configs here if needed, e.g., api_key for cloud
|
||||
}
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
The `MemoryConfig` model provides these configuration options:
|
||||
|
||||
#### Memory Settings
|
||||
- `agent_id`: Unique identifier for the agent (default: `"browser_use_agent"`). Essential for persistent memory sessions if using a persistent vector store.
|
||||
- `memory_interval`: Number of steps between memory summarization (default: `10`)
|
||||
|
||||
#### LLM Settings (for Mem0's internal operations)
|
||||
- `llm_instance`: The LangChain `BaseChatModel` instance that Mem0 will use for its internal summarization and processing. You must pass the same LLM instance used by the main agent, or another compatible one, here.
|
||||
|
||||
#### Embedder Settings
|
||||
- `embedder_provider`: Provider for embeddings (`'openai'`, `'gemini'`, `'ollama'`, or `'huggingface'`)
|
||||
- `embedder_model`: Model name for the embedder
|
||||
- `embedder_dims`: Dimensions for the embeddings
|
||||
|
||||
#### Vector Store Settings
|
||||
- `vector_store_provider`: Choose the vector store backend. Supported options include:
|
||||
`'faiss'` (default), `'qdrant'`, `'pinecone'`, `'supabase'`, `'elasticsearch'`, `'chroma'`, `'weaviate'`, `'milvus'`, `'pgvector'`, `'upstash_vector'`, `'vertex_ai_vector_search'`, `'azure_ai_search'`, `'lancedb'`, `'mongodb'`, `'redis'`, `'memory'` (in-memory, non-persistent).
|
||||
- `vector_store_collection_name`: (Optional) Specify a custom name for the collection or index in your vector store. If not provided, a default name is generated (especially for local stores like FAISS/Chroma) or used by Mem0.
|
||||
- `vector_store_base_path`: Path for local vector stores like FAISS or Chroma (e.g., `/tmp/mem0`). Default is `/tmp/mem0`.
|
||||
- `vector_store_config_override`: (Optional) A dictionary to provide or override specific configuration parameters required by Mem0 for the chosen `vector_store_provider`. This is where you'd put connection details like `host`, `port`, `api_key`, `url`, `environment`, etc., for cloud-based or server-based vector stores.
|
||||
|
||||
The model automatically sets appropriate defaults based on the LLM being used:
|
||||
- For `ChatOpenAI`: Uses OpenAI's `text-embedding-3-small` embeddings
|
||||
- For `ChatGoogleGenerativeAI`: Uses Gemini's `models/text-embedding-004` embeddings
|
||||
- For `ChatOllama`: Uses Ollama's `nomic-embed-text` embeddings
|
||||
- Default: Uses Hugging Face's `all-MiniLM-L6-v2` embeddings
|
||||
|
||||
<Note>
|
||||
**Important:**
|
||||
- Always pass a properly constructed `MemoryConfig` object to the `memory_config` parameter.
|
||||
- Ensure the `llm_instance` is provided to `MemoryConfig` so Mem0 can perform its operations.
|
||||
- For persistent memory across agent runs or for shared memory, choose a scalable vector store provider (like Qdrant, Pinecone, etc.) and configure it correctly using `vector_store_provider` and `vector_store_config_override`. The default 'faiss' provider stores data locally in `vector_store_base_path`.
|
||||
</Note>
|
||||
|
||||
### How Memory Works
|
||||
|
||||
When enabled, the agent periodically compresses its conversation history into concise summaries:
|
||||
|
||||
1. Every `memory_interval` steps, the agent reviews its recent interactions.
|
||||
2. It uses Mem0 (configured with your chosen LLM and vector store) to create a procedural memory summary.
|
||||
3. The original messages in the agent's active context are replaced with this summary, reducing token usage.
|
||||
4. This process helps maintain important context while freeing up the context window for new information.
|
||||
|
||||
|
||||
### Disabling Memory
|
||||
|
||||
If you want to disable the memory system (for debugging or for shorter tasks), set `enable_memory` to `False`:
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
task="your task",
|
||||
llm=llm,
|
||||
enable_memory=False
|
||||
)
|
||||
```
|
||||
|
||||
<Note>
|
||||
Disabling memory may be useful for debugging or short tasks, but for longer
|
||||
tasks, it can lead to context window overflow as the conversation history
|
||||
grows. The memory system helps maintain performance during extended sessions.
|
||||
</Note>
|
||||
|
|
@ -1,968 +0,0 @@
|
|||
---
|
||||
description: "Launch or connect to an existing browser and configure it to your needs."
|
||||
applyTo: '**'
|
||||
---
|
||||
|
||||
Browser Use uses [playwright](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context) (or [patchright](https://github.com/Kaliiiiiiiiii-Vinyzu/patchright)) to manage its connection with a real browser.
|
||||
|
||||
---
|
||||
|
||||
**To launch or connect to a browser**, pass any playwright / browser-use configuration arguments you want to `BrowserSession(...)`:
|
||||
|
||||
```python
|
||||
from browser_use import BrowserSession, Agent
|
||||
|
||||
browser_session = BrowserSession(
|
||||
headless=True,
|
||||
viewport={'width': 964, 'height': 647},
|
||||
user_data_dir='~/.config/browseruse/profiles/default',
|
||||
)
|
||||
agent = Agent('fill out the form on this page', browser_session=browser_session)
|
||||
```
|
||||
|
||||
<Note>
|
||||
The new `BrowserSession` & `BrowserProfile` accept all the same arguments that Playwright's [`launch_persistent_context(...)`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context) takes, giving you full control over browser settings at launch. (see below for the full list)
|
||||
</Note>
|
||||
|
||||
|
||||
---
|
||||
|
||||
## `BrowserSession`
|
||||
|
||||
- 🎭 `BrowserSession(**params)` is Browser Use's object that tracks a playwright connection to a running browser. It sets up:
|
||||
- the `playwright` library, `browser` and/or `browser_context`, and `page` objects and tracks which tabs the agent & human are focused on
|
||||
- methods to interact with the browser window, apply config needed by the Agent, and run the `DOMService` for element detection
|
||||
- it can take a `browser_profile=BrowserProfile(...)` template containing some config defaults, and `**kwargs` session-specific config overrides
|
||||
|
||||
### Browser Connection Parameters
|
||||
|
||||
Provide any one of these options to connect to an existing browser. These options are session-specific and cannot be stored in a `BrowserProfile(...)` template.
|
||||
|
||||
#### `wss_url`
|
||||
|
||||
```python
|
||||
wss_url: str | None = None
|
||||
```
|
||||
|
||||
WSS URL of the playwright-protocol browser server to connect to. See here for [WSS connection instructions](https://docs.browser-use.com/customize/real-browser#method-d%3A-connect-to-remote-playwright-node-js-browser-server-via-wss-url).
|
||||
|
||||
#### `cdp_url`
|
||||
|
||||
```python
|
||||
cdp_url: str | None = None
|
||||
```
|
||||
|
||||
CDP URL of the browser to connect to (e.g. `http://localhost:9222`). See here for [CDP connection instructions](https://docs.browser-use.com/customize/real-browser#method-e%3A-connect-to-remote-browser-via-cdp-url).
|
||||
|
||||
#### `browser_pid`
|
||||
|
||||
```python
|
||||
browser_pid: int | None = None
|
||||
```
|
||||
|
||||
PID of a running chromium-based browser process to connect to on localhost. See here for [connection via pid](https://docs.browser-use.com/customize/real-browser#method-c%3A-connect-to-local-browser-using-browser-pid) instructions.
|
||||
|
||||
<Note>
|
||||
For web scraping tasks on sites that restrict automated access, we recommend
|
||||
using [our cloud](https://browser-use.com) or an external browser provider for better reliability.
|
||||
See the [Connect to your Browser](real-browser) guide for detailed connection instructions.
|
||||
</Note>
|
||||
|
||||
### Session-Specific Parameters
|
||||
|
||||
#### `browser_profile`
|
||||
|
||||
```python
|
||||
browser_profile: BrowserProfile = BrowserProfile()
|
||||
```
|
||||
|
||||
Optional `BrowserProfile` template containing default config to use for the `BrowserSession`. (see below for more info)
|
||||
|
||||
#### `playwright`
|
||||
|
||||
```python
|
||||
playwright: Playwright | None = None
|
||||
```
|
||||
|
||||
Optional playwright or patchright API client handle to use, the result of `(await async_playwright().start())` or `(await async_patchright().start())`, which spawns a node.js child subprocess that relays commands to the browser over CDP.
|
||||
|
||||
See here for [more detailed usage instructions](https://docs.browser-use.com/customize/real-browser#method-b%3A-connect-using-existing-playwright-objects).
|
||||
|
||||
#### `browser`
|
||||
|
||||
```python
|
||||
browser: Browser | None = None
|
||||
```
|
||||
|
||||
Playwright Browser object to use (optional). See here for [more detailed usage instructions](https://docs.browser-use.com/customize/real-browser#method-b%3A-connect-using-existing-playwright-objects).
|
||||
|
||||
#### `browser_context`
|
||||
|
||||
```python
|
||||
browser_context: BrowserContext | None = None
|
||||
```
|
||||
|
||||
Playwright BrowserContext object to use (optional). See here for [more detailed usage instructions](https://docs.browser-use.com/customize/real-browser#method-b%3A-connect-using-existing-playwright-objects).
|
||||
|
||||
#### `page` *aka* `agent_current_page`
|
||||
|
||||
<a name="page"></a><a name="agent-current-page"></a>
|
||||
|
||||
```python
|
||||
page: Page | None = None
|
||||
```
|
||||
|
||||
Foreground Page that the agent is focused on, can also be passed as `page=...` as a shortcut. See here for [more detailed usage instructions](https://docs.browser-use.com/customize/real-browser#method-b%3A-connect-using-existing-playwright-objects).
|
||||
|
||||
#### `human_current_page`
|
||||
|
||||
```python
|
||||
human_current_page: Page | None = None
|
||||
```
|
||||
|
||||
Foreground Page that the human is focused on to start, not necessary to set manually.
|
||||
|
||||
#### `initialized`
|
||||
|
||||
```python
|
||||
initialized: bool = False
|
||||
```
|
||||
|
||||
Mark BrowserSession as already initialized, skips launch/connection (not recommended)
|
||||
|
||||
|
||||
#### `**kwargs`
|
||||
|
||||
`BrowserSession` can also accept *all* of the parameters [below](#browserprofile).
|
||||
(the parameters *above* this point are specific to `BrowserSession` and cannot be stored in a `BrowserProfile` template)
|
||||
|
||||
Extra `**kwargs` passed to `BrowserSession(...)` act as session-specific overrides to the `BrowserProfile(...)` template.
|
||||
|
||||
```python
|
||||
base_iphone13 = BrowserProfile(
|
||||
storage_state='/tmp/auth.json', # share cookies between parallel browsers
|
||||
**playwright.devices['iPhone 13'],
|
||||
timezone_id='UTC',
|
||||
)
|
||||
usa_phone = BrowserSession(
|
||||
browser_profile=base_iphone13,
|
||||
timezone_id='America/New_York', # kwargs override values in base_iphone13
|
||||
)
|
||||
eu_phone = BrowserSession(
|
||||
browser_profile=base_iphone13,
|
||||
timezone_id='Europe/Paris',
|
||||
)
|
||||
|
||||
usa_agent = Agent(task='show me todays schedule...', browser_session=usa_phone)
|
||||
eu_agent = Agent(task='show me todays schedule...', browser_session=eu_phone)
|
||||
await asyncio.gather(agent1.run(), agent2.run())
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
## `BrowserProfile`
|
||||
|
||||
A `BrowserProfile` is a 📋 config template for a 🎭 `BrowserSession(...)`.
|
||||
|
||||
It's basically just a typed + validated version of a `dict` to hold config.
|
||||
|
||||
When you find yourself storing or re-using many browser configs, you can upgrade from:
|
||||
|
||||
```diff
|
||||
- config = {key: val, key: val, ...}
|
||||
- BrowserSession(**config)
|
||||
```
|
||||
To this instead:
|
||||
```diff
|
||||
+ config = BrowserProfile(key=val, key=val, ...)
|
||||
+ BrowserSession(browser_profile=config)
|
||||
```
|
||||
|
||||
<Tip>
|
||||
You don't ever *need* to use a `BrowserProfile`, you can always pass config parameters directly to `BrowserSession`:
|
||||
```python
|
||||
session = BrowserSession(headless=True, storage_state='auth.json', viewport={...}, ...)
|
||||
```
|
||||
</Tip>
|
||||
|
||||
`BrowserProfile` is optional, but it provides a number of benefits over a normal `dict` for holding config:
|
||||
|
||||
- has type hints and pydantic field descriptions that show up in your IDE
|
||||
- validates config at runtime quickly without having to start a browser
|
||||
- provides helper methods to autodetect screen size, set up local paths, save/load config as json, and more...
|
||||
|
||||
<Tip>
|
||||
`BrowserProfiles`s are designed to easily be given 🆔 `uuid`s and put in a database + made editable by users.
|
||||
`BrowserSession`s get their own 🆔 `uuid`s and be linked by 🖇 foreign key to whatever `BrowserProfiles` they use.
|
||||
|
||||
This cleanly separates the per-connection rows from the bulky re-usable config and avoids wasting space in your db.
|
||||
This is useful because a user may only have 2 or 3 profiles, but they could have 100k+ sessions within a few months.
|
||||
</Tip>
|
||||
|
||||
|
||||
`BrowserProfile` and `BrowserSession` can both take any of the:
|
||||
|
||||
- [Playwright parameters](#playwright)
|
||||
- [Browser-Use parameters](#browser-use-parameters) (extra options we provide on top of `playwright`)
|
||||
|
||||
The only parameters `BrowserProfile` can NOT take are the session-specific connection parameters and live playwright objects:
|
||||
`cdp_url`, `wss_url`, `browser_pid`, `page`, `browser`, `browser_context`, `playwright`, etc.
|
||||
|
||||
### Basic Example
|
||||
|
||||
```python
|
||||
from browser_use.browser import BrowserProfile
|
||||
|
||||
profile = BrowserProfile(
|
||||
stealth=True,
|
||||
storage_state='/tmp/google_docs_cookies.json',
|
||||
allowed_domains=['docs.google.com', 'https://accounts.google.com'],
|
||||
viewport={'width': 396, 'height': 774},
|
||||
# ... playwright args / browser-use config args ...
|
||||
)
|
||||
|
||||
phone1 = BrowserSession(browser_profile=profile, device_scale_factor=1)
|
||||
phone2 = BrowserSession(browser_profile=profile, device_scale_factor=2)
|
||||
phone3 = BrowserSession(browser_profile=profile, device_scale_factor=3)
|
||||
```
|
||||
|
||||
### Browser-Use Parameters
|
||||
|
||||
These parameters control Browser Use-specific features, and are outside the standard playwright set. They can be passed to `BrowserSession(...)` and/or stored in a `BrowserProfile` template.
|
||||
|
||||
#### `keep_alive`
|
||||
|
||||
```python
|
||||
keep_alive: bool | None = None
|
||||
```
|
||||
|
||||
If `True` it wont close the browser after the first `agent.run()` ends. Useful for running multiple tasks with the same browser instance. If this is left as `None` and the Agent launched its own browser, the default is to close the browser after the agent completes. If the agent connected to an existing browser then it will leave it open.
|
||||
|
||||
#### `stealth`
|
||||
|
||||
```python
|
||||
stealth: bool = False
|
||||
```
|
||||
Set to `True` to use [`patchright`](https://github.com/Kaliiiiiiiiii-Vinyzu/patchright) to avoid bot-blocking. (Might cause issues with some sites, requires manual testing.)
|
||||
|
||||
<a name="restrict-urls"></a>
|
||||
|
||||
#### `allowed_domains`
|
||||
|
||||
```python
|
||||
allowed_domains: list[str] | None = None
|
||||
```
|
||||
|
||||
List of allowed domains for navigation. If None, all domains are allowed.
|
||||
Example: `['google.com', '*.wikipedia.org']` - Here the agent will only be able to access `google.com` exactly and `wikipedia.org` + `*.wikipedia.org`.
|
||||
|
||||
Glob patterns are supported:
|
||||
- `['example.com']` ✅ will match only `https://example.com/*` exactly, subdomains will not be allowed.
|
||||
It's always the most secure to list all the domains you want to give the access to explicitly w/ schemes e.g.
|
||||
`['https://google.com', 'http*://www.google.com', 'https://myaccount.google.com', 'https://mail.google.com', 'https://docs.google.com']`
|
||||
- `['*.example.com']` ⚠️ **CAUTION** this will match `https://example.com` and *all* its subdomains.
|
||||
Make sure *all* the subdomains are safe for the agent! `abc.example.com`, `def.example.com`, ..., `useruploads.example.com`, `admin.example.com`
|
||||
|
||||
#### `disable_security`
|
||||
|
||||
```python
|
||||
disable_security: bool = False
|
||||
```
|
||||
|
||||
Completely disables all basic browser security features. Allows interacting across cross-site iFrames boundaries, but
|
||||
|
||||
<Warning>
|
||||
This option is very INSECURE and is only for niche use cases. DO NOT LET YOUR AGENT visit untrusted URLs or give it real cookies when `disable_security=True`.
|
||||
Visiting a single malicious site in this mode can trivially compromise *all* the cookies in the browser profile in under 1 second.
|
||||
</Warning>
|
||||
|
||||
#### `deterministic_rendering`
|
||||
|
||||
```python
|
||||
deterministic_rendering: bool = False
|
||||
```
|
||||
|
||||
Attempt to forced more deterministic rendering for consistent screenshots across different host operating systems and hardware.
|
||||
|
||||
Disables OS-specific font hints, aliasing, GPU-accelerated rendering, normalizes DPI, and sets a specific JS random seed to try to avoid nondeterministic JS.
|
||||
|
||||
<Warning>
|
||||
This flag is for niche use cases (e.g. screenshot diffing) where pixel-perfect rendering across different server operating systems is more important than stability.
|
||||
It makes the agent more likely to be blocked as a bot and triggers some glitchy behavior in chrome occasionally, it's not recommended unless you know you need it.
|
||||
</Warning>
|
||||
|
||||
#### `highlight_elements`
|
||||
|
||||
```python
|
||||
highlight_elements: bool = True
|
||||
```
|
||||
|
||||
Highlight interactive elements on the screen with colorful bounding boxes.
|
||||
|
||||
#### `viewport_expansion`
|
||||
|
||||
```python
|
||||
viewport_expansion: int = 500
|
||||
```
|
||||
|
||||
Viewport expansion in pixels. With this you can control how much of the page is included in the context of the LLM:
|
||||
- `-1`: All elements from the entire page will be included, regardless of visibility (highest token usage but most complete).
|
||||
- `0`: Only elements which are currently visible in the viewport will be included.
|
||||
- `500` (default): Elements in the viewport plus an additional 500 pixels in each direction will be included, providing a balance between context and token usage.
|
||||
|
||||
#### `include_dynamic_attributes`
|
||||
|
||||
```python
|
||||
include_dynamic_attributes: bool = True
|
||||
```
|
||||
|
||||
Include dynamic attributes in selectors for better element targeting.
|
||||
|
||||
#### `minimum_wait_page_load_time`
|
||||
|
||||
```python
|
||||
minimum_wait_page_load_time: float = 0.25
|
||||
```
|
||||
|
||||
Minimum time to wait before capturing page state for LLM input.
|
||||
|
||||
#### `wait_for_network_idle_page_load_time`
|
||||
|
||||
```python
|
||||
wait_for_network_idle_page_load_time: float = 0.5
|
||||
```
|
||||
|
||||
Time to wait for network activity to cease. Increase to 3-5s for slower websites. This tracks essential content loading, not dynamic elements like videos.
|
||||
|
||||
#### `maximum_wait_page_load_time`
|
||||
|
||||
```python
|
||||
maximum_wait_page_load_time: float = 5.0
|
||||
```
|
||||
|
||||
Maximum time to wait for page load before proceeding.
|
||||
|
||||
#### `wait_between_actions`
|
||||
|
||||
```python
|
||||
wait_between_actions: float = 0.5
|
||||
```
|
||||
|
||||
Time to wait between agent actions.
|
||||
|
||||
#### `cookies_file`
|
||||
|
||||
```python
|
||||
cookies_file: str | None = None
|
||||
```
|
||||
|
||||
JSON file path to save cookies to.
|
||||
|
||||
<Warning>
|
||||
This option is DEPRECATED. Use [`storage_state`](#storage-state) instead, it's the standard playwright format and also supports `localStorage` and `indexedDB`!
|
||||
|
||||
The library will automatically save a new `storage_state.json` next to any `cookies_file` path you provide, just use `storage_state='path/to/storage_state.json' to switch to the new format:
|
||||
|
||||
`cookies_file.json`: `[{cookie}, {cookie}, {cookie}]`
|
||||
⬇️
|
||||
`storage_state.json`: `{"cookies": [{cookie}, {cookie}, {cookie}], "origins": {... optional localstorage state ...}}`
|
||||
|
||||
Or run `playwright open https://example.com/ --save-storage=storage_state.json` and log into any sites you need to generate a fresh storage state file.
|
||||
</Warning>
|
||||
|
||||
|
||||
#### `profile_directory`
|
||||
|
||||
```python
|
||||
profile_directory: str = 'Default'
|
||||
```
|
||||
|
||||
Chrome profile subdirectory name inside of your `user_data_dir` (e.g. `Default`, `Profile 1`, `Work`, etc.).
|
||||
No need to set this unless you have multiple profiles set up in a single `user_data_dir` and need to use a specific one.
|
||||
|
||||
#### `window_position`
|
||||
|
||||
```python
|
||||
window_position: dict | None = {"width": 0, "height": 0}
|
||||
```
|
||||
|
||||
Window position from top-left.
|
||||
|
||||
|
||||
---
|
||||
|
||||
<a name="playwright-parameters"></a><a name="playwright"></a>
|
||||
|
||||
### Playwright Launch Options
|
||||
|
||||
|
||||
All the parameters below are standard playwright parameters and can be passed to both `BrowserSession` and `BrowserProfile`.
|
||||
They are defined in `browser_use/browser/profile.py`. See here for the [official Playwright documentation](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context) for all of these options.
|
||||
|
||||
#### `headless`
|
||||
|
||||
```python
|
||||
headless: bool | None = None
|
||||
```
|
||||
|
||||
Runs the browser without a visible UI. If None, auto-detects based on display availability. If you set `headless=False` on a server with no monitor attached, the browser will fail to launch (use `xvfb` + vnc to give a headless server a virtual display you can remote control).
|
||||
|
||||
`headless=False` is recommended for maximum stealth and is required for human-in-the-loop workflows.
|
||||
|
||||
#### `channel`
|
||||
|
||||
```python
|
||||
channel: BrowserChannel = 'chromium'
|
||||
```
|
||||
|
||||
Browser channel: `['chromium']` (default when `stealth=False`), `'chrome'` (default when `stealth=True`), `'chrome-beta'`, `'chrome-dev'`, `'chrome-canary'`, `'msedge'`, `'msedge-beta'`, `'msedge-dev'`, `'msedge-canary'`
|
||||
|
||||
Don't worry, other chromium-based browsers not in this list (e.g. `brave`) are still supported if you provide your own [`executable_path`](#executable_path), just set it to `chromium` for those.
|
||||
|
||||
#### `executable_path`
|
||||
|
||||
```python
|
||||
executable_path: str | Path | None = None
|
||||
```
|
||||
|
||||
Path to browser executable for custom installations.
|
||||
|
||||
#### `user_data_dir`
|
||||
|
||||
```python
|
||||
user_data_dir: str | Path | None = '~/.config/browseruse/profiles/default'
|
||||
```
|
||||
|
||||
Directory for browser profile data. Set to `None` to use an ephemeral temporary profile (aka incognito mode).
|
||||
|
||||
Multiple running browsers **cannot share a single `user_data_dir` at the same time**. You must set it to `None` or
|
||||
provide a unique `user_data_dir` per-session if you plan to run multiple browsers.
|
||||
|
||||
The browser version run must always be equal to or greater than the version used to create the `user_data_dir`.
|
||||
If you see errors like `Failed to parse Extensions` or similar and failures when launching, you're attempting to run an older browser with an incompatible `user_data_dir` that's already been migrated to a newer schema version.
|
||||
|
||||
#### `args`
|
||||
|
||||
```python
|
||||
args: list[str] = []
|
||||
```
|
||||
Additional command-line arguments to pass to the browser. See here for the [full list of available chrome launch options](https://peter.sh/experiments/chromium-command-line-switches/).
|
||||
|
||||
|
||||
|
||||
#### `ignore_default_args`
|
||||
|
||||
```python
|
||||
ignore_default_args: list[str] | bool = ['--enable-automation', '--disable-extensions']
|
||||
```
|
||||
|
||||
List of default CLI args to stop playwright from including when launching chrome. Set it to `True` to disable *all* default options (not recommended).
|
||||
|
||||
#### `env`
|
||||
|
||||
```python
|
||||
env: dict[str, str] = {}
|
||||
```
|
||||
|
||||
Extra environment variables to set when launching browser. e.g. `{'DISPLAY': '1'}` to use a specific X11 display.
|
||||
|
||||
#### `chromium_sandbox`
|
||||
|
||||
```python
|
||||
chromium_sandbox: bool = not IN_DOCKER
|
||||
```
|
||||
|
||||
Whether to enable Chromium sandboxing (recommended for security). Should always be `False` when running inside Docker
|
||||
because Docker provides its own sandboxing can conflict with Chrome's.
|
||||
|
||||
#### `devtools`
|
||||
|
||||
```python
|
||||
devtools: bool = False
|
||||
```
|
||||
|
||||
Whether to open DevTools panel automatically (only works when `headless=False`).
|
||||
|
||||
#### `slow_mo`
|
||||
|
||||
```python
|
||||
slow_mo: float = 0
|
||||
```
|
||||
|
||||
Slow down actions by this many milliseconds.
|
||||
|
||||
#### `timeout`
|
||||
|
||||
```python
|
||||
timeout: float = 30000
|
||||
```
|
||||
|
||||
Default timeout in milliseconds for connecting to a remote browser.
|
||||
|
||||
#### `accept_downloads`
|
||||
|
||||
```python
|
||||
accept_downloads: bool = True
|
||||
```
|
||||
|
||||
Whether to automatically accept all downloads.
|
||||
|
||||
#### `proxy`
|
||||
|
||||
```python
|
||||
proxy: dict | None = None
|
||||
```
|
||||
|
||||
Proxy settings. Example: `{"server": "http://proxy.com:8080", "username": "user", "password": "pass"}`.
|
||||
|
||||
#### `permissions`
|
||||
|
||||
```python
|
||||
permissions: list[str] = ['clipboard-read', 'clipboard-write', 'notifications']
|
||||
```
|
||||
|
||||
Browser permissions to grant. See here for the [full list of available permission](https://playwright.dev/python/docs/api/class-browsercontext#browser-context-grant-permissions).
|
||||
|
||||
#### `storage_state`
|
||||
|
||||
```python
|
||||
storage_state: str | Path | dict | None = None
|
||||
```
|
||||
|
||||
Browser storage state (cookies, localStorage). Can be file path or dict. See here for the [Playwright `storage_state` documentation](https://playwright.dev/python/docs/api/class-browsercontext#browser-context-storage-state) on how to use it.
|
||||
This option is only applied when launching a new browser using the default builtin playwright chromium and `user_data_dir=None` is set.
|
||||
|
||||
```bash
|
||||
# to create a storage state file, run the following and log into the sites you need once the browser opens:
|
||||
playwright open https://example.com/ --save-storage=./storage_state.json
|
||||
# then setup a BrowserSession with storage_state='./storage_state.json' and user_data_dir=None to use it
|
||||
```
|
||||
|
||||
### Playwright Timing Settings
|
||||
|
||||
These control how the browser waits for CDP API calls to complete and pages to load.
|
||||
|
||||
#### `default_timeout`
|
||||
|
||||
```python
|
||||
default_timeout: float | None = None
|
||||
```
|
||||
|
||||
Default timeout for Playwright operations in milliseconds.
|
||||
|
||||
#### `default_navigation_timeout`
|
||||
|
||||
```python
|
||||
default_navigation_timeout: float | None = None
|
||||
```
|
||||
|
||||
Default timeout for page navigation in milliseconds.
|
||||
|
||||
|
||||
### Playwright Viewport Options
|
||||
|
||||
Configure browser window size, viewport, and display properties:
|
||||
|
||||
#### `user_agent`
|
||||
|
||||
```python
|
||||
user_agent: str | None = None
|
||||
```
|
||||
|
||||
Specific user agent to use in this context.
|
||||
|
||||
#### `is_mobile`
|
||||
|
||||
```python
|
||||
is_mobile: bool = False
|
||||
```
|
||||
|
||||
Whether the meta viewport tag is taken into account and touch events are enabled.
|
||||
|
||||
#### `has_touch`
|
||||
|
||||
```python
|
||||
has_touch: bool = False
|
||||
```
|
||||
|
||||
Specifies if viewport supports touch events.
|
||||
|
||||
#### `geolocation`
|
||||
|
||||
```python
|
||||
geolocation: dict | None = None
|
||||
```
|
||||
|
||||
Geolocation coordinates. Example: `{"latitude": 59.95, "longitude": 30.31667}`
|
||||
|
||||
#### `locale`
|
||||
|
||||
```python
|
||||
locale: str | None = None
|
||||
```
|
||||
|
||||
Specify user locale, for example en-GB, de-DE, etc. Locale will affect the navigator.language value, Accept-Language request header value as well as number and date formatting rules.
|
||||
|
||||
#### `timezone_id`
|
||||
|
||||
```python
|
||||
timezone_id: str | None = None
|
||||
```
|
||||
|
||||
Timezone identifier (e.g., 'America/New_York').
|
||||
|
||||
#### `window_size`
|
||||
|
||||
```python
|
||||
window_size: dict | None = None
|
||||
```
|
||||
|
||||
Browser window size for headful mode. Example: `{"width": 1920, "height": 1080}`
|
||||
|
||||
#### `viewport`
|
||||
|
||||
```python
|
||||
viewport: dict | None = None
|
||||
```
|
||||
|
||||
Viewport size with `width` and `height`. Example: `{"width": 1280, "height": 720}`
|
||||
|
||||
#### `no_viewport`
|
||||
|
||||
```python
|
||||
no_viewport: bool | None = not headless
|
||||
```
|
||||
|
||||
Disable fixed viewport. Content will resize with window.
|
||||
|
||||
*Tip:* don't use this parameter, it's a playwright standard parameter but it's redundant and only serves to override the `viewport` setting above.
|
||||
A viewport is *always* used in headless mode regardless of this setting, and is *never* used in headful mode unless you pass `viewport={width, height}` explicitly.
|
||||
|
||||
#### `device_scale_factor`
|
||||
|
||||
```python
|
||||
device_scale_factor: float | None = None
|
||||
```
|
||||
|
||||
Device scale factor (DPI). Useful for high-resolution screenshots (set it to 2).
|
||||
|
||||
#### `screen`
|
||||
|
||||
```python
|
||||
screen: dict | None = None
|
||||
```
|
||||
|
||||
Screen size available to browser. Auto-detected if not specified.
|
||||
|
||||
#### `color_scheme`
|
||||
|
||||
```python
|
||||
color_scheme: ColorScheme = 'light'
|
||||
```
|
||||
|
||||
Preferred color scheme: `'light'`, `'dark'`, `'no-preference'`
|
||||
|
||||
#### `contrast`
|
||||
|
||||
```python
|
||||
contrast: Contrast = 'no-preference'
|
||||
```
|
||||
|
||||
Contrast preference: `'no-preference'`, `'more'`, `'null'`
|
||||
|
||||
#### `reduced_motion`
|
||||
|
||||
```python
|
||||
reduced_motion: ReducedMotion = 'no-preference'
|
||||
```
|
||||
|
||||
Reduced motion preference: `'reduce'`, `'no-preference'`, `'null'`
|
||||
|
||||
#### `forced_colors`
|
||||
|
||||
```python
|
||||
forced_colors: ForcedColors = 'none'
|
||||
```
|
||||
|
||||
Forced colors mode: `'active'`, `'none'`, `'null'`
|
||||
|
||||
#### `**playwright.devices[...]`
|
||||
|
||||
Playwright provides launch & context arg presets to [emulate common device fingerprints](https://playwright.dev/python/docs/emulation).
|
||||
|
||||
```python
|
||||
BrowserProfile(
|
||||
...
|
||||
**playwright.devices['iPhone 13'], # playwright = await async_playwright().start()
|
||||
)
|
||||
```
|
||||
|
||||
Because `BrowserSession` and `BrowserProfile` take all the standard playwright args, we are able to support these device presets as well.
|
||||
|
||||
### Playwright Security Options
|
||||
|
||||
> See `allowed_domains` above too!
|
||||
|
||||
#### `offline`
|
||||
|
||||
```python
|
||||
offline: bool = False
|
||||
```
|
||||
|
||||
Emulate network being offline.
|
||||
|
||||
#### `http_credentials`
|
||||
|
||||
```python
|
||||
http_credentials: dict | None = None
|
||||
```
|
||||
|
||||
Credentials for HTTP authentication.
|
||||
|
||||
#### `extra_http_headers`
|
||||
|
||||
```python
|
||||
extra_http_headers: dict[str, str] = {}
|
||||
```
|
||||
|
||||
Additional HTTP headers to be sent with every request.
|
||||
|
||||
#### `ignore_https_errors`
|
||||
|
||||
```python
|
||||
ignore_https_errors: bool = False
|
||||
```
|
||||
|
||||
Whether to ignore HTTPS errors when sending network requests.
|
||||
|
||||
#### `bypass_csp`
|
||||
|
||||
```python
|
||||
bypass_csp: bool = False
|
||||
```
|
||||
|
||||
Toggles bypassing Content-Security-Policy.
|
||||
|
||||
#### `java_script_enabled`
|
||||
|
||||
```python
|
||||
java_script_enabled: bool = True
|
||||
```
|
||||
|
||||
Whether or not to enable JavaScript in the context.
|
||||
|
||||
#### `service_workers`
|
||||
|
||||
```python
|
||||
service_workers: ServiceWorkers = 'allow'
|
||||
```
|
||||
|
||||
Whether to allow sites to register Service workers: `'allow'`, `'block'`
|
||||
|
||||
#### `base_url`
|
||||
|
||||
```python
|
||||
base_url: str | None = None
|
||||
```
|
||||
|
||||
Base URL to be used in `page.goto()` and similar operations.
|
||||
|
||||
#### `strict_selectors`
|
||||
|
||||
```python
|
||||
strict_selectors: bool = False
|
||||
```
|
||||
|
||||
If true, selector passed to Playwright methods will throw if more than one element matches.
|
||||
|
||||
#### `client_certificates`
|
||||
|
||||
```python
|
||||
client_certificates: list[ClientCertificate] = []
|
||||
```
|
||||
|
||||
Client certificates to be used with requests.
|
||||
|
||||
|
||||
### Playwright Recording Options
|
||||
|
||||
Note: Browser Use also provides some of our own recording-related options not listed below (see above).
|
||||
|
||||
#### `record_video_dir`
|
||||
|
||||
<a name="record-video-dir"></a>
|
||||
<a name="save-recording-path"></a>
|
||||
|
||||
```python
|
||||
record_video_dir: str | Path | None = None
|
||||
```
|
||||
|
||||
Directory to save `.webm` video recordings. [Playwright Docs: `record_video_dir`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context-option-record-video-dir)
|
||||
|
||||
<Note>
|
||||
This parameter also has an alias `save_recording_path` for backwards compatibility with past versions, but we recommend using the standard Playwright name `record_video_dir` going forward.
|
||||
</Note>
|
||||
|
||||
#### `record_video_size`
|
||||
|
||||
```python
|
||||
record_video_size: dict | None = None. [Playwright Docs: `record_video_size`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context-option-record-video-size)
|
||||
|
||||
```
|
||||
|
||||
Video size. Example: `{"width": 1280, "height": 720}`
|
||||
|
||||
#### `record_har_path`
|
||||
|
||||
<a name="record-har-path"></a>
|
||||
<a name="save-har-path"></a>
|
||||
|
||||
```python
|
||||
record_har_path: str | Path | None = None
|
||||
```
|
||||
|
||||
Path to save `.har` network trace files. [Playwright Docs: `record_har_path`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context-option-record-har-path)
|
||||
|
||||
<Note>
|
||||
This parameter also has an alias `save_har_path` for backwards compatibility with past versions, but we recommend using the standard Playwright name `record_har_path` going forward.
|
||||
</Note>
|
||||
|
||||
#### `record_har_content`
|
||||
|
||||
```python
|
||||
record_har_content: RecordHarContent = 'embed'
|
||||
```
|
||||
|
||||
How to persist HAR content: `'omit'`, `'embed'`, `'attach'`
|
||||
|
||||
#### `record_har_mode`
|
||||
|
||||
```python
|
||||
record_har_mode: RecordHarMode = 'full'
|
||||
```
|
||||
|
||||
HAR recording mode: `'full'`, `'minimal'`
|
||||
|
||||
#### `record_har_omit_content`
|
||||
|
||||
```python
|
||||
record_har_omit_content: bool = False
|
||||
```
|
||||
|
||||
Whether to omit request content from the HAR.
|
||||
|
||||
#### `record_har_url_filter`
|
||||
|
||||
```python
|
||||
record_har_url_filter: str | Pattern | None = None
|
||||
```
|
||||
|
||||
URL filter for HAR recording.
|
||||
|
||||
#### `downloads_path`
|
||||
|
||||
```python
|
||||
downloads_path: str | Path | None = '~/.config/browseruse/downloads'
|
||||
```
|
||||
|
||||
(aliases: `downloads_dir`, `save_downloads_path`)
|
||||
|
||||
Local filesystem directory to save browser file downloads to.
|
||||
|
||||
#### `traces_dir`
|
||||
|
||||
<a name="traces-dir"></a>
|
||||
<a name="trace-path"></a>
|
||||
|
||||
```python
|
||||
traces_dir: str | Path | None = None
|
||||
```
|
||||
|
||||
Directory to save all-in-one trace files. Files are automatically named as `{traces_dir}/{context_id}.zip`. [Playwright Docs: `traces_dir`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context-option-traces-dir)
|
||||
|
||||
<Note>
|
||||
This parameter also has an alias `trace_path` for backwards compatibility with past versions, but we recommend using the standard Playwright name `traces_dir` going forward.
|
||||
</Note>
|
||||
|
||||
#### `handle_sighup`
|
||||
|
||||
```python
|
||||
handle_sighup: bool = True
|
||||
```
|
||||
|
||||
Whether playwright should swallow SIGHUP signals and kill the browser.
|
||||
|
||||
#### `handle_sigint`
|
||||
|
||||
```python
|
||||
handle_sigint: bool = False
|
||||
```
|
||||
|
||||
Whether playwright should swallow SIGINT signals and kill the browser.
|
||||
|
||||
#### `handle_sigterm`
|
||||
|
||||
```python
|
||||
handle_sigterm: bool = False
|
||||
```
|
||||
|
||||
Whether playwright should swallow SIGTERM signals and kill the browser.
|
||||
|
||||
---
|
||||
|
||||
## Full Example
|
||||
|
||||
```python
|
||||
from browser_use import BrowserSession, BrowserProfile, Agent
|
||||
|
||||
browser_profile = BrowserProfile(
|
||||
headless=False,
|
||||
storage_state="path/to/storage_state.json",
|
||||
wait_for_network_idle_page_load_time=3.0,
|
||||
viewport={"width": 1280, "height": 1100},
|
||||
locale='en-US',
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
|
||||
highlight_elements=True,
|
||||
viewport_expansion=500,
|
||||
allowed_domains=['*.google.com', 'http*://*.wikipedia.org'],
|
||||
user_data_dir=None,
|
||||
)
|
||||
|
||||
browser_session = BrowserSession(
|
||||
browser_profile=browser_profile,
|
||||
headless=True, # extra kwargs to the session override the defaults in the profile
|
||||
)
|
||||
|
||||
# you can drive a session without the agent / reuse it between agents
|
||||
await browser_session.start()
|
||||
page = await browser_session.get_current_page()
|
||||
await page.goto('https://example.com/first/page')
|
||||
|
||||
async def run_search():
|
||||
agent = Agent(
|
||||
task='Your task',
|
||||
llm=llm,
|
||||
page=page, # optional: pass a specific playwright page to start on
|
||||
browser_session=browser_session, # optional: pass an existing browser session to an agent
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
- **BrowserSession** (defined in `browser_use/browser/session.py`) handles the live browser connection and runtime state
|
||||
- **BrowserProfile** (defined in `browser_use/browser/profile.py`) is a template that can store default config parameters for a `BrowserSession(...)`
|
||||
|
||||
Configuration parameters defined in both scopes consumed by these calls depending on whether we're connecting/launching:
|
||||
|
||||
- `BrowserConnectArgs` - args for `playwright.BrowserType.connect_over_cdp(...)`
|
||||
- `BrowserLaunchArgs` - args for `playwright.BrowserType.launch(...)`
|
||||
- `BrowserNewContextArgs` - args for `playwright.BrowserType.new_context(...)`
|
||||
- `BrowserLaunchPersistentContextArgs` - args for `playwright.BrowserType.launch_persistent_context(...)`
|
||||
- Browser Use's own internal methods
|
||||
|
||||
For more details on Playwright's browser context options, see their [launch args documentation](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-persistent-context).
|
||||
|
||||
---
|
||||
82
.github/instructions/browser-use.instructions.md
vendored
82
.github/instructions/browser-use.instructions.md
vendored
|
|
@ -1,82 +0,0 @@
|
|||
---
|
||||
applyTo: '**'
|
||||
---
|
||||
## 🧠 General Guidelines for Contributing to `browser-use`
|
||||
|
||||
**Browser-Use** is an AI agent that autonomously interacts with the web. It takes a user-defined task, navigates web pages using Chromium via Playwright, processes HTML, and repeatedly queries a language model (like `gpt-4o`) to decide the next action—until the task is completed.
|
||||
|
||||
### 🗂️ File Documentation
|
||||
|
||||
When you create a **new file**:
|
||||
|
||||
* **For humans**: At the top of the file, include a docstring in natural language explaining:
|
||||
|
||||
* What this file does.
|
||||
* How it fits into the browser-use system.
|
||||
* If it introduces a new abstraction or replaces an old one.
|
||||
* **For LLMs/AI**: Include structured metadata using standardized comments such as:
|
||||
|
||||
```python
|
||||
# @file purpose: Defines <purpose>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 🧰 Development Rules
|
||||
|
||||
* ✅ **Always use [`uv`](mdc:https:/github.com/astral-sh/uv) instead of `pip`**
|
||||
For deterministic and fast dependency installs.
|
||||
|
||||
```bash
|
||||
uv venv --python 3.11
|
||||
source .venv/bin/activate
|
||||
uv sync
|
||||
```
|
||||
|
||||
* ✅ **Use real model names**
|
||||
Do **not** replace `gpt-4o` with `gpt-4`. The model `gpt-4o` is a distinct release and supported.
|
||||
|
||||
* ✅ **Type-safe coding**
|
||||
Use **Pydantic v2 models** for all internal action schemas, task inputs/outputs, and controller I/O. This ensures robust validation and LLM-call integrity.
|
||||
|
||||
---
|
||||
|
||||
## ⚙️ Adding New Actions
|
||||
|
||||
To add a new action that your browser agent can execute:
|
||||
|
||||
```python
|
||||
from playwright.async_api import Page
|
||||
from browser_use.core.controller import Controller, ActionResult
|
||||
|
||||
controller = Controller()
|
||||
|
||||
@controller.registry.action("Search the web for a specific query")
|
||||
async def search_web(query: str, page: Page):
|
||||
# Implement your logic here, e.g., query a search engine and return results
|
||||
result = ...
|
||||
return ActionResult(extracted_content=result, include_in_memory=True)
|
||||
```
|
||||
|
||||
### Notes:
|
||||
|
||||
* Use descriptive names and docstrings for each action.
|
||||
* Prefer returning `ActionResult` with structured content to help the agent reason better.
|
||||
|
||||
---
|
||||
|
||||
## 🧠 Creating and Running an Agent
|
||||
|
||||
To define a task and run a browser-use agent:
|
||||
|
||||
```python
|
||||
from browser_use import Agent
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
|
||||
task = "Find the CEO of OpenAI and return their name"
|
||||
model = ChatOpenAI(model="gpt-4o")
|
||||
|
||||
agent = Agent(task=task, llm=model, controller=controller)
|
||||
|
||||
history = await agent.run()
|
||||
```
|
||||
|
|
@ -1,249 +0,0 @@
|
|||
---
|
||||
description: "Extend default agent and write custom action functions to do certain tasks"
|
||||
applyTo: '**'
|
||||
---
|
||||
|
||||
Custom actions are functions *you* provide, that are added to our [default actions](https://github.com/browser-use/browser-use/blob/main/browser_use/controller/service.py) the agent can use to accomplish tasks.
|
||||
Action functions can request [arbitrary parameters](#action-parameters-via-pydantic-model) that the LLM has to come up with + a fixed set of [framework-provided arguments](#framework-provided-parameters) for browser APIs / `Agent(context=...)` / etc.
|
||||
|
||||
<Note>
|
||||
Our default set of actions is already quite powerful, the built-in `Controller` provides basics like `open_tab`, `scroll_down`, `extract_content`, [and more](https://github.com/browser-use/browser-use/blob/main/browser_use/controller/service.py).
|
||||
</Note>
|
||||
|
||||
It's easy to add your own actions to implement additional custom behaviors, integrations with other apps, or performance optimizations.
|
||||
|
||||
For examples of custom actions (e.g. uploading files, asking a human-in-the-loop for help, drawing a polygon with the mouse, and more), see [examples/custom-functions](https://github.com/browser-use/browser-use/tree/main/examples/custom-functions).
|
||||
|
||||
|
||||
## Action Function Registration
|
||||
|
||||
To register your own custom functions (which can be `sync` or `async`), decorate them with the `@controller.action(...)` decorator. This saves them into the `controller.registry`.
|
||||
|
||||
```python
|
||||
from browser_use import Controller, ActionResult
|
||||
|
||||
controller = Controller()
|
||||
|
||||
@controller.action('Ask human for help with a question', domains=['example.com']) # pass allowed_domains= or page_filter= to limit actions to certain pages
|
||||
def ask_human(question: str) -> ActionResult:
|
||||
answer = input(f'{question} > ')
|
||||
return ActionResult(extracted_content=f'The human responded with: {answer}', include_in_memory=True)
|
||||
```
|
||||
|
||||
```python
|
||||
# Then pass your controller to the agent to use it
|
||||
agent = Agent(
|
||||
task='...',
|
||||
llm=llm,
|
||||
controller=controller,
|
||||
)
|
||||
```
|
||||
|
||||
<Note>
|
||||
Keep your action function names and descriptions short and concise:
|
||||
- The LLM chooses between actions to run solely based on the function name and description
|
||||
- The LLM decides how to fill action params based on their names, type hints, & defaults
|
||||
</Note>
|
||||
|
||||
---
|
||||
|
||||
## Action Parameters
|
||||
|
||||
Browser Use supports two patterns for defining action parameters: normal function arguments, or a Pydantic model.
|
||||
|
||||
### Function Arguments
|
||||
|
||||
For simple actions that don't need default values, you can define the action parameters directly as arguments to the function. This one takes a single string argument, `css_selector`.
|
||||
When the LLM calls an action, it sees its argument names & types, and will provide values that fit.
|
||||
|
||||
```python
|
||||
@controller.action('Click element')
|
||||
def click_element(css_selector: str, page: Page) -> ActionResult:
|
||||
# css_selector is an action param the LLM must provide when calling
|
||||
# page is a special framework-provided param to access the browser APIs (see below)
|
||||
await page.locator(css_selector).click()
|
||||
return ActionResult(extracted_content=f"Clicked element {css_selector}")
|
||||
```
|
||||
|
||||
### Pydantic Model
|
||||
|
||||
You can define a pydantic model for the parameters your action expects by setting a `@controller.action(..., param_model=MyParams)`.
|
||||
This allows you to use optional parameters, default values, `Annotated[...]` types with custom validation, field descriptions, and other features offered by pydantic.
|
||||
|
||||
When the agent calls calls your agent function, an instance of your model with the values filled by the LLM will be passed as the argument named `params` to your action function.
|
||||
|
||||
Using a pydantic model is helpful because it allows more flexibility and power to enforce the schema of the values the LLM should provide.
|
||||
The LLM gets the entire pydantic JSON schema for your `param_model`, it will see the function name & description + individual field names, types, descriptions, and default values.
|
||||
|
||||
|
||||
```python
|
||||
from typing import Annotated
|
||||
from pydantic import BaseModel, AfterValidator
|
||||
from browser_use import ActionResult
|
||||
|
||||
class MyParams(BaseModel):
|
||||
field1: int
|
||||
field2: str = 'default value'
|
||||
field3: Annotated[str, AfterValidator(lambda s: s.lower())] # example: enforce always lowercase
|
||||
field4: str = Field(default='abc', description='Detailed description for the LLM')
|
||||
|
||||
@controller.action('My action', param_model=MyParams)
|
||||
def my_action(params: MyParams, page: Page) -> ActionResult:
|
||||
await page.keyboard.type(params.field2)
|
||||
return ActionResult(extracted_content=f"Inputted {params} on {page.url}")
|
||||
```
|
||||
|
||||
Any special framework-provided arguments (e.g. `page`) will be passed as separate positional arguments after `params`.
|
||||
|
||||
<Important>
|
||||
To use a `BaseModel` the arg *must* be called `params`. Action function args are matched and filled like named arguments; arg order doesn't matter but names and types do.
|
||||
</Important>
|
||||
|
||||
### Framework-Provided Parameters
|
||||
|
||||
These special action parameters are injected by the `Controller` and are passed as extra args to any actions that expect them.
|
||||
|
||||
For example, actions that need to run playwright code to interact with the browser should take the argument `page` or `browser_session`.
|
||||
|
||||
- `page: Page` - The current Playwright page (shortcut for `browser_session.get_current_page()`)
|
||||
- `browser_session: BrowserSession` - The current browser session (and playwright context via `browser_session.browser_context`)
|
||||
- `context: AgentContext` - Any optional top-level context object passed to the Agent, e.g. `Agent(context=user_provided_obj)`
|
||||
- `page_extraction_llm: BaseChatModel` - LLM instance used for page content extraction
|
||||
- `available_file_paths: list[str]` - List of available file paths for upload / processing
|
||||
- `has_sensitive_data: bool` - Whether the action content contains sensitive data markers (check this to avoid logging sensitive data to terminal by accident)
|
||||
|
||||
#### Example: Action uses the current `page`
|
||||
|
||||
```python
|
||||
from playwright.async_api import Page
|
||||
from browser_use import Controller, ActionResult
|
||||
|
||||
controller = Controller()
|
||||
|
||||
@controller.action('Type keyboard input into a page')
|
||||
async def input_text_into_page(text: str, page: Page) -> ActionResult:
|
||||
await page.keyboard.type(text)
|
||||
return ActionResult(extracted_content='Website opened')
|
||||
```
|
||||
|
||||
#### Example: Action uses the `browser_context`
|
||||
|
||||
```python
|
||||
from browser_use import BrowserSession, Controller, ActionResult
|
||||
|
||||
controller = Controller()
|
||||
|
||||
@controller.action('Open website')
|
||||
async def open_website(url: str, browser_session: BrowserSession) -> ActionResult:
|
||||
# find matching existing tab by looking through all pages in playwright browser_context
|
||||
all_tabs = await browser_session.browser_context.pages
|
||||
for tab in all_tabs:
|
||||
if tab.url == url:
|
||||
await tab.bring_to_foreground()
|
||||
return ActionResult(extracted_content=f'Switched to tab with url {url}')
|
||||
# otherwise, create a new tab
|
||||
new_tab = await browser_session.browser_context.new_page()
|
||||
await new_tab.goto(url)
|
||||
return ActionResult(extracted_content=f'Opened new tab with url {url}')
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Important Rules
|
||||
|
||||
1. **Return an [`ActionResult`](https://github.com/search?q=repo%3Abrowser-use%2Fbrowser-use+%22class+ActionResult%28BaseModel%29%22&type=code)**: All actions should return an `ActionResult | str | None`. The stringified version of the result is passed back to the LLM, and optionally persisted in the long-term memory when `ActionResult(..., include_in_memory=True)`.
|
||||
2. **Type hints on arguments are required**: They are used to verify that action params don't conflict with special arguments injected by the controller (e.g. `page`)
|
||||
3. **Actions functions called directly must be passed kwargs**: When calling actions from other actions or python code, you must **pass all parameters as kwargs only**, even though the actions are usually defined using positional args (for the same reasons as [pluggy](https://pluggy.readthedocs.io/en/stable/index.html#calling-hooks)).
|
||||
Action arguments are always matched by name and type, **not** positional order, so this helps prevent ambiguity / reordering issues while keeping action signatures short.
|
||||
```python
|
||||
@controller.action('Fill in the country form field')
|
||||
def input_country_field(country: str, page: Page) -> ActionResult:
|
||||
await some_action(123, page=page) # ❌ not allowed: positional args, use kwarg syntax when calling
|
||||
await some_action(abc=123, page=page) # ✅ allowed: action params & special kwargs
|
||||
await some_other_action(params=OtherAction(abc=123), page=page) # ✅ allowed: params=model & special kwargs
|
||||
```
|
||||
|
||||
```python
|
||||
# Using Pydantic Model to define action params (recommended)
|
||||
class PinCodeParams(BaseModel):
|
||||
code: int
|
||||
retries: int = 3 # ✅ supports optional/defaults
|
||||
|
||||
@controller.action('...', param_model=PinCodeParams)
|
||||
async def input_pin_code(params: PinCodeParams, page: Page): ... # ✅ special params at the end
|
||||
|
||||
# Using function arguments to define action params
|
||||
async def input_pin_code(code: int, retries: int, page: Page): ... # ✅ params first, special params second, no defaults
|
||||
async def input_pin_code(code: int, retries: int=3): ... # ✅ defaults ok only if no special params needed
|
||||
async def input_pin_code(code: int, retries: int=3, page: Page): ... # ❌ Python SyntaxError! not allowed
|
||||
```
|
||||
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Reusing Custom Actions Across Agents
|
||||
|
||||
You can use the same controller for multiple agents.
|
||||
|
||||
```python
|
||||
controller = Controller()
|
||||
|
||||
# ... register actions to the controller
|
||||
|
||||
agent = Agent(
|
||||
task="Go to website X and find the latest news",
|
||||
llm=llm,
|
||||
controller=controller
|
||||
)
|
||||
|
||||
# Run the agent
|
||||
await agent.run()
|
||||
|
||||
agent2 = Agent(
|
||||
task="Go to website Y and find the latest news",
|
||||
llm=llm,
|
||||
controller=controller
|
||||
)
|
||||
|
||||
await agent2.run()
|
||||
```
|
||||
|
||||
<Note>
|
||||
The controller is stateless and can be used to register multiple actions and
|
||||
multiple agents.
|
||||
</Note>
|
||||
|
||||
|
||||
|
||||
## Exclude functions
|
||||
|
||||
If you want to exclude some registered actions and make them unavailable to the agent, you can do:
|
||||
```python
|
||||
controller = Controller(exclude_actions=['open_tab', 'search_google'])
|
||||
agent = Agent(controller=controller, ...)
|
||||
```
|
||||
|
||||
|
||||
If you want actions to only be available on certain pages, and to not tell the LLM about them on other pages,
|
||||
you can use the `allowed_domains` and `page_filter`:
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
from browser_use import Controller, ActionResult
|
||||
|
||||
controller = Controller()
|
||||
|
||||
async def is_ai_allowed(page: Page):
|
||||
if api.some_service.check_url(page.url):
|
||||
logger.warning('Allowing AI agent to visit url:', page.url)
|
||||
return True
|
||||
return False
|
||||
|
||||
@controller.action('Fill out secret_form', allowed_domains=['https://*.example.com'], page_filter=is_ai_allowed)
|
||||
def fill_out_form(...) -> ActionResult:
|
||||
... will only be runnable by LLM on pages that match https://*.example.com *AND* where is_ai_allowed(page) returns True
|
||||
|
||||
```
|
||||
381
.github/instructions/hooks.instructions.md
vendored
381
.github/instructions/hooks.instructions.md
vendored
|
|
@ -1,381 +0,0 @@
|
|||
---
|
||||
description: "Customize agent behavior with lifecycle hooks"
|
||||
applyTo: '**'
|
||||
---
|
||||
|
||||
Browser-Use provides lifecycle hooks that allow you to execute custom code at specific points during the agent's execution.
|
||||
Hook functions can be used to read and modify agent state while running, implement custom logic, change configuration, integrate the Agent with external applications.
|
||||
|
||||
|
||||
## Available Hooks
|
||||
|
||||
Currently, Browser-Use provides the following hooks:
|
||||
|
||||
| Hook | Description | When it's called |
|
||||
| ---- | ----------- | ---------------- |
|
||||
| `on_step_start` | Executed at the beginning of each agent step | Before the agent processes the current state and decides on the next action |
|
||||
| `on_step_end` | Executed at the end of each agent step | After the agent has executed all the actions for the current step, before it starts the next step |
|
||||
|
||||
```python
|
||||
await agent.run(on_step_start=..., on_step_end=...)
|
||||
```
|
||||
|
||||
Each hook should be an `async` callable function that accepts the `agent` instance as its only parameter.
|
||||
|
||||
|
||||
### Basic Example
|
||||
|
||||
```python
|
||||
from browser_use import Agent
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
|
||||
async def my_step_hook(agent: Agent):
|
||||
# inside a hook you can access all the state and methods under the Agent object:
|
||||
# agent.settings, agent.state, agent.task
|
||||
# agent.controller, agent.llm, agent.browser_session
|
||||
# agent.pause(), agent.resume(), agent.add_new_task(...), etc.
|
||||
|
||||
# You also have direct access to the playwright Page and Browser Context
|
||||
page = await agent.browser_session.get_current_page()
|
||||
# https://playwright.dev/python/docs/api/class-page
|
||||
|
||||
current_url = page.url
|
||||
visit_log = agent.state.history.urls()
|
||||
previous_url = visit_log[-2] if len(visit_log) >= 2 else None
|
||||
print(f"Agent was last on URL: {previous_url} and is now on {current_url}")
|
||||
|
||||
# Example: listen for events on the page, interact with the DOM, run JS directly, etc.
|
||||
await page.on('domcontentloaded', lambda: print('page navigated to a new url...'))
|
||||
await page.locator("css=form > input[type=submit]").click()
|
||||
await page.evaluate('() => alert(1)')
|
||||
await page.browser.new_tab
|
||||
await agent.browser_session.session.context.add_init_script('/* some JS to run on every page */')
|
||||
|
||||
# Example: monitor or intercept all network requests
|
||||
async def handle_request(route):
|
||||
# Print, modify, block, etc. do anything to the requests here
|
||||
# https://playwright.dev/python/docs/network#handle-requests
|
||||
print(route.request, route.request.headers)
|
||||
await route.continue_(headers=route.request.headers)
|
||||
await page.route("**/*", handle_route)
|
||||
|
||||
# Example: pause agent execution and resume it based on some custom code
|
||||
if '/completed' in current_url:
|
||||
agent.pause()
|
||||
Path('result.txt').write_text(await page.content())
|
||||
input('Saved "completed" page content to result.txt, press [Enter] to resume...')
|
||||
agent.resume()
|
||||
|
||||
agent = Agent(
|
||||
task="Search for the latest news about AI",
|
||||
llm=ChatOpenAI(model="gpt-4o"),
|
||||
)
|
||||
|
||||
await agent.run(
|
||||
on_step_start=my_step_hook,
|
||||
# on_step_end=...
|
||||
max_steps=10
|
||||
)
|
||||
```
|
||||
|
||||
## Data Available in Hooks
|
||||
|
||||
When working with agent hooks, you have access to the entire `Agent` instance. Here are some useful data points you can access:
|
||||
|
||||
- `agent.task` lets you see what the main task is, `agent.add_new_task(...)` lets you queue up a new one
|
||||
- `agent.controller` give access to the `Controller()` object and `Registry()` containing the available actions
|
||||
- `agent.controller.registry.execute_action('click_element_by_index', {'index': 123}, browser_session=agent.browser_session)`
|
||||
- `agent.context` lets you access any user-provided context object passed in to `Agent(context=...)`
|
||||
- `agent.sensitive_data` contains the sensitive data dict, which can be updated in-place to add/remove/modify items
|
||||
- `agent.settings` contains all the configuration options passed to the `Agent(...)` at init time
|
||||
- `agent.llm` gives direct access to the main LLM object (e.g. `ChatOpenAI`)
|
||||
- `agent.state` gives access to lots of internal state, including agent thoughts, outputs, actions, etc.
|
||||
- `agent.state.history.model_thoughts()`: Reasoning from Browser Use's model.
|
||||
- `agent.state.history.model_outputs()`: Raw outputs from the Browsre Use's model.
|
||||
- `agent.state.history.model_actions()`: Actions taken by the agent
|
||||
- `agent.state.history.extracted_content()`: Content extracted from web pages
|
||||
- `agent.state.history.urls()`: URLs visited by the agent
|
||||
- `agent.browser_session` gives direct access to the `BrowserSession()` and playwright objects
|
||||
- `agent.browser_session.get_current_page()`: Get the current playwright `Page` object the agent is focused on
|
||||
- `agent.browser_session.browser_context`: Get the current playwright `BrowserContext` object
|
||||
- `agent.browser_session.browser_context.pages`: Get all the tabs currently open in the context
|
||||
- `agent.browser_session.get_page_html()`: Current page HTML
|
||||
- `agent.browser_session.take_screenshot()`: Screenshot of the current page
|
||||
|
||||
|
||||
## Tips for Using Hooks
|
||||
|
||||
- **Avoid blocking operations**: Since hooks run in the same execution thread as the agent, try to keep them efficient or use asynchronous patterns.
|
||||
- **Handle exceptions**: Make sure your hook functions handle exceptions gracefully to prevent interrupting the agent's main flow.
|
||||
- **Use custom actions instead**: hooks are fairly advanced, most things can be implemented with [custom action functions](/customize/custom-functions) instead
|
||||
|
||||
---
|
||||
|
||||
## Complex Example: Agent Activity Recording System
|
||||
|
||||
This comprehensive example demonstrates a complete implementation for recording and saving Browser-Use agent activity, consisting of both server and client components.
|
||||
|
||||
### Setup Instructions
|
||||
|
||||
To use this example, you'll need to:
|
||||
|
||||
1. Set up the required dependencies:
|
||||
```bash
|
||||
pip install fastapi uvicorn prettyprinter pyobjtojson dotenv browser-use langchain-openai
|
||||
```
|
||||
|
||||
2. Create two separate Python files:
|
||||
- `api.py` - The FastAPI server component
|
||||
- `client.py` - The Browser-Use agent with recording hook
|
||||
|
||||
3. Run both components:
|
||||
- Start the API server first: `python api.py`
|
||||
- Then run the client: `python client.py`
|
||||
|
||||
### Server Component (api.py)
|
||||
|
||||
The server component handles receiving and storing the agent's activity data:
|
||||
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
#
|
||||
# FastAPI API to record and save Browser-Use activity data.
|
||||
# Save this code to api.py and run with `python api.py`
|
||||
#
|
||||
|
||||
import json
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
import prettyprinter
|
||||
import uvicorn
|
||||
|
||||
prettyprinter.install_extras()
|
||||
|
||||
# Utility function to save screenshots
|
||||
def b64_to_png(b64_string: str, output_file):
|
||||
"""
|
||||
Convert a Base64-encoded string to a PNG file.
|
||||
|
||||
:param b64_string: A string containing Base64-encoded data
|
||||
:param output_file: The path to the output PNG file
|
||||
"""
|
||||
with open(output_file, "wb") as f:
|
||||
f.write(base64.b64decode(b64_string))
|
||||
|
||||
# Initialize FastAPI app
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@app.post("/post_agent_history_step")
|
||||
async def post_agent_history_step(request: Request):
|
||||
data = await request.json()
|
||||
prettyprinter.cpprint(data)
|
||||
|
||||
# Ensure the "recordings" folder exists using pathlib
|
||||
recordings_folder = Path("recordings")
|
||||
recordings_folder.mkdir(exist_ok=True)
|
||||
|
||||
# Determine the next file number by examining existing .json files
|
||||
existing_numbers = []
|
||||
for item in recordings_folder.iterdir():
|
||||
if item.is_file() and item.suffix == ".json":
|
||||
try:
|
||||
file_num = int(item.stem)
|
||||
existing_numbers.append(file_num)
|
||||
except ValueError:
|
||||
# In case the file name isn't just a number
|
||||
pass
|
||||
|
||||
if existing_numbers:
|
||||
next_number = max(existing_numbers) + 1
|
||||
else:
|
||||
next_number = 1
|
||||
|
||||
# Construct the file path
|
||||
file_path = recordings_folder / f"{next_number}.json"
|
||||
|
||||
# Save the JSON data to the file
|
||||
with file_path.open("w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
# Optionally save screenshot if needed
|
||||
# if "website_screenshot" in data and data["website_screenshot"]:
|
||||
# screenshot_folder = Path("screenshots")
|
||||
# screenshot_folder.mkdir(exist_ok=True)
|
||||
# b64_to_png(data["website_screenshot"], screenshot_folder / f"{next_number}.png")
|
||||
|
||||
return {"status": "ok", "message": f"Saved to {file_path}"}
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Starting Browser-Use recording API on http://0.0.0.0:9000")
|
||||
uvicorn.run(app, host="0.0.0.0", port=9000)
|
||||
```
|
||||
|
||||
### Client Component (client.py)
|
||||
|
||||
The client component runs the Browser-Use agent with a recording hook:
|
||||
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
#
|
||||
# Client to record and save Browser-Use activity.
|
||||
# Save this code to client.py and run with `python client.py`
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import requests
|
||||
from dotenv import load_dotenv
|
||||
from pyobjtojson import obj_to_json
|
||||
from langchain_openai import ChatOpenAI
|
||||
from browser_use import Agent
|
||||
|
||||
# Load environment variables (for API keys)
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def send_agent_history_step(data):
|
||||
"""Send the agent step data to the recording API"""
|
||||
url = "http://127.0.0.1:9000/post_agent_history_step"
|
||||
response = requests.post(url, json=data)
|
||||
return response.json()
|
||||
|
||||
|
||||
async def record_activity(agent_obj):
|
||||
"""Hook function that captures and records agent activity at each step"""
|
||||
website_html = None
|
||||
website_screenshot = None
|
||||
urls_json_last_elem = None
|
||||
model_thoughts_last_elem = None
|
||||
model_outputs_json_last_elem = None
|
||||
model_actions_json_last_elem = None
|
||||
extracted_content_json_last_elem = None
|
||||
|
||||
print('--- ON_STEP_START HOOK ---')
|
||||
|
||||
# Capture current page state
|
||||
website_html = await agent_obj.browser_session.get_page_html()
|
||||
website_screenshot = await agent_obj.browser_session.take_screenshot()
|
||||
|
||||
# Make sure we have state history
|
||||
if hasattr(agent_obj, "state"):
|
||||
history = agent_obj.state.history
|
||||
else:
|
||||
history = None
|
||||
print("Warning: Agent has no state history")
|
||||
return
|
||||
|
||||
# Process model thoughts
|
||||
model_thoughts = obj_to_json(
|
||||
obj=history.model_thoughts(),
|
||||
check_circular=False
|
||||
)
|
||||
if len(model_thoughts) > 0:
|
||||
model_thoughts_last_elem = model_thoughts[-1]
|
||||
|
||||
# Process model outputs
|
||||
model_outputs = agent_obj.state.history.model_outputs()
|
||||
model_outputs_json = obj_to_json(
|
||||
obj=model_outputs,
|
||||
check_circular=False
|
||||
)
|
||||
if len(model_outputs_json) > 0:
|
||||
model_outputs_json_last_elem = model_outputs_json[-1]
|
||||
|
||||
# Process model actions
|
||||
model_actions = agent_obj.state.history.model_actions()
|
||||
model_actions_json = obj_to_json(
|
||||
obj=model_actions,
|
||||
check_circular=False
|
||||
)
|
||||
if len(model_actions_json) > 0:
|
||||
model_actions_json_last_elem = model_actions_json[-1]
|
||||
|
||||
# Process extracted content
|
||||
extracted_content = agent_obj.state.history.extracted_content()
|
||||
extracted_content_json = obj_to_json(
|
||||
obj=extracted_content,
|
||||
check_circular=False
|
||||
)
|
||||
if len(extracted_content_json) > 0:
|
||||
extracted_content_json_last_elem = extracted_content_json[-1]
|
||||
|
||||
# Process URLs
|
||||
urls = agent_obj.state.history.urls()
|
||||
urls_json = obj_to_json(
|
||||
obj=urls,
|
||||
check_circular=False
|
||||
)
|
||||
if len(urls_json) > 0:
|
||||
urls_json_last_elem = urls_json[-1]
|
||||
|
||||
# Create a summary of all data for this step
|
||||
model_step_summary = {
|
||||
"website_html": website_html,
|
||||
"website_screenshot": website_screenshot,
|
||||
"url": urls_json_last_elem,
|
||||
"model_thoughts": model_thoughts_last_elem,
|
||||
"model_outputs": model_outputs_json_last_elem,
|
||||
"model_actions": model_actions_json_last_elem,
|
||||
"extracted_content": extracted_content_json_last_elem
|
||||
}
|
||||
|
||||
print("--- MODEL STEP SUMMARY ---")
|
||||
print(f"URL: {urls_json_last_elem}")
|
||||
|
||||
# Send data to the API
|
||||
result = send_agent_history_step(data=model_step_summary)
|
||||
print(f"Recording API response: {result}")
|
||||
|
||||
|
||||
async def run_agent():
|
||||
"""Run the Browser-Use agent with the recording hook"""
|
||||
agent = Agent(
|
||||
task="Compare the price of gpt-4o and DeepSeek-V3",
|
||||
llm=ChatOpenAI(model="gpt-4o"),
|
||||
)
|
||||
|
||||
try:
|
||||
print("Starting Browser-Use agent with recording hook")
|
||||
await agent.run(
|
||||
on_step_start=record_activity,
|
||||
max_steps=30
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Error running agent: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check if API is running
|
||||
try:
|
||||
requests.get("http://127.0.0.1:9000")
|
||||
print("Recording API is available")
|
||||
except:
|
||||
print("Warning: Recording API may not be running. Start api.py first.")
|
||||
|
||||
# Run the agent
|
||||
asyncio.run(run_agent())
|
||||
```
|
||||
|
||||
Contribution by Carlos A. Planchón.
|
||||
|
||||
### Working with the Recorded Data
|
||||
|
||||
After running the agent, you'll find the recorded data in the `recordings` directory. Here's how you can use this data:
|
||||
|
||||
1. **View recorded sessions**: Each JSON file contains a snapshot of agent activity for one step
|
||||
2. **Extract screenshots**: You can modify the API to save screenshots separately
|
||||
3. **Analyze agent behavior**: Use the recorded data to study how the agent navigates websites
|
||||
|
||||
### Extending the Example
|
||||
|
||||
You can extend this recording system in several ways:
|
||||
|
||||
1. **Save screenshots separately**: Uncomment the screenshot saving code in the API
|
||||
2. **Add a web dashboard**: Create a simple web interface to view recorded sessions
|
||||
3. **Add session IDs**: Modify the API to group steps by agent session
|
||||
4. **Add filtering**: Implement filters to record only specific types of actions
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
---
|
||||
description: "The default is text. But you can define a structured output format to make post-processing easier."
|
||||
applyTo: '**'
|
||||
---
|
||||
|
||||
## Custom output format
|
||||
With [this example](https://github.com/browser-use/browser-use/blob/main/examples/features/custom_output.py) you can define what output format the agent should return to you.
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
# Define the output format as a Pydantic model
|
||||
class Post(BaseModel):
|
||||
post_title: str
|
||||
post_url: str
|
||||
num_comments: int
|
||||
hours_since_post: int
|
||||
|
||||
|
||||
class Posts(BaseModel):
|
||||
posts: List[Post]
|
||||
|
||||
|
||||
controller = Controller(output_model=Posts)
|
||||
|
||||
|
||||
async def main():
|
||||
task = 'Go to hackernews show hn and give me the first 5 posts'
|
||||
model = ChatOpenAI(model='gpt-4o')
|
||||
agent = Agent(task=task, llm=model, controller=controller)
|
||||
|
||||
history = await agent.run()
|
||||
|
||||
result = history.final_result()
|
||||
if result:
|
||||
parsed: Posts = Posts.model_validate_json(result)
|
||||
|
||||
for post in parsed.posts:
|
||||
print('\n--------------------------------')
|
||||
print(f'Title: {post.post_title}')
|
||||
print(f'URL: {post.post_url}')
|
||||
print(f'Comments: {post.num_comments}')
|
||||
print(f'Hours since post: {post.hours_since_post}')
|
||||
else:
|
||||
print('No result')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
```
|
||||
414
.github/instructions/real-browser.instructions.md
vendored
414
.github/instructions/real-browser.instructions.md
vendored
|
|
@ -1,414 +0,0 @@
|
|||
---
|
||||
description: "Connect to a remote browser or launch a new local browser."
|
||||
applyTo: '**'
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Browser Use supports a wide variety of ways to launch or connect to a browser:
|
||||
|
||||
- Launch a new local browser using playwright/patchright chromium (the default)
|
||||
- Connect to a remote browser using CDP or WSS
|
||||
- Use an existing playwright `Page`, `Browser`, or `BrowserContext` object
|
||||
- Connect to a local browser already running using `browser_pid`
|
||||
|
||||
<Tip>
|
||||
Don't want to manage your own browser infrastructure? Try [☁️ Browser Use Cloud](https://browser-use.com) ➡️
|
||||
|
||||
We provide automatic CAPTCHA solving, proxies, human-in-the-loop automation, and more!
|
||||
</Tip>
|
||||
|
||||
## Connection Methods
|
||||
|
||||
### Method A: Launch a New Local Browser (Default)
|
||||
|
||||
Launch a local browser using built-in default (playwright `chromium`) or a provided `executable_path`:
|
||||
|
||||
```python
|
||||
from browser_use import Agent, BrowserSession
|
||||
|
||||
# If no executable_path provided, uses Playwright/Patchright's built-in Chromium
|
||||
browser_session = BrowserSession(
|
||||
# Path to a specific Chromium-based executable (optional)
|
||||
executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', # macOS
|
||||
# For Windows: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
|
||||
# For Linux: '/usr/bin/google-chrome'
|
||||
|
||||
# Use a specific data directory on disk (optional, set to None for incognito)
|
||||
user_data_dir='~/.config/browseruse/profiles/default', # this is the default
|
||||
# ... any other BrowserProfile or playwright launch_persistnet_context config...
|
||||
# headless=False,
|
||||
)
|
||||
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
browser_session=browser_session,
|
||||
)
|
||||
```
|
||||
|
||||
We support most `chromium`-based browsers in `executable_path`, including [Brave](https://github.com/browser-use/browser-use/tree/main/examples/browser/stealth.py), [patchright chromium](https://github.com/Kaliiiiiiiiii-Vinyzu/patchright), [rebrowser](https://rebrowser.net/), Edge, and more. See [`examples/browser/stealth.py`](https://github.com/browser-use/browser-use/tree/main/examples/browser) for more. We do not support Firefox or Safari at the moment.
|
||||
|
||||
<Warning>
|
||||
[As of Chrome v136](https://github.com/browser-use/browser-use/issues/1520), driving browsers with the default profile is [no longer supported](https://developer.chrome.com/blog/remote-debugging-port) for security reasons. Browser-Use has transitioned to creating a new dedicated profile for agents in: `~/.config/browseruse/profiles/default`. You can [open this profile](https://superuser.com/questions/377186/how-do-i-start-chrome-using-a-specified-user-profile) and log into everything you need your agent to have access to, and it will persist over time.
|
||||
</Warning>
|
||||
|
||||
### Method B: Connect Using Existing Playwright Objects
|
||||
|
||||
Pass existing Playwright `Page`, `BrowserContext`, `Browser`, and/or `playwright` API object to `BrowserSession(...)`:
|
||||
|
||||
```python
|
||||
from browser_use import Agent, BrowserSession
|
||||
from playwright.async_api import async_playwright
|
||||
# from patchright.async_api import async_playwright # stealth alternative
|
||||
|
||||
async with async_playwright() as playwright:
|
||||
browser = await playwright.chromium.launch()
|
||||
context = await browser.new_context()
|
||||
page = await context.new_page()
|
||||
|
||||
browser_session = BrowserSession(
|
||||
page=page,
|
||||
# browser_context=context, # all these are supported
|
||||
# browser=browser,
|
||||
# playwright=playwright,
|
||||
)
|
||||
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
browser_session=browser_session,
|
||||
)
|
||||
```
|
||||
|
||||
You can also pass `page` directly to `Agent(...)` as a shortcut.
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
page=page,
|
||||
)
|
||||
```
|
||||
|
||||
### Method C: Connect to Local Browser Using Browser PID
|
||||
|
||||
Connect to a browser with open `--remote-debugging-port`:
|
||||
|
||||
```python
|
||||
from browser_use import Agent, BrowserSession
|
||||
|
||||
# First, start Chrome with remote debugging:
|
||||
# /Applications/Google Chrome.app/Contents/MacOS/Google Chrome --remote-debugging-port=9242
|
||||
|
||||
# Then connect using the process ID
|
||||
browser_session = BrowserSession(browser_pid=12345) # Replace with actual Chrome PID
|
||||
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
browser_session=browser_session,
|
||||
)
|
||||
```
|
||||
|
||||
### Method D: Connect to remote Playwright Node.js Browser Server via WSS URL
|
||||
|
||||
Connect to Playwright Node.js server providers:
|
||||
|
||||
```python
|
||||
from browser_use import Agent, BrowserSession
|
||||
|
||||
# Connect to a playwright server
|
||||
browser_session = BrowserSession(wss_url="wss://your-playwright-server.com/ws")
|
||||
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
browser_session=browser_session,
|
||||
)
|
||||
```
|
||||
|
||||
### Method E: Connect to Remote Browser via CDP URL
|
||||
|
||||
Connect to any remote Chromium-based browser:
|
||||
|
||||
```python
|
||||
from browser_use import Agent, BrowserSession
|
||||
|
||||
# Connect to Chrome via CDP
|
||||
browser_session = BrowserSession(cdp_url="http://localhost:9222")
|
||||
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
browser_session=browser_session,
|
||||
)
|
||||
```
|
||||
|
||||
|
||||
|
||||
## Security Considerations
|
||||
|
||||
<Warning>
|
||||
When using any browser profile, the agent will have access to:
|
||||
- All its logged-in sessions and cookies
|
||||
- Saved passwords (if autofill is enabled)
|
||||
- Browser history and bookmarks
|
||||
- Extensions and their data
|
||||
|
||||
Always review the task you're giving to the agent and ensure it aligns with your security requirements!
|
||||
Use `Agent(sensitive_data={'https://auth.example.com': {x_key: value}})` for any secrets, and restrict the browser with `BrowserSession(allowed_domains=['https://*.example.com'])`.
|
||||
</Warning>
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use isolated profiles**: Create separate Chrome profiles for different agents to limit scope of risk:
|
||||
```python
|
||||
browser_session = BrowserSession(
|
||||
user_data_dir='~/.config/browseruse/profiles/banking',
|
||||
# profile_directory='Default'
|
||||
)
|
||||
```
|
||||
|
||||
2. **Limit domain access**: Restrict which sites the agent can visit:
|
||||
```python
|
||||
browser_session = BrowserSession(
|
||||
allowed_domains=['example.com', 'http*://*.github.com'],
|
||||
)
|
||||
```
|
||||
|
||||
3. **Enable `keep_alive=True`** If you want to use a single `BrowserSession` with more than one agent:
|
||||
```python
|
||||
browser_session = BrowserSession(
|
||||
keep_alive=True,
|
||||
...
|
||||
)
|
||||
await browser_session.start() # start the session yourself before passing to Agent
|
||||
...
|
||||
agent = Agent(..., browser_session=browser_session)
|
||||
await agent.run()
|
||||
...
|
||||
await browser_session.kill() # end the session yourself, shortcut for keep_alive=False + .stop()
|
||||
```
|
||||
|
||||
## Re-Using a Browser
|
||||
|
||||
A `BrowserSession` starts when the browser is launched/connected, and ends when the browser process exits/disconnects. A session internally manages a single live playwright browser context, and is normally auto-closed by the agent when its task is complete (*if* the agent started the session itself). If you pass an existing `BrowserSession` into an Agent, or if you set `BrowserSession(keep_alive=True)`, the session will not be closed and can be re-used between agents.
|
||||
|
||||
Browser Use provides a number of ways to re-use profiles, sessions, and other configuration across multiple agents.
|
||||
|
||||
- ✅ sequential agents can re-use a single `user_data_dir` in new `BrowserSession`s
|
||||
- ✅ sequential agents can re-use a single `BrowserSession` without closing it
|
||||
- ❌ parallel agents cannot run separate `BrowserSession`s using the same `user_data_dir`
|
||||
- ✅ parallel agents can run separate `BrowserSession`s using the same `storage_state`
|
||||
- ✅ parallel agents can share a single `BrowserSession`, working in different tabs
|
||||
- ⚠️ parallel agents can share a single `BrowserSession`, working in the same tab
|
||||
|
||||
<Important>
|
||||
Multiple `BrowserSession`s (aka chrome processes) cannot share the same `user_data_dir` at the same time, but they can share a `storage_state` file or `BrowserProfile` config.
|
||||
</Important>
|
||||
|
||||
### Sequential Agents, Same Profile, Different Browser
|
||||
|
||||
If you are only running one agent & browser at a time, they can re-use the same `user_data_dir` sequentially.
|
||||
|
||||
```python
|
||||
from browser_use import Agent, BrowserSession
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
reused_profile = BrowserProfile(user_data_dir='~/.config/browseruse/profiles/default')
|
||||
|
||||
agent1 = Agent(
|
||||
task="The first task...",
|
||||
llm=ChatOpenAI(model="gpt-4o-mini"),
|
||||
browser_profile=reused_profile, # pass the profile in, it will auto-create a session
|
||||
)
|
||||
await agent1.run()
|
||||
|
||||
agent2 = Agent(
|
||||
task="The second task...",
|
||||
llm=ChatOpenAI(model="gpt-4o-mini"),
|
||||
browser_profile=reused_profile, # agent will auto-create its own new session
|
||||
)
|
||||
await agent2.run()
|
||||
```
|
||||
|
||||
> Make sure to never mix different browser versions or `executable_path`s with the same `user_data_dir`. Once run with a newer browser version, some migrations are applied to the dir and older browsers wont be able to read it.
|
||||
|
||||
### Sequential Agents, Same Profile, Same Browser
|
||||
|
||||
If you are only running one agent at a time, they can re-use the same active `BrowserSession` and avoid having to relaunch chrome.
|
||||
Each agent will start off looking at the same tab the last agent ended off on.
|
||||
|
||||
```python
|
||||
from browser_use import Agent, BrowserSession
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
reused_session = BrowserSession(
|
||||
user_data_dir='~/.config/browseruse/profiles/default',
|
||||
keep_alive=True, # dont close browser after 1st agent.run() ends
|
||||
)
|
||||
await reused_session.start() # when keep_alive=True, session must be started manually
|
||||
|
||||
agent1 = Agent(
|
||||
task="The first task...",
|
||||
llm=ChatOpenAI(model="gpt-4o-mini"),
|
||||
browser_session=reused_session,
|
||||
)
|
||||
await agent1.run()
|
||||
|
||||
agent2 = Agent(
|
||||
task="The second task...",
|
||||
llm=ChatOpenAI(model="gpt-4o-mini"),
|
||||
browser_session=reused_session, # re-use the same session
|
||||
)
|
||||
await agent2.run()
|
||||
|
||||
await reused_session.close()
|
||||
```
|
||||
|
||||
### Parallel Agents, Same Browser, Multiple Tabs
|
||||
|
||||
```python
|
||||
from browser_use import Agent, BrowserSession
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
shared_browser = BrowserSession(
|
||||
storage_state='/tmp/cookies.json',
|
||||
user_data_dir=None,
|
||||
keep_alive=True,
|
||||
headless=True,
|
||||
)
|
||||
await shared_browser.start() # when keep_alive=True, you must start the session yourself
|
||||
|
||||
agent1 = Agent(
|
||||
task="The first task...",
|
||||
llm=ChatOpenAI(model="gpt-4o-mini"),
|
||||
browser_session=shared_browser, # pass the session in
|
||||
)
|
||||
agent2 = Agent(
|
||||
task="The second task...",
|
||||
llm=ChatOpenAI(model="gpt-4o-mini"),
|
||||
browser_session=shared_browser, # re-use the same session
|
||||
)
|
||||
await asyncio.gather(agent1.run(), agent2.run()) # run in parallel
|
||||
|
||||
await shared_browser.close()
|
||||
```
|
||||
|
||||
### Parallel Agents, Same Browser, Same Tab
|
||||
|
||||
<Warning>
|
||||
⚠️ This mode is not recommended. Agents are not yet optimized to share the same tab in the same browser, they may interfere with each other or cause errors.
|
||||
</Warning>
|
||||
|
||||
|
||||
```python
|
||||
from browser_use import Agent, BrowserSession
|
||||
from langchain_openai import ChatOpenAI
|
||||
from playwright.async_api import async_playwright
|
||||
|
||||
playwright = await async_playwright().start()
|
||||
browser = await playwright.chromium.launch(headless=True)
|
||||
context = await browser.new_context()
|
||||
shared_page = await context.new_page()
|
||||
await shared_page.goto('https://example.com', wait_until='domcontentloaded')
|
||||
|
||||
shared_session = BrowserSession(page=shared_page, keep_alive=True)
|
||||
await shared_session.start()
|
||||
|
||||
agent1 = Agent(
|
||||
task="Fill out the form in section A...",
|
||||
llm=ChatOpenAI(model="gpt-4o-mini"),
|
||||
browser_session=shared_session
|
||||
)
|
||||
agent2 = Agent(
|
||||
task="Fill out the form in section B...",
|
||||
llm=ChatOpenAI(model="gpt-4o-mini"),
|
||||
browser_session=shared_session,
|
||||
)
|
||||
await asyncio.gather(agent1.run(), agent2.run()) # run in parallel
|
||||
|
||||
await shared_session.kill()
|
||||
```
|
||||
|
||||
### Parallel Agents, Same Profile, Different Browsers
|
||||
|
||||
<Tip>
|
||||
This mode is the recommended default.
|
||||
</Tip>
|
||||
|
||||
To share a single set of configuration or cookies, but still have agents working in their own browser sessions (potentially in parallel), use our provided `BrowserProfile` object.
|
||||
|
||||
The recommended way to re-use cookies and localStorage state between separate parallel sessions is to use the [`storage_state`](https://docs.browser-use.com/customize/browser-settings#storage-state) option.
|
||||
|
||||
```bash
|
||||
# open a browser to log into sites you want the Agent to have access to
|
||||
playwright open https://example.com/ --save-storage=/tmp/auth.json
|
||||
playwright open https://example.com/ --load-storage=/tmp/auth.json
|
||||
```
|
||||
|
||||
```python
|
||||
from browser_use.browser import BrowserProfile, BrowserSession
|
||||
|
||||
shared_profile = BrowserProfile(
|
||||
headless=True,
|
||||
user_data_dir=None, # use dedicated tmp user_data_dir per session
|
||||
storage_state='/tmp/auth.json', # load/save cookies to/from json file
|
||||
keep_alive=True, # don't close the browser after the agent finishes
|
||||
)
|
||||
|
||||
window1 = BrowserSession(browser_profile=profile_a)
|
||||
await window1.start()
|
||||
agent1 = Agent(browser_session=window1)
|
||||
|
||||
window2 = BrowserSession(browser_profile=profile_a)
|
||||
await window2.start()
|
||||
agent2 = Agent(browser_session=window2)
|
||||
|
||||
await asyncio.gather(agent1.run(), agent2.run()) # run in parallel
|
||||
await window1.save_storage_state() # write storage state (cookies, localStorage, etc.) to auth.json
|
||||
await window2.save_storage_state() # you must decide when to save manually
|
||||
|
||||
# can also reload the cookies from the file into the active session if they change
|
||||
await window1.load_storage_state()
|
||||
await window1.close()
|
||||
await window2.close()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Chrome Won't Connect
|
||||
|
||||
If you're having trouble connecting:
|
||||
|
||||
1. **Close all Chrome instances** before trying to launch with a custom profile
|
||||
2. **Check if Chrome is running with debugging port**:
|
||||
```bash
|
||||
ps aux | grep chrome | grep remote-debugging-port
|
||||
```
|
||||
3. **Verify the executable path** is correct for your system
|
||||
4. **Check profile permissions** - ensure your user has read/write access
|
||||
|
||||
### Profile Lock Issues
|
||||
|
||||
If you get a "profile is already in use" error:
|
||||
|
||||
1. Close all Chrome instances
|
||||
2. The profile will automatically be unlocked when BrowserSession starts
|
||||
3. Alternatively, manually delete the `SingletonLock` file in the profile directory
|
||||
|
||||
<Note>
|
||||
For more configuration options, see the [Browser Settings](/customize/browser-settings) documentation.
|
||||
</Note>
|
||||
|
||||
### Profile Version Issues
|
||||
|
||||
The browser version you run must always be equal to or greater than the version used to create the `user_data_dir`.
|
||||
If you see errors like `Failed to parse Extensions` when launching, you're likely attempting to run an older browser with an incompatible `user_data_dir` that's already been migrated to a newer Chrome version.
|
||||
|
||||
Playwright ships a version of chromium that's newer than the default stable Google Chrome release channel, so this can happen if you try to use
|
||||
a profile created by the default playwright chromium (e.g. `user_data_dir='~/.config/browseruse/profiles/default'`) with an older
|
||||
local browser like `executable_path='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'`.
|
||||
198
.github/instructions/sensitive-data.instructions.md
vendored
198
.github/instructions/sensitive-data.instructions.md
vendored
|
|
@ -1,198 +0,0 @@
|
|||
---
|
||||
description: "Handle sensitive information securely and avoid sending PII & passwords to the LLM."
|
||||
applyTo: '**'
|
||||
---
|
||||
|
||||
## Handling Sensitive Data
|
||||
|
||||
When working with sensitive information like passwords or PII, you can use the `Agent(sensitive_data=...)` parameter to provide sensitive strings that the model can use in actions without ever seeing directly.
|
||||
|
||||
```python
|
||||
agent = Agent(
|
||||
task='Log into example.com as user x_username with password x_password',
|
||||
sensitive_data={
|
||||
'https://example.com': {
|
||||
'x_username': 'abc@example.com',
|
||||
'x_password': 'abc123456', # 'x_placeholder': '<actual secret value>',
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
<Note>
|
||||
|
||||
You should also configure [`BrowserSession(allowed_domains=...)`](https://docs.browser-use.com/customize/browser-settings#allowed-domains) to prevent the Agent from visiting URLs not needed for the task.
|
||||
|
||||
</Note>
|
||||
|
||||
### Basic Usage
|
||||
|
||||
Here's a basic example of how to use sensitive data:
|
||||
|
||||
```python
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from browser_use import Agent, BrowserSession
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
|
||||
|
||||
# Define sensitive data
|
||||
# The LLM will only see placeholder names (x_member_number, x_passphrase), never the actual values
|
||||
sensitive_data = {
|
||||
'https://*.example.com': {
|
||||
'x_member_number': '123235325',
|
||||
'x_passphrase': 'abcwe234',
|
||||
},
|
||||
}
|
||||
|
||||
# Use the placeholder names in your task description
|
||||
task = """
|
||||
1. go to https://travel.example.com
|
||||
2. sign in with your member number x_member_number and private access code x_passphrase
|
||||
3. extract today's list of travel deals as JSON
|
||||
"""
|
||||
|
||||
# Recommended: Limit the domains available for the entire browser so the Agent can't be tricked into visiting untrusted URLs
|
||||
browser_session = BrowserSession(allowed_domains=['https://*.example.com'])
|
||||
|
||||
agent = Agent(
|
||||
task=task,
|
||||
llm=llm,
|
||||
sensitive_data=sensitive_data, # Pass the sensitive data to the agent
|
||||
browser_session=browser_session, # Pass the restricted browser_session to limit URLs Agent can visit
|
||||
use_vision=False, # Disable vision or else the LLM might see entered values in screenshots
|
||||
)
|
||||
|
||||
async def main():
|
||||
await agent.run()
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
In this example:
|
||||
|
||||
1. The LLM only ever sees the `x_member_number` and `x_passphrase` placeholders in prompts
|
||||
2. When the model wants to use your password it outputs x_passphrase - and we replace it with the actual value in the DOM
|
||||
3. When sensitive data appear in the content of the current page, we replace it in the page summary fed to the LLM - so that the model never has it in its state.
|
||||
4. The browser will be entirely prevented from going to any site not under `https://*.example.com`
|
||||
|
||||
This approach ensures that sensitive information remains secure while still allowing the agent to perform tasks that require authentication.
|
||||
|
||||
---
|
||||
|
||||
### Best Practices
|
||||
|
||||
- Always restrict your sensitive data to only the exact domains that need it, `https://travel.example.com` is better than `*.example.com`
|
||||
- Always restrict [`BrowserSession(allowed_domains=[...])`](https://docs.browser-use.com/customize/browser-settings#allowed-domains) to only the domains the agent needs to visit to accomplish its task. This helps guard against prompt injection attacks, jailbreaks, and LLM mistakes.
|
||||
- Only use `sensitive_data` for strings that can be inputted verbatim as text. The LLM never sees the actual values, so it can't "understand" them, adapt them, or split them up for multiple input fields. For example, you can't ask the Agent to click through a datepicker UI to input the sensitive value `1990-12-31`. For these situations you can implement a [custom function](/customize/custom-functions) the LLM can call that updates the DOM using Python / JS.
|
||||
- Don't use `sensitive_data` for login credentials, it's better to use [`storage_state`](docs.browser-use.com/customize/browser-settings#storage-state) or a [`user_data_dir`](/customize/browser-settings#user-data-dir) to log into the sites the agent needs in advance & reuse the cookies:
|
||||
|
||||
```bash
|
||||
# open a browser to log into the sites you need & save the cookies
|
||||
$ playwright open https://accounts.google.com --save-storage auth.json
|
||||
```
|
||||
|
||||
Then use those cookies when the agent runs:
|
||||
|
||||
```python
|
||||
agent = Agent(..., browser_session=BrowserSession(storage_state='./auth.json'))
|
||||
```
|
||||
|
||||
<Warning>
|
||||
|
||||
Warning: Vision models still see the screenshot of the page by default - where the sensitive data might be visible.
|
||||
|
||||
It's recommended to set `Agent(use_vision=False)` when working with `sensitive_data`.
|
||||
|
||||
</Warning>
|
||||
|
||||
|
||||
<a name="allowed_domains"></a>
|
||||
<a name="domain-pattern-format"></a>
|
||||
|
||||
|
||||
### Allowed Domains
|
||||
|
||||
Domain patterns in `sensitive_data` follow the same format as [`allowed_domains`](https://docs.browser-use.com/customize/browser-settings#allowed-domains):
|
||||
|
||||
- `example.com` - Matches only `https://example.com/*`
|
||||
- `*.example.com` - Matches `https://example.com/*` and any subdomain `https://*.example.com/*`
|
||||
- `http*://example.com` - Matches both `http://` and `https://` protocols for `example.com/*`
|
||||
- `chrome-extension://*` - Matches any Chrome extension URL e.g. `chrome-extension://anyextensionid/options.html`
|
||||
|
||||
> **Security Warning**: For security reasons, certain patterns are explicitly rejected:
|
||||
>
|
||||
> - Wildcards in TLD part (e.g., `example.*`) are **not allowed** (`google.*` would match `google.ninja`, `google.pizza`, etc. which is a bad idea)
|
||||
> - Embedded wildcards (e.g., `g*e.com`) are rejected to prevent overly broad matches
|
||||
> - Multiple wildcards like `*.*.domain` are not supported currently, open an issue if you need this feature
|
||||
|
||||
The default protocol when no scheme is specified is now `https://` for enhanced security.
|
||||
|
||||
For convenience the system will validate that all domain patterns used in `Agent(sensitive_data)` are also included in `BrowserSession(allowed_domains)`.
|
||||
|
||||
### Missing or Empty Values
|
||||
|
||||
When working with sensitive data, keep these details in mind:
|
||||
|
||||
- If a key referenced by the model (`<secret>key_name</secret>`) is missing from your `sensitive_data` dictionary, a warning will be logged but the substitution tag will be preserved.
|
||||
- If you provide an empty value for a key in the `sensitive_data` dictionary, it will be treated the same as a missing key.
|
||||
- The system will always attempt to process all valid substitutions, even if some keys are missing or empty.
|
||||
|
||||
|
||||
---
|
||||
|
||||
### Full Example
|
||||
|
||||
Here's a more complex example demonstrating multiple domains and sensitive data values.
|
||||
|
||||
```python
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from browser_use import Agent, BrowserSession
|
||||
|
||||
|
||||
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
|
||||
|
||||
# Domain-specific sensitive data
|
||||
sensitive_data = {
|
||||
'https://*.google.com': {'x_email': '...', 'x_pass': '...'},
|
||||
'chrome-extension://abcd1243': {'x_api_key': '...'},
|
||||
'http*://example.com': {'x_authcode': '123123'}
|
||||
}
|
||||
|
||||
# Set browser session with allowed domains that match all domain patterns in sensitive_data
|
||||
browser_session = BrowserSession(
|
||||
allowed_domains=[
|
||||
'https://*.google.com',
|
||||
'chrome-extension://abcd',
|
||||
'http://example.com', # Explicitly include http:// if needed
|
||||
'https://example.com' # By default, only https:// is matched
|
||||
]
|
||||
)
|
||||
|
||||
# Pass the sensitive data to the agent
|
||||
agent = Agent(
|
||||
task="Log into Google, then check my account information",
|
||||
llm=llm,
|
||||
sensitive_data=sensitive_data,
|
||||
browser_session=browser_session,
|
||||
use_vision=False,
|
||||
)
|
||||
|
||||
async def main():
|
||||
await agent.run()
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
With this approach:
|
||||
|
||||
1. The Google credentials (`x_email` and `x_pass`) will only be used on Google domains (any subdomain, https only)
|
||||
2. The API key (`x_api_key`) will only be used on pages served by the specific Chrome extension `abcd1243`
|
||||
3. The auth code (`x_authcode`) will only be used on `http://example.com/*` or `https://example.com/*`
|
||||
|
|
@ -1,294 +0,0 @@
|
|||
---
|
||||
description: "Guide to using different LangChain chat models with Browser Use"
|
||||
applyTo: '**'
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Browser Use supports various LangChain chat models. Here's how to configure and use the most popular ones. The full list is available in the [LangChain documentation](https://python.langchain.com/docs/integrations/chat/).
|
||||
|
||||
## Model Recommendations
|
||||
|
||||
We have yet to test performance across all models. Currently, we achieve the best results using GPT-4o with an 89% accuracy on the [WebVoyager Dataset](https://browser-use.com/posts/sota-technical-report). DeepSeek-V3 is 30 times cheaper than GPT-4o. Gemini-2.0-exp is also gaining popularity in the community because it is currently free.
|
||||
We also support local models, like Qwen 2.5, but be aware that small models often return the wrong output structure-which lead to parsing errors. We believe that local models will improve significantly this year.
|
||||
|
||||
|
||||
<Note>
|
||||
All models require their respective API keys. Make sure to set them in your
|
||||
environment variables before running the agent.
|
||||
</Note>
|
||||
|
||||
## Supported Models
|
||||
|
||||
All LangChain chat models, which support tool-calling are available. We will document the most popular ones here.
|
||||
|
||||
### OpenAI
|
||||
|
||||
OpenAI's GPT-4o models are recommended for best performance.
|
||||
|
||||
```python
|
||||
from langchain_openai import ChatOpenAI
|
||||
from browser_use import Agent
|
||||
|
||||
# Initialize the model
|
||||
llm = ChatOpenAI(
|
||||
model="gpt-4o",
|
||||
temperature=0.0,
|
||||
)
|
||||
|
||||
# Create agent with the model
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm
|
||||
)
|
||||
```
|
||||
|
||||
Required environment variables:
|
||||
|
||||
```bash .env
|
||||
OPENAI_API_KEY=
|
||||
```
|
||||
|
||||
### Anthropic
|
||||
|
||||
|
||||
```python
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from browser_use import Agent
|
||||
|
||||
# Initialize the model
|
||||
llm = ChatAnthropic(
|
||||
model_name="claude-3-5-sonnet-20240620",
|
||||
temperature=0.0,
|
||||
timeout=100, # Increase for complex tasks
|
||||
)
|
||||
|
||||
# Create agent with the model
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm
|
||||
)
|
||||
```
|
||||
|
||||
And add the variable:
|
||||
|
||||
```bash .env
|
||||
ANTHROPIC_API_KEY=
|
||||
```
|
||||
|
||||
### Azure OpenAI
|
||||
|
||||
```python
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from browser_use import Agent
|
||||
from pydantic import SecretStr
|
||||
import os
|
||||
|
||||
# Initialize the model
|
||||
llm = AzureChatOpenAI(
|
||||
model="gpt-4o",
|
||||
api_version='2024-10-21',
|
||||
azure_endpoint=os.getenv('AZURE_OPENAI_ENDPOINT', ''),
|
||||
api_key=SecretStr(os.getenv('AZURE_OPENAI_KEY', '')),
|
||||
)
|
||||
|
||||
# Create agent with the model
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm
|
||||
)
|
||||
```
|
||||
|
||||
Required environment variables:
|
||||
|
||||
```bash .env
|
||||
AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
|
||||
AZURE_OPENAI_KEY=
|
||||
```
|
||||
|
||||
|
||||
### Gemini
|
||||
|
||||
> [!IMPORTANT]
|
||||
> `GEMINI_API_KEY` was the old environment var name, it should be called `GOOGLE_API_KEY` as of 2025-05.
|
||||
|
||||
```python
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
from browser_use import Agent
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Read GOOGLE_API_KEY into env
|
||||
load_dotenv()
|
||||
|
||||
# Initialize the model
|
||||
llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp')
|
||||
|
||||
# Create agent with the model
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm
|
||||
)
|
||||
```
|
||||
|
||||
Required environment variables:
|
||||
|
||||
```bash .env
|
||||
GOOGLE_API_KEY=
|
||||
```
|
||||
|
||||
|
||||
### DeepSeek-V3
|
||||
The community likes DeepSeek-V3 for its low price, no rate limits, open-source nature, and good performance.
|
||||
The example is available [here](https://github.com/browser-use/browser-use/blob/main/examples/models/deepseek.py).
|
||||
|
||||
```python
|
||||
from langchain_deepseek import ChatDeepSeek
|
||||
from browser_use import Agent
|
||||
from pydantic import SecretStr
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
api_key = os.getenv("DEEPSEEK_API_KEY")
|
||||
|
||||
# Initialize the model
|
||||
llm=ChatDeepSeek(base_url='https://api.deepseek.com/v1', model='deepseek-chat', api_key=SecretStr(api_key))
|
||||
|
||||
# Create agent with the model
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
use_vision=False
|
||||
)
|
||||
```
|
||||
|
||||
Required environment variables:
|
||||
|
||||
```bash .env
|
||||
DEEPSEEK_API_KEY=
|
||||
```
|
||||
|
||||
### DeepSeek-R1
|
||||
We support DeepSeek-R1. Its not fully tested yet, more and more functionality will be added, like e.g. the output of it'sreasoning content.
|
||||
The example is available [here](https://github.com/browser-use/browser-use/blob/main/examples/models/deepseek-r1.py).
|
||||
It does not support vision. The model is open-source so you could also use it with Ollama, but we have not tested it.
|
||||
```python
|
||||
from langchain_deepseek import ChatDeepSeek
|
||||
from browser_use import Agent
|
||||
from pydantic import SecretStr
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
api_key = os.getenv("DEEPSEEK_API_KEY")
|
||||
|
||||
# Initialize the model
|
||||
llm=ChatDeepSeek(base_url='https://api.deepseek.com/v1', model='deepseek-reasoner', api_key=SecretStr(api_key))
|
||||
|
||||
# Create agent with the model
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
use_vision=False
|
||||
)
|
||||
```
|
||||
|
||||
Required environment variables:
|
||||
|
||||
```bash .env
|
||||
DEEPSEEK_API_KEY=
|
||||
```
|
||||
|
||||
### Ollama
|
||||
Many users asked for local models. Here they are.
|
||||
|
||||
1. Download Ollama from [here](https://ollama.ai/download)
|
||||
2. Run `ollama pull model_name`. Pick a model which supports tool-calling from [here](https://ollama.com/search?c=tools)
|
||||
3. Run `ollama start`
|
||||
|
||||
```python
|
||||
from langchain_ollama import ChatOllama
|
||||
from browser_use import Agent
|
||||
from pydantic import SecretStr
|
||||
|
||||
|
||||
# Initialize the model
|
||||
llm=ChatOllama(model="qwen2.5", num_ctx=32000)
|
||||
|
||||
# Create agent with the model
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm
|
||||
)
|
||||
```
|
||||
|
||||
Required environment variables: None!
|
||||
|
||||
### Novita AI
|
||||
[Novita AI](https://novita.ai) is an LLM API provider that offers a wide range of models. Note: choose a model that supports function calling.
|
||||
|
||||
```python
|
||||
from langchain_openai import ChatOpenAI
|
||||
from browser_use import Agent
|
||||
from pydantic import SecretStr
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
api_key = os.getenv("NOVITA_API_KEY")
|
||||
|
||||
# Initialize the model
|
||||
llm = ChatOpenAI(base_url='https://api.novita.ai/v3/openai', model='deepseek/deepseek-v3-0324', api_key=SecretStr(api_key))
|
||||
|
||||
# Create agent with the model
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
use_vision=False
|
||||
)
|
||||
```
|
||||
|
||||
Required environment variables:
|
||||
|
||||
```bash .env
|
||||
NOVITA_API_KEY=
|
||||
```
|
||||
### X AI
|
||||
[X AI](https://x.ai) is an LLM API provider that offers a wide range of models. Note: choose a model that supports function calling.
|
||||
|
||||
```python
|
||||
from langchain_openai import ChatOpenAI
|
||||
from browser_use import Agent
|
||||
from pydantic import SecretStr
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
api_key = os.getenv("GROK_API_KEY")
|
||||
|
||||
# Initialize the model
|
||||
llm = ChatOpenAI(
|
||||
base_url='https://api.x.ai/v1',
|
||||
model='grok-3-beta',
|
||||
api_key=SecretStr(api_key)
|
||||
)
|
||||
|
||||
# Create agent with the model
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
use_vision=False
|
||||
)
|
||||
```
|
||||
|
||||
Required environment variables:
|
||||
|
||||
```bash .env
|
||||
GROK_API_KEY=
|
||||
```
|
||||
|
||||
## Coming soon
|
||||
(We are working on it)
|
||||
- Groq
|
||||
- Github
|
||||
- Fine-tuned models
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
---
|
||||
description: "Customize the system prompt to control agent behavior and capabilities"
|
||||
applyTo: '**'
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
You can customize the system prompt in two ways:
|
||||
|
||||
1. Extend the default system prompt with additional instructions
|
||||
2. Override the default system prompt entirely
|
||||
|
||||
<Note>
|
||||
Custom system prompts allow you to modify the agent's behavior at a
|
||||
fundamental level. Use this feature carefully as it can significantly impact
|
||||
the agent's performance and reliability.
|
||||
</Note>
|
||||
|
||||
### Extend System Prompt (recommended)
|
||||
|
||||
To add additional instructions to the default system prompt:
|
||||
|
||||
```python
|
||||
extend_system_message = """
|
||||
REMEMBER the most important RULE:
|
||||
ALWAYS open first a new tab and go first to url wikipedia.com no matter the task!!!
|
||||
"""
|
||||
```
|
||||
|
||||
### Override System Prompt
|
||||
|
||||
<Warning>
|
||||
Not recommended! If you must override the [default system
|
||||
prompt](https://github.com/browser-use/browser-use/blob/main/browser_use/agent/system_prompt.md),
|
||||
make sure to test the agent yourself.
|
||||
</Warning>
|
||||
|
||||
Anyway, to override the default system prompt:
|
||||
|
||||
```python
|
||||
# Define your complete custom prompt
|
||||
override_system_message = """
|
||||
You are an AI agent that helps users with web browsing tasks.
|
||||
|
||||
[Your complete custom instructions here...]
|
||||
"""
|
||||
|
||||
# Create agent with custom system prompt
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=ChatOpenAI(model='gpt-4'),
|
||||
override_system_message=override_system_message
|
||||
)
|
||||
```
|
||||
|
||||
### Extend Planner System Prompt
|
||||
|
||||
You can customize the behavior of the planning agent by extending its system prompt:
|
||||
|
||||
```python
|
||||
extend_planner_system_message = """
|
||||
PRIORITIZE gathering information before taking any action.
|
||||
Always suggest exploring multiple options before making a decision.
|
||||
"""
|
||||
|
||||
# Create agent with extended planner system prompt
|
||||
llm = ChatOpenAI(model='gpt-4o')
|
||||
planner_llm = ChatOpenAI(model='gpt-4o-mini')
|
||||
|
||||
agent = Agent(
|
||||
task="Your task here",
|
||||
llm=llm,
|
||||
planner_llm=planner_llm,
|
||||
extend_planner_system_message=extend_planner_system_message
|
||||
)
|
||||
```
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -12,7 +12,7 @@ oauth_providers.csv
|
|||
.venv
|
||||
|
||||
.env
|
||||
#.sensitive.json
|
||||
.sensitive.json
|
||||
log_*.log
|
||||
|
||||
domains.txt
|
||||
|
|
@ -83,5 +83,6 @@ my.sh
|
|||
|
||||
log.txt
|
||||
data/
|
||||
!src/lib/utils/data
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/macos,windows
|
||||
# End of https://www.toptal.com/developers/gitignore/api/macos,windows
|
||||
42
.sensitive.example.json
Normal file
42
.sensitive.example.json
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"google.com": {
|
||||
"x_username": "whs.imnya.ng@gmail.com",
|
||||
"x_password": "Vb1Mz9pgjY8JVs"
|
||||
},
|
||||
"accounts.google.com": {
|
||||
"x_username": "whs.imnya.ng@gmail.com",
|
||||
"x_password": "Vb1Mz9pgjY8JVs"
|
||||
},
|
||||
"naver.com": {
|
||||
"x_username": "oauth-test-test",
|
||||
"x_password": "gx^AKz-289d3/7B"
|
||||
},
|
||||
"nid.naver.com": {
|
||||
"x_username": "oauth-test-test",
|
||||
"x_password": "gx^AKz-289d3/7B"
|
||||
},
|
||||
"github.com": {
|
||||
"x_username": "imnyang-bot",
|
||||
"x_password": "6PuVXCH9tpQLNm"
|
||||
},
|
||||
"apple.com": {
|
||||
"x_username": "",
|
||||
"x_password": ""
|
||||
},
|
||||
"appleid.apple.com": {
|
||||
"x_username": "",
|
||||
"x_password": ""
|
||||
},
|
||||
"microsoft.com": {
|
||||
"x_username": "whs.imnya.ng@gmail.com",
|
||||
"x_password": "WHS123987"
|
||||
},
|
||||
"login.microsoftonline.com": {
|
||||
"x_username": "whs.imnya.ng@gmail.com",
|
||||
"x_password": "WHS123987"
|
||||
},
|
||||
"facebook.com": {
|
||||
"x_username": "01047183675",
|
||||
"x_password": "whs3oauth@"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
{
|
||||
"google.com": {
|
||||
"x_username": "whs.imnya.ng@gmail.com",
|
||||
"x_password": "Vb1Mz9pgjY8JVs"
|
||||
},
|
||||
"accounts.google.com": {
|
||||
"x_username": "whs.imnya.ng@gmail.com",
|
||||
"x_password": "Vb1Mz9pgjY8JVs"
|
||||
},
|
||||
"naver.com": {
|
||||
"x_username": "oauth-j93es",
|
||||
"x_password": "whs31234"
|
||||
},
|
||||
"nid.naver.com": {
|
||||
"x_username": "oauth-j93es",
|
||||
"x_password": "whs31234"
|
||||
},
|
||||
"github.com": {
|
||||
"x_username": "imnyang-bot",
|
||||
"x_password": "6PuVXCH9tpQLNm"
|
||||
}
|
||||
}
|
||||
93
README.md
93
README.md
|
|
@ -12,7 +12,7 @@
|
|||
> 그렇지 않으면 실행되지 않습니다.
|
||||
>
|
||||
> 윈도우 환경에서는 `sudo certutil -addstore root mitmproxy-ca-cert.cer`로 인증합니다.
|
||||
>
|
||||
>
|
||||
> Sudo가 활성화되어있지 않은 환경에서는 관리자로 상향된 쉘에서 실행합니다.
|
||||
>
|
||||
> MacOS 환경에서는 `sudo security add-trusted-cert -d -p ssl -p basic -k /Library/Keychains/System.keychain ~/.mitmproxy/mitmproxy-ca-cert.pem`으로 인증합니다.
|
||||
|
|
@ -20,7 +20,19 @@
|
|||
> 다른 플렛폼은 수동으로 설정되어야만 합니다.
|
||||
> https://docs.mitmproxy.org/stable/concepts/certificates/
|
||||
|
||||
현재 아래와 같은 환경에서 개발되며 테스트되고 있습니다.
|
||||
- ✅ MacOS 26 Tahoe Developer Beta 2 (25A5295e) en-US aarch64
|
||||
- ✅ Windows 11 Pro for Workstations 24H2 (26100.4351) en-US x86_64
|
||||
- ✅ NixOS 25.05.804570.c7ab75210cb8 KDE 6 / Linux 6.15 x86_64
|
||||
|
||||
---
|
||||
다음과 같은 명령어로 환경을 설정합니다.
|
||||
|
||||
설명하는 가이드를 잘 따라가면 설정할 수 있습니다.
|
||||
|
||||
```sh
|
||||
uv run setup.py
|
||||
```
|
||||
|
||||
uv 설치 후 다음과 같은 명령어를 입력합니다.
|
||||
|
||||
|
|
@ -32,51 +44,19 @@ venv와 패키지가 설치가 됩니다.
|
|||
|
||||
---
|
||||
|
||||
~~browser_use가 Playwright에 대한 의존성이 있어 브라우저 설치가 필요합니다~~
|
||||
|
||||
스텔스 기능 때문에 Google Chrome이 필요합니다.
|
||||
|
||||
만약 설치가 되어 있지 않다면
|
||||
```
|
||||
playwright install chrome
|
||||
```
|
||||
`uv run setup.py`로 환경을 설정합니다.
|
||||
|
||||
---
|
||||
다음과 같은 명령어로 실행합니다.
|
||||
|
||||
```sh
|
||||
uv run main.py
|
||||
```
|
||||
# 윈도우 인코딩 이슈 해결
|
||||
이거 해결 방법
|
||||

|
||||
|
||||
Environment는 .env.example에 따라 설정되어야합니다.
|
||||

|
||||
|
||||
.env.example을 .env로 복사하여서 사용해주세요.
|
||||
|
||||
# 로그인 방안
|
||||
|
||||
## 쿠키와 로컬 스토리지 설정 방법 (추천)
|
||||
|
||||

|
||||
|
||||
```sh
|
||||
uv run playwright open https://google.com/ --save-storage=./data/storage_state.json
|
||||
```
|
||||
|
||||
위 명령어를 실행하면 playwright Browser가 하나 열리는데 여기서 원하는 프로바이더를 모두 로그인 한 후에 브라우저를 정상적으로 닫으면 ./data/storage_state.json 경로에 쿠키, 로컬스토리지를 저장한 파일이 생성됩니다.
|
||||
|
||||
## Browser Use에게 직접 로그인 요청 (선택)
|
||||
<details>
|
||||
위에 쿠키와 로컬스토리지 설정 방법과 혼용해서 사용가능합니다.
|
||||
|
||||
`.sensitive.example.json`을 `.sensitive.json`으로 복사해서
|
||||
|
||||
안에 있는 예시 내용을 참고해서 작성해주시면 됩니다.
|
||||
더 자세한 내용은
|
||||
[Sensitive Data - Browser Use](https://docs.browser-use.com/customize/sensitive-data)를 참고하시면 좋을 것 같습니다.
|
||||
|
||||
[Sensitive Data - Browser Use](https://docs.browser-use.com/customize/sensitive-data)에서도 권장하지 않는 방법인만큼 애매하긴 하지만 쿠키와 로컬 스토리지를 저장하기 어려운 경우나 일부 flow에서 접근이 어려운 경우 사용해주세요.
|
||||
</details>
|
||||
이것도 setup.py 사용하면 반자동으로 할 수 있습니다.
|
||||
|
||||
못찾겠으면 intl.cpl 열어주세요.
|
||||
|
||||
# 실행
|
||||
|
||||
|
|
@ -87,14 +67,37 @@ curl "https://f.imnya.ng/.whs/tp-domains/data/domains/latest.txt" -o domains.txt
|
|||
```
|
||||
|
||||
```sh
|
||||
# ./run.sh {domains.txt 시작 줄} {domains.txt 끝 줄} {HTML 검사 Skip}
|
||||
./run.sh 12540 13000 False
|
||||
# uv run run.py {domains.txt 시작 줄} {domains.txt 끝 줄} {--skh} {--no-download}
|
||||
uv run run.py 1 100 --skh
|
||||
```
|
||||
|
||||
# Prompt 확장 가이드
|
||||
|
||||
```pwsh
|
||||
# ./run.ps1 {domains.txt 시작 줄} {domains.txt 끝 줄} {HTML 검사 Skip}
|
||||
./run.ps1 12540 13000 False
|
||||
## 1. 파일 생성
|
||||
|
||||
`lib/llm/prompt` 폴더에서 fallback 폴더를 복사하여
|
||||
|
||||
원하는 프로바이더를 추가해줍니다. `ex) lib/llm/prompt/Google/`
|
||||
|
||||
## 2. prompt.py 수정
|
||||
|
||||
Prompt에서 추가한 파일을 prompt.py에서 수정합니다.
|
||||
|
||||
만약 로그인 정보를 넣고 싶다면 Sensitive
|
||||
`Log into example.com as user x_username with password x_password`
|
||||
|
||||
## 3. model.py
|
||||
|
||||
응답할 때 원하는 리턴 값을 `dict`로 받습니다.
|
||||
|
||||
## 4. \_\_init\_\_.py 수정
|
||||

|
||||
|
||||
추가한 prompt에 따라 import합니다.
|
||||
|
||||
## 5. 사용 방법
|
||||
```py
|
||||
from lib.llm.prompt.fallback import prompt, model
|
||||
```
|
||||
|
||||
# 참고하면 좋을만한 것
|
||||
|
|
|
|||
BIN
docs/image.png
BIN
docs/image.png
Binary file not shown.
|
Before Width: | Height: | Size: 1.3 MiB |
|
|
@ -1,25 +0,0 @@
|
|||
from langchain.callbacks.base import BaseCallbackHandler
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
|
||||
class QuotaExhaustedHandler(BaseCallbackHandler):
|
||||
def on_llm_error(self, error, **kwargs):
|
||||
if "ResourceExhausted" in str(error) or "429" in str(error):
|
||||
print("⚠️ API 쿼터가 소진되었습니다. 재시도 로직에 위임합니다...")
|
||||
# backoff handled in scan_one_url
|
||||
|
||||
def CreateChatGoogleGenerativeAI(model: str):
|
||||
"""재시도 로직이 포함된 LLM 생성"""
|
||||
if model == "fallback":
|
||||
print("⚠️ Fallback 모델을 사용합니다. Envorinment 변수를 확인하세요.")
|
||||
print("⚠️ Model Gemini-2.0-flash-lite를 사용합니다.")
|
||||
model = "gemini-2.0-flash-lite"
|
||||
return ChatGoogleGenerativeAI(
|
||||
model=model,
|
||||
max_retries=10, # 최대 재시도 횟수 증가
|
||||
model_kwargs={
|
||||
"request_timeout": 120, # 타임아웃 시간 증가 (2분)
|
||||
},
|
||||
callbacks=[QuotaExhaustedHandler()],
|
||||
# API 호출 간격 조정
|
||||
temperature=0.1,
|
||||
)
|
||||
|
|
@ -1,142 +0,0 @@
|
|||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Extended planner prompt
|
||||
extend_planner_system_message = f"""
|
||||
🎯 목적: 웹 자동화를 위한 **SSO 로그인 리디렉션 URL 수집**
|
||||
|
||||
📌 주의사항 (전제 조건)
|
||||
- ❌ **검색 엔진(Google, Bing 등) 사용 금지**
|
||||
- ✅ **초기 제공된 URL 내에서만 탐색**
|
||||
- ❌ 직접 이동하거나 추측한 링크 클릭 금지
|
||||
- ⛔ 추측한 URL은 대답하거나 클릭하지 마세요
|
||||
- OAuth가 아닌 일반 로그인은 무시
|
||||
- OAuth가 없다면 **즉시 중단**하고 빈 배열 반환
|
||||
|
||||
---
|
||||
|
||||
## 🧩 Step 0: 페이지 차단(Block) 여부 확인
|
||||
|
||||
초기 URL의 로그인 페이지에 접근하여 다음 사항을 점검합니다:
|
||||
|
||||
- 🚫 페이지 차단됨 (Firewall, Access Denied 등) → 즉시 중단
|
||||
- 🔒 CAPTCHA는 통과 가능 (해결하고 계속 진행)
|
||||
- ❗ 로그인 UI가 정상적으로 로드되지 않으면 중단
|
||||
|
||||
📤 차단 시 즉시 반환:
|
||||
|
||||
```json
|
||||
[
|
||||
{{
|
||||
"provider": "Blocked",
|
||||
"oauth_uri": "-"
|
||||
}}
|
||||
]
|
||||
````
|
||||
|
||||
---
|
||||
|
||||
## 🔍 Step 1: 로그인 페이지 탐색
|
||||
|
||||
* 초기 URL에 접속하여 **클라이언트용 로그인 페이지**로 진입합니다.
|
||||
* 쿠키 동의, 개인정보 안내 등 팝업은 무시하거나 닫고 계속 진행하세요.
|
||||
* 페이지가 정상 로드되었다고 가정합니다.
|
||||
|
||||
---
|
||||
|
||||
## 👀 Step 2: SSO 로그인 버튼 식별
|
||||
|
||||
아래 **OAuth SSO 버튼들만** 유효합니다:
|
||||
|
||||
* ✅ Google, GitHub, Facebook, LinkedIn, Microsoft, Naver
|
||||
|
||||
**유효한 버튼 기준**:
|
||||
|
||||
* OAuth 인증 흐름을 실제로 트리거
|
||||
* `window.location` 또는 `<a href=...>` 또는 JS로 redirect가 발생
|
||||
|
||||
**제외 버튼들 (클릭 금지)**:
|
||||
|
||||
* ❌ 일반 로그인, 패스키, 이메일/전화번호, 인증서 기반, 비밀번호 입력
|
||||
|
||||
---
|
||||
|
||||
## ✅ Step 3: 모든 SSO 버튼 클릭 및 로그인 시도
|
||||
|
||||
> 각 SSO 로그인 버튼을 클릭한 뒤 반드시 아래 절차를 **완전히 수행**해야 합니다.
|
||||
|
||||
각 SSO 버튼에 대해 다음을 수행:
|
||||
|
||||
1. 버튼 클릭
|
||||
2. 🌐 페이지가 이동되면, **현재 주소창(URL)을 확인하여 리디렉션된 OAuth URL**을 `oauth_uri`로 저장
|
||||
→ 예: `https://accounts.google.com/o/oauth2/auth?...`
|
||||
3. ✅ 로그인 진행:
|
||||
- 로그인 페이지에서 OAuth 인증을 완료합니다.
|
||||
- sign in with your username(email) x_username and password is x_password
|
||||
- 버튼같은게 안눌리면 새로고침을 해봐
|
||||
- **로그인 완료 후 authorize 등 버튼이 있으면 클릭**
|
||||
- GitHub같은 경우 Authorize 버튼이 뜨는데 오래걸릴 수 있음, 기다려야 할 수도 있음
|
||||
- 만약 버튼을 눌러도 반응이 없을 경우 새로고침을 한번 해주세요.
|
||||
- 로그인 실패 시에는 다음 SSO 버튼을 클릭합니다.
|
||||
4. 로그인이 성공하면 모두 쿠키를 삭제하고 다음 SSO 버튼을 클릭합니다.
|
||||
5. 다음 SSO 버튼으로 반복 진행
|
||||
|
||||
쿠키 삭제 방법:
|
||||
chrome://settings/clearBrowserData에 들어가서 삭제해주세요.
|
||||
|
||||
🛑 절대 아래와 같이 해석하지 말 것:
|
||||
- ❌ 버튼 클릭 후 페이지 로딩만 기다리고 돌아가기
|
||||
- ❌ URL 저장 없이 go_back() 호출
|
||||
|
||||
📤 각 로그인 후 다음 형식으로 결과 저장:
|
||||
|
||||
```json
|
||||
[
|
||||
{{
|
||||
"provider": "Google",
|
||||
"oauth_uri": "https://example.com/auth/google?client_id=..."
|
||||
}}
|
||||
]
|
||||
````
|
||||
|
||||
````
|
||||
|
||||
---
|
||||
|
||||
### ✨ 추가 안전 장치: "뒤로가기(go_back) 호출 조건" 제한
|
||||
|
||||
```text
|
||||
🛑 뒤로가기(go_back)은 다음 조건이 모두 충족될 때만 사용 => 다만 로그인 실패 시, 뒤로가기 수행:
|
||||
- ✅ 로그인 흐름이 완료됨 (예: redirect back to app, or callback URL)
|
||||
- ✅ 현재 리디렉션 URL이 수집됨
|
||||
- ✅ 결과에 저장 후 다음 버튼 탐색을 위해 복귀 필요할 때
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🚫 Step 4: 버튼 없음 또는 예외 발생 시
|
||||
|
||||
* 유효한 SSO 버튼이 **전혀 없을 경우**
|
||||
* 예외, 오류 등 발생 시
|
||||
|
||||
📤 즉시 중단 후 다음 형식으로 반환:
|
||||
|
||||
```json
|
||||
[]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 📎 중요 규칙 요약
|
||||
|
||||
* ✅ **모든 SSO 로그인은 반드시 실행** (가능한 버튼은 모두 클릭)
|
||||
* 🔁 단계는 반드시 순서대로 진행
|
||||
* 🔐 로그인은 쿠키/세션으로 유지된 상태에서 수행
|
||||
* 🚫 직접 ID/PW 입력하지 않음
|
||||
* ⛔ 추측 URL 클릭 금지
|
||||
* ❗ 예외 발생 시 반드시 규정된 JSON 포맷만 반환
|
||||
|
||||
---
|
||||
"""
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
from lib.utils.browser_use.func import *
|
||||
|
||||
# Initialize configuration
|
||||
proxy_url = setup_proxy()
|
||||
|
||||
# Create browser profile
|
||||
async def GetProfile():
|
||||
storage_state_path = await setup_storage_state()
|
||||
profile = BrowserProfile(
|
||||
# Security settings
|
||||
disable_security=True,
|
||||
stealth=True,
|
||||
|
||||
# Display settings
|
||||
headless=False,
|
||||
device_scale_factor=1,
|
||||
window_size={"width": 1600, "height": 900},
|
||||
viewport={"width": 1600, "height": 900},
|
||||
|
||||
# Data persistence
|
||||
user_data_dir=None,
|
||||
storage_state=storage_state_path,
|
||||
|
||||
# Network settings
|
||||
proxy={"server": proxy_url} if proxy_url else None,
|
||||
|
||||
# Additional arguments
|
||||
args=get_browser_args(),
|
||||
)
|
||||
|
||||
return profile
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
from pathlib import Path
|
||||
|
||||
async def clean_resources(agent=None, session=None):
|
||||
"""리소스를 정리하는 함수"""
|
||||
storage_state_temp_path = Path("./data/storage_state_temp.json").resolve()
|
||||
if storage_state_temp_path.exists():
|
||||
try:
|
||||
# remove file
|
||||
print(f"🗑️ 임시 스토리지 상태 파일 삭제 중: {storage_state_temp_path}")
|
||||
# unlink removes the file
|
||||
storage_state_temp_path.unlink()
|
||||
print("🗑️ 임시 스토리지 상태 파일 삭제 완료.")
|
||||
except Exception as e:
|
||||
print(f"⚠️ 임시 스토리지 상태 파일 삭제 실패: {e}")
|
||||
|
||||
if agent:
|
||||
try:
|
||||
await agent.close()
|
||||
except Exception as e:
|
||||
print(f"⚠️ 에이전트 리소스 정리 실패: {e}")
|
||||
if session:
|
||||
try:
|
||||
await session.close()
|
||||
except Exception as e:
|
||||
print(f"⚠️ 세션 리소스 정리 실패: {e}")
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
from typing import List
|
||||
from pydantic import BaseModel
|
||||
|
||||
# 출력 모델
|
||||
class OAuth(BaseModel):
|
||||
provider: str
|
||||
oauth_uri: str
|
||||
|
||||
|
||||
class OAuthList(BaseModel):
|
||||
oauth_providers: List[OAuth]
|
||||
293
main.py
293
main.py
|
|
@ -1,293 +0,0 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import csv
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
import signal
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from browser_use import (
|
||||
Agent,
|
||||
BrowserSession,
|
||||
Controller,
|
||||
ActionResult,
|
||||
)
|
||||
from patchright.async_api import async_playwright as async_patchright, Page
|
||||
from pydantic import BaseModel
|
||||
|
||||
from lib.utils import env_cheker
|
||||
from lib.utils.backend_client import notify_backend
|
||||
from lib.utils.browser_use import model
|
||||
from lib.utils.browser_use.clean_resources import clean_resources
|
||||
from lib.utils.browser_use.func import setup_storage_state
|
||||
from lib.utils.browser_use.sensitive_data import GetSensitiveData
|
||||
from lib.utils.config import BACKEND_URL, GOOGLE_MODEL, GOOGLE_PLANNER_MODEL
|
||||
from lib.utils.is_html import is_html_url
|
||||
from lib.utils.read_txt import read_lines_between
|
||||
from lib.llm.prompt import extend_planner_system_message
|
||||
from lib.utils.logger import logger
|
||||
import lib.utils.browser_use as browser_use
|
||||
from lib.llm import CreateChatGoogleGenerativeAI
|
||||
|
||||
load_dotenv(verbose=True, override=True)
|
||||
|
||||
# Exponential backoff settings
|
||||
INITIAL_BACKOFF = int(os.getenv("INITIAL_BACKOFF", "60")) # seconds
|
||||
MAX_BACKOFF = int(os.getenv("MAX_BACKOFF", "600")) # seconds
|
||||
|
||||
# 진행 상황 추적을 위한 전역 변수
|
||||
current_progress = {"current_index": 0, "total": 0, "current_url": "", "start_line": 0}
|
||||
progress_file = Path("data/scan_progress.json")
|
||||
|
||||
env_cheker()
|
||||
if os.getenv("LMNR_PROJECT_API_KEY"):
|
||||
from lmnr import Laminar
|
||||
|
||||
Laminar.initialize(project_api_key=os.getenv("LMNR_PROJECT_API_KEY"))
|
||||
|
||||
|
||||
def save_progress():
|
||||
"""현재 진행 상황을 파일에 저장"""
|
||||
with open(progress_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(current_progress, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def load_progress():
|
||||
"""이전 진행 상황을 파일에서 불러오기"""
|
||||
if os.path.exists(progress_file):
|
||||
try:
|
||||
with open(progress_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def signal_handler(signum, frame):
|
||||
"""Ctrl+C 시그널 핸들러"""
|
||||
print("\n" + "="*60)
|
||||
print("🛑 스캔이 중단되었습니다!")
|
||||
print(f"📊 진행 상황:")
|
||||
print(f" - 전체: {current_progress['total']}개 URL")
|
||||
print(f" - 완료: {current_progress['current_index']}개 URL")
|
||||
print(f" - 현재 처리 중: {current_progress['current_url']}")
|
||||
print(f" - domains.txt의 {current_progress['start_line'] + current_progress['current_index']}번째 줄")
|
||||
print(f" - 진행률: {current_progress['current_index']}/{current_progress['total']} ({current_progress['current_index']/current_progress['total']*100:.1f}%)")
|
||||
print("="*60)
|
||||
save_progress()
|
||||
print(f"💾 진행 상황이 {progress_file}에 저장되었습니다.")
|
||||
exit(0)
|
||||
|
||||
|
||||
# 시그널 핸들러 등록
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
|
||||
# ── URL별로 Browser를 새로 띄우는 함수 ──
|
||||
async def scan_one_url(url: str, skip_html_check: bool = False):
|
||||
await setup_storage_state()
|
||||
target_url = url if url.startswith("http") else f"https://{url}"
|
||||
print(f"🚀 Starting scan for: {target_url}")
|
||||
|
||||
# 1) URL이 HTML 페이지인지 확인
|
||||
if not is_html_url(target_url) and not skip_html_check:
|
||||
print(f"❌ {target_url} 은(는) HTML이 아닙니다. 스킵합니다.")
|
||||
return
|
||||
|
||||
# Backend에 스캔 시작을 알림
|
||||
notify_backend(target_url)
|
||||
|
||||
agent = None
|
||||
session = None
|
||||
try_cnt = 0
|
||||
while True:
|
||||
# BrowserSession에 profile 전달
|
||||
session = BrowserSession(
|
||||
playwright=(await async_patchright().start()),
|
||||
browser_profile=await browser_use.GetProfile(),
|
||||
)
|
||||
|
||||
# Agent 생성 및 실행 (단일 try-except with 백오프)
|
||||
initial_actions = [{"open_tab": {"url": target_url}}]
|
||||
controller = Controller(output_model=model.BaseModel, exclude_actions=['search_google'])
|
||||
|
||||
print("🤖 LLM 모델 초기화 및 스캔 시작...")
|
||||
print("Available actions:", list(controller.registry.registry.actions.keys()))
|
||||
try:
|
||||
agent = Agent(
|
||||
browser_session=session,
|
||||
initial_actions=initial_actions,
|
||||
sensitive_data=GetSensitiveData(),
|
||||
task=(
|
||||
"Navigate to the login page, identify all OAuth provider buttons (excluding Passkey), "
|
||||
"and for each one: click the button, follow the full OAuth login flow as far as possible "
|
||||
"with a real user account (without using a fake or non-existent account), and capture the "
|
||||
"final redirect URL after login. Do not stop at just collecting the initial authorization URL—"
|
||||
"actually perform the login step like a real user would. "
|
||||
"If the OAuth buttons do not appear immediately, wait briefly to allow the page to load completely before proceeding. "
|
||||
"Always log out before starting the login process, and make sure to attempt the login again from a clean state."
|
||||
),
|
||||
llm=CreateChatGoogleGenerativeAI(GOOGLE_MODEL),
|
||||
planner_llm=CreateChatGoogleGenerativeAI(GOOGLE_PLANNER_MODEL),
|
||||
controller=controller,
|
||||
extend_planner_system_message=extend_planner_system_message,
|
||||
)
|
||||
response = await agent.run()
|
||||
final_result = response.final_result()
|
||||
|
||||
if final_result is None:
|
||||
raise ValueError("final_result()가 None을 반환했습니다.")
|
||||
except Exception as e:
|
||||
await clean_resources(agent, session)
|
||||
# API 쿼터 문제인지 확인
|
||||
if "ResourceExhausted" in str(e) or "429" in str(e):
|
||||
wait = min(INITIAL_BACKOFF * (2**try_cnt), MAX_BACKOFF)
|
||||
print(f"⚠️ API 쿼터 에러: {e}. {wait}초 대기 후 재시도합니다...")
|
||||
await asyncio.sleep(wait)
|
||||
try_cnt += 1
|
||||
if try_cnt >= 3:
|
||||
print(f"❌ {url} 스캔 실패: API 쿼터 문제가 지속됩니다.")
|
||||
logger(f"❌ {url} 스캔 실패: API 쿼터 문제: {e}")
|
||||
return
|
||||
continue
|
||||
# 일반 에러 처리
|
||||
try_cnt += 1
|
||||
if try_cnt >= 3:
|
||||
print(f"❌ {url} 스캔 실패: 에러: {e}")
|
||||
logger(f"❌ {url} 스캔 실패: 에러: {e}")
|
||||
return
|
||||
print(f"⚠️ 에러 발생: {e}. {try_cnt}번째 재시도 중...")
|
||||
await asyncio.sleep(30)
|
||||
continue
|
||||
|
||||
# 스캔 결과 처리
|
||||
data = json.loads(final_result)
|
||||
try:
|
||||
oauth_entries = [model.OAuth(**entry) for entry in data["oauth_providers"]]
|
||||
except Exception as e:
|
||||
raise ValueError(f"결과 파싱 실패: {e}\n원본 결과: {final_result}")
|
||||
|
||||
print("-" * 50)
|
||||
print(f"🔗 Scanned URL: {url}\n")
|
||||
print("🔐 Detected OAuth Providers and URLs:")
|
||||
for entry in oauth_entries:
|
||||
if "<" in entry.oauth_uri or "..." in entry.oauth_uri:
|
||||
print(
|
||||
f"⚠️ WARNING: {entry.provider} URL may be masked or incomplete:\n{entry.oauth_uri}\n"
|
||||
)
|
||||
else:
|
||||
print(f"- {entry.provider}: {entry.oauth_uri}")
|
||||
print("-" * 50)
|
||||
|
||||
# CSV에 저장 (append)
|
||||
csv_file = "./data/oauth_providers.csv"
|
||||
file_exists = os.path.isfile(csv_file)
|
||||
with open(csv_file, "a", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
if not file_exists:
|
||||
writer.writerow(["issuer", "provider", "oauth_uri"])
|
||||
for entry in oauth_entries:
|
||||
writer.writerow([url, entry.provider, entry.oauth_uri])
|
||||
await clean_resources(agent, session)
|
||||
break
|
||||
|
||||
|
||||
async def loop(
|
||||
filepath: str, start_line: int, end_line: int, skip_html_check: bool = False
|
||||
):
|
||||
# 인자값으로 받은 파일 경로와 줄 범위를 통해 도메인 리스트 생성
|
||||
target_list = read_lines_between(
|
||||
filepath=filepath, start_line=start_line, end_line=end_line
|
||||
)
|
||||
|
||||
# 진행 상황 초기화
|
||||
current_progress["total"] = len(target_list)
|
||||
current_progress["start_line"] = start_line
|
||||
current_progress["current_index"] = 0
|
||||
|
||||
# 이전 진행 상황 확인
|
||||
prev_progress = load_progress()
|
||||
if prev_progress and prev_progress.get("start_line") == start_line:
|
||||
print(f"📋 이전 진행 상황을 발견했습니다:")
|
||||
print(f" - 이전 완료: {prev_progress['current_index']}/{prev_progress['total']}")
|
||||
print(f" - 마지막 처리: {prev_progress.get('current_url', 'N/A')}")
|
||||
|
||||
resume = input("이어서 진행하시겠습니까? (y/n): ").lower().strip()
|
||||
if resume == 'y':
|
||||
current_progress["current_index"] = prev_progress["current_index"]
|
||||
target_list = target_list[current_progress["current_index"]:]
|
||||
print(f"✅ {current_progress['current_index']}번째부터 재개합니다.")
|
||||
|
||||
# (필요하다면) 강제 설정이 필요한 경우, 아래 주석을 해제하여 target_list[0] 등을 덮어쓸 수 있습니다.
|
||||
# target_list[0] = "velog.io"
|
||||
|
||||
for i, url in enumerate(target_list):
|
||||
actual_index = current_progress["current_index"] + i
|
||||
current_progress["current_url"] = url
|
||||
current_progress["current_index"] = actual_index
|
||||
|
||||
print(f"\n🔄 Processing {actual_index + 1}/{current_progress['total']}: {url}")
|
||||
print(f"📍 domains.txt의 {start_line + actual_index}번째 줄")
|
||||
|
||||
# URL들 사이에 API 쿼터 회복을 위한 대기 시간 추가
|
||||
if actual_index > 0:
|
||||
print("⏳ API 쿼터 보호를 위해 30초 대기 중...")
|
||||
await asyncio.sleep(30)
|
||||
|
||||
await scan_one_url(url, skip_html_check=skip_html_check)
|
||||
|
||||
# 진행 상황 저장
|
||||
current_progress["current_index"] = actual_index + 1
|
||||
save_progress()
|
||||
|
||||
print(f"\n🎉 모든 스캔이 완료되었습니다! ({current_progress['total']}개 URL)")
|
||||
# 완료 후 진행 상황 파일 삭제
|
||||
if os.path.exists(progress_file):
|
||||
os.remove(progress_file)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="domain_scanner",
|
||||
description="도메인 목록 파일에서 지정한 줄 범위를 읽어 SSO 스캔을 수행합니다.",
|
||||
)
|
||||
|
||||
# 커맨드라인 인자로 받을 옵션들 정의
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--file",
|
||||
type=str,
|
||||
required=True,
|
||||
help="도메인 목록이 들어 있는 텍스트 파일 경로 (예: ./domains.txt)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-s", "--start", type=int, required=True, help="읽기 시작 줄 번호 (1-based)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e", "--end", type=int, required=True, help="읽기 종료 줄 번호 (1-based)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-skh",
|
||||
"--skip-html-check",
|
||||
type=bool,
|
||||
default=False,
|
||||
help="HTML 페이지 체크를 건너뛰고 모든 URL을 스캔합니다. (기본값: False)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 인자값을 비동기 함수에 전달
|
||||
asyncio.run(
|
||||
loop(
|
||||
filepath=args.file,
|
||||
start_line=args.start,
|
||||
end_line=args.end,
|
||||
skip_html_check=args.skip_html_check,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -5,7 +5,10 @@ description = "Add your description here"
|
|||
readme = "README.md"
|
||||
requires-python = ">=3.13"
|
||||
dependencies = [
|
||||
"browser-use[memory]>=0.2.7",
|
||||
"black>=25.1.0",
|
||||
"browser-use[memory]==0.5.3",
|
||||
"chardet>=5.2.0",
|
||||
"isort>=6.0.1",
|
||||
"lmnr[all]>=0.6.10",
|
||||
"patchright>=1.52.5",
|
||||
]
|
||||
|
|
|
|||
36
run.ps1
36
run.ps1
|
|
@ -1,36 +0,0 @@
|
|||
# ── 설정 부분 ──
|
||||
# 실행할 Python 스크립트 이름 (파일 확장자까지)
|
||||
$PYTHON_SCRIPT = "main.py"
|
||||
|
||||
# 도메인 목록 파일 경로 (Python 스크립트 실행 시 -f 옵션에 전달)
|
||||
$DOMAIN_FILE = "./data/domains.txt"
|
||||
# ─────────────
|
||||
|
||||
# https://f.imnya.ng/.whs/tp-domains/data/domains/latest.txt
|
||||
# domains.txt 파일을 다운로드하는 명령어
|
||||
|
||||
curl "https://f.imnya.ng/.whs/tp-domains/data/domains/latest.txt" -o $DOMAIN_FILE
|
||||
|
||||
# 인자 개수 확인 (2개 또는 3개)
|
||||
if ($args.Count -lt 2 -or $args.Count -gt 3) {
|
||||
Write-Host "Usage: $($MyInvocation.MyCommand.Name) <start_line> <end_line> [skip_header]"
|
||||
Write-Host "예시) $($MyInvocation.MyCommand.Name) 10000 11000"
|
||||
Write-Host "예시) $($MyInvocation.MyCommand.Name) 10000 11000 True"
|
||||
exit 1
|
||||
}
|
||||
|
||||
$START_LINE = [int]$args[0]
|
||||
$END_LINE = [int]$args[1]
|
||||
$SKIP_HEADER = if ($args.Count -eq 3) { $args[2] } else { "False" }
|
||||
|
||||
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
|
||||
Write-Host "[$timestamp] Processing lines $START_LINE to $END_LINE..."
|
||||
|
||||
# Python 스크립트 실행
|
||||
# -f DOMAIN_FILE: 도메인 목록 파일 경로
|
||||
# -s START_LINE : 읽기 시작 줄
|
||||
# -e END_LINE : 읽기 끝 줄
|
||||
# -skh SKIP_HEADER: 헤더 스킵 여부
|
||||
uv run $PYTHON_SCRIPT -f $DOMAIN_FILE -s $START_LINE -e $END_LINE -skh $SKIP_HEADER
|
||||
|
||||
Write-Host "처리 완료."
|
||||
165
run.py
Normal file
165
run.py
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
import argparse
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# ── 설정 부분 ──
|
||||
PYTHON_SCRIPT = "./src/main.py"
|
||||
DOMAIN_FILE = "./data/domains.txt"
|
||||
# ─────────────
|
||||
|
||||
|
||||
def download_domains():
|
||||
"""도메인 파일 다운로드"""
|
||||
try:
|
||||
print("도메인 파일 다운로드 중...")
|
||||
response = requests.get(
|
||||
"https://f.imnya.ng/.whs/tp-domains/data/domains/latest.txt"
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# 디렉토리가 없으면 생성
|
||||
os.makedirs(os.path.dirname("./data"), exist_ok=True)
|
||||
|
||||
with open(DOMAIN_FILE, "w", encoding="utf-8") as f:
|
||||
f.write(response.text)
|
||||
print("도메인 파일 다운로드 완료")
|
||||
except requests.RequestException as e:
|
||||
print(f"도메인 파일 다운로드 실패: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def run_script(start_line, end_line, skh_option):
|
||||
"""Python 스크립트 실행"""
|
||||
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
print(f"[{current_time}] Processing lines {start_line} to {end_line}...")
|
||||
|
||||
process = None
|
||||
signal_handled = False
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
nonlocal signal_handled
|
||||
if signal_handled:
|
||||
return
|
||||
signal_handled = True
|
||||
|
||||
print("\n🛑 종료 신호를 받았습니다. 정리 작업을 진행합니다...")
|
||||
|
||||
if process:
|
||||
try:
|
||||
# 자식 프로세스에 SIGTERM 전송
|
||||
print("📤 서브프로세스에 종료 신호를 전달합니다...")
|
||||
process.terminate()
|
||||
# 5초간 대기
|
||||
process.wait(timeout=5)
|
||||
print("✅ 서브프로세스가 정상적으로 종료되었습니다.")
|
||||
except subprocess.TimeoutExpired:
|
||||
print("⚠️ 서브프로세스가 응답하지 않아 강제 종료합니다...")
|
||||
process.kill()
|
||||
try:
|
||||
process.wait(timeout=3)
|
||||
print("✅ 서브프로세스가 강제 종료되었습니다.")
|
||||
except subprocess.TimeoutExpired:
|
||||
print("❌ 서브프로세스 강제 종료 실패")
|
||||
except Exception as e:
|
||||
print(f"❌ 프로세스 종료 중 오류: {e}")
|
||||
|
||||
print("✅ 런처 종료 완료.")
|
||||
sys.exit(0)
|
||||
|
||||
# 원래 시그널 핸들러 저장
|
||||
original_sigint = signal.signal(signal.SIGINT, signal_handler)
|
||||
original_sigterm = signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
command = [
|
||||
"uv",
|
||||
"run",
|
||||
PYTHON_SCRIPT,
|
||||
"-f",
|
||||
DOMAIN_FILE,
|
||||
"-s",
|
||||
str(start_line),
|
||||
"-e",
|
||||
str(end_line),
|
||||
]
|
||||
if skh_option:
|
||||
command.append("--skip-html-check")
|
||||
|
||||
process = subprocess.Popen(command)
|
||||
returncode = process.wait()
|
||||
|
||||
if returncode != 0:
|
||||
print(f"❌ Python 스크립트가 오류 코드 {returncode}로 종료되었습니다.")
|
||||
sys.exit(returncode)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
signal_handler(signal.SIGINT, None)
|
||||
except Exception as e:
|
||||
print(f"❌ 스크립트 실행 중 오류: {e}")
|
||||
if process:
|
||||
try:
|
||||
process.terminate()
|
||||
process.wait(timeout=3)
|
||||
except subprocess.TimeoutExpired:
|
||||
process.kill()
|
||||
sys.exit(1)
|
||||
finally:
|
||||
# 시그널 핸들러 복원
|
||||
signal.signal(signal.SIGINT, original_sigint)
|
||||
signal.signal(signal.SIGTERM, original_sigterm)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="도메인 처리 스크립트 실행기",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
사용 예시:
|
||||
uv run run.py 10000 11000 # 10000~11000 라인 처리
|
||||
uv run run.py 10000 11000 --skh # SKH 옵션 활성화
|
||||
uv run run.py 10000 11000 --no-download # 다운로드 생략
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument("start_line", type=int, help="시작 라인 번호")
|
||||
parser.add_argument("end_line", type=int, help="종료 라인 번호")
|
||||
parser.add_argument("--skh", action="store_true", help="SKH 옵션 활성화")
|
||||
parser.add_argument(
|
||||
"--no-download", action="store_true", help="도메인 파일 다운로드 생략"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 라인 범위 검증
|
||||
if args.start_line < 0 or args.end_line < 0:
|
||||
print("라인 번호는 0 이상이어야 합니다.")
|
||||
sys.exit(1)
|
||||
|
||||
if args.start_line > args.end_line:
|
||||
print("시작 라인은 종료 라인보다 크거나 같아야 합니다.")
|
||||
sys.exit(1)
|
||||
|
||||
# 도메인 파일 다운로드
|
||||
if not args.no_download:
|
||||
download_domains()
|
||||
elif not os.path.exists(DOMAIN_FILE):
|
||||
print(
|
||||
f"도메인 파일({DOMAIN_FILE})이 존재하지 않습니다. --no-download 옵션을 제거하거나 파일을 준비해주세요."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
# 스크립트 실행
|
||||
run_script(args.start_line, args.end_line, args.skh)
|
||||
|
||||
print("처리 완료.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
28
run.sh
28
run.sh
|
|
@ -1,28 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# ── 설정 부분 ──
|
||||
PYTHON_SCRIPT="main.py"
|
||||
DOMAIN_FILE="./data/domains.txt"
|
||||
# ─────────────
|
||||
|
||||
curl "https://f.imnya.ng/.whs/tp-domains/data/domains/latest.txt" -o $DOMAIN_FILE
|
||||
|
||||
# 인자 개수 확인
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "Usage: $0 <start_line> <end_line> [skh_option]"
|
||||
echo "예시) $0 10000 11000 True"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
START_LINE=$1
|
||||
END_LINE=$2
|
||||
SKH_OPTION=$3
|
||||
|
||||
if [ -z "$SKH_OPTION" ]; then
|
||||
SKH_OPTION="False"
|
||||
fi
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Processing lines ${START_LINE} to ${END_LINE}..."
|
||||
uv run "$PYTHON_SCRIPT" -f "$DOMAIN_FILE" -s "$START_LINE" -e "$END_LINE" -skh $SKH_OPTION
|
||||
|
||||
echo "처리 완료."
|
||||
263
setup.py
Normal file
263
setup.py
Normal file
|
|
@ -0,0 +1,263 @@
|
|||
import os
|
||||
import subprocess
|
||||
import webbrowser
|
||||
import asyncio
|
||||
from browser_use import BrowserProfile, Agent
|
||||
from browser_use.llm import ChatGoogle
|
||||
from dotenv import load_dotenv
|
||||
import threading
|
||||
|
||||
load_dotenv(verbose=True, override=True)
|
||||
|
||||
os.makedirs(os.path.dirname("./data"), exist_ok=True)
|
||||
|
||||
|
||||
def create_file_from_example(target: str, example: str) -> bool:
|
||||
if not os.path.exists(target):
|
||||
if os.path.exists(example):
|
||||
with (
|
||||
open(example, "r", encoding="utf-8") as example_file,
|
||||
open(target, "w", encoding="utf-8") as target_file,
|
||||
):
|
||||
target_file.write(example_file.read())
|
||||
# os.startfile(target)
|
||||
print(f"✅ {target} 파일이 {example}에서 생성되었습니다.")
|
||||
return True
|
||||
else:
|
||||
print(
|
||||
f"⚠️ {example} 파일이 존재하지 않습니다. {target} 생성에 실패했습니다."
|
||||
)
|
||||
else:
|
||||
print(f"ℹ️ {target} 파일이 이미 존재합니다.")
|
||||
return False
|
||||
|
||||
|
||||
def install_playwright_chrome():
|
||||
print("\n🛠️ Playwright의 Chromium을 설치 중입니다...")
|
||||
print("👉 이 작업은 시간이 걸릴 수 있습니다. 잠시 기다려주세요.")
|
||||
try:
|
||||
subprocess.run(["uv", "run", "playwright", "install", "chromium"], check=True)
|
||||
print("✅ Playwright Chrome 설치 완료.")
|
||||
except subprocess.CalledProcessError as e:
|
||||
if "already" in e.stdout.decode():
|
||||
print("ℹ️ Chrome이 이미 설치되어 있습니다.")
|
||||
else:
|
||||
print(f"❌ Playwright 설치 실패: {e}")
|
||||
print("\n")
|
||||
|
||||
|
||||
def prompt_yes_no(message: str) -> bool:
|
||||
print(message, end="")
|
||||
return input().strip().lower() in ["y", "yes"]
|
||||
|
||||
|
||||
def i_dont_like_windows():
|
||||
# Windows인지 확인
|
||||
if os.name != "nt":
|
||||
return
|
||||
else:
|
||||
# run (Get-ItemProperty "HKLM:\SYSTEM\CurrentControlSet\Control\Nls\CodePage").ACP
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
"powershell",
|
||||
"-Command",
|
||||
'(Get-ItemProperty "HKLM:\\SYSTEM\\CurrentControlSet\\Control\\Nls\\CodePage").ACP',
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
acp = result.stdout.strip()
|
||||
if acp == "65001":
|
||||
print("현재 Active Code Page가 UTF-8로 설정되어 있습니다.")
|
||||
return
|
||||
else:
|
||||
print("현재 Active Code Page가 UTF-8로 설정되어 있지 않습니다.")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"코드 페이지 확인 실패: {e}")
|
||||
print("=======================================================")
|
||||
print("\n⚠️ Windows에서는 인코딩 문제가 발생합니다.")
|
||||
print("👉 엔터를 누르면 자동으로 intl.cpl이 열립니다.")
|
||||
print('👉 자세한 내용은 README.md에서 "윈도우 인코딩 해결"을 참조해주세요.\n')
|
||||
print(
|
||||
"⚠️ 경고 : 이 작업은 윈도우에서 킹갓 대한민국의 프로그램들의 한글이 정상적으로 표시되지 않을 수 있습니다."
|
||||
)
|
||||
# Pause
|
||||
input("계속하려면 Enter 키를 누르세요...")
|
||||
|
||||
webbrowser.open("intl.cpl")
|
||||
|
||||
print("👉 intl.cpl가 열렸습니다.\n")
|
||||
print("👉 관리자 옵션 -> 시스템 로켈 변경")
|
||||
print("👀 Beta: 세계 언어 지원을 위해 Unicode UTF-8 사용")
|
||||
print("👉 이 설정을 변경한 후, 시스템을 재시작하세요.\n")
|
||||
print("⚠️ 이 작업은 시스템 언어 설정을 변경하므로 주의가 필요합니다.\n")
|
||||
print("=======================================================")
|
||||
input("계속하려면 Enter 키를 누르세요...")
|
||||
|
||||
|
||||
async def setup_user_data():
|
||||
print("\n📂 사용자 데이터 디렉토리를 설정하시겠습니까?")
|
||||
print("⚠️ 사용자 데이터 디렉토리는 브라우저의 프로필 데이터를 저장하는 곳입니다.")
|
||||
print("✅ 이 작업은 Google API Key를 설정하고 나서 진행해야만합니다.")
|
||||
if prompt_yes_no("\033[1m\033[33m선택하시려면 y를 입력하세요 (y/n):\033[0m "):
|
||||
if os.getenv("GOOGLE_API_KEY") is None:
|
||||
print(
|
||||
"⚠️ Google API Key가 설정되어 있지 않습니다. 먼저 Google API Key를 설정해주세요."
|
||||
)
|
||||
return
|
||||
print("======================================================")
|
||||
llm = ChatGoogle(
|
||||
model="gemini-2.0-flash",
|
||||
)
|
||||
initial_actions = [
|
||||
{"go_to_url": {"url": "https://www.google.com", "new_tab": False}},
|
||||
{"wait": {"seconds": 2147483647}},
|
||||
]
|
||||
|
||||
agent = Agent(
|
||||
task="Just Wait",
|
||||
llm=llm,
|
||||
use_vision=False,
|
||||
initial_actions=initial_actions,
|
||||
browser_profile=BrowserProfile(
|
||||
disable_security=True,
|
||||
# stealth=True,
|
||||
headless=False,
|
||||
device_scale_factor=1,
|
||||
window_size={"width": 1600, "height": 900},
|
||||
viewport={"width": 1600, "height": 900},
|
||||
user_data_dir="./data/user_data",
|
||||
args=[
|
||||
# "--disable-features=Translate,PasswordManagerDefaultEnabled",
|
||||
],
|
||||
ignore_default_args=[
|
||||
"--disable-datasaver-prompt",
|
||||
"--disable-component-extensions-with-background-pages",
|
||||
"--disable-prompt-on-repost",
|
||||
"--safeBrowse-disable-auto-update",
|
||||
"--install-autogenerated-theme=0,0,0",
|
||||
"--disable-speech-synthesis-api",
|
||||
"--ash-no-nudges",
|
||||
"--test-type=gpu",
|
||||
"--noerrdialogs",
|
||||
"--disable-external-intent-requests",
|
||||
"--disable-breakpad",
|
||||
"--disable-backgrounding-occluded-windows",
|
||||
"--export-tagged-pdf",
|
||||
"--disable-focus-on-load",
|
||||
"--suppress-message-center-popups",
|
||||
"--disable-renderer-backgrounding",
|
||||
"--hide-crash-restore-bubble",
|
||||
"--disable-back-forward-cache",
|
||||
"--allow-legacy-extension-manifests",
|
||||
# "--disable-field-trial-config", # 왜 이걸 끄면 웹사이트가 압축된 형태로 보이는 진 모르곘음
|
||||
"--disable-popup-blocking",
|
||||
"--disable-background-networking",
|
||||
"--no-first-run",
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
"--password-store=basic",
|
||||
"--enable-network-information-downlink-max",
|
||||
"--allow-pre-commit-input",
|
||||
"--enable-features=NetworkService,NetworkServiceInProcess",
|
||||
"--metrics-recording-only",
|
||||
"--silent-debugger-extension-api",
|
||||
"--disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate,AutomationControlled,BackForwardCache,OptimizationHints,ProcessPerSiteUpToMainFrameThreshold,InterestFeedContentSuggestions,CalculateNativeWinOcclusion,HeavyAdPrivacyMitigations,PrivacySandboxSettings4,AutofillServerCommunication,CrashReporting,OverscrollHistoryNavigation,InfiniteSessionRestore,ExtensionDisableUnsupportedDeveloper",
|
||||
"--disable-ipc-flooding-protection",
|
||||
"--disable-hang-monitor",
|
||||
"--disable-dev-shm-usage",
|
||||
"--disable-client-side-phishing-detection",
|
||||
"--log-level=2",
|
||||
"--generate-pdf-document-outline",
|
||||
"--disable-speech-api",
|
||||
"--disable-search-engine-choice-screen",
|
||||
"--no-service-autorun",
|
||||
"--no-pings",
|
||||
"--disable-component-update",
|
||||
'--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"',
|
||||
"--disable-background-timer-throttling",
|
||||
"--use-mock-keychain",
|
||||
"--disable-features=IsolateOrigins,site-per-process",
|
||||
# 아래는 기존 예시에 있던 인자들입니다. 필요에 따라 유지하거나 제거하세요.
|
||||
"--enable-automation",
|
||||
"--disable-extensions",
|
||||
"--hide-scrollbars",
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
print("======================================================\n")
|
||||
print(
|
||||
"👉 브라우저가 열립니다. 필요한 로그인을 완료한 후 엔터키를 눌러 다음 단계로 진행하세요."
|
||||
)
|
||||
input("계속하려면 Enter 키를 누르세요...\n")
|
||||
print("======================================================")
|
||||
|
||||
# 브라우저를 백그라운드에서 시작
|
||||
def run_agent():
|
||||
asyncio.run(agent.run())
|
||||
|
||||
agent_thread = threading.Thread(target=run_agent)
|
||||
agent_thread.daemon = True
|
||||
agent_thread.start()
|
||||
|
||||
# 사용자가 'n'을 입력할 때까지 대기
|
||||
while True:
|
||||
user_input = input("").strip().lower()
|
||||
if user_input == "":
|
||||
agent.stop()
|
||||
break
|
||||
|
||||
print("======================================================")
|
||||
print("✅ 설정이 완료되었습니다.")
|
||||
else:
|
||||
print("🚫 설정이 취소되었습니다.")
|
||||
print("======================================================")
|
||||
print(
|
||||
"⚠️ 이후에 USER_DATA_DIR을 설정하려면, .env 파일을 참고하여 USER_DATA_DIR을 설정하세요.\n"
|
||||
)
|
||||
|
||||
|
||||
def setup_sensitive():
|
||||
print("\n🔐 Sensitive Data을 설정하시겠습니까?")
|
||||
print("👉 이미 세션을 설정했다면, 이 작업은 **선택사항**입니다.")
|
||||
print(
|
||||
"⚠️ 민감 정보 파일은 오류를 유발하거나 문제가 될 수 있으므로 가급적 세션 사용을 권장합니다."
|
||||
)
|
||||
if prompt_yes_no("\033[1m\033[33m선택하시려면 y를 입력하세요 (y/n):\033[0m "):
|
||||
print("======================================================")
|
||||
print("👀 .sensitive.json 파일을 생성합니다.")
|
||||
print("💾 Browser Use의 문서를 참조하여 수정을 수정해주세요.")
|
||||
print("https://docs.browser-use.com/customize/sensitive-data")
|
||||
create_file_from_example(".sensitive.json", ".sensitive.example.json")
|
||||
print("======================================================")
|
||||
print("✅ .sensitive.json 파일이 생성되었습니다.")
|
||||
else:
|
||||
print("🚫 .sensitive.json 생성이 취소되었습니다.")
|
||||
print("======================================================")
|
||||
print(
|
||||
"⚠️ 이후에 민감 정보 파일을 설정하려면, .sensitive.example.json 파일을 참고하여 .sensitive.json 파일을 생성하세요.\n"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 1. .env 생성
|
||||
create_file_from_example(".env", ".env.example")
|
||||
print("=====================================================")
|
||||
# 2. Playwright용 Chrome 설치
|
||||
install_playwright_chrome()
|
||||
print("=====================================================")
|
||||
|
||||
# 3. Windows 인코딩 문제 해결
|
||||
# i_dont_like_windows()
|
||||
# print("=====================================================")
|
||||
|
||||
# 4. Setup User Data
|
||||
asyncio.run(setup_user_data())
|
||||
print("=====================================================")
|
||||
|
||||
# 5. .sensitive.json 생성
|
||||
# setup_sensitive()
|
||||
print("=====================================================")
|
||||
print("🎉 초기 설정이 완료되었습니다! 이제 스크립트를 실행할 준비가 되었습니다.")
|
||||
6
src/lib/browser_use/__init__.py
Normal file
6
src/lib/browser_use/__init__.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
from lib.browser_use.agents import *
|
||||
from lib.browser_use.func import *
|
||||
from lib.browser_use.init_profile import *
|
||||
from lib.browser_use.model import *
|
||||
from lib.browser_use.scanner import *
|
||||
from lib.browser_use.sensitive_data import *
|
||||
417
src/lib/browser_use/agents.py
Normal file
417
src/lib/browser_use/agents.py
Normal file
|
|
@ -0,0 +1,417 @@
|
|||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from browser_use import Agent, BrowserSession, Controller
|
||||
from patchright.async_api import async_playwright as async_patchright
|
||||
|
||||
from lib.browser_use.init_profile import GetProfile
|
||||
from lib.browser_use.sensitive_data import GetSensitiveData
|
||||
from lib.llm import CreateChatGoogle, get_prompt
|
||||
from lib.utils import config, logger
|
||||
|
||||
# Exponential backoff settings
|
||||
INITIAL_BACKOFF = int(os.getenv("INITIAL_BACKOFF", "60")) # seconds
|
||||
MAX_BACKOFF = int(os.getenv("MAX_BACKOFF", "600")) # seconds
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetryTask:
|
||||
"""재시도할 작업을 나타내는 클래스"""
|
||||
|
||||
task_type: str # "oauth_list" or "oauth_login"
|
||||
url: str
|
||||
oauth_provider: Optional[str] = None
|
||||
retry_count: int = 0
|
||||
next_retry_time: Optional[datetime] = None
|
||||
max_retries: int = 5
|
||||
|
||||
|
||||
# 전역 재시도 큐
|
||||
retry_queue: list[RetryTask] = []
|
||||
retry_queue_lock = asyncio.Lock()
|
||||
|
||||
|
||||
async def add_to_retry_queue(task: RetryTask):
|
||||
"""작업을 재시도 큐에 추가"""
|
||||
async with retry_queue_lock:
|
||||
# 중복 작업 확인
|
||||
existing_task = None
|
||||
for existing in retry_queue:
|
||||
if (
|
||||
existing.task_type == task.task_type
|
||||
and existing.url == task.url
|
||||
and existing.oauth_provider == task.oauth_provider
|
||||
):
|
||||
existing_task = existing
|
||||
break
|
||||
|
||||
if existing_task:
|
||||
# 기존 작업이 있으면 재시도 횟수 업데이트
|
||||
existing_task.retry_count = task.retry_count
|
||||
existing_task.next_retry_time = task.next_retry_time
|
||||
print(
|
||||
f"📝 기존 작업 업데이트: {task.task_type} - {task.url} (재시도: {task.retry_count})"
|
||||
)
|
||||
else:
|
||||
# 새 작업 추가
|
||||
retry_queue.append(task)
|
||||
print(
|
||||
f"➕ 재시도 큐에 작업 추가: {task.task_type} - {task.url} (재시도: {task.retry_count})"
|
||||
)
|
||||
|
||||
|
||||
async def process_retry_queue():
|
||||
"""재시도 큐 처리"""
|
||||
async with retry_queue_lock:
|
||||
now = datetime.now()
|
||||
ready_tasks = []
|
||||
|
||||
for task in retry_queue[:]: # 복사본에서 반복
|
||||
if task.next_retry_time and task.next_retry_time <= now:
|
||||
ready_tasks.append(task)
|
||||
retry_queue.remove(task)
|
||||
|
||||
if ready_tasks:
|
||||
print(f"🔄 {len(ready_tasks)}개의 재시도 작업 처리 중...")
|
||||
|
||||
for task in ready_tasks:
|
||||
try:
|
||||
if task.task_type == "oauth_list":
|
||||
result = await _extract_oauth_list_internal(task.url)
|
||||
if result:
|
||||
print(f"✅ 재시도 성공: OAuth 리스트 추출 - {task.url}")
|
||||
else:
|
||||
await _handle_retry_failure(task)
|
||||
elif task.task_type == "oauth_login":
|
||||
result = await _test_oauth_login_internal(
|
||||
task.url, task.oauth_provider
|
||||
)
|
||||
if result:
|
||||
print(
|
||||
f"✅ 재시도 성공: {task.oauth_provider} 로그인 - {task.url}"
|
||||
)
|
||||
else:
|
||||
await _handle_retry_failure(task)
|
||||
except Exception as e:
|
||||
print(f"❌ 재시도 중 에러: {e}")
|
||||
await _handle_retry_failure(task)
|
||||
|
||||
|
||||
async def _handle_retry_failure(task: RetryTask):
|
||||
"""재시도 실패 처리"""
|
||||
if task.retry_count < task.max_retries:
|
||||
task.retry_count += 1
|
||||
wait_time = min(INITIAL_BACKOFF * (2**task.retry_count), MAX_BACKOFF)
|
||||
task.next_retry_time = datetime.now() + timedelta(seconds=wait_time)
|
||||
await add_to_retry_queue(task)
|
||||
print(f"⏰ {wait_time}초 후 재시도 예정: {task.task_type} - {task.url}")
|
||||
else:
|
||||
print(f"❌ 최대 재시도 횟수 초과: {task.task_type} - {task.url}")
|
||||
logger(f"❌ 최대 재시도 횟수 초과: {task.task_type} - {task.url}")
|
||||
|
||||
|
||||
async def get_retry_queue_status():
|
||||
"""재시도 큐 상태 조회"""
|
||||
async with retry_queue_lock:
|
||||
return {
|
||||
"queue_length": len(retry_queue),
|
||||
"tasks": [
|
||||
{
|
||||
"task_type": task.task_type,
|
||||
"url": task.url,
|
||||
"oauth_provider": task.oauth_provider,
|
||||
"retry_count": task.retry_count,
|
||||
"next_retry_time": (
|
||||
task.next_retry_time.isoformat()
|
||||
if task.next_retry_time
|
||||
else None
|
||||
),
|
||||
}
|
||||
for task in retry_queue
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
async def _run_agent_with_retry(agent_config):
|
||||
"""Agent 실행을 위한 내부 헬퍼 함수 (재시도 로직 포함)"""
|
||||
agent = None
|
||||
session = None
|
||||
try_cnt = 0
|
||||
url = agent_config["url"]
|
||||
headless = os.getenv("HEADLESS", "False").lower() == "true"
|
||||
|
||||
while try_cnt < 3:
|
||||
try:
|
||||
Profile = await GetProfile(headless=headless)
|
||||
session = BrowserSession(
|
||||
playwright=(await async_patchright().start()),
|
||||
browser_profile=Profile[0],
|
||||
)
|
||||
|
||||
agent = Agent(browser_session=session, **agent_config["agent_params"])
|
||||
|
||||
response = await agent.run()
|
||||
|
||||
if any(
|
||||
keyword in str(response)
|
||||
for keyword in [
|
||||
"429",
|
||||
"resource_exhausted",
|
||||
"resourceexhausted",
|
||||
"quota",
|
||||
"rate limit",
|
||||
"too many requests",
|
||||
"exceeded",
|
||||
"limit reached",
|
||||
]
|
||||
):
|
||||
print(f"⚠️ API 쿼터 에러 발생, 재시도 큐에 추가: {url}")
|
||||
task = RetryTask(
|
||||
task_type=agent_config.get("task_type", "unknown"),
|
||||
url=url,
|
||||
retry_count=try_cnt + 1,
|
||||
next_retry_time=datetime.now() + timedelta(seconds=INITIAL_BACKOFF),
|
||||
)
|
||||
await add_to_retry_queue(task)
|
||||
return None
|
||||
|
||||
# remove profile
|
||||
print(Profile)
|
||||
if Profile[1] and isinstance(Profile[1], str):
|
||||
print(1)
|
||||
shutil.rmtree(Profile[1], ignore_errors=True)
|
||||
print(f"🗑️ 임시 프로필 디렉토리 삭제 완료: {Profile[1]}")
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
# 일반 에러 처리
|
||||
try_cnt += 1
|
||||
if try_cnt >= 3:
|
||||
error_msg = f"최대 재시도 횟수 초과."
|
||||
logger(
|
||||
f"❌ {url} - {agent_config['log_context']} 실패: {error_msg}: {e}"
|
||||
)
|
||||
print(f"❌ {url} - {agent_config['log_context']} 실패: {error_msg}")
|
||||
return None
|
||||
|
||||
print(f"⚠️ 에러 발생: {e}. {try_cnt}번째 재시도 중...")
|
||||
await asyncio.sleep(30)
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
async def _extract_oauth_list_internal(url: str):
|
||||
"""OAuth 리스트 추출 내부 함수 (재시도 큐에서 사용)"""
|
||||
target_url = url if url.startswith("http") else f"https://{url}"
|
||||
print(f"🔎 OAuth 리스트 추출 시작: {target_url}")
|
||||
prompt, model = get_prompt("auth")
|
||||
|
||||
agent_config = {
|
||||
"url": target_url,
|
||||
"log_context": "OAuth 리스트 추출",
|
||||
"agent_params": {
|
||||
"initial_actions": [{"go_to_url": {"url": target_url, 'new_tab': False}}],
|
||||
"sensitive_data": GetSensitiveData(),
|
||||
"task": (
|
||||
"Navigate to the login page and identify all OAuth provider buttons (excluding Passkey). "
|
||||
"DO NOT click any OAuth buttons or attempt to login. "
|
||||
"Just find and list all available OAuth providers with their button texts or provider names. "
|
||||
"Return a list of OAuth providers found on the login page."
|
||||
),
|
||||
"llm": CreateChatGoogle(config.GOOGLE_MODEL),
|
||||
"planner_llm": (
|
||||
CreateChatGoogle(config.GOOGLE_PLANNER_MODEL)
|
||||
if config.GOOGLE_PLANNER_MODEL
|
||||
and os.getenv("ENABLE_PLANNER_MODEL_OAUTH_LIST")
|
||||
else None
|
||||
),
|
||||
"controller": Controller(
|
||||
output_model=model if not isinstance(model, str) else None,
|
||||
exclude_actions=["search_google", "unknown_action", "unkown"],
|
||||
),
|
||||
"extend_system_message": prompt,
|
||||
"extend_planner_system_message": prompt,
|
||||
},
|
||||
}
|
||||
|
||||
response = await _run_agent_with_retry(agent_config)
|
||||
|
||||
if not response:
|
||||
return []
|
||||
|
||||
final_result = response.final_result()
|
||||
if not final_result:
|
||||
print("OAuth 리스트 추출 결과가 없습니다.")
|
||||
return []
|
||||
|
||||
try:
|
||||
data = json.loads(final_result)
|
||||
print(final_result)
|
||||
oauth_providers = data.get("sso_list", [])
|
||||
if not oauth_providers:
|
||||
print("❌ OAuth 제공자가 없습니다.")
|
||||
logger(f"❌ {url} - OAuth 제공자 없음: {final_result}")
|
||||
return []
|
||||
print(f"✅ OAuth 제공자 추출 완료: {oauth_providers}")
|
||||
return oauth_providers
|
||||
except (json.JSONDecodeError, KeyError) as e:
|
||||
print(f"❌ 결과 파싱 실패: {e}")
|
||||
logger(f"❌ {url} 결과 파싱 실패: {final_result}")
|
||||
return []
|
||||
|
||||
|
||||
async def extract_oauth_list(url: str):
|
||||
"""첫 번째 Agent: 로그인 페이지를 찾고 OAuth 리스트만 추출"""
|
||||
try:
|
||||
return await _extract_oauth_list_internal(url)
|
||||
except Exception as e:
|
||||
error_str = str(e).lower()
|
||||
if any(
|
||||
keyword in error_str
|
||||
for keyword in [
|
||||
"429",
|
||||
"resource_exhausted",
|
||||
"resourceexhausted",
|
||||
"quota",
|
||||
"rate limit",
|
||||
"too many requests",
|
||||
"exceeded",
|
||||
"limit reached",
|
||||
]
|
||||
):
|
||||
print(f"⚠️ API 쿼터 에러 발생, 재시도 큐에 추가: {url}")
|
||||
task = RetryTask(
|
||||
task_type="oauth_list",
|
||||
url=url,
|
||||
retry_count=1,
|
||||
next_retry_time=datetime.now() + timedelta(seconds=INITIAL_BACKOFF),
|
||||
)
|
||||
await add_to_retry_queue(task)
|
||||
return []
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
async def _test_oauth_login_internal(url: str, oauth_provider: str):
|
||||
"""OAuth 로그인 테스트 내부 함수 (재시도 큐에서 사용)"""
|
||||
target_url = url if url.startswith("http") else f"https://{url}"
|
||||
print(f"🔐 {oauth_provider} 로그인 시작: {target_url}")
|
||||
|
||||
prompt, model = get_prompt(oauth_provider)
|
||||
|
||||
agent_config = {
|
||||
"url": target_url,
|
||||
"log_context": f"{oauth_provider} 로그인",
|
||||
"agent_params": {
|
||||
"initial_actions": [{"go_to_url": {"url": target_url, 'new_tab': False}}],
|
||||
"sensitive_data": GetSensitiveData(),
|
||||
"task": (
|
||||
f"Navigate to the login page, find and click the {oauth_provider} OAuth button, "
|
||||
f"then follow the complete OAuth login flow as far as possible with a real user account. "
|
||||
f"Capture the final redirect URL after login completion. "
|
||||
f"If login fails or encounters errors, report the issue. "
|
||||
f"Focus only on {oauth_provider} - ignore other OAuth providers."
|
||||
),
|
||||
"llm": CreateChatGoogle(config.GOOGLE_MODEL),
|
||||
"planner_llm": (
|
||||
CreateChatGoogle(config.GOOGLE_PLANNER_MODEL)
|
||||
if config.GOOGLE_PLANNER_MODEL
|
||||
and os.getenv("ENABLE_PLANNER_MODEL_OAUTH_LOGIN")
|
||||
else None
|
||||
),
|
||||
"controller": Controller(
|
||||
output_model=model if not isinstance(model, str) else None,
|
||||
exclude_actions=["search_google", "unknown_action", "unkown"],
|
||||
),
|
||||
"extend_system_message": prompt,
|
||||
"extend_planner_system_message": prompt,
|
||||
},
|
||||
}
|
||||
|
||||
response = await _run_agent_with_retry(agent_config)
|
||||
|
||||
if response and response.final_result():
|
||||
final_result = response.final_result()
|
||||
try:
|
||||
import json
|
||||
result_data = json.loads(final_result)
|
||||
status = result_data.get("status", "")
|
||||
|
||||
if status == "success":
|
||||
print(f"✅ {oauth_provider} 로그인 완료")
|
||||
logger(f"✅ {url} - {oauth_provider} 로그인 결과: {final_result}")
|
||||
return True
|
||||
else:
|
||||
print(f"❌ {oauth_provider} 로그인 실패: {status}")
|
||||
logger(f"❌ {url} - {oauth_provider} 로그인 실패: {final_result}")
|
||||
return False
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
print(f"❌ {oauth_provider} 결과 파싱 실패")
|
||||
return False
|
||||
|
||||
print(f"❌ {oauth_provider} 로그인 실패")
|
||||
return False
|
||||
|
||||
|
||||
async def test_oauth_login(url: str, oauth_provider: str):
|
||||
"""두 번째 Agent: 특정 OAuth 제공자로 로그인 시도"""
|
||||
try:
|
||||
return await _test_oauth_login_internal(url, oauth_provider)
|
||||
except Exception as e:
|
||||
error_str = str(e).lower()
|
||||
if any(
|
||||
keyword in error_str
|
||||
for keyword in [
|
||||
"429",
|
||||
"resource_exhausted",
|
||||
"resourceexhausted",
|
||||
"quota",
|
||||
"rate limit",
|
||||
"too many requests",
|
||||
"exceeded",
|
||||
"limit reached",
|
||||
]
|
||||
):
|
||||
print(f"⚠️ API 쿼터 에러 발생, 재시도 큐에 추가: {oauth_provider} - {url}")
|
||||
task = RetryTask(
|
||||
task_type="oauth_login",
|
||||
url=url,
|
||||
oauth_provider=oauth_provider,
|
||||
retry_count=1,
|
||||
next_retry_time=datetime.now() + timedelta(seconds=INITIAL_BACKOFF),
|
||||
)
|
||||
await add_to_retry_queue(task)
|
||||
return False
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
async def start_retry_queue_processor():
|
||||
"""재시도 큐 처리기를 백그라운드에서 시작"""
|
||||
|
||||
async def queue_processor():
|
||||
while True:
|
||||
try:
|
||||
await process_retry_queue()
|
||||
await asyncio.sleep(30) # 30초마다 큐 확인
|
||||
except Exception as e:
|
||||
print(f"❌ 재시도 큐 처리 중 에러: {e}")
|
||||
await asyncio.sleep(60) # 에러 발생 시 1분 대기
|
||||
|
||||
# 백그라운드 태스크로 실행
|
||||
asyncio.create_task(queue_processor())
|
||||
print("🔄 재시도 큐 처리기 시작됨")
|
||||
|
||||
|
||||
# 모듈 로딩 시 자동으로 백그라운드 처리기 시작
|
||||
# (실제 애플리케이션에서는 main 함수에서 호출하는 것이 좋음)
|
||||
def init_retry_system():
|
||||
"""재시도 시스템 초기화"""
|
||||
print("🔧 재시도 시스템 초기화 중...")
|
||||
# 이 함수는 메인 애플리케이션에서 호출해야 함
|
||||
110
src/lib/browser_use/cleanup.py
Normal file
110
src/lib/browser_use/cleanup.py
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
"""
|
||||
브라우저 리소스 정리를 위한 모듈
|
||||
"""
|
||||
import os
|
||||
import shutil
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
async def cleanup_browser_resources(agent=None, session=None, user_data_dir=None):
|
||||
"""브라우저 관련 리소스를 정리하는 함수"""
|
||||
print("🔄 브라우저 리소스 정리를 시작합니다...")
|
||||
|
||||
# 에이전트 리소스 정리
|
||||
if agent:
|
||||
try:
|
||||
print("<EFBFBD> 에이전트 리소스 정리 중...")
|
||||
# 브라우저 종료 대기 시간 설정
|
||||
await asyncio.wait_for(agent.close(), timeout=10.0)
|
||||
print("✅ 에이전트 리소스 정리 완료.")
|
||||
except asyncio.TimeoutError:
|
||||
print("⚠️ 에이전트 종료 시간 초과. 강제 종료합니다.")
|
||||
except Exception as e:
|
||||
print(f"⚠️ 에이전트 리소스 정리 실패: {e}")
|
||||
|
||||
# 세션 리소스 정리
|
||||
if session:
|
||||
try:
|
||||
print("🔄 세션 리소스 정리 중...")
|
||||
await asyncio.wait_for(session.close(), timeout=5.0)
|
||||
print("✅ 세션 리소스 정리 완료.")
|
||||
except asyncio.TimeoutError:
|
||||
print("⚠️ 세션 종료 시간 초과.")
|
||||
except Exception as e:
|
||||
print(f"⚠️ 세션 리소스 정리 실패: {e}")
|
||||
|
||||
# 임시 스토리지 상태 파일 삭제
|
||||
storage_state_temp_path = Path("./data/storage_state_temp.json").resolve()
|
||||
if storage_state_temp_path.exists():
|
||||
try:
|
||||
print(f"<EFBFBD>️ 임시 스토리지 상태 파일 삭제 중: {storage_state_temp_path}")
|
||||
storage_state_temp_path.unlink()
|
||||
print("✅ 임시 스토리지 상태 파일 삭제 완료.")
|
||||
except Exception as e:
|
||||
print(f"⚠️ 임시 스토리지 상태 파일 삭제 실패: {e}")
|
||||
|
||||
# 임시 사용자 데이터 디렉토리 정리
|
||||
if user_data_dir and os.path.exists(user_data_dir):
|
||||
try:
|
||||
print(f"🗑️ 임시 사용자 데이터 디렉토리 삭제 중: {user_data_dir}")
|
||||
await asyncio.sleep(0.5) # 브라우저가 완전히 종료될 시간 제공
|
||||
shutil.rmtree(user_data_dir)
|
||||
print("✅ 임시 사용자 데이터 디렉토리 삭제 완료.")
|
||||
except Exception as e:
|
||||
print(f"⚠️ 임시 사용자 데이터 디렉토리 삭제 실패: {e}")
|
||||
|
||||
# userdata.dump 파일에서 기록된 디렉토리 정리
|
||||
log_file = "./data/userdata.dump"
|
||||
if os.path.exists(log_file):
|
||||
try:
|
||||
with open(log_file, "r") as f:
|
||||
tmp_user_data_dir = f.read().strip()
|
||||
if tmp_user_data_dir and os.path.exists(tmp_user_data_dir):
|
||||
print(f"🗑️ 기록된 임시 사용자 데이터 디렉토리 삭제 중: {tmp_user_data_dir}")
|
||||
await asyncio.sleep(0.5) # 브라우저가 완전히 종료될 시간 제공
|
||||
shutil.rmtree(tmp_user_data_dir)
|
||||
print("✅ 기록된 임시 사용자 데이터 디렉토리 삭제 완료.")
|
||||
os.remove(log_file)
|
||||
print("✅ userdata.dump 파일 삭제 완료.")
|
||||
except Exception as e:
|
||||
print(f"⚠️ userdata.dump 관련 정리 실패: {e}")
|
||||
|
||||
print("✅ 브라우저 리소스 정리가 완료되었습니다.")
|
||||
|
||||
|
||||
def cleanup_all_running_tasks():
|
||||
"""실행 중인 모든 asyncio 태스크를 정리"""
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
tasks = [task for task in asyncio.all_tasks(loop) if not task.done()]
|
||||
|
||||
if tasks:
|
||||
print(f"🔄 {len(tasks)}개의 실행 중인 태스크를 정리합니다...")
|
||||
for task in tasks:
|
||||
task.cancel()
|
||||
|
||||
# 태스크들이 정리될 때까지 잠시 대기
|
||||
async def wait_for_tasks():
|
||||
await asyncio.gather(*tasks, return_exceptions=True)
|
||||
|
||||
asyncio.create_task(wait_for_tasks())
|
||||
print("✅ 모든 태스크 정리 완료.")
|
||||
except RuntimeError:
|
||||
# 이벤트 루프가 실행 중이 아닌 경우
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"⚠️ 태스크 정리 중 오류: {e}")
|
||||
|
||||
|
||||
async def emergency_cleanup():
|
||||
"""긴급 종료 시 최소한의 리소스 정리"""
|
||||
print("🚨 긴급 리소스 정리 실행 중...")
|
||||
|
||||
# 모든 태스크 취소
|
||||
cleanup_all_running_tasks()
|
||||
|
||||
# 기본 리소스 정리
|
||||
await cleanup_browser_resources()
|
||||
|
||||
print("✅ 긴급 리소스 정리 완료.")
|
||||
|
|
@ -1,11 +1,14 @@
|
|||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from browser_use import BrowserProfile
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
def setup_proxy():
|
||||
"""Configure proxy settings from environment variables."""
|
||||
proxy_host = os.getenv("PROXY_HOST")
|
||||
|
|
@ -20,30 +23,6 @@ def setup_proxy():
|
|||
return None
|
||||
|
||||
|
||||
async def setup_storage_state():
|
||||
"""Setup browser storage state for session persistence."""
|
||||
# Get the script directory to ensure correct path resolution
|
||||
script_dir = Path(__file__).parent.parent.parent.parent
|
||||
storage_state_path = script_dir / "data" / "storage_state.json"
|
||||
storage_state_temp_path = script_dir / "data" / "storage_state_temp.json"
|
||||
|
||||
print(f"📂 Storage state path: {storage_state_path}")
|
||||
print(f"📂 Temp storage state path: {storage_state_temp_path}")
|
||||
|
||||
if storage_state_path.exists():
|
||||
if storage_state_temp_path.exists():
|
||||
storage_state_temp_path.unlink()
|
||||
|
||||
storage_state_temp_path.write_text(
|
||||
storage_state_path.read_text(encoding="utf-8"), encoding="utf-8"
|
||||
)
|
||||
print(f"🔄 Using existing storage state: {storage_state_temp_path}")
|
||||
return str(storage_state_temp_path)
|
||||
|
||||
print("⚠️ No existing storage state found")
|
||||
return None
|
||||
|
||||
|
||||
def get_browser_args():
|
||||
"""Get browser arguments for enhanced compatibility and security."""
|
||||
return [
|
||||
134
src/lib/browser_use/init_profile.py
Normal file
134
src/lib/browser_use/init_profile.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
from lib.browser_use.func import *
|
||||
from lib.utils.config import USER_DATA_DIR
|
||||
|
||||
# Initialize configuration
|
||||
proxy_url = setup_proxy()
|
||||
|
||||
|
||||
async def GetProfile(headless=False):
|
||||
"""브라우저 프로필을 생성하고 임시 사용자 데이터 디렉토리를 관리합니다."""
|
||||
user_data_dir = None
|
||||
tmp_user_data_dir = None
|
||||
|
||||
if USER_DATA_DIR and os.path.isdir(USER_DATA_DIR):
|
||||
try:
|
||||
tmp_user_data_dir = tempfile.mkdtemp(prefix="browser_use_")
|
||||
print(f"🔧 기본 사용자 데이터 디렉토리: {USER_DATA_DIR}")
|
||||
print(f"🔧 임시 사용자 데이터 디렉토리: {tmp_user_data_dir}")
|
||||
|
||||
log_file = os.path.join("./data", "userdata.dump")
|
||||
if not os.path.exists("./data"):
|
||||
os.makedirs("./data")
|
||||
|
||||
# 기존 로그 파일이 있다면 해당 디렉토리 정리
|
||||
if os.path.exists(log_file):
|
||||
try:
|
||||
with open(log_file, "r") as f:
|
||||
old_tmp_dir = f.read().strip()
|
||||
if old_tmp_dir and os.path.exists(old_tmp_dir):
|
||||
shutil.rmtree(old_tmp_dir)
|
||||
print(f"🗑️ 이전 임시 디렉토리 정리: {old_tmp_dir}")
|
||||
except Exception as e:
|
||||
print(f"⚠️ 이전 임시 디렉토리 정리 실패: {e}")
|
||||
os.remove(log_file)
|
||||
|
||||
# 새 임시 디렉토리 경로 로깅
|
||||
with open(log_file, "w") as f:
|
||||
f.write(tmp_user_data_dir)
|
||||
|
||||
# 사용자 데이터 디렉토리 복사
|
||||
if os.path.exists(tmp_user_data_dir):
|
||||
shutil.rmtree(tmp_user_data_dir)
|
||||
shutil.copytree(
|
||||
USER_DATA_DIR,
|
||||
tmp_user_data_dir,
|
||||
dirs_exist_ok=False,
|
||||
ignore_dangling_symlinks=True,
|
||||
)
|
||||
user_data_dir = tmp_user_data_dir
|
||||
print(f"✅ 사용자 데이터 디렉토리 복사 완료: {user_data_dir}")
|
||||
except Exception as e:
|
||||
print(f"❌ 사용자 데이터 디렉토리 복사 실패: {e}")
|
||||
# 실패 시 임시 디렉토리 정리
|
||||
if tmp_user_data_dir and os.path.exists(tmp_user_data_dir):
|
||||
try:
|
||||
shutil.rmtree(tmp_user_data_dir)
|
||||
except Exception:
|
||||
pass
|
||||
tmp_user_data_dir = None
|
||||
user_data_dir = None
|
||||
print(proxy_url)
|
||||
|
||||
profile = BrowserProfile(
|
||||
# Security settings
|
||||
# disable_security=True,
|
||||
# Display settings
|
||||
headless=headless,
|
||||
# Data persistence
|
||||
user_data_dir=user_data_dir,
|
||||
# Network settings
|
||||
proxy={"server": proxy_url} if proxy_url else None,
|
||||
# Additional arguments
|
||||
args=[
|
||||
"--proxy-server=" + proxy_url if proxy_url else "",
|
||||
# "--disable-features=Translate,PasswordManagerDefaultEnabled",
|
||||
],
|
||||
ignore_default_args=[
|
||||
# "--disable-datasaver-prompt",
|
||||
# "--disable-component-extensions-with-background-pages",
|
||||
# "--disable-prompt-on-repost",
|
||||
# "--safeBrowse-disable-auto-update",
|
||||
# "--install-autogenerated-theme=0,0,0",
|
||||
# "--disable-speech-synthesis-api",
|
||||
# "--ash-no-nudges",
|
||||
# "--test-type=gpu",
|
||||
# "--noerrdialogs",
|
||||
# "--disable-external-intent-requests",
|
||||
# "--disable-breakpad",
|
||||
# "--disable-backgrounding-occluded-windows",
|
||||
# "--export-tagged-pdf",
|
||||
# "--disable-focus-on-load",
|
||||
# "--suppress-message-center-popups",
|
||||
# "--disable-renderer-backgrounding",
|
||||
# "--hide-crash-restore-bubble",
|
||||
# "--disable-back-forward-cache",
|
||||
# "--allow-legacy-extension-manifests",
|
||||
# # "--disable-field-trial-config", # 왜 이걸 끄면 웹사이트가 압축된 형태로 보이는 진 모르곘음
|
||||
# "--disable-popup-blocking",
|
||||
# "--disable-background-networking",
|
||||
# "--no-first-run",
|
||||
# "--disable-blink-features=AutomationControlled",
|
||||
# "--password-store=basic",
|
||||
# "--enable-network-information-downlink-max",
|
||||
# "--allow-pre-commit-input",
|
||||
# "--enable-features=NetworkService,NetworkServiceInProcess",
|
||||
# "--metrics-recording-only",
|
||||
# "--silent-debugger-extension-api",
|
||||
# "--disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate,AutomationControlled,BackForwardCache,OptimizationHints,ProcessPerSiteUpToMainFrameThreshold,InterestFeedContentSuggestions,CalculateNativeWinOcclusion,HeavyAdPrivacyMitigations,PrivacySandboxSettings4,AutofillServerCommunication,CrashReporting,OverscrollHistoryNavigation,InfiniteSessionRestore,ExtensionDisableUnsupportedDeveloper",
|
||||
# "--disable-ipc-flooding-protection",
|
||||
# "--disable-hang-monitor",
|
||||
# "--disable-dev-shm-usage",
|
||||
# "--disable-client-side-phishing-detection",
|
||||
# "--log-level=2",
|
||||
# "--generate-pdf-document-outline",
|
||||
# "--disable-speech-api",
|
||||
# "--disable-search-engine-choice-screen",
|
||||
# "--no-service-autorun",
|
||||
# "--no-pings",
|
||||
# "--disable-component-update",
|
||||
# '--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"',
|
||||
# "--disable-background-timer-throttling",
|
||||
# "--use-mock-keychain",
|
||||
# "--disable-features=IsolateOrigins,site-per-process",
|
||||
# 아래는 기존 예시에 있던 인자들입니다. 필요에 따라 유지하거나 제거하세요.
|
||||
"--enable-automation",
|
||||
"--disable-extensions",
|
||||
"--hide-scrollbars",
|
||||
],
|
||||
)
|
||||
|
||||
return [profile, tmp_user_data_dir] if tmp_user_data_dir else [profile]
|
||||
17
src/lib/browser_use/model.py
Normal file
17
src/lib/browser_use/model.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
from typing import List
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
# 출력 모델
|
||||
class OAuth(BaseModel):
|
||||
provider: str
|
||||
oauth_uri: str = "" # OAuth 리스트 추출 단계에서는 URI가 없을 수 있음
|
||||
|
||||
|
||||
class OAuthList(BaseModel):
|
||||
oauth_providers: List[str] # 이제 문자열 배열로 변경
|
||||
|
||||
|
||||
# 기존 모델 유지 (backward compatibility)
|
||||
BaseModel = OAuthList
|
||||
191
src/lib/browser_use/scanner.py
Normal file
191
src/lib/browser_use/scanner.py
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
import asyncio
|
||||
import csv
|
||||
import os
|
||||
|
||||
from lib.browser_use.agents import (
|
||||
extract_oauth_list,
|
||||
get_retry_queue_status,
|
||||
start_retry_queue_processor,
|
||||
test_oauth_login,
|
||||
)
|
||||
from lib.browser_use.cleanup import cleanup_browser_resources
|
||||
from lib.utils import is_html_url, notify_backend, read_lines_between
|
||||
from lib.utils.progress import (
|
||||
current_progress,
|
||||
is_shutdown_requested,
|
||||
load_progress,
|
||||
progress_file,
|
||||
save_progress,
|
||||
)
|
||||
|
||||
|
||||
async def scan_one_url(url: str, skip_html_check: bool = False):
|
||||
"""URL 스캔 통합 함수: OAuth 리스트 추출 → 개별 OAuth 로그인 시도"""
|
||||
target_url = url if url.startswith("http") else f"https://{url}"
|
||||
print(f"🚀 스캔 시작: {target_url}")
|
||||
|
||||
# Backend에 스캔 시작을 알림
|
||||
notify_backend(target_url)
|
||||
|
||||
# 1) URL이 HTML 페이지인지 확인
|
||||
if not is_html_url(target_url) and not skip_html_check:
|
||||
print(f"❌ {target_url} 은(는) HTML이 아닙니다. 스킵합니다.")
|
||||
return
|
||||
|
||||
# 1단계: OAuth 리스트 추출
|
||||
oauth_entries = await extract_oauth_list(target_url)
|
||||
|
||||
if not oauth_entries:
|
||||
print(f"❌ {target_url}에서 OAuth 제공자를 찾을 수 없습니다.")
|
||||
return
|
||||
|
||||
print("-" * 50)
|
||||
print(f"🔗 스캔 URL: {url}")
|
||||
print(f"🔐 발견된 OAuth 제공자들: {len(oauth_entries)}개")
|
||||
for entry in oauth_entries:
|
||||
print(f" - {entry}")
|
||||
print("-" * 50)
|
||||
|
||||
# CSV에 OAuth 리스트 저장
|
||||
csv_file = "./data/oauth_providers.csv"
|
||||
file_exists = os.path.isfile(csv_file)
|
||||
with open(csv_file, "a", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
if not file_exists:
|
||||
writer.writerow(["issuer", "provider", "oauth_uri", "login_tested"])
|
||||
for entry in oauth_entries:
|
||||
writer.writerow([url, entry, "", "pending"])
|
||||
|
||||
# 2단계: 각 OAuth 제공자별로 개별 로그인 시도
|
||||
for i, oauth_entry in enumerate(oauth_entries):
|
||||
print(f"\n🔄 OAuth 로그인 테스트 {i+1}/{len(oauth_entries)}: {oauth_entry}")
|
||||
|
||||
# OAuth 간 대기 시간
|
||||
if i > 0:
|
||||
print("⏳ OAuth 테스트 간 대기 중 (30초)...")
|
||||
await asyncio.sleep(30)
|
||||
|
||||
# 개별 OAuth 로그인 시도
|
||||
success = await test_oauth_login(url, oauth_entry)
|
||||
|
||||
# 결과를 CSV에 업데이트 (간단하게 로그만 남김)
|
||||
status = "success" if success else "failed"
|
||||
print(f"📝 {oauth_entry} 로그인 결과: {status}")
|
||||
|
||||
|
||||
async def main_loop(
|
||||
filepath: str, start_line: int, end_line: int, skip_html_check: bool = False
|
||||
):
|
||||
"""지정된 URL 목록에 대해 스캔을 실행하는 메인 루프"""
|
||||
try:
|
||||
# 재시도 큐 처리기 시작
|
||||
await start_retry_queue_processor()
|
||||
|
||||
target_list = read_lines_between(
|
||||
filepath=filepath, start_line=start_line, end_line=end_line
|
||||
)
|
||||
|
||||
# 전체 목록 길이를 저장 (재개 시에도 유지되어야 함)
|
||||
total_count = len(target_list)
|
||||
current_progress["total"] = total_count
|
||||
current_progress["start_line"] = start_line
|
||||
current_progress["current_index"] = 0
|
||||
|
||||
prev_progress = load_progress()
|
||||
if prev_progress and prev_progress.get("start_line") == start_line:
|
||||
print("📋 이전 진행 상황을 발견했습니다:")
|
||||
print(
|
||||
f" - 이전 완료: {prev_progress['current_index']}/{prev_progress['total']}"
|
||||
)
|
||||
print(f" - 마지막 처리: {prev_progress.get('current_url', 'N/A')}")
|
||||
|
||||
resume = input("이어서 진행하시겠습니까? (y/n): ").lower().strip()
|
||||
if resume == "y":
|
||||
start_index = prev_progress.get("current_index", 0)
|
||||
current_progress["current_index"] = start_index
|
||||
# 전체 개수는 원래 목록 길이로 유지
|
||||
current_progress["total"] = total_count
|
||||
target_list = target_list[start_index:]
|
||||
print(f"✅ {start_index}번째부터 재개합니다.")
|
||||
|
||||
for i, url in enumerate(target_list):
|
||||
# 종료 요청 체크
|
||||
if is_shutdown_requested():
|
||||
print("🛑 종료 요청으로 인해 스캔을 중단합니다.")
|
||||
break
|
||||
|
||||
# current_index는 전체 목록에서의 현재 위치를 나타냄
|
||||
current_url_index = current_progress["current_index"]
|
||||
current_progress["current_url"] = url
|
||||
|
||||
print(
|
||||
f"\n🔄 Processing {current_url_index + 1}/{current_progress['total']}: {url}"
|
||||
)
|
||||
print(
|
||||
f"📍 {os.path.basename(filepath)}의 {start_line + current_url_index}번째 줄"
|
||||
)
|
||||
|
||||
# 재시도 큐 상태 확인 및 출력
|
||||
retry_status = await get_retry_queue_status()
|
||||
if retry_status["queue_length"] > 0:
|
||||
print(f"⏳ 재시도 큐에 {retry_status['queue_length']}개 작업 대기 중")
|
||||
|
||||
if i > 0:
|
||||
print("⏳ API 쿼터 보호를 위해 30초 대기 중...")
|
||||
# 대기 중에도 종료 요청 체크
|
||||
for _ in range(30):
|
||||
if is_shutdown_requested():
|
||||
print("🛑 대기 중 종료 요청으로 스캔을 중단합니다.")
|
||||
return
|
||||
await asyncio.sleep(1)
|
||||
|
||||
try:
|
||||
await scan_one_url(url, skip_html_check=skip_html_check)
|
||||
except Exception as e:
|
||||
print(f"❌ {url} 스캔 중 오류 발생: {e}")
|
||||
continue
|
||||
|
||||
# 스캔 완료 후 재시도 큐 상태 확인
|
||||
retry_status_after = await get_retry_queue_status()
|
||||
if retry_status_after["queue_length"] > 0:
|
||||
print(
|
||||
f"📊 스캔 완료 후 재시도 큐 상태: {retry_status_after['queue_length']}개 작업 대기 중"
|
||||
)
|
||||
|
||||
# 다음 URL로 진행
|
||||
current_progress["current_index"] = current_url_index + 1
|
||||
save_progress()
|
||||
|
||||
# 모든 URL 처리 완료 후 재시도 큐가 빌 때까지 대기
|
||||
if not is_shutdown_requested():
|
||||
print("\n🔄 모든 URL 처리 완료. 재시도 큐 처리 대기 중...")
|
||||
while True:
|
||||
if is_shutdown_requested():
|
||||
print("🛑 재시도 큐 대기 중 종료 요청으로 중단합니다.")
|
||||
break
|
||||
|
||||
retry_status = await get_retry_queue_status()
|
||||
if retry_status["queue_length"] == 0:
|
||||
break
|
||||
print(
|
||||
f"⏳ 재시도 큐에 {retry_status['queue_length']}개 작업 남음. 30초 후 다시 확인..."
|
||||
)
|
||||
# 대기 중에도 종료 요청 체크
|
||||
for _ in range(30):
|
||||
if is_shutdown_requested():
|
||||
print("🛑 재시도 큐 대기 중 종료 요청으로 중단합니다.")
|
||||
break
|
||||
await asyncio.sleep(1)
|
||||
|
||||
if not is_shutdown_requested():
|
||||
print(f"\n🎉 모든 스캔이 완료되었습니다! ({total_count}개 URL)")
|
||||
print("🎉 재시도 큐도 모두 처리되었습니다!")
|
||||
else:
|
||||
print("\n🛑 종료 요청으로 인해 스캔이 중단되었습니다.")
|
||||
else:
|
||||
print("\n🛑 종료 요청으로 인해 스캔이 중단되었습니다.")
|
||||
|
||||
finally:
|
||||
# 항상 리소스 정리
|
||||
print("🔄 브라우저 리소스를 정리합니다...")
|
||||
await cleanup_browser_resources()
|
||||
|
|
@ -3,19 +3,20 @@
|
|||
import json
|
||||
import os
|
||||
|
||||
|
||||
def GetSensitiveData():
|
||||
"""
|
||||
Reads sensitive data from a .sensitive.json file in the current directory.
|
||||
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the sensitive data.
|
||||
"""
|
||||
file_path = os.path.join(os.getcwd(), '.sensitive.json')
|
||||
|
||||
file_path = os.path.join(os.getcwd(), ".sensitive.json")
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
return None
|
||||
|
||||
with open(file_path, 'r') as file:
|
||||
|
||||
with open(file_path, "r") as file:
|
||||
sensitive_data = json.load(file)
|
||||
|
||||
return sensitive_data
|
||||
|
||||
return sensitive_data
|
||||
2
src/lib/llm/__init__.py
Normal file
2
src/lib/llm/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from lib.llm.create import *
|
||||
from lib.llm.prompt import *
|
||||
19
src/lib/llm/create.py
Normal file
19
src/lib/llm/create.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
from browser_use.llm import ChatGoogle
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# 환경 변수 로드 (GOOGLE_API_KEY 필요)
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
def CreateChatGoogle(model: str):
|
||||
"""Browser Use용 Google 모델 생성"""
|
||||
if model == "fallback":
|
||||
print("⚠️ Fallback 모델을 사용합니다. Environment 변수를 확인하세요.")
|
||||
print("⚠️ Model gemini-2.0-flash-lite를 사용합니다.")
|
||||
model = "gemini-2.0-flash-lite"
|
||||
|
||||
return ChatGoogle(
|
||||
model=model,
|
||||
temperature=0.0
|
||||
# Browser Use는 내부적으로 재시도 로직을 처리합니다
|
||||
)
|
||||
46
src/lib/llm/prompt/__init__.py
Normal file
46
src/lib/llm/prompt/__init__.py
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
from typing import Type, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
def get_prompt(type: str) -> tuple[str, Type[BaseModel]] | str:
|
||||
"""
|
||||
Prompt를 반환합니다.
|
||||
|
||||
:param type: 'auth' {Auth List} 또는 'google' {OAuth Provider}, 'meta' {OAuth Provider}을 지정합니다.
|
||||
:return: 해당하는 프롬프트 문자열 또는 (프롬프트, 모델) 튜플
|
||||
"""
|
||||
if type.lower() == "auth":
|
||||
from lib.llm.prompt._get_oauth import model, prompt
|
||||
|
||||
return prompt, model
|
||||
|
||||
elif type.lower() in ["google", "google account"]:
|
||||
from lib.llm.prompt.google import model, prompt
|
||||
|
||||
return prompt, model
|
||||
|
||||
elif type.lower() in ["microsoft", "microsoftonline"]:
|
||||
from lib.llm.prompt.microsoft import model, prompt
|
||||
|
||||
return prompt, model
|
||||
|
||||
elif type.lower() in ["meta", "facebook"]:
|
||||
from lib.llm.prompt.facebook import model, prompt
|
||||
|
||||
return prompt, model
|
||||
|
||||
elif type.lower() in ["apple"]:
|
||||
from lib.llm.prompt.apple import model, prompt
|
||||
|
||||
return prompt, model
|
||||
|
||||
elif type.lower() in ["github"]:
|
||||
from lib.llm.prompt.github import model, prompt
|
||||
|
||||
return prompt, model
|
||||
|
||||
else:
|
||||
from lib.llm.prompt._fallback import model, prompt
|
||||
|
||||
return prompt, model
|
||||
2
src/lib/llm/prompt/_fallback/__init__.py
Normal file
2
src/lib/llm/prompt/_fallback/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from lib.llm.prompt._fallback.model import model
|
||||
from lib.llm.prompt._fallback.prompt import prompt
|
||||
9
src/lib/llm/prompt/_fallback/model.py
Normal file
9
src/lib/llm/prompt/_fallback/model.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class model(BaseModel):
|
||||
msg: str | None = None
|
||||
status: str | None = (
|
||||
None # "success", "mfa_required", "blocked", "sso_not_found", "login_page_not_found", "invalid_credentials"
|
||||
)
|
||||
final_url: str | None = None
|
||||
66
src/lib/llm/prompt/_fallback/prompt.py
Normal file
66
src/lib/llm/prompt/_fallback/prompt.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
google_id = os.getenv("GOOGLE_ID")
|
||||
google_password = os.getenv("GOOGLE_PASSWORD")
|
||||
|
||||
naver_id = os.getenv("NAVER_ID")
|
||||
naver_password = os.getenv("NAVER_PASSWORD")
|
||||
|
||||
facebook_id = os.getenv("FACEBOOK_ID")
|
||||
facebook_password = os.getenv("FACEBOOK_PASSWORD")
|
||||
|
||||
github_id = os.getenv("GITHUB_ID")
|
||||
github_password = os.getenv("GITHUB_PASSWORD")
|
||||
|
||||
microsoft_id = os.getenv("MICROSOFT_ID")
|
||||
microsoft_password = os.getenv("MICROSOFT_PASSWORD")
|
||||
|
||||
# Extended planner prompt
|
||||
prompt = f"""
|
||||
You are a web automation agent.
|
||||
|
||||
Your task is to visit the given domain and perform a full login via the **SSO Login button**, following all steps strictly as described below.
|
||||
|
||||
Instructions:
|
||||
|
||||
1. If any cookie or privacy popups appear, dismiss or accept them.
|
||||
2. Navigate through the site's UI to find the **login or sign-in page** (e.g., via buttons like "Log In", "Sign In", "Get Started").
|
||||
3. Click the **SSO login button**.
|
||||
4. Check if the user is **already logged and immediately redirected back to the original site** without showing a login screen.
|
||||
- ✅ If so, treat the login as successful and return immediately.
|
||||
5. If login proceeds without interruptions, complete the login and **immediately close the browser window**. Do not perform any further actions.
|
||||
6. Login is considered successful if:
|
||||
- You are redirected to a page that indicates successful login (e.g., a welcome page, dashboard, or account page).
|
||||
- If a page such as a sign-up page appears, consider it a successful login and terminate immediately.
|
||||
|
||||
Credentials to use for login:
|
||||
- Google → `{google_id}` / `{google_password}`
|
||||
- Naver → `{naver_id}` / `{naver_password}`
|
||||
- GitHub → `{github_id}` / `{github_password}`
|
||||
- facebook → `{facebook_id}` / `{facebook_password}`
|
||||
- Microsoft → `{microsoft_id}` / `{microsoft_password}`
|
||||
|
||||
Constraints:
|
||||
- Do NOT use search engines or guess URLs.
|
||||
- Do NOT proceed with login if:
|
||||
- CAPTCHA or MFA appears
|
||||
- If the user is already logged and redirected back automatically, stop there and report success.
|
||||
- If the login page cannot be found, return "login_page_not_found".
|
||||
- If the login button is not found, return "sso_not_found".
|
||||
- If a page such as a sign-up page appears, consider it a successful login and terminate immediately.
|
||||
|
||||
Final Output:
|
||||
Return the result in the following format only:
|
||||
|
||||
```json
|
||||
{{
|
||||
"msg": "login completed",
|
||||
"status": "success" | "already_logged_in" | "mfa_required" | "captcha_triggered" | "window_blocked" | "idpw_required" | "sso_not_found" | "login_page_not_found",
|
||||
"final_url": "<url_after_login_redirect or empty string>"
|
||||
}}
|
||||
```
|
||||
|
||||
- Return ONLY the JSON object. Do NOT include any explanation, logging, or extra output.
|
||||
"""
|
||||
2
src/lib/llm/prompt/_get_oauth/__init__.py
Normal file
2
src/lib/llm/prompt/_get_oauth/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from lib.llm.prompt._get_oauth.model import model
|
||||
from lib.llm.prompt._get_oauth.prompt import prompt
|
||||
7
src/lib/llm/prompt/_get_oauth/model.py
Normal file
7
src/lib/llm/prompt/_get_oauth/model.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class model(BaseModel):
|
||||
msg: str | None = None
|
||||
url: str | None = None
|
||||
sso_list: list[str] = [] # List of SSO providers found on the login page
|
||||
61
src/lib/llm/prompt/_get_oauth/prompt.py
Normal file
61
src/lib/llm/prompt/_get_oauth/prompt.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
prompt = """
|
||||
You are an expert in finding login pages.
|
||||
|
||||
Your task is to navigate to the login page of the given URL. Follow the steps below strictly and return results only in the specified format.
|
||||
|
||||
※ You are NOT allowed to navigate to URLs that are not directly discoverable within the initial domain. Do NOT use search engines or guess external login URLs.
|
||||
|
||||
0. INITIAL BLOCK CHECK
|
||||
- If the browser is blocked when trying to access the page — due to firewall, CAPTCHA, regional restrictions, or other access denials — immediately terminate the process and return the following JSON:
|
||||
```json
|
||||
{
|
||||
"msg": "Blocked",
|
||||
"url": "",
|
||||
"sso_list": []
|
||||
}
|
||||
```
|
||||
- Do NOT proceed to further steps in this case.
|
||||
|
||||
1. LOGIN PAGE NAVIGATION
|
||||
- Navigate only to a **client-side (non-enterprise)** login page within the provided domain.
|
||||
- Do NOT rely on external tools, search engines, or links not directly found on the site.
|
||||
- If a consent popup (e.g. for privacy/cookies) appears, you MUST dismiss or close it before proceeding.
|
||||
- Since step 0 confirmed access, assume the page now loads properly.
|
||||
|
||||
2. SSO BUTTON IDENTIFICATION
|
||||
- On the login page, look for the following social login (SSO) buttons:
|
||||
- Google, GitHub, Facebook, Microsoft, Naver, Etc.
|
||||
- ✅ Proceed only if it is clearly an **actual SSO button**.
|
||||
- ❌ Exclude the following:
|
||||
- Passkey-related buttons
|
||||
- Username/password fields
|
||||
- Email-based login
|
||||
- Non-OAuth methods such as certificate or phone verification
|
||||
|
||||
3. RETURN FORMAT
|
||||
- If the login page is successfully found, return:
|
||||
```json
|
||||
{
|
||||
"msg": "Login page found",
|
||||
"url": "https://example.com/login",
|
||||
"sso_list": ["Google", "GitHub"]
|
||||
}
|
||||
```
|
||||
- If the login page cannot be found, return:
|
||||
```json
|
||||
{
|
||||
"msg": "Login page not found",
|
||||
"url": "",
|
||||
"sso_list": []
|
||||
}
|
||||
```
|
||||
- If blocked (as in step 0), return:
|
||||
```json
|
||||
{
|
||||
"msg": "Blocked",
|
||||
"url": "",
|
||||
"sso_list": []
|
||||
}
|
||||
```
|
||||
- Return ONLY the JSON object. Do NOT include any explanation, logging, or extra output.
|
||||
"""
|
||||
2
src/lib/llm/prompt/apple/__init__.py
Normal file
2
src/lib/llm/prompt/apple/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from lib.llm.prompt.apple.model import model
|
||||
from lib.llm.prompt.apple.prompt import prompt
|
||||
9
src/lib/llm/prompt/apple/model.py
Normal file
9
src/lib/llm/prompt/apple/model.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class model(BaseModel):
|
||||
msg: str | None = None
|
||||
status: str | None = (
|
||||
None # "success", "mfa_required", "apple_blocked", "sso_not_found", "login_page_not_found", "invalid_credentials"
|
||||
)
|
||||
final_url: str | None = None
|
||||
62
src/lib/llm/prompt/apple/prompt.py
Normal file
62
src/lib/llm/prompt/apple/prompt.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
import os
|
||||
|
||||
# Extended planner prompt
|
||||
prompt = f"""
|
||||
You are a web automation agent.
|
||||
|
||||
Your task is to visit the given domain and perform a full login via the **Apple SSO button**, following all steps strictly as described below.
|
||||
|
||||
▶ Target: Find a login page inside this domain that allows "Sign in with Apple", and use it to complete login via Apple.
|
||||
|
||||
Instructions:
|
||||
|
||||
1. If any cookie or privacy popups appear, dismiss or accept them.
|
||||
2. Navigate through the site's UI to find the **login or sign-in page** (e.g., via buttons like "Log In", "Sign In", "Get Started").
|
||||
- Only follow links within the same domain.
|
||||
3. On the login page, look for a clearly labeled **Apple SSO button** — typically labeled as:
|
||||
- "Continue with Apple"
|
||||
- "Sign in with Apple"
|
||||
- or a button with the Apple icon
|
||||
4. Click the **Apple login button**.
|
||||
- ⚠️ The Apple login flow MUST open in a **new browser tab** (not a new window or popup).
|
||||
- ❌ If the login opens in a new **window** or **popup**, do NOT continue. Immediately stop and return the appropriate status.
|
||||
5. Check if the user is **already logged in to Apple and immediately redirected back to the original site** without showing a Apple login screen.
|
||||
- ✅ If so, treat the login as successful and return immediately.
|
||||
6. If redirected to the Apple login page:
|
||||
a. If a **CAPTCHA**, complete it.
|
||||
b. If a **MFA prompt**, or a request for **ID/password entry** appears, do NOT proceed - Immediately stop and return the appropriate status.
|
||||
- If a **"Continue"**, **"Trust"**, **"Authorize"**, or **"Allow"** button is displayed, click it to grant consent.
|
||||
7. If login proceeds without interruptions, complete the login and **immediately close the browser window**. Do not perform any further actions.
|
||||
8. Login is considered successful if:
|
||||
- You are redirected to a page that indicates successful login (e.g., a welcome page, dashboard, or account page).
|
||||
- If a page such as a sign-up page appears, consider it a successful login and terminate immediately.
|
||||
|
||||
Credentials to use for Apple login:
|
||||
- Email: {os.getenv("APPLE_EMAIL", "")}
|
||||
- Password: {os.getenv("APPLE_PASSWORD", "")}
|
||||
|
||||
Constraints:
|
||||
- Do NOT use search engines or guess URLs.
|
||||
- Do NOT use autofill, saved sessions, or cookies.
|
||||
- Do NOT proceed with login if:
|
||||
- The login opens in a new window (only tabs are allowed)
|
||||
- CAPTCHA or MFA appears
|
||||
- ID/password input is required
|
||||
- If the user is already logged in to Apple and redirected back automatically, stop there and report success.
|
||||
- If the login page cannot be found, return "login_page_not_found".
|
||||
- If the Apple login button is not found, return "sso_not_found".
|
||||
- If a page such as a sign-up page appears, consider it a successful login and terminate immediately.
|
||||
|
||||
Final Output:
|
||||
Return the result in the following format only:
|
||||
|
||||
```json
|
||||
{{
|
||||
"msg": "Apple login completed",
|
||||
"status": "success" | "already_logged_in" | "mfa_required" | "captcha_triggered" | "window_blocked" | "idpw_required" | "apple_blocked" | "sso_not_found" | "login_page_not_found",
|
||||
"final_url": "<url_after_login_redirect or empty string>"
|
||||
}}
|
||||
```
|
||||
|
||||
- Return ONLY the JSON object. Do NOT include any explanation, logging, or extra output.
|
||||
"""
|
||||
2
src/lib/llm/prompt/facebook/__init__.py
Normal file
2
src/lib/llm/prompt/facebook/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from lib.llm.prompt.facebook.model import model
|
||||
from lib.llm.prompt.facebook.prompt import prompt
|
||||
9
src/lib/llm/prompt/facebook/model.py
Normal file
9
src/lib/llm/prompt/facebook/model.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class model(BaseModel):
|
||||
msg: str | None = None
|
||||
status: str | None = (
|
||||
None # "success", "mfa_required", "facebook_blocked", "sso_not_found", "login_page_not_found", "invalid_credentials"
|
||||
)
|
||||
final_url: str | None = None
|
||||
71
src/lib/llm/prompt/facebook/prompt.py
Normal file
71
src/lib/llm/prompt/facebook/prompt.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
import os
|
||||
|
||||
# Extended planner prompt
|
||||
prompt = f"""
|
||||
You are a web automation agent.
|
||||
Your task is to visit the given domain and perform a full login via the **Facebook SSO button**, following all steps strictly as described below.
|
||||
|
||||
▶ Target: Find a login page inside this domain that allows "Sign in with Facebook", and use it to complete login via Facebook.
|
||||
Instructions:
|
||||
1. **CRITICAL**: Wait 3 seconds for page to load and interactive elements to appear
|
||||
2. If any cookie or privacy popups appear, dismiss or accept them by looking for buttons like "Accept All", "Accept", "Allow All"
|
||||
3. If any promotional/discount popups appear, dismiss them by looking for "Close", "X", or "No Thanks" buttons
|
||||
4. Navigate through the site's UI to find the **login or sign-in page**:
|
||||
- Look for "My Account", "Log In", "Sign In", "Get Started" buttons
|
||||
- Only follow links within the same domain
|
||||
5. On the login page, look for a clearly labeled **Facebook SSO button** — typically labeled as:
|
||||
- "Continue with Facebook"
|
||||
- "Sign in with Facebook"
|
||||
- "Login with Facebook"
|
||||
- or a button with the Facebook 'f' icon
|
||||
6. Click the **Facebook login button**.
|
||||
- ✅ The Facebook login flow will open in a new browser tab or window (this is normal OAuth behavior)
|
||||
- Continue with the Facebook login process in the new tab/window
|
||||
7. **Facebook Login Flow Handling:**
|
||||
- **If already logged in**: You may see a consent screen with buttons like "Continue as [Name]", "Continue", "Allow" - CLICK these buttons to proceed
|
||||
- **If CAPTCHA appears on registration form**: Try clicking "Create account" button multiple times (up to 5 attempts) as some CAPTCHAs may not block form submission
|
||||
- **If form submission fails after 5 attempts**: Report "success" anyway if all fields were filled and Facebook OAuth was completed successfully
|
||||
- **Registration completion is optional**: The main goal is Facebook OAuth completion, not site registration
|
||||
8. **After Facebook OAuth consent/login**:
|
||||
- Wait for redirection back to the original site
|
||||
- **CRITICAL**: If redirected to a registration/signup form with fields like:
|
||||
- "Last name", "Email address", "Password"
|
||||
- "Create account" or "Sign up" button
|
||||
- ANY additional information form
|
||||
- **MUST complete the registration process:**
|
||||
- Fill in required fields with appropriate test data:
|
||||
- Last name: "TestUser"
|
||||
- Email: Use the Facebook email or "test@example.com"
|
||||
- Password: "TestPassword123!"
|
||||
- Click "Create account", "Sign up", or "Complete registration" button
|
||||
- Only after completing ALL steps, record the final URL as successful login
|
||||
9. If all steps are completed successfully, close your browser window.
|
||||
10. Login is considered successful if:
|
||||
- You are redirected to a page that indicates successful login (e.g., a welcome page, dashboard, or account page).
|
||||
- If a page such as a sign-up page appears, consider it a successful login and terminate immediately.
|
||||
|
||||
Credentials to use for Facebook login (if needed):
|
||||
- Email/Phone: {os.getenv("FACEBOOK_EMAIL", "")}
|
||||
- Password: {os.getenv("FACEBOOK_PASSWORD", "")}
|
||||
Constraints:
|
||||
- Do NOT use search engines or guess URLs
|
||||
- Do NOT use autofill, saved sessions, or cookies
|
||||
- Do NOT proceed with login if CAPTCHA or MFA appears
|
||||
- **ALWAYS complete any additional registration forms** after Facebook OAuth
|
||||
- **Fill required fields** with test data if signup form appears
|
||||
- **Only return "success" after completing ALL registration steps**
|
||||
- If the login page cannot be found, return "login_page_not_found"
|
||||
- If the Facebook login button is not found, return "sso_not_found"
|
||||
|
||||
Final Output:
|
||||
Return the result in the following format only:
|
||||
```json
|
||||
{{
|
||||
"msg": "Facebook login completed",
|
||||
"status": "success" | "already_logged_in" | "mfa_required" | "captcha_triggered" | "idpw_required" | "facebook_blocked" | "sso_not_found" | "login_page_not_found",
|
||||
"final_url": "<url_after_login_redirect or empty string>"
|
||||
}}
|
||||
```
|
||||
|
||||
- Return ONLY the JSON object. Do NOT include any explanation, logging, or extra output.
|
||||
"""
|
||||
2
src/lib/llm/prompt/github/__init__.py
Normal file
2
src/lib/llm/prompt/github/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from lib.llm.prompt.github.model import model
|
||||
from lib.llm.prompt.github.prompt import prompt
|
||||
9
src/lib/llm/prompt/github/model.py
Normal file
9
src/lib/llm/prompt/github/model.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class model(BaseModel):
|
||||
msg: str | None = None
|
||||
status: str | None = (
|
||||
None # "success", "mfa_required", "github_blocked", "sso_not_found", "login_page_not_found", "invalid_credentials"
|
||||
)
|
||||
final_url: str | None = None
|
||||
81
src/lib/llm/prompt/github/prompt.py
Normal file
81
src/lib/llm/prompt/github/prompt.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
import os
|
||||
|
||||
# Extended planner prompt
|
||||
|
||||
prompt = f"""
|
||||
You are a web automation agent.
|
||||
|
||||
Your task is to visit the given domain and perform a full login via the **GitHub SSO button**, following all steps strictly as described below.
|
||||
|
||||
▶ Target: Find a login page inside this domain that allows "Sign in with GitHub", and use it to complete login via GitHub.
|
||||
|
||||
Instructions:
|
||||
|
||||
1. If any cookie or privacy popups appear, dismiss or accept them.
|
||||
|
||||
2. Navigate through the site's UI to find the **login or sign-in page** (e.g., via buttons like "Log In", "Sign In", "Get Started").
|
||||
- Only follow links within the same domain.
|
||||
- If a "Sign Up" or "Create Account" page appears instead, it is acceptable **as long as it includes a GitHub SSO option**.
|
||||
|
||||
3. On the login or sign-up page, look for a clearly labeled **GitHub SSO button** — typically labeled as:
|
||||
- "Continue with GitHub"
|
||||
- "Sign in with GitHub"
|
||||
- or a button with the GitHub logo
|
||||
|
||||
4. Click the **GitHub login button**.
|
||||
- ⚠️ The GitHub login flow MUST open in a **new browser tab** (not a new window or popup).
|
||||
- ❌ If the login opens in a new **window** or **popup**, do NOT continue. Immediately stop and return the appropriate status.
|
||||
|
||||
5. Check if the user is **already logged in to GitHub and immediately redirected back to the original site** without showing a GitHub login screen.
|
||||
- ✅ If so, treat the login as successful and return immediately.
|
||||
|
||||
6. If redirected to the GitHub login page:
|
||||
a. Wait for the username or email input field, then enter the email: {os.getenv("GITHUB_EMAIL", "")}
|
||||
b. Click the "Continue" or "Next" button if present.
|
||||
c. Enter the password: {os.getenv("GITHUB_PASSWORD", "")}
|
||||
d. Click the "Sign in" button.
|
||||
e. If a page appears asking to "Authorize" access for the application, click the "Authorize" button.
|
||||
- GitHub may take a while to redirect after authorization, so please wait patiently.
|
||||
- If a CAPTCHA, MFA prompt, or other interruption appears, do NOT proceed.
|
||||
- If login fails due to incorrect credentials or authentication errors, treat as `"idpw_required"` and stop.
|
||||
- Immediately stop and return the appropriate status.
|
||||
|
||||
|
||||
7. If login proceeds without interruptions, wait for redirection back to the original site and record the final URL.
|
||||
|
||||
8. Close your browser window after the login is completed.
|
||||
|
||||
9. Login is considered successful if:
|
||||
- You are redirected to a page that indicates successful login (e.g., a welcome page, dashboard, or account page).
|
||||
- If a page such as a sign-up page appears, consider it a successful login and terminate immediately.
|
||||
|
||||
Credentials to use for GitHub login:
|
||||
- Email: {os.getenv("GITHUB_EMAIL", "")}
|
||||
- Password: {os.getenv("GITHUB_PASSWORD", "")}
|
||||
|
||||
Constraints:
|
||||
- Do NOT use search engines or guess URLs.
|
||||
- Do NOT use autofill, saved sessions, or cookies.
|
||||
- Do NOT proceed with login if:
|
||||
- The login opens in a new window (only tabs are allowed)
|
||||
- CAPTCHA or MFA appears
|
||||
- ID/password input is required and cannot be autofilled
|
||||
- If the user is already logged in to GitHub and redirected back automatically, stop there and report success.
|
||||
- If the login page cannot be found, return "login_page_not_found".
|
||||
- If the GitHub login button is not found, return "sso_not_found".
|
||||
- If a page such as a sign-up page appears, consider it a successful login and terminate immediately.
|
||||
|
||||
Final Output:
|
||||
Return the result in the following format only:
|
||||
|
||||
```json
|
||||
{{
|
||||
"msg": "GitHub login completed",
|
||||
"status": "success" | "already_logged_in" | "mfa_required" | "captcha_triggered" | "window_blocked" | "idpw_required" | "github_blocked" | "sso_not_found" | "login_page_not_found",
|
||||
"final_url": "<url_after_login_redirect or empty string>"
|
||||
}}
|
||||
```
|
||||
|
||||
- Return ONLY the JSON object. Do NOT include any explanation, logging, or extra output.
|
||||
"""
|
||||
|
||||
2
src/lib/llm/prompt/google/__init__.py
Normal file
2
src/lib/llm/prompt/google/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from lib.llm.prompt.google.model import model
|
||||
from lib.llm.prompt.google.prompt import prompt
|
||||
9
src/lib/llm/prompt/google/model.py
Normal file
9
src/lib/llm/prompt/google/model.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class model(BaseModel):
|
||||
msg: str | None = None
|
||||
status: str | None = (
|
||||
None # "success", "mfa_required", "google_blocked", "sso_not_found", "login_page_not_found", "invalid_credentials"
|
||||
)
|
||||
final_url: str | None = None
|
||||
63
src/lib/llm/prompt/google/prompt.py
Normal file
63
src/lib/llm/prompt/google/prompt.py
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
import os
|
||||
|
||||
# Extended planner prompt
|
||||
prompt = f"""
|
||||
You are a web automation agent.
|
||||
|
||||
Your task is to visit the given domain and perform a full login via the **Google SSO button**, following all steps strictly as described below.
|
||||
|
||||
▶ Target: Find a login page inside this domain that allows "Sign in with Google", and use it to complete login via Google.
|
||||
|
||||
Instructions:
|
||||
|
||||
1. If any cookie or privacy popups appear, dismiss or accept them.
|
||||
2. Navigate through the site's UI to find the **login or sign-in page** (e.g., via buttons like "Log In", "Sign In", "Get Started").
|
||||
- Only follow links within the same domain.
|
||||
3. On the login page, look for a clearly labeled **Google SSO button** — typically labeled as:
|
||||
- "Continue with Google"
|
||||
- "Sign in with Google"
|
||||
- or a button with the Google 'G' icon
|
||||
4. Click the **Google login button**.
|
||||
- ⚠️ The Google login flow MUST open in a **new browser tab** (not a new window or popup).
|
||||
- ❌ If the login opens in a new **window** or **popup**, do NOT continue. Immediately stop and return the appropriate status.
|
||||
5. Check if the user is **already logged in to Google and immediately redirected back to the original site** without showing a Google login screen.
|
||||
- ✅ If so, treat the login as successful and return immediately.
|
||||
6. If redirected to the Google login page:
|
||||
a. Wait for the username or email input field, then enter the email: {os.getenv("GOOGLE_EMAIL", "")}
|
||||
b. Click the "Continue" or "Next" button if present. (If still on the same page, reapeat step a)
|
||||
c. Wait for the password input field, then enter the password: {os.getenv("GOOGLE_PASSWORD", "")}
|
||||
d. Click the "Sign in" or "Next" button.
|
||||
7. If login proceeds without interruptions, wait for redirection back to the original site and record the final URL.
|
||||
8. Close your browser window after the login is completed.
|
||||
9. Login is considered successful if:
|
||||
- You are redirected to a page that indicates successful login (e.g., a welcome page, dashboard, or account page).
|
||||
- If a page such as a sign-up page appears, consider it a successful login and terminate immediately.
|
||||
|
||||
Credentials to use for Google login:
|
||||
- Email: {os.getenv("GOOGLE_EMAIL", "")}
|
||||
- Password: {os.getenv("GOOGLE_PASSWORD", "")}
|
||||
|
||||
Constraints:
|
||||
- Do NOT use search engines or guess URLs.
|
||||
- Do NOT use autofill, saved sessions, or cookies.
|
||||
- Do NOT proceed with login if:
|
||||
- The login opens in a new window (only tabs are allowed)
|
||||
- CAPTCHA or MFA appears
|
||||
- If the user is already logged in to Google and redirected back automatically, stop there and report success.
|
||||
- If the login page cannot be found, return "login_page_not_found".
|
||||
- If the Google login button is not found, return "sso_not_found".
|
||||
- If a page such as a sign-up page appears, consider it a successful login and terminate immediately.
|
||||
|
||||
Final Output:
|
||||
Return the result in the following format only:
|
||||
|
||||
```json
|
||||
{{
|
||||
"msg": "Google login completed",
|
||||
"status": "success" | "already_logged_in" | "mfa_required" | "captcha_triggered" | "window_blocked" | "idpw_required" | "google_blocked" | "sso_not_found" | "login_page_not_found",
|
||||
"final_url": "<url_after_login_redirect or empty string>"
|
||||
}}
|
||||
```
|
||||
|
||||
- Return ONLY the JSON object. Do NOT include any explanation, logging, or extra output.
|
||||
"""
|
||||
2
src/lib/llm/prompt/microsoft/__init__.py
Normal file
2
src/lib/llm/prompt/microsoft/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from lib.llm.prompt.microsoft.model import model
|
||||
from lib.llm.prompt.microsoft.prompt import prompt
|
||||
9
src/lib/llm/prompt/microsoft/model.py
Normal file
9
src/lib/llm/prompt/microsoft/model.py
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class model(BaseModel):
|
||||
msg: str | None = None
|
||||
status: str | None = (
|
||||
None # "success", "mfa_required", "microsoft_blocked", "sso_not_found", "login_page_not_found", "invalid_credentials"
|
||||
)
|
||||
final_url: str | None = None
|
||||
60
src/lib/llm/prompt/microsoft/prompt.py
Normal file
60
src/lib/llm/prompt/microsoft/prompt.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
import os
|
||||
|
||||
# This code snippet is used to generate a prompt for a web automation agent that performs Microsoft SSO login.
|
||||
prompt = f"""
|
||||
당신은 웹 자동화 에이전트입니다.
|
||||
|
||||
당신의 임무는 주어진 도메인에 방문하여 아래에 엄격히 설명된 모든 단계를 따라 **Microsoft SSO 버튼**을 통해 전체 로그인을 수행하는 것입니다.
|
||||
|
||||
▶ 목표: 이 도메인 내에서 "Microsoft로 로그인"이 가능한 로그인 페이지를 찾아 Microsoft을 통해 로그인을 완료하세요.
|
||||
|
||||
지침:
|
||||
|
||||
1. 쿠키 또는 개인정보 팝업이 나타나면 닫거나 수락하세요.
|
||||
2. 사이트의 UI를 탐색하여 **로그인 또는 로그인 페이지**(예: "로그인", "Sign In", "Get Started"와 같은 버튼)를 찾으세요.
|
||||
- 동일한 도메인 내의 링크만 따라가세요.
|
||||
3. 로그인 페이지에서 명확하게 표시된 **Microsoft SSO 버튼**을 찾으세요. 일반적으로 다음과 같이 표시됩니다:
|
||||
- "Continue with Microsoft"
|
||||
- "Sign in with Microsoft"
|
||||
- or a button with the Microsoft logo (usually four squares)
|
||||
4. **Microsoft 로그인 버튼**을 클릭하세요.
|
||||
- ⚠️ Microsoft 로그인 플로우는 반드시 **새 브라우저 탭**에서 열려야 합니다 (새 창이나 팝업이 아님).
|
||||
- ❌ 로그인이 새 **창**이나 **팝업**에서 열리면, 즉시 중단하고 적절한 상태를 반환하세요.
|
||||
5. 사용자가 **이미 Microsoft에 로그인되어 있고 즉시 원래 사이트로 리디렉션**된다면,
|
||||
- ✅ 이 경우 로그인이 성공한 것으로 간주하고 즉시 반환하세요.
|
||||
6. Microsoft 로그인 페이지로 리디렉션된 경우:
|
||||
- **CAPTCHA**, **MFA 프롬프트** 요청이 나타나면 진행하지 마세요.
|
||||
- 즉시 중단하고 적절한 상태를 반환하세요.
|
||||
7. 로그인에 방해가 없다면, 원래 사이트로 리디렉션될 때까지 기다리고 최종 URL을 기록하세요.
|
||||
8. 로그인 되어있지 않으면 아래의 EMAIL과 PASSWORD를 사용하여 로그인하세요:
|
||||
- Email: {os.getenv("MICROSOFT_EMAIL", "")}
|
||||
- Password: {os.getenv("MICROSOFT_PASSWORD", "")}
|
||||
9. 로그인 완료 후 브라우저 창을 닫으세요.
|
||||
10. Login is considered successful if:
|
||||
- You are redirected to a page that indicates successful login (e.g., a welcome page, dashboard, or account page).
|
||||
- If a page such as a sign-up page appears, consider it a successful login and terminate immediately.
|
||||
|
||||
제약 사항:
|
||||
- 검색 엔진을 사용하거나 URL을 추측하지 마세요.
|
||||
- 자동완성, 저장된 세션 또는 쿠키를 사용하지 마세요.
|
||||
- 다음과 같은 경우 로그인 절차를 진행하지 마세요:
|
||||
- 로그인이 새 창에서 열릴 때 (탭만 허용)
|
||||
- CAPTCHA 또는 MFA가 나타날 때
|
||||
- ID/비밀번호 입력이 필요하지만 자동입력이 불가한 경우
|
||||
- 사용자가 이미 Microsoft에 로그인되어 자동으로 리디렉션된다면, 그 즉시 성공으로 보고 종료하세요.
|
||||
- 로그인 페이지를 찾을 수 없으면 "login_page_not_found"를 반환하세요.
|
||||
- Microsoft 로그인 버튼을 찾을 수 없으면 "sso_not_found"를 반환하세요.
|
||||
- 회원가입 페이지와 같은 화면이 나타나면 성공적인 로그인으로 간주하고 즉시 종료하세요.
|
||||
|
||||
최종 출력:
|
||||
다음 형식으로만 결과를 반환하세요:
|
||||
```json
|
||||
{{
|
||||
"msg": "Microsoft login completed",
|
||||
"status": "success" | "already_logged_in" | "mfa_required" | "captcha_triggered" | "window_blocked" | "idpw_required" | "microsoft_blocked" | "sso_not_found" | "login_page_not_found",
|
||||
"final_url": "<url_after_login_redirect or empty string>"
|
||||
}}
|
||||
```
|
||||
|
||||
- Return ONLY the JSON object. Do NOT include any explanation, logging, or extra output.
|
||||
"""
|
||||
7
src/lib/utils/__init__.py
Normal file
7
src/lib/utils/__init__.py
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# export from show_info
|
||||
|
||||
from lib.utils.agent_info import *
|
||||
from lib.utils.config import *
|
||||
from lib.utils.data import *
|
||||
from lib.utils.parsing.is_html import *
|
||||
from lib.utils.parsing.read_txt import *
|
||||
|
|
@ -1,3 +1,7 @@
|
|||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from lib.utils.config import (
|
||||
BACKEND_URL,
|
||||
GOOGLE_API_KEY,
|
||||
|
|
@ -5,6 +9,8 @@ from lib.utils.config import (
|
|||
GOOGLE_PLANNER_MODEL,
|
||||
)
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
|
||||
def show_info():
|
||||
print("🔧 환경 설정:")
|
||||
|
|
@ -38,3 +44,21 @@ def browser_use_version():
|
|||
def env_cheker():
|
||||
if GOOGLE_API_KEY is None:
|
||||
raise ValueError("GOOGLE_API_KEY 환경변수가 설정되지 않았습니다.")
|
||||
if GOOGLE_PLANNER_MODEL != None and (
|
||||
not os.getenv("ENABLE_PLANNER_MODEL_OAUTH_LOGIN")
|
||||
or not os.getenv("ENABLE_PLANNER_MODEL_OAUTH_LIST")
|
||||
):
|
||||
print(
|
||||
"⚠️ GOOGLE_PLANNER_MODEL이 설정되어 있지만, ENABLE_PLANNER_MODEL_OAUTH_LOGIN 또는 ENABLE_PLANNER_MODEL_OAUTH_LIST가 활성화되지 않았습니다."
|
||||
)
|
||||
print(
|
||||
"⚠️ Planner 모델을 사용하려면 .env 파일에서 ENABLE_PLANNER_MODEL_OAUTH_LOGIN과 ENABLE_PLANNER_MODEL_OAUTH_LIST를 true로 설정하세요."
|
||||
)
|
||||
print(
|
||||
"‼️ 하지만 현재 Planner 모델을 사용하는 것이 권장되지 않습니다. 이 기능은 오작동을 일으킬 수 있습니다."
|
||||
)
|
||||
print("⚠️ 이 경고는 1초동안 정지합니다.")
|
||||
# 이 경고는 1초동안 sleep
|
||||
import time
|
||||
|
||||
time.sleep(1)
|
||||
|
|
@ -1,8 +1,11 @@
|
|||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(verbose=True, override=True)
|
||||
|
||||
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:11081")
|
||||
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
||||
GOOGLE_MODEL = os.getenv("GOOGLE_MODEL", "gemini-2.5-flash-preview-05-20")
|
||||
GOOGLE_PLANNER_MODEL = os.getenv("GOOGLE_PLANNER_MODEL", "gemini-2.5-pro-preview-06-05")
|
||||
GOOGLE_MODEL = os.getenv("GOOGLE_MODEL", "gemini-2.5-flash")
|
||||
GOOGLE_PLANNER_MODEL = os.getenv("GOOGLE_PLANNER_MODEL")
|
||||
USER_DATA_DIR = os.getenv("USER_DATA_DIR", "./data/user_data")
|
||||
2
src/lib/utils/data/__init__.py
Normal file
2
src/lib/utils/data/__init__.py
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
from lib.utils.data.backend_client import *
|
||||
from lib.utils.data.logger import *
|
||||
|
|
@ -2,6 +2,7 @@ import requests
|
|||
|
||||
from lib.utils.config import BACKEND_URL
|
||||
|
||||
|
||||
def notify_backend(target_url):
|
||||
# Backend에 스캔 시작을 알림
|
||||
try:
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
# 미리 정해진 파일 경로
|
||||
FILE_PATH = Path("data/log.txt")
|
||||
|
||||
|
||||
def logger(msg: str) -> None:
|
||||
try:
|
||||
"""
|
||||
|
|
@ -13,7 +14,7 @@ def logger(msg: str) -> None:
|
|||
"""
|
||||
# 상위 디렉터리 생성 (이미 있으면 무시)
|
||||
FILE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
# 현재 시각 구해서 포맷팅
|
||||
now = datetime.now()
|
||||
timestamp = now.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
|
@ -26,4 +27,4 @@ def logger(msg: str) -> None:
|
|||
with FILE_PATH.open(mode="a", encoding="utf-8") as f:
|
||||
f.write(line)
|
||||
except:
|
||||
print(msg)
|
||||
print(msg)
|
||||
|
|
@ -1,32 +1,34 @@
|
|||
import requests
|
||||
|
||||
|
||||
def is_html_url(url: str, timeout: float = 10.0) -> bool:
|
||||
"""
|
||||
주어진 URL에 HEAD 요청을 보내고, 응답 헤더의 Content-Type이 HTML인지 확인합니다.
|
||||
- url: 검사할 URL 문자열
|
||||
- timeout: 요청 타임아웃(초 단위)
|
||||
|
||||
|
||||
반환값:
|
||||
- Content-Type이 'text/html' 로 시작하면 True, 그렇지 않으면 False
|
||||
"""
|
||||
|
||||
|
||||
try:
|
||||
with requests.get(url, timeout=timeout, stream=True) as response:
|
||||
# 응답 코드가 200번대가 아니면 False로 간주
|
||||
if not response.ok:
|
||||
return False
|
||||
|
||||
content_type = response.headers.get('Content-Type', '')
|
||||
content_type = response.headers.get("Content-Type", "")
|
||||
# Content-Type에 'text/html'이 포함되어 있으면 HTML로 간주
|
||||
return content_type.lower().startswith('text/html')
|
||||
return content_type.lower().startswith("text/html")
|
||||
except requests.RequestException:
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_urls = [
|
||||
'https://www.example.com',
|
||||
'https://api.github.com', # JSON API라서 HTML이 아닐 확률이 높음
|
||||
'https://raw.githubusercontent.com' # 텍스트 파일 등 다양한 타입
|
||||
"https://www.example.com",
|
||||
"https://api.github.com", # JSON API라서 HTML이 아닐 확률이 높음
|
||||
"https://raw.githubusercontent.com", # 텍스트 파일 등 다양한 타입
|
||||
]
|
||||
|
||||
for url in test_urls:
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
def read_lines_between(filepath: str, start_line: int, end_line: int) -> list[str]:
|
||||
"""
|
||||
파일에서 start_line번 째 줄부터 end_line번 째 줄까지 읽어와
|
||||
파일에서 start_line번 째 줄부터 end_line번 째 줄까지 읽어와
|
||||
각 줄을 요소로 갖는 리스트를 반환하는 함수.
|
||||
|
||||
Parameters:
|
||||
|
|
@ -15,15 +15,17 @@ def read_lines_between(filepath: str, start_line: int, end_line: int) -> list[st
|
|||
Returns:
|
||||
-------
|
||||
list[str]
|
||||
각 줄을 문자열로 저장한 리스트.
|
||||
각 줄을 문자열로 저장한 리스트.
|
||||
파일에 해당 범위의 줄이 없으면 가능한 만큼만 반환.
|
||||
"""
|
||||
|
||||
if start_line < 1 or end_line < start_line:
|
||||
raise ValueError("start_line은 1 이상이어야 하며, end_line은 start_line 이상이어야 합니다.")
|
||||
raise ValueError(
|
||||
"start_line은 1 이상이어야 하며, end_line은 start_line 이상이어야 합니다."
|
||||
)
|
||||
|
||||
selected_lines: list[str] = []
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
for idx, line in enumerate(f, start=1):
|
||||
if idx < start_line:
|
||||
# 아직 읽기 시작 전
|
||||
|
|
@ -32,5 +34,5 @@ def read_lines_between(filepath: str, start_line: int, end_line: int) -> list[st
|
|||
# 읽을 범위를 벗어났으므로 중단
|
||||
break
|
||||
# 줄 끝의 개행 문자를 제거하고 리스트에 추가
|
||||
selected_lines.append(line.rstrip('\n'))
|
||||
selected_lines.append(line.rstrip("\n"))
|
||||
return selected_lines
|
||||
119
src/lib/utils/progress.py
Normal file
119
src/lib/utils/progress.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
import json
|
||||
import os, sys
|
||||
import signal
|
||||
import time
|
||||
import threading
|
||||
from pathlib import Path
|
||||
|
||||
# 진행 상황 추적을 위한 전역 변수
|
||||
current_progress = {"current_index": 0, "total": 0, "current_url": "", "start_line": 0}
|
||||
progress_file = Path("data/scan_progress.json")
|
||||
|
||||
# Ctrl+C 처리를 위한 전역 변수
|
||||
ctrl_c_count = 0
|
||||
last_ctrl_c_time = 0
|
||||
shutdown_requested = False
|
||||
shutdown_lock = threading.Lock()
|
||||
|
||||
|
||||
def save_progress():
|
||||
"""현재 진행 상황을 파일에 저장"""
|
||||
progress_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(progress_file, "w", encoding="utf-8") as f:
|
||||
json.dump(current_progress, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def load_progress():
|
||||
"""이전 진행 상황을 파일에서 불러오기"""
|
||||
if os.path.exists(progress_file):
|
||||
try:
|
||||
with open(progress_file, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except (json.JSONDecodeError, IOError):
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def signal_handler(signum, frame):
|
||||
"""Ctrl+C 시그널 핸들러 - browser-use pause 기능과 호환"""
|
||||
global shutdown_requested, ctrl_c_count, last_ctrl_c_time
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
with shutdown_lock:
|
||||
# 연속된 Ctrl+C 감지 (2초 내에 두 번 누르면 강제 종료)
|
||||
if current_time - last_ctrl_c_time < 2.0:
|
||||
ctrl_c_count += 1
|
||||
else:
|
||||
ctrl_c_count = 1
|
||||
|
||||
last_ctrl_c_time = current_time
|
||||
|
||||
# 두 번째 Ctrl+C이거나 이미 종료 요청이 있었다면 강제 종료
|
||||
if ctrl_c_count >= 2 or shutdown_requested:
|
||||
print("\n⚡ 강제 종료합니다!")
|
||||
import asyncio
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
for task in asyncio.all_tasks(loop):
|
||||
task.cancel()
|
||||
except RuntimeError:
|
||||
pass
|
||||
os._exit(1)
|
||||
|
||||
# 첫 번째 Ctrl+C: 정상 종료 요청
|
||||
shutdown_requested = True
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("🛑 종료 신호를 받았습니다!")
|
||||
print(f"📊 현재 진행 상황:")
|
||||
print(f" - 전체: {current_progress['total']}개 URL")
|
||||
print(f" - 완료: {current_progress['current_index']}개 URL")
|
||||
print(f" - 현재 처리 중: {current_progress['current_url']}")
|
||||
if current_progress.get('start_line'):
|
||||
print(f" - domains.txt의 {current_progress['start_line'] + current_progress['current_index']}번째 줄")
|
||||
if current_progress["total"] > 0:
|
||||
print(f" - 진행률: {current_progress['current_index']}/{current_progress['total']} ({current_progress['current_index']/current_progress['total']*100:.1f}%)")
|
||||
print("=" * 60)
|
||||
|
||||
# 진행 상황 저장
|
||||
save_progress()
|
||||
print(f"💾 진행 상황이 {progress_file}에 저장되었습니다.")
|
||||
print("다음에 같은 명령어로 실행하면 이어서 진행할 수 있습니다.")
|
||||
print("<EFBFBD> 2초 내에 Ctrl+C를 다시 누르면 강제 종료됩니다.")
|
||||
|
||||
# 정상적인 종료를 위해 KeyboardInterrupt 발생
|
||||
raise KeyboardInterrupt()
|
||||
|
||||
|
||||
def is_shutdown_requested():
|
||||
"""종료 요청 상태를 확인하는 함수"""
|
||||
with shutdown_lock:
|
||||
return shutdown_requested
|
||||
|
||||
def request_shutdown():
|
||||
"""외부에서 종료를 요청할 수 있는 함수"""
|
||||
global shutdown_requested
|
||||
with shutdown_lock:
|
||||
if not shutdown_requested:
|
||||
shutdown_requested = True
|
||||
print("\n🛑 종료가 요청되었습니다.")
|
||||
print(f"📊 현재 진행 상황:")
|
||||
print(f" - 전체: {current_progress['total']}개 URL")
|
||||
print(f" - 완료: {current_progress['current_index']}개 URL")
|
||||
print(f" - 현재 처리 중: {current_progress['current_url']}")
|
||||
if current_progress.get('start_line'):
|
||||
print(f" - domains.txt의 {current_progress['start_line'] + current_progress['current_index']}번째 줄")
|
||||
if current_progress["total"] > 0:
|
||||
print(f" - 진행률: {current_progress['current_index']}/{current_progress['total']} ({current_progress['current_index']/current_progress['total']*100:.1f}%)")
|
||||
|
||||
save_progress()
|
||||
print(f"💾 진행 상황이 {progress_file}에 저장되었습니다.")
|
||||
print("다음에 같은 명령어로 실행하면 이어서 진행할 수 있습니다.")
|
||||
|
||||
|
||||
def setup_signal_handler():
|
||||
"""시그널 핸들러 등록 - browser-use와의 호환성을 위해 비활성화"""
|
||||
# browser-use 라이브러리가 자체적으로 Ctrl+C 처리를 하므로
|
||||
# 우리의 signal handler는 등록하지 않음
|
||||
pass
|
||||
116
src/lib/utils/progress_v2.py
Normal file
116
src/lib/utils/progress_v2.py
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
"""
|
||||
종료 처리를 위한 개선된 모듈
|
||||
browser-use의 pause 기능과 호환되도록 설계
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import signal
|
||||
import time
|
||||
import threading
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
# 진행 상황 추적을 위한 전역 변수
|
||||
current_progress = {"current_index": 0, "total": 0, "current_url": "", "start_line": 0}
|
||||
progress_file = Path("data/scan_progress.json")
|
||||
|
||||
# 종료 관리를 위한 전역 변수
|
||||
shutdown_requested = False
|
||||
shutdown_lock = threading.Lock()
|
||||
original_handler = None
|
||||
|
||||
|
||||
def save_progress():
|
||||
"""현재 진행 상황을 파일에 저장"""
|
||||
progress_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(progress_file, "w", encoding="utf-8") as f:
|
||||
json.dump(current_progress, f, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
def load_progress():
|
||||
"""이전 진행 상황을 파일에서 불러오기"""
|
||||
if os.path.exists(progress_file):
|
||||
try:
|
||||
with open(progress_file, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
except (json.JSONDecodeError, IOError):
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def request_shutdown():
|
||||
"""종료 요청 함수 - 외부에서 호출 가능"""
|
||||
global shutdown_requested
|
||||
with shutdown_lock:
|
||||
if not shutdown_requested:
|
||||
shutdown_requested = True
|
||||
print("\n🛑 종료가 요청되었습니다. 현재 작업을 완료한 후 종료합니다...")
|
||||
save_progress()
|
||||
print(f"💾 진행 상황이 {progress_file}에 저장되었습니다.")
|
||||
|
||||
|
||||
def is_shutdown_requested():
|
||||
"""종료 요청 상태를 확인하는 함수"""
|
||||
with shutdown_lock:
|
||||
return shutdown_requested
|
||||
|
||||
|
||||
def cleanup_signal_handler():
|
||||
"""signal handler를 정리하고 원래 상태로 복원"""
|
||||
global original_handler
|
||||
if original_handler is not None:
|
||||
signal.signal(signal.SIGINT, original_handler)
|
||||
original_handler = None
|
||||
|
||||
|
||||
def setup_minimal_signal_handler():
|
||||
"""최소한의 signal handler만 설정 - browser-use와 충돌 방지"""
|
||||
global original_handler
|
||||
|
||||
# 원래 핸들러 저장
|
||||
original_handler = signal.signal(signal.SIGINT, signal.SIG_DFL)
|
||||
|
||||
def graceful_signal_handler(signum, frame):
|
||||
"""우아한 종료를 위한 최소한의 signal handler"""
|
||||
print("\n🛑 종료 신호를 받았습니다...")
|
||||
save_progress()
|
||||
print(f"💾 진행 상황이 {progress_file}에 저장되었습니다.")
|
||||
|
||||
# 원래 핸들러로 복원하고 신호를 다시 발생시킴
|
||||
signal.signal(signal.SIGINT, original_handler)
|
||||
os.kill(os.getpid(), signal.SIGINT)
|
||||
|
||||
signal.signal(signal.SIGINT, graceful_signal_handler)
|
||||
|
||||
|
||||
class GracefulShutdown:
|
||||
"""컨텍스트 매니저로 사용할 수 있는 우아한 종료 클래스"""
|
||||
|
||||
def __init__(self):
|
||||
self.original_handler = None
|
||||
|
||||
def __enter__(self):
|
||||
self.original_handler = signal.signal(signal.SIGINT, self._signal_handler)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if self.original_handler is not None:
|
||||
signal.signal(signal.SIGINT, self.original_handler)
|
||||
|
||||
def _signal_handler(self, signum, frame):
|
||||
"""내부 signal handler"""
|
||||
request_shutdown()
|
||||
# 원래 핸들러 복원 후 신호 재전송
|
||||
signal.signal(signal.SIGINT, self.original_handler)
|
||||
os.kill(os.getpid(), signal.SIGINT)
|
||||
|
||||
|
||||
# 기존 함수들과의 호환성을 위한 별칭
|
||||
def setup_signal_handler():
|
||||
"""기존 코드와의 호환성을 위한 함수"""
|
||||
pass # browser-use의 signal handler를 방해하지 않음
|
||||
|
||||
|
||||
def signal_handler(signum, frame):
|
||||
"""기존 코드와의 호환성을 위한 함수"""
|
||||
request_shutdown()
|
||||
121
src/main.py
Normal file
121
src/main.py
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
import argparse
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from lib.browser_use.scanner import main_loop
|
||||
from lib.utils import env_cheker
|
||||
from lib.utils.progress import progress_file, setup_signal_handler
|
||||
|
||||
|
||||
def setup_environment():
|
||||
"""환경 변수 로드 및 관련 라이브러리를 초기화합니다."""
|
||||
# .env 파일 로드
|
||||
load_dotenv(verbose=True, override=True)
|
||||
|
||||
# 환경 변수 체크
|
||||
env_cheker()
|
||||
|
||||
# Laminar 초기화 (선택적)
|
||||
if os.getenv("LMNR_PROJECT_API_KEY"):
|
||||
try:
|
||||
from lmnr import Laminar
|
||||
|
||||
Laminar.initialize(project_api_key=os.getenv("LMNR_PROJECT_API_KEY"))
|
||||
except ImportError:
|
||||
print("⚠️ Laminar 라이브러리가 설치되지 않았습니다. 관련 기능이 비활성화됩니다.")
|
||||
else:
|
||||
print("⚠️ LMNR_PROJECT_API_KEY 환경 변수가 설정되지 않았습니다. Laminar 기능이 비활성화됩니다.")
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""커맨드 라인 인자를 파싱합니다."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="domain_scanner",
|
||||
description="도메인 목록 파일에서 지정한 줄 범위를 읽어 SSO 스캔을 수행합니다.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--file",
|
||||
type=str,
|
||||
required=True,
|
||||
help="도메인 목록이 들어 있는 텍스트 파일 경로 (예: ./domains.txt)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-s", "--start", type=int, required=True, help="읽기 시작 줄 번호 (1-based)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e", "--end", type=int, required=True, help="읽기 종료 줄 번호 (1-based)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-skh",
|
||||
"--skip-html-check",
|
||||
action="store_true",
|
||||
help="HTML 페이지 체크를 건너뛰고 모든 URL을 스캔합니다.",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
"""애플리케이션 메인 진입점"""
|
||||
setup_environment()
|
||||
setup_signal_handler()
|
||||
args = parse_arguments()
|
||||
|
||||
# read and remove user data path
|
||||
log_file = os.path.join("./data", "userdata.dump")
|
||||
if not os.path.exists("./data"):
|
||||
os.makedirs("./data")
|
||||
if os.path.exists(log_file):
|
||||
with open(log_file, "r") as f:
|
||||
tmp_user_data_dir = f.read().strip()
|
||||
try:
|
||||
import shutil
|
||||
if os.path.exists(tmp_user_data_dir):
|
||||
shutil.rmtree(tmp_user_data_dir)
|
||||
print(f"🔧 이전 실행의 임시 사용자 데이터 디렉토리 {tmp_user_data_dir}를 삭제하였습니다.")
|
||||
except (PermissionError, FileNotFoundError, OSError) as e:
|
||||
print(f"⚠️ 임시 사용자 데이터 디렉토리 삭제 실패: {e}")
|
||||
try:
|
||||
os.remove(log_file)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try:
|
||||
asyncio.run(
|
||||
main_loop(
|
||||
filepath=args.file,
|
||||
start_line=args.start,
|
||||
end_line=args.end,
|
||||
skip_html_check=args.skip_html_check,
|
||||
)
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("\n🛑 사용자에 의해 중단되었습니다.")
|
||||
# 진행 상황 저장
|
||||
from lib.utils.progress import save_progress, request_shutdown
|
||||
request_shutdown()
|
||||
print("✅ 정리 완료.")
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
print(f"\n❌ 예상치 못한 오류가 발생했습니다: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
finally:
|
||||
# 정상 종료 시에만 진행 상황 파일 삭제
|
||||
from lib.utils.progress import is_shutdown_requested
|
||||
if not is_shutdown_requested() and os.path.exists(progress_file):
|
||||
try:
|
||||
os.remove(progress_file)
|
||||
print("✅ 진행 상황 파일이 삭제되었습니다.")
|
||||
except OSError as e:
|
||||
print(f"⚠️ 진행 상황 파일 삭제 실패: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
52
temp.md
52
temp.md
|
|
@ -1,52 +0,0 @@
|
|||
|
||||
You are an AI model specialized in web crawling and analysis. Given a URI, perform the following tasks:
|
||||
|
||||
1. Navigate to the provided URI and locate the login page. If it’s not found, explore common auth-related pages like /login or /auth.
|
||||
2. On the login page, identify all available social login buttons (OAuth-based) such as Google, GitHub, Facebook, etc.
|
||||
3. Simulate clicking each social login button and follow the redirect to capture the full redirect URL (including query parameters).
|
||||
4. From the redirect URL and parameters, extract:
|
||||
- `client_id`
|
||||
- `redirect_uri`
|
||||
- `response_type`
|
||||
- `scope`
|
||||
5. Based on URL patterns, infer the OAuth method: Authorization Code, Implicit, PKCE, etc.
|
||||
6. Return data in the following JSON format only:
|
||||
|
||||
```json
|
||||
{
|
||||
"oauths": [
|
||||
{
|
||||
"issue": "<site being tested, e.g., git.imnya.ng>",
|
||||
"oauth_uri": "<original button href or URL triggered>"
|
||||
}
|
||||
]
|
||||
}
|
||||
````
|
||||
|
||||
7. If the login button says something like "Login with GitHub" or "Login with Google", follow the flow and use the **final redirect URL after clicking** as the value of `oauth_uri`.
|
||||
|
||||
**Examples:**
|
||||
|
||||
```json
|
||||
{
|
||||
"oauths": [
|
||||
{
|
||||
"issue": "git.imnya.ng",
|
||||
"provider": "GitHub",
|
||||
"client_id": "Iv1.xxxxx",
|
||||
"redirect_uri": "https://git.imnya.ng/user/oauth2/callback",
|
||||
"response_type": "code",
|
||||
"scope": "read:user",
|
||||
"oauth_uri": "https://github.com/login/oauth/authorize?client_id=Iv1.xxxxx&redirect_uri=https%3A%2F%2Fgit.imnya.ng%2Fuser%2Foauth2%2Fcallback&response_type=code&scope=read%3Auser"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Constraints:**
|
||||
|
||||
* Simulate realistic interaction with buttons (e.g., clicking them to follow redirects).
|
||||
* Ensure the output is strictly in the specified JSON format.
|
||||
* Avoid any additional text or explanations outside the JSON response.
|
||||
* If no OAuth logins are found, return an empty array.
|
||||
* WebAuthn, PassKey is not OAuth, so do not include it in the results.
|
||||
Loading…
Add table
Add a link
Reference in a new issue