[Update] prompt

This commit is contained in:
tv0924@icloud.com 2025-06-05 16:10:30 +09:00
commit 9818dafce4
3 changed files with 34430 additions and 1 deletions

34377
domains.txt Normal file

File diff suppressed because it is too large Load diff

36
lib/read_txt.py Normal file
View file

@ -0,0 +1,36 @@
def read_lines_between(filepath: str, start_line: int, end_line: int) -> list[str]:
"""
파일에서 start_line번 줄부터 end_line번 줄까지 읽어와
줄을 요소로 갖는 리스트를 반환하는 함수.
Parameters:
----------
filepath : str
읽을 텍스트 파일의 경로
start_line : int
읽기 시작할 번호 (1부터 시작)
end_line : int
읽을 마지막 번호 (start_line <= end_line)
Returns:
-------
list[str]
줄을 문자열로 저장한 리스트.
파일에 해당 범위의 줄이 없으면 가능한 만큼만 반환.
"""
if start_line < 1 or end_line < start_line:
raise ValueError("start_line은 1 이상이어야 하며, end_line은 start_line 이상이어야 합니다.")
selected_lines: list[str] = []
with open(filepath, 'r', encoding='utf-8') as f:
for idx, line in enumerate(f, start=1):
if idx < start_line:
# 아직 읽기 시작 전
continue
if idx > end_line:
# 읽을 범위를 벗어났으므로 중단
break
# 줄 끝의 개행 문자를 제거하고 리스트에 추가
selected_lines.append(line.rstrip('\n'))
return selected_lines

18
main.py
View file

@ -10,6 +10,7 @@ from browser_use import Agent, Browser, BrowserConfig, Controller
from browser_use.browser.context import BrowserContext, BrowserContextConfig
from lib.browser_config import browser_config_kwargs
from lib.is_html import is_html_url
from lib.read_txt import read_lines_between
load_dotenv()
@ -39,6 +40,15 @@ extend_planner_system_message = """
1. Locate the Login Page
- Navigate to the **client (non-enterprise)** login page.
- If a **privacy policy / cookie / consent popup** appears, **dismiss** or **close** it before continuing.
- If the page is blocked (e.g., by a firewall, CAPTCHA challenge, or any access restriction), terminate the process immediately and return:
```json
[
{
"provider": "Blocked",
"oauth_uri": "-"
}
]
```
2. On the Login Page
- Look for buttons like:
@ -58,6 +68,7 @@ extend_planner_system_message = """
- Capture the **first URL that the browser is redirected to** include query string. This URL should:
Look like: `https://example.com/auth/google`
Do NOT collect OAuth provider endpoint like: `https://accounts.google.com/...`
- If you notice any **repeated action** (for example, opening or clicking the same SSO button more than once, or looping between pages), **terminate the process immediately** and return an empty list: `[]`.
- Return the results in the following format:
[
{
@ -151,7 +162,12 @@ async def scan_one_url(url: str):
# ── 인터랙티브 입력 루프 ──
async def loop():
target_list = ["naver.com"]
target_list = read_lines_between(
filepath="./domains.txt",
start_line=12187,
end_line=12200 # 원하는 범위로 조정 가능
)
for url in target_list:
await scan_one_url(f'https://{url}')