browser use 버전 업데이트 및 프롬프트 개선 및 임시 파일 삭제 구조 개선

This commit is contained in:
암냥 2025-07-11 15:51:29 +09:00
commit 0f5ab6dea1
16 changed files with 442 additions and 620 deletions

View file

@ -6,7 +6,7 @@ readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
dependencies = [ dependencies = [
"black>=25.1.0", "black>=25.1.0",
"browser-use[memory]==0.3.3", "browser-use[memory]==0.5.3",
"chardet>=5.2.0", "chardet>=5.2.0",
"isort>=6.0.1", "isort>=6.0.1",
"lmnr[all]>=0.6.10", "lmnr[all]>=0.6.10",

10
run.py
View file

@ -1,5 +1,6 @@
import argparse import argparse
import os import os
import signal
import subprocess import subprocess
import sys import sys
from datetime import datetime from datetime import datetime
@ -35,6 +36,7 @@ def run_script(start_line, end_line, skh_option):
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f"[{current_time}] Processing lines {start_line} to {end_line}...") print(f"[{current_time}] Processing lines {start_line} to {end_line}...")
process = None
try: try:
command = [ command = [
"uv", "run", PYTHON_SCRIPT, "uv", "run", PYTHON_SCRIPT,
@ -45,7 +47,13 @@ def run_script(start_line, end_line, skh_option):
if skh_option: if skh_option:
command.append("--skip-html-check") command.append("--skip-html-check")
subprocess.run(command, check=True) # KeyboardInterrupt를 subprocess에 전달하도록 수정
process = subprocess.Popen(command)
process.wait()
if process.returncode != 0:
print("Python 스크립트 실행 실패")
sys.exit(1)
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
print("Python 스크립트 실행 실패") print("Python 스크립트 실행 실패")
sys.exit(1) sys.exit(1)

View file

@ -1,5 +1,4 @@
from lib.browser_use.agents import * from lib.browser_use.agents import *
from lib.browser_use.clean_resources import *
from lib.browser_use.func import * from lib.browser_use.func import *
from lib.browser_use.init_profile import * from lib.browser_use.init_profile import *
from lib.browser_use.model import * from lib.browser_use.model import *

View file

@ -1,6 +1,7 @@
import asyncio import asyncio
import json import json
import os import os
import shutil
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
@ -8,7 +9,6 @@ from typing import Any, Dict, Optional
from browser_use import Agent, BrowserSession, Controller from browser_use import Agent, BrowserSession, Controller
from patchright.async_api import async_playwright as async_patchright from patchright.async_api import async_playwright as async_patchright
from lib.browser_use.clean_resources import clean_resources
from lib.browser_use.init_profile import GetProfile from lib.browser_use.init_profile import GetProfile
from lib.browser_use.sensitive_data import GetSensitiveData from lib.browser_use.sensitive_data import GetSensitiveData
from lib.llm import CreateChatGoogle, get_prompt from lib.llm import CreateChatGoogle, get_prompt
@ -147,15 +147,15 @@ async def _run_agent_with_retry(agent_config):
while try_cnt < 3: while try_cnt < 3:
try: try:
Profile = await GetProfile(headless=headless)
session = BrowserSession( session = BrowserSession(
playwright=(await async_patchright().start()), playwright=(await async_patchright().start()),
browser_profile=await GetProfile(headless=headless), browser_profile=Profile[0],
) )
agent = Agent(browser_session=session, **agent_config["agent_params"]) agent = Agent(browser_session=session, **agent_config["agent_params"])
response = await agent.run() response = await agent.run()
await clean_resources(agent, session)
if any( if any(
keyword in str(response) keyword in str(response)
@ -180,11 +180,14 @@ async def _run_agent_with_retry(agent_config):
await add_to_retry_queue(task) await add_to_retry_queue(task)
return None return None
# remove profile
if Profile[1]:
shutil.rmtree(Profile[1], ignore_errors=True)
print(f"🗑️ 임시 프로필 디렉토리 삭제 완료: {Profile[1]}")
return response return response
except Exception as e: except Exception as e:
await clean_resources(agent, session)
# 일반 에러 처리 # 일반 에러 처리
try_cnt += 1 try_cnt += 1
if try_cnt >= 3: if try_cnt >= 3:

View file

@ -1,26 +0,0 @@
from pathlib import Path
async def clean_resources(agent=None, session=None):
"""리소스를 정리하는 함수"""
storage_state_temp_path = Path("./data/storage_state_temp.json").resolve()
if storage_state_temp_path.exists():
try:
# remove file
print(f"🗑️ 임시 스토리지 상태 파일 삭제 중: {storage_state_temp_path}")
# unlink removes the file
storage_state_temp_path.unlink()
print("🗑️ 임시 스토리지 상태 파일 삭제 완료.")
except Exception as e:
print(f"⚠️ 임시 스토리지 상태 파일 삭제 실패: {e}")
if agent:
try:
await agent.close()
except Exception as e:
print(f"⚠️ 에이전트 리소스 정리 실패: {e}")
if session:
try:
await session.close()
except Exception as e:
print(f"⚠️ 세션 리소스 정리 실패: {e}")

View file

@ -11,9 +11,25 @@ proxy_url = setup_proxy()
async def GetProfile(headless=False): async def GetProfile(headless=False):
user_data_dir = None user_data_dir = None
tmp_user_data_dir = None
if USER_DATA_DIR and os.path.isdir(USER_DATA_DIR): if USER_DATA_DIR and os.path.isdir(USER_DATA_DIR):
try: try:
tmp_user_data_dir = tempfile.mkdtemp() tmp_user_data_dir = tempfile.mkdtemp()
# write path in user_data_dir_path
print(f"🔧 Using user data dir: {USER_DATA_DIR}")
print(f"🔧 Temporary user data dir: {tmp_user_data_dir}")
log_file = os.path.join("./data", "userdata.dump")
if not os.path.exists("./data"):
os.makedirs("./data")
if os.path.exists(log_file):
os.remove(log_file)
# Log current browser use directory
with open(log_file, "w") as f:
f.write(f"{tmp_user_data_dir}")
# Copy USER_DATA_DIR to tmp_user_data_dir
if os.path.exists(tmp_user_data_dir): if os.path.exists(tmp_user_data_dir):
shutil.rmtree(tmp_user_data_dir) shutil.rmtree(tmp_user_data_dir)
shutil.copytree(USER_DATA_DIR, tmp_user_data_dir, dirs_exist_ok=False, ignore_dangling_symlinks=True) shutil.copytree(USER_DATA_DIR, tmp_user_data_dir, dirs_exist_ok=False, ignore_dangling_symlinks=True)
@ -28,16 +44,17 @@ async def GetProfile(headless=False):
#stealth=True, #stealth=True,
# Display settings # Display settings
headless=headless, headless=headless,
device_scale_factor=1, #device_scale_factor=1,
window_size={"width": 1600, "height": 900}, #window_size={"width": 1600, "height": 900},
#viewport={"width": 1600, "height": 900},
# Data persistence # Data persistence
user_data_dir=user_data_dir, user_data_dir=user_data_dir,
#storage_state=storage_state, #storage_state=storage_state,
# Network settings # Network settings
proxy={"server": proxy_url} if proxy_url else None, proxy={"server": proxy_url} if proxy_url else None,
# Additional arguments # Additional arguments
args=get_browser_args(), #args=get_browser_args(),
ignore_default_args=['--enable-automation'] ignore_default_args=['--enable-automation', '--disable-extensions', '--hide-scrollbars', '--disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DeferRendererTasksAfterInput,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate'],
) )
return profile return [profile, tmp_user_data_dir] if tmp_user_data_dir else [profile]

View file

@ -14,6 +14,7 @@ from lib.utils.progress import (
load_progress, load_progress,
progress_file, progress_file,
save_progress, save_progress,
is_shutdown_requested,
) )
@ -106,6 +107,11 @@ async def main_loop(
print(f"{start_index}번째부터 재개합니다.") print(f"{start_index}번째부터 재개합니다.")
for i, url in enumerate(target_list): for i, url in enumerate(target_list):
# 종료 요청 체크
if is_shutdown_requested():
print("🛑 종료 요청으로 인해 스캔을 중단합니다.")
break
# current_index는 전체 목록에서의 현재 위치를 나타냄 # current_index는 전체 목록에서의 현재 위치를 나타냄
current_url_index = current_progress["current_index"] current_url_index = current_progress["current_index"]
current_progress["current_url"] = url current_progress["current_url"] = url
@ -124,9 +130,18 @@ async def main_loop(
if i > 0: if i > 0:
print("⏳ API 쿼터 보호를 위해 30초 대기 중...") print("⏳ API 쿼터 보호를 위해 30초 대기 중...")
await asyncio.sleep(30) # 대기 중에도 종료 요청 체크
for _ in range(30):
if is_shutdown_requested():
print("🛑 대기 중 종료 요청으로 스캔을 중단합니다.")
return
await asyncio.sleep(1)
try:
await scan_one_url(url, skip_html_check=skip_html_check) await scan_one_url(url, skip_html_check=skip_html_check)
except Exception as e:
print(f"{url} 스캔 중 오류 발생: {e}")
continue
# 스캔 완료 후 재시도 큐 상태 확인 # 스캔 완료 후 재시도 큐 상태 확인
retry_status_after = await get_retry_queue_status() retry_status_after = await get_retry_queue_status()
@ -140,15 +155,27 @@ async def main_loop(
save_progress() save_progress()
# 모든 URL 처리 완료 후 재시도 큐가 빌 때까지 대기 # 모든 URL 처리 완료 후 재시도 큐가 빌 때까지 대기
if not is_shutdown_requested():
print("\n🔄 모든 URL 처리 완료. 재시도 큐 처리 대기 중...") print("\n🔄 모든 URL 처리 완료. 재시도 큐 처리 대기 중...")
while True: while True:
if is_shutdown_requested():
print("🛑 재시도 큐 대기 중 종료 요청으로 중단합니다.")
return
retry_status = await get_retry_queue_status() retry_status = await get_retry_queue_status()
if retry_status["queue_length"] == 0: if retry_status["queue_length"] == 0:
break break
print( print(
f"⏳ 재시도 큐에 {retry_status['queue_length']}개 작업 남음. 30초 후 다시 확인..." f"⏳ 재시도 큐에 {retry_status['queue_length']}개 작업 남음. 30초 후 다시 확인..."
) )
await asyncio.sleep(30) # 대기 중에도 종료 요청 체크
for _ in range(30):
if is_shutdown_requested():
print("🛑 재시도 큐 대기 중 종료 요청으로 중단합니다.")
return
await asyncio.sleep(1)
print(f"\n🎉 모든 스캔이 완료되었습니다! ({total_count}개 URL)") print(f"\n🎉 모든 스캔이 완료되었습니다! ({total_count}개 URL)")
print("🎉 재시도 큐도 모두 처리되었습니다!") print("🎉 재시도 큐도 모두 처리되었습니다!")
else:
print("\n🛑 종료 요청으로 인해 스캔이 중단되었습니다.")

View file

@ -31,6 +31,7 @@ Instructions:
4. Check if the user is **already logged and immediately redirected back to the original site** without showing a login screen. 4. Check if the user is **already logged and immediately redirected back to the original site** without showing a login screen.
- If so, treat the login as successful and return immediately. - If so, treat the login as successful and return immediately.
5. If login proceeds without interruptions, wait for redirection back to the original site and record the final URL. 5. If login proceeds without interruptions, wait for redirection back to the original site and record the final URL.
6. Close your browser window after the login is completed.
Credentials to use for login: Credentials to use for login:
- Google `{google_id}` / `{google_password}` - Google `{google_id}` / `{google_password}`
@ -38,7 +39,6 @@ Credentials to use for login:
- GitHub `{github_id}` / `{github_password}` - GitHub `{github_id}` / `{github_password}`
- facebook `{facebook_id}` / `{facebook_password}` - facebook `{facebook_id}` / `{facebook_password}`
- Microsoft `{microsoft_id}` / `{microsoft_password}` - Microsoft `{microsoft_id}` / `{microsoft_password}`
If credentials for a provider are not provided, skip the login attempt for that provider.
Constraints: Constraints:
- Do NOT use search engines or guess URLs. - Do NOT use search engines or guess URLs.

View file

@ -27,6 +27,7 @@ Instructions:
b. If a **MFA prompt**, or a request for **ID/password entry** appears, do NOT proceed - Immediately stop and return the appropriate status. b. If a **MFA prompt**, or a request for **ID/password entry** appears, do NOT proceed - Immediately stop and return the appropriate status.
- If a **"Continue"**, **"Trust"**, **"Authorize"**, or **"Allow"** button is displayed, click it to grant consent. - If a **"Continue"**, **"Trust"**, **"Authorize"**, or **"Allow"** button is displayed, click it to grant consent.
7. If login proceeds without interruptions, wait for redirection back to the original site and record the final URL. 7. If login proceeds without interruptions, wait for redirection back to the original site and record the final URL.
8. Close your browser window after the login is completed.
Credentials to use for Apple login: Credentials to use for Apple login:
- Email: {os.getenv("APPLE_EMAIL", "")} - Email: {os.getenv("APPLE_EMAIL", "")}

View file

@ -39,6 +39,7 @@ Instructions:
- Password: "TestPassword123!" - Password: "TestPassword123!"
- Click "Create account", "Sign up", or "Complete registration" button - Click "Create account", "Sign up", or "Complete registration" button
- Only after completing ALL steps, record the final URL as successful login - Only after completing ALL steps, record the final URL as successful login
9. If all steps are completed successfully, close your browser window.
Credentials to use for Facebook login (if needed): Credentials to use for Facebook login (if needed):
- Email/Phone: {os.getenv("FACEBOOK_EMAIL", "")} - Email/Phone: {os.getenv("FACEBOOK_EMAIL", "")}

View file

@ -43,6 +43,8 @@ Instructions:
7. If login proceeds without interruptions, wait for redirection back to the original site and record the final URL. 7. If login proceeds without interruptions, wait for redirection back to the original site and record the final URL.
8. Close your browser window after the login is completed.
Credentials to use for GitHub login: Credentials to use for GitHub login:
- Email: {os.getenv("GITHUB_EMAIL", "")} - Email: {os.getenv("GITHUB_EMAIL", "")}
- Password: {os.getenv("GITHUB_PASSWORD", "")} - Password: {os.getenv("GITHUB_PASSWORD", "")}

View file

@ -28,6 +28,7 @@ Instructions:
c. Wait for the password input field, then enter the password: {os.getenv("GOOGLE_PASSWORD", "")} c. Wait for the password input field, then enter the password: {os.getenv("GOOGLE_PASSWORD", "")}
d. Click the "Sign in" or "Next" button. d. Click the "Sign in" or "Next" button.
7. If login proceeds without interruptions, wait for redirection back to the original site and record the final URL. 7. If login proceeds without interruptions, wait for redirection back to the original site and record the final URL.
8. Close your browser window after the login is completed.
Credentials to use for Google login: Credentials to use for Google login:
- Email: {os.getenv("GOOGLE_EMAIL", "")} - Email: {os.getenv("GOOGLE_EMAIL", "")}

View file

@ -29,6 +29,7 @@ prompt = f"""
8. 로그인 되어있지 않으면 아래의 EMAIL과 PASSWORD를 사용하여 로그인하세요: 8. 로그인 되어있지 않으면 아래의 EMAIL과 PASSWORD를 사용하여 로그인하세요:
- Email: {os.getenv("MICROSOFT_EMAIL", "")} - Email: {os.getenv("MICROSOFT_EMAIL", "")}
- Password: {os.getenv("MICROSOFT_PASSWORD", "")} - Password: {os.getenv("MICROSOFT_PASSWORD", "")}
9. 로그인 완료 브라우저 창을 닫으세요.
제약 사항: 제약 사항:
- 검색 엔진을 사용하거나 URL을 추측하지 마세요. - 검색 엔진을 사용하거나 URL을 추측하지 마세요.

View file

@ -1,12 +1,20 @@
import json import json
import os import os, sys
import signal import signal
import time
import threading
from pathlib import Path from pathlib import Path
# 진행 상황 추적을 위한 전역 변수 # 진행 상황 추적을 위한 전역 변수
current_progress = {"current_index": 0, "total": 0, "current_url": "", "start_line": 0} current_progress = {"current_index": 0, "total": 0, "current_url": "", "start_line": 0}
progress_file = Path("data/scan_progress.json") progress_file = Path("data/scan_progress.json")
# Ctrl+C 처리를 위한 전역 변수
ctrl_c_count = 0
last_ctrl_c_time = 0
shutdown_requested = False
shutdown_lock = threading.Lock()
def save_progress(): def save_progress():
"""현재 진행 상황을 파일에 저장""" """현재 진행 상황을 파일에 저장"""
@ -27,25 +35,48 @@ def load_progress():
def signal_handler(signum, frame): def signal_handler(signum, frame):
"""Ctrl+C 시그널 핸들러""" """Ctrl+C 시그널 핸들러 - 강제 종료"""
global shutdown_requested
with shutdown_lock:
if shutdown_requested:
# 이미 종료 요청이 있었다면 즉시 강제 종료
print("\n<EFBFBD> 강제 종료합니다!")
os._exit(1)
shutdown_requested = True
print("\n" + "=" * 60) print("\n" + "=" * 60)
print("🛑 스캔이 중단되었습니다!") print("🛑 종료 신호를 받았습니다!")
print(f"📊 진행 상황:") print(f"📊 현재 진행 상황:")
print(f" - 전체: {current_progress['total']}개 URL") print(f" - 전체: {current_progress['total']}개 URL")
print(f" - 완료: {current_progress['current_index']}개 URL") print(f" - 완료: {current_progress['current_index']}개 URL")
print(f" - 현재 처리 중: {current_progress['current_url']}") print(f" - 현재 처리 중: {current_progress['current_url']}")
print( if current_progress.get('start_line'):
f" - domains.txt의 {current_progress['start_line'] + current_progress['current_index']}번째 줄" print(f" - domains.txt의 {current_progress['start_line'] + current_progress['current_index']}번째 줄")
)
if current_progress["total"] > 0: if current_progress["total"] > 0:
print( print(f" - 진행률: {current_progress['current_index']}/{current_progress['total']} ({current_progress['current_index']/current_progress['total']*100:.1f}%)")
f" - 진행률: {current_progress['current_index']}/{current_progress['total']} ({current_progress['current_index']/current_progress['total']*100:.1f}%)"
)
print("=" * 60) print("=" * 60)
# 진행 상황 저장
save_progress() save_progress()
print(f"💾 진행 상황이 {progress_file}에 저장되었습니다.") print(f"💾 진행 상황이 {progress_file}에 저장되었습니다.")
exit(0) print("다음에 같은 명령어로 실행하면 이어서 진행할 수 있습니다.")
print("\n🔄 정리 작업 중... (다시 Ctrl+C를 누르면 강제 종료)")
# 정리 작업을 위해 잠시 대기 후 종료
def delayed_exit():
time.sleep(2) # 2초 후 자동 종료
print("\n✅ 정리 완료. 프로그램을 종료합니다.")
os._exit(0)
threading.Thread(target=delayed_exit, daemon=True).start()
def is_shutdown_requested():
"""종료 요청 상태를 확인하는 함수"""
with shutdown_lock:
return shutdown_requested
def setup_signal_handler(): def setup_signal_handler():
"""시그널 핸들러 등록""" """시그널 핸들러 등록"""

View file

@ -26,6 +26,8 @@ def setup_environment():
Laminar.initialize(project_api_key=os.getenv("LMNR_PROJECT_API_KEY")) Laminar.initialize(project_api_key=os.getenv("LMNR_PROJECT_API_KEY"))
except ImportError: except ImportError:
print("⚠️ Laminar 라이브러리가 설치되지 않았습니다. 관련 기능이 비활성화됩니다.") print("⚠️ Laminar 라이브러리가 설치되지 않았습니다. 관련 기능이 비활성화됩니다.")
else:
print("⚠️ LMNR_PROJECT_API_KEY 환경 변수가 설정되지 않았습니다. Laminar 기능이 비활성화됩니다.")
def parse_arguments(): def parse_arguments():
@ -64,6 +66,18 @@ def main():
setup_signal_handler() setup_signal_handler()
args = parse_arguments() args = parse_arguments()
# read and remove user data path
log_file = os.path.join("./data", "userdata.dump")
if not os.path.exists("./data"):
os.makedirs("./data")
if os.path.exists(log_file):
with open(log_file, "r") as f:
tmp_user_data_dir = f.read().strip()
os.remove(tmp_user_data_dir)
os.remove(log_file)
print(f"🔧 강제로 종료되기 전에 사용한 {tmp_user_data_dir}를 삭제하였습니다.")
try: try:
asyncio.run( asyncio.run(
main_loop( main_loop(
@ -74,11 +88,24 @@ def main():
) )
) )
except KeyboardInterrupt: except KeyboardInterrupt:
print("\n프로그램이 사용자에 의해 중단되었습니다.") print("\n사용자에 의해 중단되었습니다. 현재까지의 작업을 저장합니다...")
from lib.utils.progress import save_progress
save_progress()
print(f"💾 진행 상황이 {progress_file}에 저장되었습니다.")
print("다음에 같은 명령어로 실행하면 이어서 진행할 수 있습니다.")
# terminate
sys.exit(0)
except Exception as e:
print(f"\n❌ 예상치 못한 오류가 발생했습니다: {e}")
import traceback
traceback.print_exc()
sys.exit(1) sys.exit(1)
finally: finally:
# 정상 종료 시 진행 상황 파일 삭제 # 정상 종료 시 진행 상황 파일 삭제 (종료 요청이 아닌 경우에만)
if os.path.exists(progress_file): from lib.utils.progress import is_shutdown_requested
if not is_shutdown_requested() and os.path.exists(progress_file):
try: try:
os.remove(progress_file) os.remove(progress_file)
print("진행 상황 파일이 삭제되었습니다.") print("진행 상황 파일이 삭제되었습니다.")

836
uv.lock generated

File diff suppressed because it is too large Load diff