From e1319a108d08d9af4bdafebf14b0b868c06a061a Mon Sep 17 00:00:00 2001 From: imnyang Date: Mon, 23 Jun 2025 19:19:56 +0900 Subject: [PATCH] =?UTF-8?q?chore:=20storage=5Fstate.json=20=EA=B4=80?= =?UTF-8?q?=EB=A0=A8=ED=95=B4=EC=84=9C=20=EC=88=98=EC=A0=95=ED=95=A8=20-?= =?UTF-8?q?=20Windows=20=EC=8B=AB=EC=96=B4=EC=9A=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- lib/utils/browser_use/func.py | 84 ++++++++++------------------------- main.py | 50 ++++++++++++--------- pyproject.toml | 1 + uv.lock | 11 +++++ 5 files changed, 66 insertions(+), 82 deletions(-) diff --git a/README.md b/README.md index c9ade30..e827509 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ curl "https://f.imnya.ng/.whs/tp-domains/data/domains/latest.txt" -o domains.txt ``` ```sh -# uv run run.py {domains.txt 시작 줄} {domains.txt 끝 줄} {--skh} +# uv run run.py {domains.txt 시작 줄} {domains.txt 끝 줄} {--skh} {--no-download} uv run run.py 1 100 --skh ``` diff --git a/lib/utils/browser_use/func.py b/lib/utils/browser_use/func.py index afc6d52..5c2faa8 100644 --- a/lib/utils/browser_use/func.py +++ b/lib/utils/browser_use/func.py @@ -3,45 +3,12 @@ import json from pathlib import Path from dotenv import load_dotenv from browser_use import BrowserProfile +import json +import os # Load environment variables load_dotenv(override=True) -def safe_json_read(file_path: Path) -> dict: - """Safely read JSON file with proper encoding handling.""" - try: - with open(file_path, 'r', encoding='utf-8') as f: - return json.load(f) - except (UnicodeDecodeError, json.JSONDecodeError): - # Try with different encodings - for encoding in ['utf-8-sig', 'latin1', 'cp1252']: - try: - with open(file_path, 'r', encoding=encoding) as f: - return json.load(f) - except (UnicodeDecodeError, json.JSONDecodeError): - continue - return {} - -def safe_json_write(file_path: Path, data: dict): - """Safely write JSON file with proper encoding handling.""" - with open(file_path, 'w', encoding='utf-8') as f: - json.dump(data, f, ensure_ascii=False, indent=4) - - -def setup_proxy(): - """Configure proxy settings from environment variables.""" - proxy_host = os.getenv("PROXY_HOST") - proxy_port = os.getenv("PROXY_PORT") - - if proxy_host and proxy_port: - proxy_url = f"http://{proxy_host}:{proxy_port}" - print(f"🔗 Using proxy: {proxy_host}:{proxy_port}") - return proxy_url - else: - print("🔗 No proxy configured, using direct connection.") - return None - - async def setup_storage_state(): """Setup browser storage state for session persistence.""" # Get the script directory to ensure correct path resolution @@ -57,20 +24,17 @@ async def setup_storage_state(): if storage_state_temp_path.exists(): storage_state_temp_path.unlink() - # 안전한 JSON 파일 처리 (인코딩 문제 해결) - storage_data = safe_json_read(storage_state_path) + with open(storage_state_path, 'r') as f: + storage_data = json.load(f) - if storage_data: # 데이터가 성공적으로 읽혔다면 - safe_json_write(storage_state_temp_path, storage_data) - print(f"🔄 Using existing storage state: {storage_state_temp_path}") - return str(storage_state_temp_path) - else: - print("⚠️ Storage state file is empty or corrupted") - return None + with open(storage_state_temp_path, 'w') as f: + json.dump(storage_data, f, indent=4) + + print(f"🔄 Using existing storage state: {storage_state_temp_path}") + return str(storage_state_temp_path) except Exception as e: print(f"⚠️ Error processing storage state: {e}") - # 문제가 있는 파일을 제거하고 새로 시작 if storage_state_temp_path.exists(): storage_state_temp_path.unlink() return None @@ -79,6 +43,20 @@ async def setup_storage_state(): return None +def setup_proxy(): + """Configure proxy settings from environment variables.""" + proxy_host = os.getenv("PROXY_HOST") + proxy_port = os.getenv("PROXY_PORT") + + if proxy_host and proxy_port: + proxy_url = f"http://{proxy_host}:{proxy_port}" + print(f"🔗 Using proxy: {proxy_host}:{proxy_port}") + return proxy_url + else: + print("🔗 No proxy configured, using direct connection.") + return None + + def get_browser_args(): """Get browser arguments for enhanced compatibility and security.""" return [ @@ -108,19 +86,3 @@ def get_browser_args(): # Language f"--lang={os.getenv('LANG', 'en_US')}", ] - -def cleanup_corrupted_storage_files(): - """Clean up corrupted storage state files.""" - script_dir = Path(__file__).parent.parent.parent.parent - storage_state_temp_path = script_dir / "data" / "storage_state_temp.json" - - if storage_state_temp_path.exists(): - try: - # Try to read the file to check if it's corrupted - with open(storage_state_temp_path, 'r', encoding='utf-8') as f: - json.load(f) - print(f"✅ Storage temp file is valid: {storage_state_temp_path}") - except (UnicodeDecodeError, json.JSONDecodeError) as e: - print(f"🗑️ Removing corrupted storage temp file: {e}") - storage_state_temp_path.unlink() - diff --git a/main.py b/main.py index cccb656..a990aa5 100644 --- a/main.py +++ b/main.py @@ -104,7 +104,7 @@ async def extract_oauth_list(url: str, skip_html_check: bool = False): agent = None session = None try_cnt = 0 - + while True: session = BrowserSession( playwright=(await async_patchright().start()), @@ -118,7 +118,7 @@ async def extract_oauth_list(url: str, skip_html_check: bool = False): ) print("🤖 OAuth 리스트 추출 Agent 초기화...") - + try: agent = Agent( browser_session=session, @@ -139,19 +139,19 @@ async def extract_oauth_list(url: str, skip_html_check: bool = False): controller=controller, extend_planner_system_message=get_prompt("auth"), ) - + response = await agent.run() final_result = response.final_result() if final_result is None: raise ValueError("OAuth 리스트 추출 결과가 None입니다.") - + data = json.loads(final_result) oauth_entries = [model.OAuth(**entry) for entry in data["oauth_providers"]] - + await clean_resources(agent, session) return oauth_entries - + except Exception as e: await clean_resources(agent, session) # API 쿼터 문제인지 확인 @@ -161,7 +161,9 @@ async def extract_oauth_list(url: str, skip_html_check: bool = False): await asyncio.sleep(wait) try_cnt += 1 if try_cnt >= 3: - print(f"❌ {url} OAuth 리스트 추출 실패: API 쿼터 문제가 지속됩니다.") + print( + f"❌ {url} OAuth 리스트 추출 실패: API 쿼터 문제가 지속됩니다." + ) logger(f"❌ {url} OAuth 리스트 추출 실패: API 쿼터 문제: {e}") return [] continue @@ -186,7 +188,7 @@ async def test_oauth_login(url: str, oauth_provider: str): agent = None session = None try_cnt = 0 - + while True: session = BrowserSession( playwright=(await async_patchright().start()), @@ -199,7 +201,7 @@ async def test_oauth_login(url: str, oauth_provider: str): ) print(f"🤖 {oauth_provider} 로그인 Agent 초기화...") - + try: agent = Agent( browser_session=session, @@ -221,17 +223,17 @@ async def test_oauth_login(url: str, oauth_provider: str): controller=controller, extend_planner_system_message=get_prompt(oauth_provider), ) - + response = await agent.run() final_result = response.final_result() - + print(f"✅ {oauth_provider} 로그인 완료") if final_result: logger(f"✅ {url} - {oauth_provider} 로그인 결과: {final_result}") - + await clean_resources(agent, session) return True - + except Exception as e: await clean_resources(agent, session) # API 쿼터 문제인지 확인 @@ -241,8 +243,12 @@ async def test_oauth_login(url: str, oauth_provider: str): await asyncio.sleep(wait) try_cnt += 1 if try_cnt >= 3: - print(f"❌ {oauth_provider} 로그인 실패: API 쿼터 문제가 지속됩니다.") - logger(f"❌ {url} - {oauth_provider} 로그인 실패: API 쿼터 문제: {e}") + print( + f"❌ {oauth_provider} 로그인 실패: API 쿼터 문제가 지속됩니다." + ) + logger( + f"❌ {url} - {oauth_provider} 로그인 실패: API 쿼터 문제: {e}" + ) return False continue # 일반 에러 처리 @@ -267,7 +273,7 @@ async def scan_one_url(url: str, skip_html_check: bool = False): # 1단계: OAuth 리스트 추출 oauth_entries = await extract_oauth_list(url, skip_html_check) - + if not oauth_entries: print(f"❌ {target_url}에서 OAuth 제공자를 찾을 수 없습니다.") return @@ -291,19 +297,23 @@ async def scan_one_url(url: str, skip_html_check: bool = False): # 2단계: 각 OAuth 제공자별로 개별 로그인 시도 for i, oauth_entry in enumerate(oauth_entries): - print(f"\n🔄 OAuth 로그인 테스트 {i+1}/{len(oauth_entries)}: {oauth_entry.provider}") - + print( + f"\n🔄 OAuth 로그인 테스트 {i+1}/{len(oauth_entries)}: {oauth_entry.provider}" + ) + # OAuth 간 대기 시간 if i > 0: print("⏳ OAuth 테스트 간 대기 중 (30초)...") await asyncio.sleep(30) - + # 개별 OAuth 로그인 시도 success = await test_oauth_login(url, oauth_entry.provider) - + # 결과를 CSV에 업데이트 (간단하게 로그만 남김) status = "success" if success else "failed" print(f"📝 {oauth_entry.provider} 로그인 결과: {status}") + + async def loop( filepath: str, start_line: int, end_line: int, skip_html_check: bool = False ): diff --git a/pyproject.toml b/pyproject.toml index 1b2f537..4c453ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ readme = "README.md" requires-python = ">=3.13" dependencies = [ "browser-use[memory]==0.3.2", + "chardet>=5.2.0", "lmnr[all]>=0.6.10", "patchright>=1.52.5", ] diff --git a/uv.lock b/uv.lock index c125c7a..02e9d9b 100644 --- a/uv.lock +++ b/uv.lock @@ -144,6 +144,7 @@ version = "0.1.0" source = { virtual = "." } dependencies = [ { name = "browser-use", extra = ["memory"] }, + { name = "chardet" }, { name = "lmnr", extra = ["all"] }, { name = "patchright" }, ] @@ -151,6 +152,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "browser-use", extras = ["memory"], specifier = "==0.3.2" }, + { name = "chardet", specifier = ">=5.2.0" }, { name = "lmnr", extras = ["all"], specifier = ">=0.6.10" }, { name = "patchright", specifier = ">=1.52.5" }, ] @@ -211,6 +213,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009, upload-time = "2024-09-04T20:44:45.309Z" }, ] +[[package]] +name = "chardet" +version = "5.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" }, +] + [[package]] name = "charset-normalizer" version = "3.4.2"