chore: storage_state.json 관련해서 수정함

- Windows 싫어요
This commit is contained in:
암냥 2025-06-23 19:19:56 +09:00
commit e1319a108d
5 changed files with 66 additions and 82 deletions

View file

@ -96,7 +96,7 @@ curl "https://f.imnya.ng/.whs/tp-domains/data/domains/latest.txt" -o domains.txt
``` ```
```sh ```sh
# uv run run.py {domains.txt 시작 줄} {domains.txt 끝 줄} {--skh} # uv run run.py {domains.txt 시작 줄} {domains.txt 끝 줄} {--skh} {--no-download}
uv run run.py 1 100 --skh uv run run.py 1 100 --skh
``` ```

View file

@ -3,45 +3,12 @@ import json
from pathlib import Path from pathlib import Path
from dotenv import load_dotenv from dotenv import load_dotenv
from browser_use import BrowserProfile from browser_use import BrowserProfile
import json
import os
# Load environment variables # Load environment variables
load_dotenv(override=True) load_dotenv(override=True)
def safe_json_read(file_path: Path) -> dict:
"""Safely read JSON file with proper encoding handling."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
return json.load(f)
except (UnicodeDecodeError, json.JSONDecodeError):
# Try with different encodings
for encoding in ['utf-8-sig', 'latin1', 'cp1252']:
try:
with open(file_path, 'r', encoding=encoding) as f:
return json.load(f)
except (UnicodeDecodeError, json.JSONDecodeError):
continue
return {}
def safe_json_write(file_path: Path, data: dict):
"""Safely write JSON file with proper encoding handling."""
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def setup_proxy():
"""Configure proxy settings from environment variables."""
proxy_host = os.getenv("PROXY_HOST")
proxy_port = os.getenv("PROXY_PORT")
if proxy_host and proxy_port:
proxy_url = f"http://{proxy_host}:{proxy_port}"
print(f"🔗 Using proxy: {proxy_host}:{proxy_port}")
return proxy_url
else:
print("🔗 No proxy configured, using direct connection.")
return None
async def setup_storage_state(): async def setup_storage_state():
"""Setup browser storage state for session persistence.""" """Setup browser storage state for session persistence."""
# Get the script directory to ensure correct path resolution # Get the script directory to ensure correct path resolution
@ -57,20 +24,17 @@ async def setup_storage_state():
if storage_state_temp_path.exists(): if storage_state_temp_path.exists():
storage_state_temp_path.unlink() storage_state_temp_path.unlink()
# 안전한 JSON 파일 처리 (인코딩 문제 해결) with open(storage_state_path, 'r') as f:
storage_data = safe_json_read(storage_state_path) storage_data = json.load(f)
with open(storage_state_temp_path, 'w') as f:
json.dump(storage_data, f, indent=4)
if storage_data: # 데이터가 성공적으로 읽혔다면
safe_json_write(storage_state_temp_path, storage_data)
print(f"🔄 Using existing storage state: {storage_state_temp_path}") print(f"🔄 Using existing storage state: {storage_state_temp_path}")
return str(storage_state_temp_path) return str(storage_state_temp_path)
else:
print("⚠️ Storage state file is empty or corrupted")
return None
except Exception as e: except Exception as e:
print(f"⚠️ Error processing storage state: {e}") print(f"⚠️ Error processing storage state: {e}")
# 문제가 있는 파일을 제거하고 새로 시작
if storage_state_temp_path.exists(): if storage_state_temp_path.exists():
storage_state_temp_path.unlink() storage_state_temp_path.unlink()
return None return None
@ -79,6 +43,20 @@ async def setup_storage_state():
return None return None
def setup_proxy():
"""Configure proxy settings from environment variables."""
proxy_host = os.getenv("PROXY_HOST")
proxy_port = os.getenv("PROXY_PORT")
if proxy_host and proxy_port:
proxy_url = f"http://{proxy_host}:{proxy_port}"
print(f"🔗 Using proxy: {proxy_host}:{proxy_port}")
return proxy_url
else:
print("🔗 No proxy configured, using direct connection.")
return None
def get_browser_args(): def get_browser_args():
"""Get browser arguments for enhanced compatibility and security.""" """Get browser arguments for enhanced compatibility and security."""
return [ return [
@ -108,19 +86,3 @@ def get_browser_args():
# Language # Language
f"--lang={os.getenv('LANG', 'en_US')}", f"--lang={os.getenv('LANG', 'en_US')}",
] ]
def cleanup_corrupted_storage_files():
"""Clean up corrupted storage state files."""
script_dir = Path(__file__).parent.parent.parent.parent
storage_state_temp_path = script_dir / "data" / "storage_state_temp.json"
if storage_state_temp_path.exists():
try:
# Try to read the file to check if it's corrupted
with open(storage_state_temp_path, 'r', encoding='utf-8') as f:
json.load(f)
print(f"✅ Storage temp file is valid: {storage_state_temp_path}")
except (UnicodeDecodeError, json.JSONDecodeError) as e:
print(f"🗑️ Removing corrupted storage temp file: {e}")
storage_state_temp_path.unlink()

18
main.py
View file

@ -161,7 +161,9 @@ async def extract_oauth_list(url: str, skip_html_check: bool = False):
await asyncio.sleep(wait) await asyncio.sleep(wait)
try_cnt += 1 try_cnt += 1
if try_cnt >= 3: if try_cnt >= 3:
print(f"{url} OAuth 리스트 추출 실패: API 쿼터 문제가 지속됩니다.") print(
f"{url} OAuth 리스트 추출 실패: API 쿼터 문제가 지속됩니다."
)
logger(f"{url} OAuth 리스트 추출 실패: API 쿼터 문제: {e}") logger(f"{url} OAuth 리스트 추출 실패: API 쿼터 문제: {e}")
return [] return []
continue continue
@ -241,8 +243,12 @@ async def test_oauth_login(url: str, oauth_provider: str):
await asyncio.sleep(wait) await asyncio.sleep(wait)
try_cnt += 1 try_cnt += 1
if try_cnt >= 3: if try_cnt >= 3:
print(f"{oauth_provider} 로그인 실패: API 쿼터 문제가 지속됩니다.") print(
logger(f"{url} - {oauth_provider} 로그인 실패: API 쿼터 문제: {e}") f"{oauth_provider} 로그인 실패: API 쿼터 문제가 지속됩니다."
)
logger(
f"{url} - {oauth_provider} 로그인 실패: API 쿼터 문제: {e}"
)
return False return False
continue continue
# 일반 에러 처리 # 일반 에러 처리
@ -291,7 +297,9 @@ async def scan_one_url(url: str, skip_html_check: bool = False):
# 2단계: 각 OAuth 제공자별로 개별 로그인 시도 # 2단계: 각 OAuth 제공자별로 개별 로그인 시도
for i, oauth_entry in enumerate(oauth_entries): for i, oauth_entry in enumerate(oauth_entries):
print(f"\n🔄 OAuth 로그인 테스트 {i+1}/{len(oauth_entries)}: {oauth_entry.provider}") print(
f"\n🔄 OAuth 로그인 테스트 {i+1}/{len(oauth_entries)}: {oauth_entry.provider}"
)
# OAuth 간 대기 시간 # OAuth 간 대기 시간
if i > 0: if i > 0:
@ -304,6 +312,8 @@ async def scan_one_url(url: str, skip_html_check: bool = False):
# 결과를 CSV에 업데이트 (간단하게 로그만 남김) # 결과를 CSV에 업데이트 (간단하게 로그만 남김)
status = "success" if success else "failed" status = "success" if success else "failed"
print(f"📝 {oauth_entry.provider} 로그인 결과: {status}") print(f"📝 {oauth_entry.provider} 로그인 결과: {status}")
async def loop( async def loop(
filepath: str, start_line: int, end_line: int, skip_html_check: bool = False filepath: str, start_line: int, end_line: int, skip_html_check: bool = False
): ):

View file

@ -6,6 +6,7 @@ readme = "README.md"
requires-python = ">=3.13" requires-python = ">=3.13"
dependencies = [ dependencies = [
"browser-use[memory]==0.3.2", "browser-use[memory]==0.3.2",
"chardet>=5.2.0",
"lmnr[all]>=0.6.10", "lmnr[all]>=0.6.10",
"patchright>=1.52.5", "patchright>=1.52.5",
] ]

11
uv.lock generated
View file

@ -144,6 +144,7 @@ version = "0.1.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "browser-use", extra = ["memory"] }, { name = "browser-use", extra = ["memory"] },
{ name = "chardet" },
{ name = "lmnr", extra = ["all"] }, { name = "lmnr", extra = ["all"] },
{ name = "patchright" }, { name = "patchright" },
] ]
@ -151,6 +152,7 @@ dependencies = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "browser-use", extras = ["memory"], specifier = "==0.3.2" }, { name = "browser-use", extras = ["memory"], specifier = "==0.3.2" },
{ name = "chardet", specifier = ">=5.2.0" },
{ name = "lmnr", extras = ["all"], specifier = ">=0.6.10" }, { name = "lmnr", extras = ["all"], specifier = ">=0.6.10" },
{ name = "patchright", specifier = ">=1.52.5" }, { name = "patchright", specifier = ">=1.52.5" },
] ]
@ -211,6 +213,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009, upload-time = "2024-09-04T20:44:45.309Z" }, { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009, upload-time = "2024-09-04T20:44:45.309Z" },
] ]
[[package]]
name = "chardet"
version = "5.2.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" },
]
[[package]] [[package]]
name = "charset-normalizer" name = "charset-normalizer"
version = "3.4.2" version = "3.4.2"