feat: 코드베이스 리팩터링

* `run.py`에서 `main.py` 경로를 명시적으로 지정하고, 명령줄 인자를 보다 사용하기 쉽게 조정했습니다. * 에이전트, 리소스 정리, 공통 함수, 모델 등을 포함하는 브라우저 유틸리티용 신규 모듈 구조를 만들었습니다. * `agents.py`에 비동기 에이전트 실행 및 재시도 로직을 구현했습니다. * `scanner.py`에 OAuth URL 추출 및 로그인 테스트 기능을 추가했습니다. * 전반적인 코드베이스에 걸쳐 에러 핸들링 및 로깅을 강화했습니다. * 백엔드 URL과 Google API 키 등의 관리를 위한 환경변수 기반 설정 시스템을 도입했습니다. * 스캐닝 중 진행 상태 추적 및 시그널 핸들링을 통한 정상 종료 처리를 개선했습니다. * 텍스트 파일 읽기 및 HTML 콘텐츠 여부 확인을 위한 유틸리티 함수를 추가했습니다. * LLM과의 상호작용을 위한 구조화된 프롬프트 시스템을 구축했습니다.
2026-06-04 06:51:52 +09:00 · 2025-06-26 21:44:31 +09:00 · 2025-06-26 21:44:31 +09:00 · 069dbf446d
commit 069dbf446d
parent 1ddc3c41bc
29 changed files with 453 additions and 452 deletions
--- a/src/lib/browser_use/scanner.py
+++ b/src/lib/browser_use/scanner.py
@ -0,0 +1,106 @@
+import asyncio
+import os
+import csv
+
+from lib.utils import notify_backend, read_lines_between, is_html_url
+from lib.browser_use.agents import extract_oauth_list, test_oauth_login
+from lib.utils.progress import current_progress, load_progress, save_progress, progress_file
+
+async def scan_one_url(url: str, skip_html_check: bool = False):
+    """URL 스캔 통합 함수: OAuth 리스트 추출 → 개별 OAuth 로그인 시도"""
+    target_url = url if url.startswith("http") else f"https://{url}"
+    print(f"🚀 스캔 시작: {target_url}")
+
+    # Backend에 스캔 시작을 알림
+    notify_backend(target_url)
+
+    # 1) URL이 HTML 페이지인지 확인
+    if not is_html_url(target_url) and not skip_html_check:
+        print(f"❌ {target_url} 은(는) HTML이 아닙니다. 스킵합니다.")
+        return
+
+    # 1단계: OAuth 리스트 추출
+    oauth_entries = await extract_oauth_list(target_url)
+
+    if not oauth_entries:
+        print(f"❌ {target_url}에서 OAuth 제공자를 찾을 수 없습니다.")
+        return
+
+    print("-" * 50)
+    print(f"🔗 스캔 URL: {url}")
+    print(f"🔐 발견된 OAuth 제공자들: {len(oauth_entries)}개")
+    for entry in oauth_entries:
+        print(f"  - {entry.provider}")
+    print("-" * 50)
+
+    # CSV에 OAuth 리스트 저장
+    csv_file = "./data/oauth_providers.csv"
+    file_exists = os.path.isfile(csv_file)
+    with open(csv_file, "a", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        if not file_exists:
+            writer.writerow(["issuer", "provider", "oauth_uri", "login_tested"])
+        for entry in oauth_entries:
+            writer.writerow([url, entry.provider, "", "pending"])
+
+    # 2단계: 각 OAuth 제공자별로 개별 로그인 시도
+    for i, oauth_entry in enumerate(oauth_entries):
+        print(
+            f"\n🔄 OAuth 로그인 테스트 {i+1}/{len(oauth_entries)}: {oauth_entry.provider}"
+        )
+
+        # OAuth 간 대기 시간
+        if i > 0:
+            print("⏳ OAuth 테스트 간 대기 중 (30초)...")
+            await asyncio.sleep(30)
+
+        # 개별 OAuth 로그인 시도
+        success = await test_oauth_login(url, oauth_entry.provider)
+
+        # 결과를 CSV에 업데이트 (간단하게 로그만 남김)
+        status = "success" if success else "failed"
+        print(f"📝 {oauth_entry.provider} 로그인 결과: {status}")
+
+
+async def main_loop(
+    filepath: str, start_line: int, end_line: int, skip_html_check: bool = False
+):
+    """지정된 URL 목록에 대해 스캔을 실행하는 메인 루프"""
+    target_list = read_lines_between(
+        filepath=filepath, start_line=start_line, end_line=end_line
+    )
+
+    current_progress["total"] = len(target_list)
+    current_progress["start_line"] = start_line
+    current_progress["current_index"] = 0
+
+    prev_progress = load_progress()
+    if prev_progress and prev_progress.get("start_line") == start_line:
+        print("📋 이전 진행 상황을 발견했습니다:")
+        print(f"   - 이전 완료: {prev_progress['current_index']}/{prev_progress['total']}")
+        print(f"   - 마지막 처리: {prev_progress.get('current_url', 'N/A')}")
+
+        resume = input("이어서 진행하시겠습니까? (y/n): ").lower().strip()
+        if resume == 'y':
+            start_index = prev_progress.get("current_index", 0)
+            current_progress["current_index"] = start_index
+            target_list = target_list[start_index:]
+            print(f"✅ {start_index}번째부터 재개합니다.")
+
+    for i, url in enumerate(target_list):
+        actual_index = current_progress["current_index"] + i
+        current_progress["current_url"] = url
+        
+        print(f"\n🔄 Processing {actual_index + 1}/{current_progress['total']}: {url}")
+        print(f"📍 {os.path.basename(filepath)}의 {start_line + actual_index}번째 줄")
+
+        if i > 0:
+            print("⏳ API 쿼터 보호를 위해 30초 대기 중...")
+            await asyncio.sleep(30)
+
+        await scan_one_url(url, skip_html_check=skip_html_check)
+
+        current_progress["current_index"] = actual_index + 1
+        save_progress()
+
+    print(f"\n🎉 모든 스캔이 완료되었습니다! ({current_progress['total']}개 URL)")