diff --git a/lib/llm/__init__.py b/lib/llm/__init__.py index 4463eff..9acc135 100644 --- a/lib/llm/__init__.py +++ b/lib/llm/__init__.py @@ -1,25 +1,3 @@ -from langchain.callbacks.base import BaseCallbackHandler -from langchain_google_genai import ChatGoogleGenerativeAI +from lib.llm.create import * -class QuotaExhaustedHandler(BaseCallbackHandler): - def on_llm_error(self, error, **kwargs): - if "ResourceExhausted" in str(error) or "429" in str(error): - print("⚠️ API 쿼터가 소진되었습니다. 재시도 로직에 위임합니다...") - # backoff handled in scan_one_url - -def CreateChatGoogleGenerativeAI(model: str): - """재시도 로직이 포함된 LLM 생성""" - if model == "fallback": - print("⚠️ Fallback 모델을 사용합니다. Envorinment 변수를 확인하세요.") - print("⚠️ Model Gemini-2.0-flash-lite를 사용합니다.") - model = "gemini-2.0-flash-lite" - return ChatGoogleGenerativeAI( - model=model, - max_retries=10, # 최대 재시도 횟수 증가 - model_kwargs={ - "request_timeout": 120, # 타임아웃 시간 증가 (2분) - }, - callbacks=[QuotaExhaustedHandler()], - # API 호출 간격 조정 - temperature=0.0, - ) +from lib.llm.prompt import * \ No newline at end of file diff --git a/lib/llm/create.py b/lib/llm/create.py new file mode 100644 index 0000000..4463eff --- /dev/null +++ b/lib/llm/create.py @@ -0,0 +1,25 @@ +from langchain.callbacks.base import BaseCallbackHandler +from langchain_google_genai import ChatGoogleGenerativeAI + +class QuotaExhaustedHandler(BaseCallbackHandler): + def on_llm_error(self, error, **kwargs): + if "ResourceExhausted" in str(error) or "429" in str(error): + print("⚠️ API 쿼터가 소진되었습니다. 재시도 로직에 위임합니다...") + # backoff handled in scan_one_url + +def CreateChatGoogleGenerativeAI(model: str): + """재시도 로직이 포함된 LLM 생성""" + if model == "fallback": + print("⚠️ Fallback 모델을 사용합니다. Envorinment 변수를 확인하세요.") + print("⚠️ Model Gemini-2.0-flash-lite를 사용합니다.") + model = "gemini-2.0-flash-lite" + return ChatGoogleGenerativeAI( + model=model, + max_retries=10, # 최대 재시도 횟수 증가 + model_kwargs={ + "request_timeout": 120, # 타임아웃 시간 증가 (2분) + }, + callbacks=[QuotaExhaustedHandler()], + # API 호출 간격 조정 + temperature=0.0, + ) diff --git a/lib/llm/prompt/__init__.py b/lib/llm/prompt/__init__.py index 1417b03..d0d690b 100644 --- a/lib/llm/prompt/__init__.py +++ b/lib/llm/prompt/__init__.py @@ -1,7 +1,6 @@ -import os -from dotenv import load_dotenv - -load_dotenv(override=True) +# why this is isn't index +# 이 파일을 __init__.py로 만든 이유는 +# 굳이 이 짧은 코드를 파일을 하나 더 만드는게 코드의 가독성을 떨어뜨린다고 판단했기 때문입니다. def get_prompt(type:str) -> str: """ diff --git a/lib/llm/prompt/auth_list.py b/lib/llm/prompt/auth_list.py index 55a4bfb..bea1212 100644 --- a/lib/llm/prompt/auth_list.py +++ b/lib/llm/prompt/auth_list.py @@ -11,6 +11,9 @@ extract_oauth_list_prompt = f""" 🛑 제한 사항: - ❌ 로그인 입력창이나 이메일/비밀번호 입력 방식은 제외합니다. - ❌ 검색 엔진, 사이트 외부 탐색은 금지합니다. +- ❌ URL 추측이나 직접 입력은 금지합니다. +- ❌ OAuth가 없는 경우 빈 배열 `[]`로 반환합니다. +- ❌ OAuth가 아닌 일반 로그인은 무시합니다. 🔍 탐색 방법: 1. 초기 URL에 접속하여 **클라이언트용 로그인 페이지**로 진입합니다. diff --git a/lib/utils/__init__.py b/lib/utils/__init__.py index d2f3a8a..200a273 100644 --- a/lib/utils/__init__.py +++ b/lib/utils/__init__.py @@ -1,40 +1,10 @@ -from lib.utils.config import ( - BACKEND_URL, - GOOGLE_API_KEY, - GOOGLE_MODEL, - GOOGLE_PLANNER_MODEL, -) +# export from show_info +from lib.utils.agent_info import * +from lib.utils.backend_client import * +from lib.utils.config import * +from lib.utils.is_html import * +from lib.utils.logger import * +from lib.utils.read_txt import * -def show_info(): - print("🔧 환경 설정:") - print(browser_use_version()) - print(f"🔗 Backend URL: {BACKEND_URL}") - print( - f"🔑 Google API Key: {'*' * (len(GOOGLE_API_KEY) - 4) + GOOGLE_API_KEY[-4:] if GOOGLE_API_KEY else None}" - ) - print(f"🌐 Google Model: {GOOGLE_MODEL}") - print(f"🌐 Google Planner Model: {GOOGLE_PLANNER_MODEL}") - - -def browser_use_version(): - try: - # run uv pip show browser-use - import subprocess - - result = subprocess.run( - ["uv", "pip", "show", "browser-use"], - capture_output=True, - text=True, - check=True, - ) - - print("📦 Browser Use 패키지 정보:") - return result.stdout.strip() - except ImportError: - return None - - -def env_cheker(): - if GOOGLE_API_KEY is None: - raise ValueError("GOOGLE_API_KEY 환경변수가 설정되지 않았습니다.") +from lib.utils.browser_use import * \ No newline at end of file diff --git a/lib/utils/agent_info.py b/lib/utils/agent_info.py new file mode 100644 index 0000000..d2f3a8a --- /dev/null +++ b/lib/utils/agent_info.py @@ -0,0 +1,40 @@ +from lib.utils.config import ( + BACKEND_URL, + GOOGLE_API_KEY, + GOOGLE_MODEL, + GOOGLE_PLANNER_MODEL, +) + + +def show_info(): + print("🔧 환경 설정:") + print(browser_use_version()) + print(f"🔗 Backend URL: {BACKEND_URL}") + print( + f"🔑 Google API Key: {'*' * (len(GOOGLE_API_KEY) - 4) + GOOGLE_API_KEY[-4:] if GOOGLE_API_KEY else None}" + ) + print(f"🌐 Google Model: {GOOGLE_MODEL}") + print(f"🌐 Google Planner Model: {GOOGLE_PLANNER_MODEL}") + + +def browser_use_version(): + try: + # run uv pip show browser-use + import subprocess + + result = subprocess.run( + ["uv", "pip", "show", "browser-use"], + capture_output=True, + text=True, + check=True, + ) + + print("📦 Browser Use 패키지 정보:") + return result.stdout.strip() + except ImportError: + return None + + +def env_cheker(): + if GOOGLE_API_KEY is None: + raise ValueError("GOOGLE_API_KEY 환경변수가 설정되지 않았습니다.") diff --git a/lib/utils/browser_use/__init__.py b/lib/utils/browser_use/__init__.py index 8b85b50..2373090 100644 --- a/lib/utils/browser_use/__init__.py +++ b/lib/utils/browser_use/__init__.py @@ -1,46 +1,5 @@ -import os +from lib.utils.browser_use.clean_resources import * from lib.utils.browser_use.func import * - -# Initialize configuration -proxy_url = setup_proxy() - -async def GetProfile(): - storage_state_path = await setup_storage_state() - - # Handle potential encoding issues with storage state file - try: - if storage_state_path and os.path.exists(storage_state_path): - # Test if file can be read properly, if not, skip it - with open(storage_state_path, 'r', encoding='utf-8') as f: - f.read() - storage_state = storage_state_path - else: - print("⚠️ Storage state file not found or inaccessible, proceeding without it.") - storage_state = None - except (UnicodeDecodeError, FileNotFoundError): - # If there's an encoding error, don't use the storage state - storage_state = None - - profile = BrowserProfile( - # Security settings - disable_security=True, - stealth=True, - - # Display settings - headless=False, - device_scale_factor=1, - window_size={"width": 1600, "height": 900}, - viewport={"width": 1600, "height": 900}, - - # Data persistence - user_data_dir=None, - storage_state=storage_state, - - # Network settings - proxy={"server": proxy_url} if proxy_url else None, - - # Additional arguments - args=get_browser_args(), - ) - - return profile +from lib.utils.browser_use.model import * +from lib.utils.browser_use.init_profile import * +from lib.utils.browser_use.sensitive_data import * \ No newline at end of file diff --git a/lib/utils/browser_use/init_profile.py b/lib/utils/browser_use/init_profile.py new file mode 100644 index 0000000..8b85b50 --- /dev/null +++ b/lib/utils/browser_use/init_profile.py @@ -0,0 +1,46 @@ +import os +from lib.utils.browser_use.func import * + +# Initialize configuration +proxy_url = setup_proxy() + +async def GetProfile(): + storage_state_path = await setup_storage_state() + + # Handle potential encoding issues with storage state file + try: + if storage_state_path and os.path.exists(storage_state_path): + # Test if file can be read properly, if not, skip it + with open(storage_state_path, 'r', encoding='utf-8') as f: + f.read() + storage_state = storage_state_path + else: + print("⚠️ Storage state file not found or inaccessible, proceeding without it.") + storage_state = None + except (UnicodeDecodeError, FileNotFoundError): + # If there's an encoding error, don't use the storage state + storage_state = None + + profile = BrowserProfile( + # Security settings + disable_security=True, + stealth=True, + + # Display settings + headless=False, + device_scale_factor=1, + window_size={"width": 1600, "height": 900}, + viewport={"width": 1600, "height": 900}, + + # Data persistence + user_data_dir=None, + storage_state=storage_state, + + # Network settings + proxy={"server": proxy_url} if proxy_url else None, + + # Additional arguments + args=get_browser_args(), + ) + + return profile diff --git a/main.py b/main.py index e531479..79a7800 100644 --- a/main.py +++ b/main.py @@ -12,24 +12,33 @@ from browser_use import ( Agent, BrowserSession, Controller, - ActionResult, ) from patchright.async_api import async_playwright as async_patchright, Page from pydantic import BaseModel -from lib.utils import env_cheker -from lib.utils.backend_client import notify_backend -from lib.utils.browser_use import model -from lib.utils.browser_use.clean_resources import clean_resources -from lib.utils.browser_use.func import setup_storage_state -from lib.utils.browser_use.sensitive_data import GetSensitiveData -from lib.utils.config import BACKEND_URL, GOOGLE_MODEL, GOOGLE_PLANNER_MODEL -from lib.utils.is_html import is_html_url -from lib.utils.read_txt import read_lines_between -from lib.llm.prompt import get_prompt -from lib.utils.logger import logger -import lib.utils.browser_use as browser_use -from lib.llm import CreateChatGoogleGenerativeAI +from lib.utils import ( + notify_backend, + read_lines_between, + is_html_url, + env_cheker, + logger, + config, + GetProfile +) + +from lib.utils import ( + GetSensitiveData, + setup_storage_state, + clean_resources +) + +from lib.llm import ( + CreateChatGoogleGenerativeAI, + get_prompt +) + + +import lib.utils.browser_use.model as model load_dotenv(verbose=True, override=True) @@ -108,7 +117,7 @@ async def extract_oauth_list(url: str, skip_html_check: bool = False): while True: session = BrowserSession( playwright=(await async_patchright().start()), - browser_profile=await browser_use.GetProfile(), + browser_profile=await GetProfile(), ) initial_actions = [{"open_tab": {"url": target_url}}] @@ -130,10 +139,10 @@ async def extract_oauth_list(url: str, skip_html_check: bool = False): "Just find and list all available OAuth providers with their button texts or provider names. " "Return a list of OAuth providers found on the login page." ), - llm=CreateChatGoogleGenerativeAI(GOOGLE_MODEL), + llm=CreateChatGoogleGenerativeAI(config.GOOGLE_MODEL), planner_llm=( - CreateChatGoogleGenerativeAI(GOOGLE_PLANNER_MODEL) - if GOOGLE_PLANNER_MODEL + CreateChatGoogleGenerativeAI(config.GOOGLE_PLANNER_MODEL) + if config.GOOGLE_PLANNER_MODEL else None ), controller=controller, @@ -193,7 +202,7 @@ async def test_oauth_login(url: str, oauth_provider: str): while True: session = BrowserSession( playwright=(await async_patchright().start()), - browser_profile=await browser_use.GetProfile(), + browser_profile=await GetProfile(), ) initial_actions = [{"open_tab": {"url": target_url}}] @@ -215,10 +224,10 @@ async def test_oauth_login(url: str, oauth_provider: str): f"If login fails or encounters errors, report the issue. " f"Focus only on {oauth_provider} - ignore other OAuth providers." ), - llm=CreateChatGoogleGenerativeAI(GOOGLE_MODEL), + llm=CreateChatGoogleGenerativeAI(config.GOOGLE_MODEL), planner_llm=( - CreateChatGoogleGenerativeAI(GOOGLE_PLANNER_MODEL) - if GOOGLE_PLANNER_MODEL + CreateChatGoogleGenerativeAI(config.GOOGLE_PLANNER_MODEL) + if config.GOOGLE_PLANNER_MODEL else None ), controller=controller,