feat: LLM 및 유틸리티 모듈 구조 개선 및 불필요한 코드 제거

This commit is contained in:
암냥 2025-06-24 22:25:28 +09:00
commit 27192dab3a
9 changed files with 162 additions and 133 deletions

View file

@ -1,25 +1,3 @@
from langchain.callbacks.base import BaseCallbackHandler
from langchain_google_genai import ChatGoogleGenerativeAI
from lib.llm.create import *
class QuotaExhaustedHandler(BaseCallbackHandler):
def on_llm_error(self, error, **kwargs):
if "ResourceExhausted" in str(error) or "429" in str(error):
print("⚠️ API 쿼터가 소진되었습니다. 재시도 로직에 위임합니다...")
# backoff handled in scan_one_url
def CreateChatGoogleGenerativeAI(model: str):
"""재시도 로직이 포함된 LLM 생성"""
if model == "fallback":
print("⚠️ Fallback 모델을 사용합니다. Envorinment 변수를 확인하세요.")
print("⚠️ Model Gemini-2.0-flash-lite를 사용합니다.")
model = "gemini-2.0-flash-lite"
return ChatGoogleGenerativeAI(
model=model,
max_retries=10, # 최대 재시도 횟수 증가
model_kwargs={
"request_timeout": 120, # 타임아웃 시간 증가 (2분)
},
callbacks=[QuotaExhaustedHandler()],
# API 호출 간격 조정
temperature=0.0,
)
from lib.llm.prompt import *

25
lib/llm/create.py Normal file
View file

@ -0,0 +1,25 @@
from langchain.callbacks.base import BaseCallbackHandler
from langchain_google_genai import ChatGoogleGenerativeAI
class QuotaExhaustedHandler(BaseCallbackHandler):
def on_llm_error(self, error, **kwargs):
if "ResourceExhausted" in str(error) or "429" in str(error):
print("⚠️ API 쿼터가 소진되었습니다. 재시도 로직에 위임합니다...")
# backoff handled in scan_one_url
def CreateChatGoogleGenerativeAI(model: str):
"""재시도 로직이 포함된 LLM 생성"""
if model == "fallback":
print("⚠️ Fallback 모델을 사용합니다. Envorinment 변수를 확인하세요.")
print("⚠️ Model Gemini-2.0-flash-lite를 사용합니다.")
model = "gemini-2.0-flash-lite"
return ChatGoogleGenerativeAI(
model=model,
max_retries=10, # 최대 재시도 횟수 증가
model_kwargs={
"request_timeout": 120, # 타임아웃 시간 증가 (2분)
},
callbacks=[QuotaExhaustedHandler()],
# API 호출 간격 조정
temperature=0.0,
)

View file

@ -1,7 +1,6 @@
import os
from dotenv import load_dotenv
load_dotenv(override=True)
# why this is isn't index
# 이 파일을 __init__.py로 만든 이유는
# 굳이 이 짧은 코드를 파일을 하나 더 만드는게 코드의 가독성을 떨어뜨린다고 판단했기 때문입니다.
def get_prompt(type:str) -> str:
"""

View file

@ -11,6 +11,9 @@ extract_oauth_list_prompt = f"""
🛑 제한 사항:
- 로그인 입력창이나 이메일/비밀번호 입력 방식은 제외합니다.
- 검색 엔진, 사이트 외부 탐색은 금지합니다.
- URL 추측이나 직접 입력은 금지합니다.
- OAuth가 없는 경우 배열 `[]` 반환합니다.
- OAuth가 아닌 일반 로그인은 무시합니다.
🔍 탐색 방법:
1. 초기 URL에 접속하여 **클라이언트용 로그인 페이지** 진입합니다.

View file

@ -1,40 +1,10 @@
from lib.utils.config import (
BACKEND_URL,
GOOGLE_API_KEY,
GOOGLE_MODEL,
GOOGLE_PLANNER_MODEL,
)
# export from show_info
from lib.utils.agent_info import *
from lib.utils.backend_client import *
from lib.utils.config import *
from lib.utils.is_html import *
from lib.utils.logger import *
from lib.utils.read_txt import *
def show_info():
print("🔧 환경 설정:")
print(browser_use_version())
print(f"🔗 Backend URL: {BACKEND_URL}")
print(
f"🔑 Google API Key: {'*' * (len(GOOGLE_API_KEY) - 4) + GOOGLE_API_KEY[-4:] if GOOGLE_API_KEY else None}"
)
print(f"🌐 Google Model: {GOOGLE_MODEL}")
print(f"🌐 Google Planner Model: {GOOGLE_PLANNER_MODEL}")
def browser_use_version():
try:
# run uv pip show browser-use
import subprocess
result = subprocess.run(
["uv", "pip", "show", "browser-use"],
capture_output=True,
text=True,
check=True,
)
print("📦 Browser Use 패키지 정보:")
return result.stdout.strip()
except ImportError:
return None
def env_cheker():
if GOOGLE_API_KEY is None:
raise ValueError("GOOGLE_API_KEY 환경변수가 설정되지 않았습니다.")
from lib.utils.browser_use import *

40
lib/utils/agent_info.py Normal file
View file

@ -0,0 +1,40 @@
from lib.utils.config import (
BACKEND_URL,
GOOGLE_API_KEY,
GOOGLE_MODEL,
GOOGLE_PLANNER_MODEL,
)
def show_info():
print("🔧 환경 설정:")
print(browser_use_version())
print(f"🔗 Backend URL: {BACKEND_URL}")
print(
f"🔑 Google API Key: {'*' * (len(GOOGLE_API_KEY) - 4) + GOOGLE_API_KEY[-4:] if GOOGLE_API_KEY else None}"
)
print(f"🌐 Google Model: {GOOGLE_MODEL}")
print(f"🌐 Google Planner Model: {GOOGLE_PLANNER_MODEL}")
def browser_use_version():
try:
# run uv pip show browser-use
import subprocess
result = subprocess.run(
["uv", "pip", "show", "browser-use"],
capture_output=True,
text=True,
check=True,
)
print("📦 Browser Use 패키지 정보:")
return result.stdout.strip()
except ImportError:
return None
def env_cheker():
if GOOGLE_API_KEY is None:
raise ValueError("GOOGLE_API_KEY 환경변수가 설정되지 않았습니다.")

View file

@ -1,46 +1,5 @@
import os
from lib.utils.browser_use.clean_resources import *
from lib.utils.browser_use.func import *
# Initialize configuration
proxy_url = setup_proxy()
async def GetProfile():
storage_state_path = await setup_storage_state()
# Handle potential encoding issues with storage state file
try:
if storage_state_path and os.path.exists(storage_state_path):
# Test if file can be read properly, if not, skip it
with open(storage_state_path, 'r', encoding='utf-8') as f:
f.read()
storage_state = storage_state_path
else:
print("⚠️ Storage state file not found or inaccessible, proceeding without it.")
storage_state = None
except (UnicodeDecodeError, FileNotFoundError):
# If there's an encoding error, don't use the storage state
storage_state = None
profile = BrowserProfile(
# Security settings
disable_security=True,
stealth=True,
# Display settings
headless=False,
device_scale_factor=1,
window_size={"width": 1600, "height": 900},
viewport={"width": 1600, "height": 900},
# Data persistence
user_data_dir=None,
storage_state=storage_state,
# Network settings
proxy={"server": proxy_url} if proxy_url else None,
# Additional arguments
args=get_browser_args(),
)
return profile
from lib.utils.browser_use.model import *
from lib.utils.browser_use.init_profile import *
from lib.utils.browser_use.sensitive_data import *

View file

@ -0,0 +1,46 @@
import os
from lib.utils.browser_use.func import *
# Initialize configuration
proxy_url = setup_proxy()
async def GetProfile():
storage_state_path = await setup_storage_state()
# Handle potential encoding issues with storage state file
try:
if storage_state_path and os.path.exists(storage_state_path):
# Test if file can be read properly, if not, skip it
with open(storage_state_path, 'r', encoding='utf-8') as f:
f.read()
storage_state = storage_state_path
else:
print("⚠️ Storage state file not found or inaccessible, proceeding without it.")
storage_state = None
except (UnicodeDecodeError, FileNotFoundError):
# If there's an encoding error, don't use the storage state
storage_state = None
profile = BrowserProfile(
# Security settings
disable_security=True,
stealth=True,
# Display settings
headless=False,
device_scale_factor=1,
window_size={"width": 1600, "height": 900},
viewport={"width": 1600, "height": 900},
# Data persistence
user_data_dir=None,
storage_state=storage_state,
# Network settings
proxy={"server": proxy_url} if proxy_url else None,
# Additional arguments
args=get_browser_args(),
)
return profile

53
main.py
View file

@ -12,24 +12,33 @@ from browser_use import (
Agent,
BrowserSession,
Controller,
ActionResult,
)
from patchright.async_api import async_playwright as async_patchright, Page
from pydantic import BaseModel
from lib.utils import env_cheker
from lib.utils.backend_client import notify_backend
from lib.utils.browser_use import model
from lib.utils.browser_use.clean_resources import clean_resources
from lib.utils.browser_use.func import setup_storage_state
from lib.utils.browser_use.sensitive_data import GetSensitiveData
from lib.utils.config import BACKEND_URL, GOOGLE_MODEL, GOOGLE_PLANNER_MODEL
from lib.utils.is_html import is_html_url
from lib.utils.read_txt import read_lines_between
from lib.llm.prompt import get_prompt
from lib.utils.logger import logger
import lib.utils.browser_use as browser_use
from lib.llm import CreateChatGoogleGenerativeAI
from lib.utils import (
notify_backend,
read_lines_between,
is_html_url,
env_cheker,
logger,
config,
GetProfile
)
from lib.utils import (
GetSensitiveData,
setup_storage_state,
clean_resources
)
from lib.llm import (
CreateChatGoogleGenerativeAI,
get_prompt
)
import lib.utils.browser_use.model as model
load_dotenv(verbose=True, override=True)
@ -108,7 +117,7 @@ async def extract_oauth_list(url: str, skip_html_check: bool = False):
while True:
session = BrowserSession(
playwright=(await async_patchright().start()),
browser_profile=await browser_use.GetProfile(),
browser_profile=await GetProfile(),
)
initial_actions = [{"open_tab": {"url": target_url}}]
@ -130,10 +139,10 @@ async def extract_oauth_list(url: str, skip_html_check: bool = False):
"Just find and list all available OAuth providers with their button texts or provider names. "
"Return a list of OAuth providers found on the login page."
),
llm=CreateChatGoogleGenerativeAI(GOOGLE_MODEL),
llm=CreateChatGoogleGenerativeAI(config.GOOGLE_MODEL),
planner_llm=(
CreateChatGoogleGenerativeAI(GOOGLE_PLANNER_MODEL)
if GOOGLE_PLANNER_MODEL
CreateChatGoogleGenerativeAI(config.GOOGLE_PLANNER_MODEL)
if config.GOOGLE_PLANNER_MODEL
else None
),
controller=controller,
@ -193,7 +202,7 @@ async def test_oauth_login(url: str, oauth_provider: str):
while True:
session = BrowserSession(
playwright=(await async_patchright().start()),
browser_profile=await browser_use.GetProfile(),
browser_profile=await GetProfile(),
)
initial_actions = [{"open_tab": {"url": target_url}}]
@ -215,10 +224,10 @@ async def test_oauth_login(url: str, oauth_provider: str):
f"If login fails or encounters errors, report the issue. "
f"Focus only on {oauth_provider} - ignore other OAuth providers."
),
llm=CreateChatGoogleGenerativeAI(GOOGLE_MODEL),
llm=CreateChatGoogleGenerativeAI(config.GOOGLE_MODEL),
planner_llm=(
CreateChatGoogleGenerativeAI(GOOGLE_PLANNER_MODEL)
if GOOGLE_PLANNER_MODEL
CreateChatGoogleGenerativeAI(config.GOOGLE_PLANNER_MODEL)
if config.GOOGLE_PLANNER_MODEL
else None
),
controller=controller,