Refactor authentication and session management

- Removed old llm_login and session scripts, replacing them with a new structure for handling SSO login and session management.
- Introduced a new prompt system for collecting SSO redirect URLs, ensuring compliance with security protocols.
- Implemented a robust backend notification system for tracking scan initiation.
- Enhanced browser profile configuration and resource management for improved session handling.
- Added utility functions for environment variable checks and logging.
- Updated the overall architecture to improve maintainability and readability.
This commit is contained in:
imnyang 2025-06-15 12:24:01 +09:00
commit b68425f523
16 changed files with 251 additions and 232 deletions

View file

@ -1,35 +0,0 @@
from browser_use.browser.context import BrowserContextConfig
from pathlib import Path
import os
from typing import Any
def browser_config_kwargs(lang: str = "en_US") -> dict[str, Any]:
browser_config_kwargs: dict[str, Any] = {
"keep_alive": True,
"browser_type": "chromium",
"headless": False,
"disable_security": True,
"extra_browser_args": [
"--disable-web-security",
"--disable-features=VizDisplayCompositor",
"--disable-site-isolation-trials",
"--disable-features=IsolateOrigins,site-per-process",
"--disable-popup-blocking",
"--disable-dev-shm-usage",
f"--lang={lang}",
"--ignore-certificate-errors",
"--ignore-ssl-errors",
"--allow-running-insecure-content"
],
}
proxy_host = os.getenv("PROXY_HOST")
proxy_port = os.getenv("PROXY_PORT")
if proxy_host and proxy_port:
browser_config_kwargs["extra_browser_args"].append(
f"--proxy-server=http={proxy_host}:{proxy_port};https={proxy_host}:{proxy_port}"
)
return browser_config_kwargs

25
lib/llm/__init__.py Normal file
View file

@ -0,0 +1,25 @@
from langchain.callbacks.base import BaseCallbackHandler
from langchain_google_genai import ChatGoogleGenerativeAI
class QuotaExhaustedHandler(BaseCallbackHandler):
def on_llm_error(self, error, **kwargs):
if "ResourceExhausted" in str(error) or "429" in str(error):
print("⚠️ API 쿼터가 소진되었습니다. 재시도 로직에 위임합니다...")
# backoff handled in scan_one_url
def CreateChatGoogleGenerativeAI(model: str):
"""재시도 로직이 포함된 LLM 생성"""
if model == "fallback":
print("⚠️ Fallback 모델을 사용합니다. Envorinment 변수를 확인하세요.")
print("⚠️ Model Gemini-2.0-flash-lite를 사용합니다.")
model = "gemini-2.0-flash-lite"
return ChatGoogleGenerativeAI(
model=model,
max_retries=10, # 최대 재시도 횟수 증가
model_kwargs={
"request_timeout": 120, # 타임아웃 시간 증가 (2분)
},
callbacks=[QuotaExhaustedHandler()],
# API 호출 간격 조정
temperature=0.1,
)

View file

@ -1,12 +1,12 @@
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
from lib.llm.prompt import llm_login, session
load_dotenv(override=True) load_dotenv(override=True)
def extend_planner_system_message(): def extend_planner_system_message():
if os.getenv("PROVIDOR_CREDENTIALS_IN_LLM", "False").lower() == "true": if os.getenv("PROVIDOR_CREDENTIALS_IN_LLM", "False").lower() == "true":
from lib.prompt import llm_login
return llm_login.extend_planner_system_message return llm_login.extend_planner_system_message
else: else:
from lib.prompt import session
return session.extend_planner_system_message return session.extend_planner_system_message

40
lib/utils/__init__.py Normal file
View file

@ -0,0 +1,40 @@
from lib.utils.config import (
BACKEND_URL,
GOOGLE_API_KEY,
GOOGLE_MODEL,
GOOGLE_PLANNER_MODEL,
)
def show_info():
print("🔧 환경 설정:")
print(browser_use_version())
print(f"🔗 Backend URL: {BACKEND_URL}")
print(
f"🔑 Google API Key: {'*' * (len(GOOGLE_API_KEY) - 4) + GOOGLE_API_KEY[-4:] if GOOGLE_API_KEY else None}"
)
print(f"🌐 Google Model: {GOOGLE_MODEL}")
print(f"🌐 Google Planner Model: {GOOGLE_PLANNER_MODEL}")
def browser_use_version():
try:
# run uv pip show browser-use
import subprocess
result = subprocess.run(
["uv", "pip", "show", "browser-use"],
capture_output=True,
text=True,
check=True,
)
print("📦 Browser Use 패키지 정보:")
return result.stdout.strip()
except ImportError:
return None
def env_cheker():
if GOOGLE_API_KEY is None:
raise ValueError("GOOGLE_API_KEY 환경변수가 설정되지 않았습니다.")

View file

@ -0,0 +1,21 @@
import requests
from config import BACKEND_URL
def notify_backend(target_url):
# Backend에 스캔 시작을 알림
try:
response = requests.post(
f"{BACKEND_URL}/start", params={"url": target_url}, timeout=5
)
if response.status_code == 200:
print(f"✅ Backend notified: {response.text}")
else:
print(f"⚠️ Backend notification failed: {response.status_code}")
except requests.exceptions.ConnectionError:
print(
f"⚠️ Backend server not available at {BACKEND_URL}. Continuing without notification."
)
except requests.exceptions.Timeout:
print(f"⚠️ Backend notification timed out. Continuing without notification.")
except Exception as e:
print(f"⚠️ Failed to notify backend: {e}")

View file

@ -0,0 +1,29 @@
from func import *
import clean_resources as clean_resources_func
# Initialize configuration
proxy_url = setup_proxy()
storage_state_path = setup_storage_state()
# Create browser profile
profile = BrowserProfile(
# Security settings
disable_security=True,
stealth=True,
# Display settings
headless=False,
device_scale_factor=1,
window_size={"width": 1600, "height": 900},
viewport={"width": 1600, "height": 900},
# Data persistence
user_data_dir=None,
storage_state=storage_state_path,
# Network settings
proxy={"server": proxy_url} if proxy_url else None,
# Additional arguments
args=get_browser_args(),
)

View file

@ -0,0 +1,25 @@
from pathlib import Path
async def clean_resources(agent=None, session=None):
"""리소스를 정리하는 함수"""
storage_state_temp_path = Path("./data/storage_state_temp.json").resolve()
if storage_state_temp_path.exists():
try:
# remove file
print(f"🗑️ 임시 스토리지 상태 파일 삭제 중: {storage_state_temp_path}")
# unlink removes the file
storage_state_temp_path.unlink()
print("🗑️ 임시 스토리지 상태 파일 삭제 완료.")
except Exception as e:
print(f"⚠️ 임시 스토리지 상태 파일 삭제 실패: {e}")
if agent:
try:
await agent.close()
except Exception as e:
print(f"⚠️ 에이전트 리소스 정리 실패: {e}")
if session:
try:
await session.close()
except Exception as e:
print(f"⚠️ 세션 리소스 정리 실패: {e}")

View file

@ -0,0 +1,69 @@
import os
from pathlib import Path
from dotenv import load_dotenv
from browser_use import BrowserProfile
# Load environment variables
load_dotenv(override=True)
def setup_proxy():
"""Configure proxy settings from environment variables."""
proxy_host = os.getenv("PROXY_HOST")
proxy_port = os.getenv("PROXY_PORT")
if proxy_host and proxy_port:
proxy_url = f"http://{proxy_host}:{proxy_port}"
print(f"🔗 Using proxy: {proxy_host}:{proxy_port}")
return proxy_url
else:
print("🔗 No proxy configured, using direct connection.")
return None
def setup_storage_state():
"""Setup browser storage state for session persistence."""
storage_state_path = Path("./data/storage_state.json").resolve()
storage_state_temp_path = Path("./data/storage_state_temp.json").resolve()
if storage_state_path.exists():
if storage_state_temp_path.exists():
storage_state_temp_path.unlink()
storage_state_temp_path.write_text(
storage_state_path.read_text(encoding="utf-8"), encoding="utf-8"
)
print(f"🔄 Using existing storage state: {storage_state_temp_path}")
return str(storage_state_temp_path)
return None
def get_browser_args():
"""Get browser arguments for enhanced compatibility and security."""
return [
# Security and isolation
"--disable-web-security",
"--disable-site-isolation-trials",
"--disable-features=IsolateOrigins,site-per-process",
"--ignore-certificate-errors",
"--ignore-ssl-errors",
"--allow-running-insecure-content",
# Performance and rendering
"--disable-features=VizDisplayCompositor",
"--disable-dev-shm-usage",
# Popup and automation
"--disable-popup-blocking",
"--disable-blink-features=AutomationControlled",
# Browser behavior
"--no-first-run",
"--no-service-autorun",
"--no-default-browser-check",
"--password-store=basic",
"--use-mock-keychain",
# Extensions
"--disable-extensions-file-access-check",
"--disable-extensions-http-throttling",
"--disable-component-extensions-with-background-pages",
# Language
f"--lang={os.getenv('LANG', 'en_US')}",
]

View file

@ -0,0 +1,11 @@
from typing import List
from pydantic import BaseModel
# 출력 모델
class OAuth(BaseModel):
provider: str
oauth_uri: str
class OAuthList(BaseModel):
oauth_providers: List[OAuth]

8
lib/utils/config.py Normal file
View file

@ -0,0 +1,8 @@
import os
from dotenv import load_dotenv
load_dotenv(verbose=True, override=True)
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:11081")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GOOGLE_MODEL = os.getenv("GOOGLE_MODEL", "gemini-2.5-flash-preview-05-20")
GOOGLE_PLANNER_MODEL = os.getenv("GOOGLE_PLANNER_MODEL", "gemini-2.5-pro-preview-06-05")

216
main.py
View file

@ -3,26 +3,27 @@ import json
import os import os
import csv import csv
import argparse import argparse
from pathlib import Path
from turtle import width
import requests
import time
from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from pydantic import BaseModel
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.callbacks.base import BaseCallbackHandler
from browser_use import ( from browser_use import (
Agent, Agent,
BrowserSession, BrowserSession,
BrowserProfile,
Controller, Controller,
) )
from patchright.async_api import async_playwright as async_patchright from patchright.async_api import async_playwright as async_patchright
from lib.is_html import is_html_url
from lib.read_txt import read_lines_between from lib.utils import env_cheker
from lib.prompt import extend_planner_system_message from lib.utils.backend_client import notify_backend
from lib.logger import logger from lib.utils.browser_use import model
from lib.utils.browser_use.clean_resources import clean_resources
from lib.utils.config import BACKEND_URL, GOOGLE_MODEL, GOOGLE_PLANNER_MODEL
from lib.utils.is_html import is_html_url
from lib.utils.read_txt import read_lines_between
from lib.llm.prompt import extend_planner_system_message
from lib.utils.logger import logger
import lib.utils.browser_use as browser_use
from lib.llm import CreateChatGoogleGenerativeAI
load_dotenv(verbose=True, override=True) load_dotenv(verbose=True, override=True)
@ -30,100 +31,12 @@ load_dotenv(verbose=True, override=True)
INITIAL_BACKOFF = int(os.getenv("INITIAL_BACKOFF", "60")) # seconds INITIAL_BACKOFF = int(os.getenv("INITIAL_BACKOFF", "60")) # seconds
MAX_BACKOFF = int(os.getenv("MAX_BACKOFF", "600")) # seconds MAX_BACKOFF = int(os.getenv("MAX_BACKOFF", "600")) # seconds
if os.getenv("GOOGLE_API_KEY") is None: env_cheker()
raise ValueError("GOOGLE_API_KEY 환경변수가 설정되지 않았습니다.")
if os.getenv("GOOGLE_MODEL") is None:
raise ValueError("GOOGLE_MODEL 환경변수가 설정되지 않았습니다.")
if os.getenv("GOOGLE_PLANNER_MODEL") is None:
raise ValueError("GOOGLE_PLANNER_MODEL 환경변수가 설정되지 않았습니다.")
backend_url = os.getenv("BACKEND_URL", "http://localhost:11081")
if os.getenv("LMNR_PROJECT_API_KEY"): if os.getenv("LMNR_PROJECT_API_KEY"):
from lmnr import Laminar from lmnr import Laminar
Laminar.initialize(project_api_key=os.getenv("LMNR_PROJECT_API_KEY")) Laminar.initialize(project_api_key=os.getenv("LMNR_PROJECT_API_KEY"))
print("🔧 환경 설정:")
try:
# run uv pip show browser-use
import subprocess
result = subprocess.run(
["uv", "pip", "show", "browser-use"], capture_output=True, text=True, check=True
)
print("📦 Browser Use 패키지 정보:")
print(result.stdout.strip())
except ImportError:
browser_use_version = "unknown"
print(f"🔗 Backend URL: {backend_url}")
api_key = os.getenv("GOOGLE_API_KEY")
print(f"🔑 Google API Key: {api_key[-4:] if api_key else None}")
print(f"🌐 Google Model: {os.getenv('GOOGLE_MODEL')}")
print(f"🌐 Google Planner Model: {os.getenv('GOOGLE_PLANNER_MODEL')}")
# API 쿼터 처리를 위한 콜백 핸들러
class QuotaExhaustedHandler(BaseCallbackHandler):
def on_llm_error(self, error, **kwargs):
if "ResourceExhausted" in str(error) or "429" in str(error):
print("⚠️ API 쿼터가 소진되었습니다. 재시도 로직에 위임합니다...")
# backoff handled in scan_one_url
def CreateChatGoogleGenerativeAI(model: str):
"""재시도 로직이 포함된 LLM 생성"""
if model == "fallback":
print("⚠️ Fallback 모델을 사용합니다. Envorinment 변수를 확인하세요.")
print("⚠️ Model Gemini-2.0-flash-lite를 사용합니다.")
model = "gemini-2.0-flash-lite"
return ChatGoogleGenerativeAI(
model=model,
max_retries=10, # 최대 재시도 횟수 증가
model_kwargs={
"request_timeout": 120, # 타임아웃 시간 증가 (2분)
},
callbacks=[QuotaExhaustedHandler()],
# API 호출 간격 조정
temperature=0.1,
)
# 출력 모델
class OAuth(BaseModel):
provider: str
oauth_uri: str
class OAuthList(BaseModel):
oauth_providers: List[OAuth]
async def clean_resources(agent=None, session=None):
"""리소스를 정리하는 함수"""
storage_state_temp_path = Path("./data/storage_state_temp.json").resolve()
if storage_state_temp_path.exists():
try:
# remove file
print(f"🗑️ 임시 스토리지 상태 파일 삭제 중: {storage_state_temp_path}")
# unlink removes the file
storage_state_temp_path.unlink()
print("🗑️ 임시 스토리지 상태 파일 삭제 완료.")
except Exception as e:
print(f"⚠️ 임시 스토리지 상태 파일 삭제 실패: {e}")
if agent:
try:
await agent.close()
except Exception as e:
print(f"⚠️ 에이전트 리소스 정리 실패: {e}")
if session:
try:
await session.close()
except Exception as e:
print(f"⚠️ 세션 리소스 정리 실패: {e}")
# ── URL별로 Browser를 새로 띄우는 함수 ── # ── URL별로 Browser를 새로 띄우는 함수 ──
async def scan_one_url(url: str, skip_html_check: bool = False): async def scan_one_url(url: str, skip_html_check: bool = False):
@ -136,106 +49,21 @@ async def scan_one_url(url: str, skip_html_check: bool = False):
return return
# Backend에 스캔 시작을 알림 # Backend에 스캔 시작을 알림
try: notify_backend(target_url)
response = requests.post(
f"{backend_url}/start", params={"url": target_url}, timeout=5
)
if response.status_code == 200:
print(f"✅ Backend notified: {response.text}")
else:
print(f"⚠️ Backend notification failed: {response.status_code}")
except requests.exceptions.ConnectionError:
print(
f"⚠️ Backend server not available at {backend_url}. Continuing without notification."
)
except requests.exceptions.Timeout:
print(f"⚠️ Backend notification timed out. Continuing without notification.")
except Exception as e:
print(f"⚠️ Failed to notify backend: {e}")
agent = None agent = None
session = None session = None
try_cnt = 0 try_cnt = 0
while True: while True:
proxy_host = os.getenv("PROXY_HOST")
proxy_port = os.getenv("PROXY_PORT")
proxy_url = None
if proxy_host and proxy_port:
proxy_url = f"http://{proxy_host}:{proxy_port}"
print(f"🔗 Using proxy: {proxy_host}:{proxy_port}")
else:
print("🔗 No proxy configured, using direct connection.")
# user_data_dir 설정
# user_data_path = Path("./data/user_data").resolve()
# user_data_path.mkdir(parents=True, exist_ok=True)
storage_state_path = Path("./data/storage_state.json").resolve()
storage_state_temp_path = Path("./data/storage_state_temp.json").resolve()
# copy storage_state.json to storage_state_temp.json
if storage_state_path.exists():
if storage_state_temp_path.exists():
storage_state_temp_path.unlink()
storage_state_temp_path.write_text(
storage_state_path.read_text(encoding="utf-8"), encoding="utf-8"
)
print(f"🔄 Using existing storage state: {storage_state_temp_path}")
else:
storage_state_temp_path = None
# BrowserProfile에 모든 설정 포함
profile = BrowserProfile(
disable_security=True,
#deterministic_rendering=True,
stealth=True,
headless=False,
# user_data_dir=str(user_data_path),
user_data_dir=None,
storage_state=(
str(storage_state_temp_path)
if storage_state_temp_path and storage_state_temp_path.exists()
else None
),
device_scale_factor=1,
window_size={"width": 1600, "height": 900},
viewport={"width": 1600, "height": 900},
proxy={"server": proxy_url} if proxy_url else None,
# 추가 args
args=[
"--disable-web-security",
"--disable-features=VizDisplayCompositor",
"--disable-site-isolation-trials",
"--disable-features=IsolateOrigins,site-per-process",
"--disable-popup-blocking",
"--disable-dev-shm-usage",
f"--lang={os.getenv('LANG', 'en_US')}",
"--ignore-certificate-errors",
"--ignore-ssl-errors",
"--allow-running-insecure-content",
"--disable-web-security",
"--disable-features=VizDisplayCompositor",
"--disable-blink-features=AutomationControlled",
"--no-first-run",
"--no-service-autorun",
"--password-store=basic",
"--use-mock-keychain",
"--no-default-browser-check",
"--disable-extensions-file-access-check",
"--disable-extensions-http-throttling",
"--disable-component-extensions-with-background-pages",
],
)
# BrowserSession에 profile 전달 # BrowserSession에 profile 전달
session = BrowserSession( session = BrowserSession(
playwright=(await async_patchright().start()), playwright=(await async_patchright().start()),
browser_profile=profile, browser_profile=browser_use.profile,
) )
# Agent 생성 및 실행 (단일 try-except with 백오프) # Agent 생성 및 실행 (단일 try-except with 백오프)
initial_actions = [{"open_tab": {"url": target_url}}] initial_actions = [{"open_tab": {"url": target_url}}]
controller = Controller(output_model=OAuthList) controller = Controller(output_model=model.BaseModel)
print("🤖 LLM 모델 초기화 및 스캔 시작...") print("🤖 LLM 모델 초기화 및 스캔 시작...")
try: try:
agent = Agent( agent = Agent(
@ -250,10 +78,8 @@ async def scan_one_url(url: str, skip_html_check: bool = False):
"If the OAuth buttons do not appear immediately, wait briefly to allow the page to load completely before proceeding. " "If the OAuth buttons do not appear immediately, wait briefly to allow the page to load completely before proceeding. "
"Always log out before starting the login process, and make sure to attempt the login again from a clean state." "Always log out before starting the login process, and make sure to attempt the login again from a clean state."
), ),
llm=CreateChatGoogleGenerativeAI( llm=CreateChatGoogleGenerativeAI(GOOGLE_MODEL),
os.getenv("GOOGLE_MODEL") or "fallback" planner_llm=CreateChatGoogleGenerativeAI(GOOGLE_PLANNER_MODEL),
),
planner_llm=CreateChatGoogleGenerativeAI(os.getenv("GOOGLE_PLANNER_MODEL") or "fallback"),
controller=controller, controller=controller,
extend_planner_system_message=extend_planner_system_message(), extend_planner_system_message=extend_planner_system_message(),
) )
@ -287,7 +113,7 @@ async def scan_one_url(url: str, skip_html_check: bool = False):
# 스캔 결과 처리 # 스캔 결과 처리
data = json.loads(final_result) data = json.loads(final_result)
try: try:
oauth_entries = [OAuth(**entry) for entry in data["oauth_providers"]] oauth_entries = [model.OAuth(**entry) for entry in data["oauth_providers"]]
except Exception as e: except Exception as e:
raise ValueError(f"결과 파싱 실패: {e}\n원본 결과: {final_result}") raise ValueError(f"결과 파싱 실패: {e}\n원본 결과: {final_result}")