[Update] agent 호출 구조 변경

This commit is contained in:
tv0924@icloud.com 2025-06-26 23:33:59 +09:00
commit b7e6afb227
12 changed files with 267 additions and 343 deletions

View file

@ -1,92 +0,0 @@
import json
from pydantic import BaseModel
from browser_use import (
Agent,
Controller,
)
from lib.agents.run_agent import run_agent
from lib.utils.logger import logger
from lib.browser_use_utils.create_google_ai import create_google_ai
from lib.config import GOOGLE_MODEL, GOOGLE_PLANNER_MODEL
NOT_FOUND_LOGIN_PAGE = 0
FOUND_LOGIN_PAGE = 1
class FindLoginPageResponse(BaseModel):
status: int = NOT_FOUND_LOGIN_PAGE # 0 if not found, 1 if found
msg: str | None = None
url: str | None = None
async def find_login_page(target_url, session) -> tuple[bool, str | None]:
initial_actions = [{"open_tab": {"url": target_url}}]
task = """
You are an expert in finding login pages.
Your task is to navigate to the login page of the given URL. Follow the steps below strictly and return results only in the specified format.
You are NOT allowed to navigate to URLs that are not directly discoverable within the initial domain. Do NOT use search engines or guess external login URLs.
0. INITIAL BLOCK CHECK
- If the browser is blocked when trying to access the page due to firewall, CAPTCHA, regional restrictions, or other access denials immediately terminate the process and return the following JSON:
```json
{
"status": 0,
"msg": "Blocked",
"url": ""
}
```
- Do NOT proceed to further steps in this case.
1. LOGIN PAGE NAVIGATION
- Navigate only to a **client-side (non-enterprise)** login page within the provided domain.
- Do NOT rely on external tools, search engines, or links not directly found on the site.
- If a consent popup (e.g. for privacy/cookies) appears, you MUST dismiss or close it before proceeding.
- Since step 0 confirmed access, assume the page now loads properly.
2. RETURN FORMAT
- Once the login page is reached, return a JSON object matching the following schema:
```json
{
"status": 1, // 1 if login page is found, 0 otherwise
"msg": "Login page found", // Optional message
"url": "https://example.com/login" // Full URL of the login page if found
}
```
- If the login page cannot be found, return:
```json
{
"status": 0,
"msg": "Login page not found",
"url": ""
}
```
- Return ONLY the JSON object. Do NOT include any explanation, logging, or extra output.
"""
controller = Controller(output_model=FindLoginPageResponse, exclude_actions=['search_google'])
agent = Agent(
browser_session=session,
initial_actions=initial_actions,
task=task,
llm=create_google_ai(GOOGLE_MODEL),
controller=controller,
)
is_failed, final_result = await run_agent(agent)
if is_failed:
logger(f"⚠️ 스캔 실패: {target_url} | {final_result}")
print(f"⚠️ 스캔 실패: {target_url} | {final_result}")
return False, None;
data = json.loads(final_result)
try:
resp = FindLoginPageResponse(**data)
if resp.status == FOUND_LOGIN_PAGE and len(resp.url) > 0:
return True, resp.url
else:
return False, resp.msg
except Exception as e:
logger(f"⚠️ 결과 파싱 실패: {target_url} | {e}\n원본 결과: {data.msg}")
print(f"⚠️ 결과 파싱 실패: {target_url} | {e}\n원본 결과: {data.msg}")
return False, data.msg

View file

@ -1,66 +0,0 @@
import json
from pydantic import BaseModel
from browser_use import (
Agent,
Controller,
)
from lib.agents.run_agent import run_agent
from lib.utils.logger import logger
from lib.browser_use_utils.create_google_ai import create_google_ai
from lib.config import GOOGLE_MODEL, GOOGLE_PLANNER_MODEL
NOT_FOUND_SSO_LIST = 0
FOUND_SSO_LIST = 1
class EachSSOProvider(BaseModel):
provider: str
oauth_uri: str | None = None
class FindLoginPageResponse(BaseModel):
EachSSOProviders: list[EachSSOProvider] | None = None
status: int = NOT_FOUND_SSO_LIST # 0 if not found,
msg: str | None = None
async def get_sso_list(target_url, session) -> tuple[bool, str | None]:
initial_actions = [{"open_tab": {"url": target_url}}]
task = "Navigate to the login page, and return the result in the specified format."
extend_planner_system_message = """
You are an expert in finding login pages.
Your task is to navigate to the login page of the given URL.
Once you reach the login page, stop and return a JSON object that matches the following schema:
```json
{
"status": 1, # 1 if login page found, 0 otherwise
"url": "https://example.com/login" # Full URL of the login page if found
}
Return only this JSON object. Do not include any explanation or additional text.
"""
controller = Controller(output_model=FindLoginPageResponse, exclude_actions=['search_google'])
agent = Agent(
browser_session=session,
initial_actions=initial_actions,
task=task,
llm=create_google_ai(GOOGLE_MODEL),
planner_llm=create_google_ai(GOOGLE_PLANNER_MODEL),
controller=controller,
extend_planner_system_message=extend_planner_system_message,
)
is_failed, final_result = await run_agent(agent)
if is_failed:
logger(f"⚠️ 스캔 실패: {target_url} | {final_result}")
print(f"⚠️ 스캔 실패: {target_url} | {final_result}")
return False, None;
data = json.loads(final_result)
try:
resp = FindLoginPageResponse(**data)
if resp.status == FOUND_SSO_LIST:
return True, resp
else:
return False, None
except Exception as e:
logger(f"⚠️ 결과 파싱 실패: {target_url} | {e}\n원본 결과: {data.msg}")
print(f"⚠️ 결과 파싱 실패: {target_url} | {e}\n원본 결과: {data.msg}")
return False, data.msg

View file

@ -1,19 +0,0 @@
from lib.browser_use_utils.clean_resources import clean_agent_resources
async def run_agent(agent) -> tuple[int, str]:
try:
response = await agent.run()
final_result = response.final_result()
if final_result is None:
return -1, "최종 결과가 없습니다. 에이전트 실행 실패"
return 0, final_result
except Exception as e:
# API 쿼터 문제인지 확인
if "ResourceExhausted" in str(e) or "429" in str(e):
return 1, "API 쿼터 에러로 인한 실패"
# 일반 에러 처리
else:
return 2, "일반 에러로 인한 실패"
finally:
await clean_agent_resources(agent)

67
lib/agents/run_task.py Normal file
View file

@ -0,0 +1,67 @@
import json
from typing import Any
from pydantic import BaseModel
from browser_use import (
Agent,
Controller,
BrowserSession
)
from patchright.async_api import async_playwright as async_patchright
from lib.utils.logger import logger
from lib.prompt.get_sso_list import get_sso_list_task
from lib.browser_use_utils.create_google_ai import create_google_ai
from lib.browser_use_utils.get_profile import get_profile
from lib.browser_use_utils.clean_resources import clean_session_resources, clean_agent_resources
from lib.config import GOOGLE_MODEL
async def run_task(target_url: str, ReturnModel: type[BaseModel], task: str) -> tuple[bool, str | Any | None]:
session = BrowserSession(
playwright=(await async_patchright().start()),
browser_profile=await get_profile(),
)
initial_actions = [{"open_tab": {"url": target_url}}]
controller = Controller(output_model=ReturnModel, exclude_actions=['search_google'])
agent = Agent(
browser_session=session,
initial_actions=initial_actions,
task=task,
llm=create_google_ai(GOOGLE_MODEL),
controller=controller,
)
try:
response = await agent.run()
final_result = response.final_result()
if final_result is None:
logger(f"⚠️ 최종 결과가 없습니다. 에이전트 실행 실패: {target_url}")
print(f"⚠️ 최종 결과가 없습니다. 에이전트 실행 실패: {target_url}")
return False, "최종 결과가 없습니다. 에이전트 실행 실패"
except Exception as e:
# API 쿼터 문제인지 확인
if "ResourceExhausted" in str(e) or "429" in str(e):
logger(f"⚠️ API 쿼터 에러로 인한 실패: {target_url} | {e}")
print(f"⚠️ API 쿼터 에러로 인한 실패: {target_url} | {e}")
return False, "API 쿼터 에러로 인한 실패"
# 일반 에러 처리
else:
logger(f"⚠️ 일반 에러로 인한 실패: {target_url} | {e}")
print(f"⚠️ 일반 에러로 인한 실패: {target_url} | {e}")
return False, "일반 에러로 인한 실패"
finally:
await clean_agent_resources(agent)
try:
data = json.loads(final_result)
resp = ReturnModel(**data)
return True, resp
except Exception as e:
logger(f"⚠️ LLM 응답 결과 파싱 실패: {target_url} | {e}\n원본 결과: {data.msg}")
print(f"⚠️ LLM 응답 결과 파싱 실패: {target_url} | {e}\n원본 결과: {data.msg}")
return False, "LLM 응답 결과 파싱 실패"
finally:
await clean_session_resources(session)

View file

@ -1,65 +0,0 @@
import asyncio
from browser_use import Agent, BrowserSession
from patchright.async_api import async_playwright as async_patchright
from lib.agents.find_login_page import find_login_page
from lib.browser_use_utils.clean_resources import clean_session_resources
from lib.browser_use_utils.get_profile import get_profile
from lib.utils.save_oauth_providers import save_oauth_providers
async def find_sso_list(target_url):
session = BrowserSession(
playwright=(await async_patchright().start()),
browser_profile=await get_profile(),
)
FIND_LOGIN_PAGE = 1
FIND_SSO_LIST = 2
SAVE_DATA = 3
WHEN_ERROR = -1
FINISH = 0
final_result = None
login_url = target_url
state = FIND_LOGIN_PAGE
while True:
if state == FIND_LOGIN_PAGE:
is_success, resp = await find_login_page(
target_url=target_url,
session=session,
)
if not is_success:
print(f"⚠️ 로그인 페이지 탐지 실패: {target_url} | {resp}")
state = WHEN_ERROR
login_url = resp if resp else target_url
state = FIND_SSO_LIST
if state == FIND_SSO_LIST:
print(f"🔎 SSO 목록 찾는 중: {target_url}")
is_success, resp = await find_sso_list(
target_url=login_url,
session=session,
)
if not is_success:
print(f"⚠️ SSO 목록 탐지 실패: {target_url} | {resp}")
state = WHEN_ERROR
final_result = ""
state = SAVE_DATA
if state == SAVE_DATA:
print(f"💾 데이터 저장 중: {target_url}")
if not final_result:
print(f"⚠️ SSO 목록이 전달되지 않았습니다: {target_url}")
state = WHEN_ERROR
save_oauth_providers(target_url, final_result)
state = FINISH
if state == WHEN_ERROR:
print(f"⚠️ 에러 발생: {target_url} | 스캔을 중단합니다.")
return
if state == FINISH:
print(f"✅ 스캔 완료: {target_url}")
break
await clean_session_resources(session)

41
lib/get_sso_list.py Normal file
View file

@ -0,0 +1,41 @@
import json
from pydantic import BaseModel
from lib.prompt.get_sso_list import get_sso_list_task
from lib.agents.run_task import run_task
NOT_FOUND_LOGIN_PAGE = 0
FOUND_LOGIN_PAGE = 1
class FindLoginPageResponse(BaseModel):
status: int = NOT_FOUND_LOGIN_PAGE # 0 if not found, 1 if found
msg: str | None = None
url: str | None = None
sso_list: list[str] = [] # List of SSO providers found on the login page
async def get_sso_list(target_url) -> tuple[bool, str | FindLoginPageResponse | None]:
task = get_sso_list_task
ReturnModel = FindLoginPageResponse
success, response = await run_task(target_url, ReturnModel, task)
if not success:
return False, response
if isinstance(response, str):
return False, response
if isinstance(response, FindLoginPageResponse):
if response.status == FOUND_LOGIN_PAGE:
if not response.sso_list:
response.msg = "로그인 페이지는 찾았지만 SSO 제공자가 없습니다."
else:
response.msg = "로그인 페이지와 SSO 제공자를 찾았습니다."
else:
response.msg = "로그인 페이지를 찾지 못했습니다."
else:
return False, "응답 형식이 올바르지 않습니다. FindLoginPageResponse가 아닙니다."
return True, response

View file

@ -0,0 +1,65 @@
get_sso_list_task = """
You are an expert in finding login pages.
Your task is to navigate to the login page of the given URL. Follow the steps below strictly and return results only in the specified format.
You are NOT allowed to navigate to URLs that are not directly discoverable within the initial domain. Do NOT use search engines or guess external login URLs.
0. INITIAL BLOCK CHECK
- If the browser is blocked when trying to access the page due to firewall, CAPTCHA, regional restrictions, or other access denials immediately terminate the process and return the following JSON:
```json
{
"status": 0,
"msg": "Blocked",
"url": "",
"sso_list": []
}
```
- Do NOT proceed to further steps in this case.
1. LOGIN PAGE NAVIGATION
- Navigate only to a **client-side (non-enterprise)** login page within the provided domain.
- Do NOT rely on external tools, search engines, or links not directly found on the site.
- If a consent popup (e.g. for privacy/cookies) appears, you MUST dismiss or close it before proceeding.
- Since step 0 confirmed access, assume the page now loads properly.
2. SSO BUTTON IDENTIFICATION
- On the login page, look for the following social login (SSO) buttons:
- Google, GitHub, Facebook, LinkedIn, Microsoft, Naver, Slack, Etc.
- Proceed only if it is clearly an **actual SSO button**.
- Exclude the following:
- Passkey-related buttons
- Username/password fields
- Email-based login
- Non-OAuth methods such as certificate or phone verification
3. RETURN FORMAT
- If the login page is successfully found, return:
```json
{
"status": 1,
"msg": "Login page found",
"url": "https://example.com/login",
"sso_list": ["Google", "GitHub"]
}
```
- If the login page cannot be found, return:
```json
{
"status": 0,
"msg": "Login page not found",
"url": "",
"sso_list": []
}
```
- If blocked (as in step 0), return:
```json
{
"status": 0,
"msg": "Blocked",
"url": "",
"sso_list": []
}
```
- Return ONLY the JSON object. Do NOT include any explanation, logging, or extra output.
"""

View file

@ -8,7 +8,6 @@ def check_env_variables():
"BACKEND_URL",
"GOOGLE_API_KEY",
"GOOGLE_MODEL",
"GOOGLE_PLANNER_MODEL"
]
for var in required_vars:

View file

@ -4,6 +4,26 @@ from pathlib import Path
progress_file = Path("data/scan_progress.json")
class ProgressChecker:
def __init__(self, filepath):
self.filepath = filepath
self.progress = self.load_progress()
def save(self):
"""현재 진행 상황을 파일에 저장"""
with open(self.filepath, 'w', encoding='utf-8') as f:
json.dump(self.progress, f, ensure_ascii=False, indent=2)
def load(self):
"""이전 진행 상황을 파일에서 불러오기"""
if os.path.exists(self.filepath):
try:
with open(self.filepath, 'r', encoding='utf-8') as f:
return json.load(f)
except:
return None
return None
def save_progress(current_progress):
"""현재 진행 상황을 파일에 저장"""