This commit is contained in:
imnyang 2025-05-18 21:36:43 +09:00
commit ce5135717c
5 changed files with 76 additions and 59 deletions

1
.env
View file

@ -1 +0,0 @@
GOOGLE_API_KEY=AIzaSyAaWhcuxKMr1mBAmC6iObksvkaRVvAPXyA

5
.env.example Normal file
View file

@ -0,0 +1,5 @@
GOOGLE_API_KEY=AIzaSyAaWhcuxKMr1mBAmC6iObksvkaRVvAPXyA
UNGOOGLED_CHROMIUM_PATH=/path/to/ungoogled-chromium
PROXY_HOST=127.0.0.1
PROXY_PORT=8080

3
.gitignore vendored
View file

@ -9,4 +9,5 @@ wheels/
# Virtual environments # Virtual environments
.venv .venv
browser/ browser/
.env

55
lib/ublock_init.py Normal file
View file

@ -0,0 +1,55 @@
import zipfile
import requests
from pathlib import Path
import shutil
TEMP_EXTRACT_DIR = Path("./browser/temp_ublock_extract")
def ensure_ublock_origin(UBLOCK_DIR: Path):
"""
uBlock Origin을 다운로드하고 압축을 해제하여 지정된 디렉토리에 저장합니다.
이미 존재하는 경우에는 다운로드를 건너뜁니다.
"""
if UBLOCK_DIR.exists() and (UBLOCK_DIR / "manifest.json").exists():
print("✅ uBlock Origin already present.")
return
if not UBLOCK_DIR.parent.exists():
UBLOCK_DIR.parent.mkdir(parents=True, exist_ok=True)
print("⬇️ Downloading uBlock Origin from GitHub API...")
# 1. GitHub API로 최신 릴리스 정보 가져오기
api_url = "https://api.github.com/repos/gorhill/uBlock/releases/latest"
res = requests.get(api_url)
res.raise_for_status()
data = res.json()
# 2. assets 중 'uBlock0.chromium.zip' 찾기
asset = next((a for a in data["assets"] if ".chromium.zip" in a["name"]), None)
if not asset:
raise Exception("❌ Could not find uBlock0.chromium.zip in GitHub release.")
zip_url = asset["browser_download_url"]
# 3. 다운로드
zip_path = Path("./browser/ublock.zip")
with requests.get(zip_url, stream=True) as r:
r.raise_for_status()
with open(zip_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# 4. 압축 해제 후 내부 디렉터리 이동
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(TEMP_EXTRACT_DIR)
# zip 안에 uBlock0.chromium/ 폴더가 있다고 가정
extracted_root = next(TEMP_EXTRACT_DIR.iterdir())
if extracted_root.name != "uBlock0.chromium":
raise Exception("❌ Unexpected directory inside zip:", extracted_root)
shutil.move(str(extracted_root), UBLOCK_DIR)
shutil.rmtree(TEMP_EXTRACT_DIR, ignore_errors=True)
zip_path.unlink() # zip 삭제
print("✅ uBlock Origin downloaded and extracted.")

71
main.py
View file

@ -1,74 +1,26 @@
import asyncio import asyncio
from gc import disable from locale import locale_alias
from logging import config
import zipfile
import requests
from dotenv import load_dotenv from dotenv import load_dotenv
from pathlib import Path
import shutil
from browser_use import Agent, Browser, BrowserConfig from browser_use import Agent, Browser, BrowserConfig
from browser_use.browser.context import BrowserContextConfig from browser_use.browser.context import BrowserContextConfig
from langchain_google_genai import ChatGoogleGenerativeAI from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_text_splitters import Language
from lib.ublock_init import ensure_ublock_origin
from pathlib import Path
import os
load_dotenv() load_dotenv()
UBLOCK_DIR = Path("./browser/ublock-origin") UBLOCK_DIR = Path("./browser/ublock-origin")
TEMP_EXTRACT_DIR = Path("./browser/temp_ublock_extract")
def ensure_ublock_origin(): ensure_ublock_origin(UBLOCK_DIR)
if UBLOCK_DIR.exists() and (UBLOCK_DIR / "manifest.json").exists():
print("✅ uBlock Origin already present.")
return
if not UBLOCK_DIR.parent.exists():
UBLOCK_DIR.parent.mkdir(parents=True, exist_ok=True)
print("⬇️ Downloading uBlock Origin from GitHub API...")
# 1. GitHub API로 최신 릴리스 정보 가져오기
api_url = "https://api.github.com/repos/gorhill/uBlock/releases/latest"
res = requests.get(api_url)
res.raise_for_status()
data = res.json()
# 2. assets 중 'uBlock0.chromium.zip' 찾기
asset = next((a for a in data["assets"] if ".chromium.zip" in a["name"]), None)
if not asset:
raise Exception("❌ Could not find uBlock0.chromium.zip in GitHub release.")
zip_url = asset["browser_download_url"]
# 3. 다운로드
zip_path = Path("./browser/ublock.zip")
with requests.get(zip_url, stream=True) as r:
r.raise_for_status()
with open(zip_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# 4. 압축 해제 후 내부 디렉터리 이동
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(TEMP_EXTRACT_DIR)
# zip 안에 uBlock0.chromium/ 폴더가 있다고 가정
extracted_root = next(TEMP_EXTRACT_DIR.iterdir())
if extracted_root.name != "uBlock0.chromium":
raise Exception("❌ Unexpected directory inside zip:", extracted_root)
shutil.move(str(extracted_root), UBLOCK_DIR)
shutil.rmtree(TEMP_EXTRACT_DIR, ignore_errors=True)
zip_path.unlink() # zip 삭제
print("✅ uBlock Origin downloaded and extracted.")
ensure_ublock_origin()
browser = Browser( browser = Browser(
config=BrowserConfig( config=BrowserConfig(
browser_type="chromium", browser_type="chromium",
headless=False, headless=False,
disable_security=True, disable_security=True,
proxy={"server": f"http://{os.getenv('PROXY_HOST')}:{os.getenv('PROXY_PORT')}"},
extra_browser_args=[ extra_browser_args=[
f"--load-extension={UBLOCK_DIR}", f"--load-extension={UBLOCK_DIR}",
@ -76,15 +28,20 @@ browser = Browser(
"--disable-web-security", "--disable-web-security",
"--disable-features=IsolateOrigins,site-per-process", "--disable-features=IsolateOrigins,site-per-process",
"--disable-popup-blocking", "--disable-popup-blocking",
"--lang=en-US",
], ],
context=BrowserContextConfig(
locale="en-US",
# You can also set 'accept_language' if supported:
accept_language="en-US,en"
),
) )
) )
async def main(): async def main():
agent = Agent( agent = Agent(
browser=browser, browser=browser,
task="https://naver.com의 로그인 페이지를 찾아줘", task="http://naver.com의 로그인 페이지를 찾아줘",
llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash"), llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash"),
) )