깃헙 모델 탑재 완료 : 사용시 0$ 소모

This commit is contained in:
imnyang 2025-05-18 23:58:16 +09:00
commit 44cdab2b40
6 changed files with 147 additions and 89 deletions

View file

@ -1,5 +1,12 @@
GOOGLE_API_KEY=AIzaSyAaWhcuxKMr1mBAmC6iObksvkaRVvAPXyA ANONYMIZED_TELEMETRY=false
UNGOOGLED_CHROMIUM_PATH=/path/to/ungoogled-chromium
OPENAI_API_KEY=your_openai_api_key_here
OPENAI_BASE_URL=https://models.github.ai/inference # 선택
OPENAI_MODEL=openai/o4-mini # Github Models가 아닐시 gpt-4.1
# 선택
UNGOOGLED_CHROMIUM_PATH=/Applications/Ungoogled-Chromium.app
# 선택
PROXY_HOST=127.0.0.1 PROXY_HOST=127.0.0.1
PROXY_PORT=8080 PROXY_PORT=8080

View file

@ -0,0 +1,6 @@
Ungoogled Chromium에서 테스트되었습니다.
일반적인 Chromium도 괜찮습니다.
## 참고하면 좋을만한 것
- [ ] 일부 웹사이트는 사용자의 언어에 따라 OAuth 옵션을 바꾸기도 합니다.

27
lib/browser_config.py Normal file
View file

@ -0,0 +1,27 @@
from browser_use.browser.context import BrowserContextConfig
from pathlib import Path
import os
def browser_config_kwargs(lang: str = "en_US") -> dict:
browser_config_kwargs = {
"browser_type": "chromium",
"headless": False,
"disable_security": True,
"extra_browser_args": [
"--disable-web-security",
"--disable-features=IsolateOrigins,site-per-process",
"--disable-popup-blocking",
f"--lang={lang}",
],
}
browser_binary_path = os.getenv("BROWSER_BINARY_PATH")
if browser_binary_path:
browser_config_kwargs["browser_binary_path"] = browser_binary_path
proxy_host = os.getenv("PROXY_HOST")
proxy_port = os.getenv("PROXY_PORT")
if proxy_host and proxy_port:
browser_config_kwargs["proxy"] = {"server": f"http://{proxy_host}:{proxy_port}"}
return browser_config_kwargs

View file

@ -1,55 +0,0 @@
import zipfile
import requests
from pathlib import Path
import shutil
TEMP_EXTRACT_DIR = Path("./browser/temp_ublock_extract")
def ensure_ublock_origin(UBLOCK_DIR: Path):
"""
uBlock Origin을 다운로드하고 압축을 해제하여 지정된 디렉토리에 저장합니다.
이미 존재하는 경우에는 다운로드를 건너뜁니다.
"""
if UBLOCK_DIR.exists() and (UBLOCK_DIR / "manifest.json").exists():
print("✅ uBlock Origin already present.")
return
if not UBLOCK_DIR.parent.exists():
UBLOCK_DIR.parent.mkdir(parents=True, exist_ok=True)
print("⬇️ Downloading uBlock Origin from GitHub API...")
# 1. GitHub API로 최신 릴리스 정보 가져오기
api_url = "https://api.github.com/repos/gorhill/uBlock/releases/latest"
res = requests.get(api_url)
res.raise_for_status()
data = res.json()
# 2. assets 중 'uBlock0.chromium.zip' 찾기
asset = next((a for a in data["assets"] if ".chromium.zip" in a["name"]), None)
if not asset:
raise Exception("❌ Could not find uBlock0.chromium.zip in GitHub release.")
zip_url = asset["browser_download_url"]
# 3. 다운로드
zip_path = Path("./browser/ublock.zip")
with requests.get(zip_url, stream=True) as r:
r.raise_for_status()
with open(zip_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# 4. 압축 해제 후 내부 디렉터리 이동
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(TEMP_EXTRACT_DIR)
# zip 안에 uBlock0.chromium/ 폴더가 있다고 가정
extracted_root = next(TEMP_EXTRACT_DIR.iterdir())
if extracted_root.name != "uBlock0.chromium":
raise Exception("❌ Unexpected directory inside zip:", extracted_root)
shutil.move(str(extracted_root), UBLOCK_DIR)
shutil.rmtree(TEMP_EXTRACT_DIR, ignore_errors=True)
zip_path.unlink() # zip 삭제
print("✅ uBlock Origin downloaded and extracted.")

85
main.py
View file

@ -1,50 +1,71 @@
import asyncio import asyncio
from locale import locale_alias import json
from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from browser_use import Agent, Browser, BrowserConfig from browser_use import Agent, Browser, BrowserConfig, Controller
from browser_use.browser.context import BrowserContextConfig from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI from pydantic import BaseModel
from langchain_text_splitters import Language from lib.browser_config import browser_config_kwargs
from lib.ublock_init import ensure_ublock_origin
from pathlib import Path
import os import os
import csv
load_dotenv() load_dotenv()
UBLOCK_DIR = Path("./browser/ublock-origin") if os.getenv("GOOGLE_API_KEY") is None:
raise ValueError("GOOGLE_API_KEY environment variable is not set.")
ensure_ublock_origin(UBLOCK_DIR) browser_config_kwargs = browser_config_kwargs()
browser = Browser( browser = Browser(
config=BrowserConfig( config=BrowserConfig(**browser_config_kwargs)
browser_type="chromium",
headless=False,
disable_security=True,
proxy={"server": f"http://{os.getenv('PROXY_HOST')}:{os.getenv('PROXY_PORT')}"},
extra_browser_args=[
f"--load-extension={UBLOCK_DIR}",
f"--disable-extensions-except={UBLOCK_DIR}",
"--disable-web-security",
"--disable-features=IsolateOrigins,site-per-process",
"--disable-popup-blocking",
"--lang=en-US",
],
context=BrowserContextConfig(
locale="en-US",
# You can also set 'accept_language' if supported:
accept_language="en-US,en"
),
)
) )
class OAuth(BaseModel):
source: str
provider: str
client_id: str
redirect_uri: str
response_type: str
scope: str
oauth_uri: str
class OAuthExists(BaseModel):
oauth_providers: List[str]
controller = Controller(output_model=OAuthExists)
extend_planner_system_message = """
{"oauth_providers": ["GitHub", "Google", "Facebook", "Twitter", "LinkedIn", "GitLab", "Bitbucket", "Discord", "Reddit", "Spotify", "Twitch", "Yahoo", "Amazon", "Microsoft", "Apple", ...]}
The OAuth providers are available on the login page of the website.
The user is interested in the OAuth providers available on the login page of the website.
Passkey isn't a Oauth provider. Remove it from the list of OAuth providers.
"""
async def main(): async def main():
url = "https://auth0.com"
agent = Agent( agent = Agent(
browser=browser, browser=browser,
task="http://naver.com의 로그인 페이지를 찾아줘", task=f"Go to {url}, navigate to the login page, and identify the available OAuth providers. Passkey is not a valid OAuth provider.",
llm=ChatGoogleGenerativeAI(model="gemini-2.0-flash"), llm=ChatOpenAI(
model=os.getenv("OPENAI_MODEL"),
temperature=0.0
),
controller=controller,
extend_planner_system_message=extend_planner_system_message,
) )
await agent.run() response = await agent.run()
result = json.loads(response.final_result())
print(result)
# {"oauth_providers": ["GitHub", "Passkey"]} print
# Clear Terminal
print("\033c", end="")
print(f"🔗: {url}")
for provider in result['oauth_providers']:
print(provider)
asyncio.run(main()) asyncio.run(main())

52
temp.md Normal file
View file

@ -0,0 +1,52 @@
You are an AI model specialized in web crawling and analysis. Given a URI, perform the following tasks:
1. Navigate to the provided URI and locate the login page. If its not found, explore common auth-related pages like /login or /auth.
2. On the login page, identify all available social login buttons (OAuth-based) such as Google, GitHub, Facebook, etc.
3. Simulate clicking each social login button and follow the redirect to capture the full redirect URL (including query parameters).
4. From the redirect URL and parameters, extract:
- `client_id`
- `redirect_uri`
- `response_type`
- `scope`
5. Based on URL patterns, infer the OAuth method: Authorization Code, Implicit, PKCE, etc.
6. Return data in the following JSON format only:
```json
{
"oauths": [
{
"issue": "<site being tested, e.g., git.imnya.ng>",
"oauth_uri": "<original button href or URL triggered>"
}
]
}
````
7. If the login button says something like "Login with GitHub" or "Login with Google", follow the flow and use the **final redirect URL after clicking** as the value of `oauth_uri`.
**Examples:**
```json
{
"oauths": [
{
"issue": "git.imnya.ng",
"provider": "GitHub",
"client_id": "Iv1.xxxxx",
"redirect_uri": "https://git.imnya.ng/user/oauth2/callback",
"response_type": "code",
"scope": "read:user",
"oauth_uri": "https://github.com/login/oauth/authorize?client_id=Iv1.xxxxx&redirect_uri=https%3A%2F%2Fgit.imnya.ng%2Fuser%2Foauth2%2Fcallback&response_type=code&scope=read%3Auser"
}
]
}
```
**Constraints:**
* Simulate realistic interaction with buttons (e.g., clicking them to follow redirects).
* Ensure the output is strictly in the specified JSON format.
* Avoid any additional text or explanations outside the JSON response.
* If no OAuth logins are found, return an empty array.
* WebAuthn, PassKey is not OAuth, so do not include it in the results.