mirror of
https://github.com/j93es/browser-use-oauth.git
synced 2026-06-04 21:11:28 +09:00
Remove is-html-fast project files and update browser profile initialization to ignore HTTPS errors and enable specific command-line arguments.
This commit is contained in:
parent
2d3677fe4a
commit
319c5bb72f
6 changed files with 47 additions and 1800 deletions
71
is-html-fast/.gitignore
vendored
71
is-html-fast/.gitignore
vendored
|
|
@ -1,71 +0,0 @@
|
|||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
debug/
|
||||
target/
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
# MSVC Windows builds of rustc generate these, which store debugging information
|
||||
*.pdb
|
||||
|
||||
# Generated by cargo mutants
|
||||
# Contains mutation testing data
|
||||
**/mutants.out*/
|
||||
|
||||
# RustRover
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
# General
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
Icon[]
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
# Windows thumbnail cache files
|
||||
Thumbs.db
|
||||
Thumbs.db:encryptable
|
||||
ehthumbs.db
|
||||
ehthumbs_vista.db
|
||||
|
||||
# Dump file
|
||||
*.stackdump
|
||||
|
||||
# Folder config file
|
||||
[Dd]esktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Windows Installer files
|
||||
*.cab
|
||||
*.msi
|
||||
*.msix
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# Windows shortcuts
|
||||
*.lnk
|
||||
1581
is-html-fast/Cargo.lock
generated
1581
is-html-fast/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,8 +0,0 @@
|
|||
[package]
|
||||
name = "is-html-fast"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
rayon = "1.10.0"
|
||||
reqwest = { version = "0.12.19", features = ["blocking", "json"]}
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
실제로 사용되진 않습니다.
|
||||
일회용 코드입니다.
|
||||
|
|
@ -1,92 +0,0 @@
|
|||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Duration;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use rayon::prelude::*;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, USER_AGENT, ACCEPT, ACCEPT_LANGUAGE, ACCEPT_ENCODING, CONNECTION, UPGRADE_INSECURE_REQUESTS};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let input_file = File::open("domains.txt")?;
|
||||
let reader = BufReader::new(input_file);
|
||||
let domains: Vec<String> = reader.lines().filter_map(Result::ok).collect();
|
||||
|
||||
let total_count = domains.len();
|
||||
let counter = Arc::new(AtomicUsize::new(0));
|
||||
let html_count = Arc::new(AtomicUsize::new(0));
|
||||
let failed_count = Arc::new(AtomicUsize::new(0));
|
||||
let non_html_count = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
let output_file = OpenOptions::new()
|
||||
.create(true)
|
||||
.write(true)
|
||||
.truncate(true)
|
||||
.open("domains-filtered.txt")?;
|
||||
|
||||
let output = Arc::new(Mutex::new(output_file));
|
||||
|
||||
// 브라우저 헤더 세팅
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(USER_AGENT, HeaderValue::from_static("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0"));
|
||||
headers.insert(ACCEPT, HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"));
|
||||
headers.insert(ACCEPT_LANGUAGE, HeaderValue::from_static("ko,en-US;q=0.7,en;q=0.3"));
|
||||
headers.insert(ACCEPT_ENCODING, HeaderValue::from_static("gzip, deflate, br"));
|
||||
headers.insert(CONNECTION, HeaderValue::from_static("keep-alive"));
|
||||
headers.insert(UPGRADE_INSECURE_REQUESTS, HeaderValue::from_static("1"));
|
||||
|
||||
let client = Arc::new(
|
||||
Client::builder()
|
||||
.timeout(Duration::from_secs(5))
|
||||
.default_headers(headers)
|
||||
.build()?,
|
||||
);
|
||||
|
||||
domains.par_iter().for_each(|domain| {
|
||||
let current = counter.fetch_add(1, Ordering::SeqCst) + 1;
|
||||
let url = format!("https://{}", domain);
|
||||
|
||||
let response = client.get(&url).send();
|
||||
|
||||
match response {
|
||||
Ok(resp) => {
|
||||
if let Some(content_type) = resp.headers().get(CONTENT_TYPE) {
|
||||
if let Ok(content_type_str) = content_type.to_str() {
|
||||
if content_type_str.starts_with("text/html") {
|
||||
if let Ok(mut file) = output.lock() {
|
||||
writeln!(file, "{}", domain).ok();
|
||||
}
|
||||
html_count.fetch_add(1, Ordering::SeqCst);
|
||||
println!("[{}/{}] ✅ HTML: {}", current, total_count, domain);
|
||||
} else {
|
||||
non_html_count.fetch_add(1, Ordering::SeqCst);
|
||||
println!("[{}/{}] ❌ Not HTML: {} ({})", current, total_count, domain, content_type_str);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
non_html_count.fetch_add(1, Ordering::SeqCst);
|
||||
println!("[{}/{}] ❌ No Content-Type: {}", current, total_count, domain);
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
failed_count.fetch_add(1, Ordering::SeqCst);
|
||||
println!("[{}/{}] ⚠️ Failed to connect: {}", current, total_count, domain);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Final results
|
||||
let html_final = html_count.load(Ordering::SeqCst);
|
||||
let failed_final = failed_count.load(Ordering::SeqCst);
|
||||
let non_html_final = non_html_count.load(Ordering::SeqCst);
|
||||
|
||||
println!("\n=== Final Results ===");
|
||||
println!("📊 Total domains: {}", total_count);
|
||||
println!("✅ HTML domains: {} ({:.1}%)", html_final, (html_final as f64 / total_count as f64) * 100.0);
|
||||
println!("❌ Non-HTML domains: {} ({:.1}%)", non_html_final, (non_html_final as f64 / total_count as f64) * 100.0);
|
||||
println!("⚠️ Failed connections: {} ({:.1}%)", failed_final, (failed_final as f64 / total_count as f64) * 100.0);
|
||||
println!("💾 HTML domains saved to: domains-filtered.txt");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -66,6 +66,7 @@ async def GetProfile(headless=False):
|
|||
profile = BrowserProfile(
|
||||
# Security settings
|
||||
# disable_security=True,
|
||||
ignore_https_errors=True,
|
||||
# Display settings
|
||||
headless=headless,
|
||||
# Data persistence
|
||||
|
|
@ -78,52 +79,52 @@ async def GetProfile(headless=False):
|
|||
# "--disable-features=Translate,PasswordManagerDefaultEnabled",
|
||||
],
|
||||
ignore_default_args=[
|
||||
# "--disable-datasaver-prompt",
|
||||
# "--disable-component-extensions-with-background-pages",
|
||||
# "--disable-prompt-on-repost",
|
||||
# "--safeBrowse-disable-auto-update",
|
||||
# "--install-autogenerated-theme=0,0,0",
|
||||
# "--disable-speech-synthesis-api",
|
||||
# "--ash-no-nudges",
|
||||
# "--test-type=gpu",
|
||||
# "--noerrdialogs",
|
||||
# "--disable-external-intent-requests",
|
||||
# "--disable-breakpad",
|
||||
# "--disable-backgrounding-occluded-windows",
|
||||
# "--export-tagged-pdf",
|
||||
# "--disable-focus-on-load",
|
||||
# "--suppress-message-center-popups",
|
||||
# "--disable-renderer-backgrounding",
|
||||
# "--hide-crash-restore-bubble",
|
||||
# "--disable-back-forward-cache",
|
||||
# "--allow-legacy-extension-manifests",
|
||||
# # "--disable-field-trial-config", # 왜 이걸 끄면 웹사이트가 압축된 형태로 보이는 진 모르곘음
|
||||
# "--disable-popup-blocking",
|
||||
# "--disable-background-networking",
|
||||
# "--no-first-run",
|
||||
# "--disable-blink-features=AutomationControlled",
|
||||
# "--password-store=basic",
|
||||
# "--enable-network-information-downlink-max",
|
||||
# "--allow-pre-commit-input",
|
||||
# "--enable-features=NetworkService,NetworkServiceInProcess",
|
||||
# "--metrics-recording-only",
|
||||
# "--silent-debugger-extension-api",
|
||||
# "--disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate,AutomationControlled,BackForwardCache,OptimizationHints,ProcessPerSiteUpToMainFrameThreshold,InterestFeedContentSuggestions,CalculateNativeWinOcclusion,HeavyAdPrivacyMitigations,PrivacySandboxSettings4,AutofillServerCommunication,CrashReporting,OverscrollHistoryNavigation,InfiniteSessionRestore,ExtensionDisableUnsupportedDeveloper",
|
||||
# "--disable-ipc-flooding-protection",
|
||||
# "--disable-hang-monitor",
|
||||
# "--disable-dev-shm-usage",
|
||||
# "--disable-client-side-phishing-detection",
|
||||
# "--log-level=2",
|
||||
# "--generate-pdf-document-outline",
|
||||
# "--disable-speech-api",
|
||||
# "--disable-search-engine-choice-screen",
|
||||
# "--no-service-autorun",
|
||||
# "--no-pings",
|
||||
# "--disable-component-update",
|
||||
# '--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"',
|
||||
# "--disable-background-timer-throttling",
|
||||
# "--use-mock-keychain",
|
||||
# "--disable-features=IsolateOrigins,site-per-process",
|
||||
"--disable-datasaver-prompt",
|
||||
"--disable-component-extensions-with-background-pages",
|
||||
"--disable-prompt-on-repost",
|
||||
"--safeBrowse-disable-auto-update",
|
||||
"--install-autogenerated-theme=0,0,0",
|
||||
"--disable-speech-synthesis-api",
|
||||
"--ash-no-nudges",
|
||||
"--test-type=gpu",
|
||||
"--noerrdialogs",
|
||||
"--disable-external-intent-requests",
|
||||
"--disable-breakpad",
|
||||
"--disable-backgrounding-occluded-windows",
|
||||
"--export-tagged-pdf",
|
||||
"--disable-focus-on-load",
|
||||
"--suppress-message-center-popups",
|
||||
"--disable-renderer-backgrounding",
|
||||
"--hide-crash-restore-bubble",
|
||||
"--disable-back-forward-cache",
|
||||
"--allow-legacy-extension-manifests",
|
||||
# "--disable-field-trial-config", # 왜 이걸 끄면 웹사이트가 압축된 형태로 보이는 진 모르곘음
|
||||
"--disable-popup-blocking",
|
||||
"--disable-background-networking",
|
||||
"--no-first-run",
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
"--password-store=basic",
|
||||
"--enable-network-information-downlink-max",
|
||||
"--allow-pre-commit-input",
|
||||
"--enable-features=NetworkService,NetworkServiceInProcess",
|
||||
"--metrics-recording-only",
|
||||
"--silent-debugger-extension-api",
|
||||
"--disable-features=AcceptCHFrame,AutoExpandDetailsElement,AvoidUnnecessaryBeforeUnloadCheckSync,CertificateTransparencyComponentUpdater,DestroyProfileOnBrowserClose,DialMediaRouteProvider,ExtensionManifestV2Disabled,GlobalMediaControls,HttpsUpgrades,ImprovedCookieControls,LazyFrameLoading,LensOverlay,MediaRouter,PaintHolding,ThirdPartyStoragePartitioning,Translate,AutomationControlled,BackForwardCache,OptimizationHints,ProcessPerSiteUpToMainFrameThreshold,InterestFeedContentSuggestions,CalculateNativeWinOcclusion,HeavyAdPrivacyMitigations,PrivacySandboxSettings4,AutofillServerCommunication,CrashReporting,OverscrollHistoryNavigation,InfiniteSessionRestore,ExtensionDisableUnsupportedDeveloper",
|
||||
"--disable-ipc-flooding-protection",
|
||||
"--disable-hang-monitor",
|
||||
"--disable-dev-shm-usage",
|
||||
"--disable-client-side-phishing-detection",
|
||||
"--log-level=2",
|
||||
"--generate-pdf-document-outline",
|
||||
"--disable-speech-api",
|
||||
"--disable-search-engine-choice-screen",
|
||||
"--no-service-autorun",
|
||||
"--no-pings",
|
||||
"--disable-component-update",
|
||||
'--simulate-outdated-no-au="Tue, 31 Dec 2099 23:59:59 GMT"',
|
||||
"--disable-background-timer-throttling",
|
||||
"--use-mock-keychain",
|
||||
"--disable-features=IsolateOrigins,site-per-process",
|
||||
# 아래는 기존 예시에 있던 인자들입니다. 필요에 따라 유지하거나 제거하세요.
|
||||
"--enable-automation",
|
||||
"--disable-extensions",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue