From 603531a0c009684c8cc9de8f2699d66914a14b64 Mon Sep 17 00:00:00 2001 From: imnyang Date: Fri, 6 Jun 2025 23:57:19 +0900 Subject: [PATCH] =?UTF-8?q?=EB=84=88=EB=AC=B4=20=EB=B9=A0=EB=A5=B4?= =?UTF-8?q?=EB=8B=88=EA=B9=8C=20=EB=A0=88=EC=9D=B4=ED=8A=B8=EB=A6=AC?= =?UTF-8?q?=EB=B0=8B=20=EA=B1=B8=EB=A6=AC=EB=8A=94=EA=B1=B0=20=EA=B0=99?= =?UTF-8?q?=EC=95=84=EC=9A=94=EC=9A=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- is-html-fast/Cargo.lock | 154 ++++++++++++--------------------------- is-html-fast/Cargo.toml | 3 +- is-html-fast/src/main.rs | 99 ++++++++++++------------- 3 files changed, 96 insertions(+), 160 deletions(-) diff --git a/is-html-fast/Cargo.lock b/is-html-fast/Cargo.lock index 5b8c4bc..11ee252 100644 --- a/is-html-fast/Cargo.lock +++ b/is-html-fast/Cargo.lock @@ -99,6 +99,31 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "displaydoc" version = "0.2.5" @@ -110,6 +135,12 @@ dependencies = [ "syn", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -171,21 +202,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "futures" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - [[package]] name = "futures-channel" version = "0.3.31" @@ -202,34 +218,12 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" -[[package]] -name = "futures-executor" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - [[package]] name = "futures-io" version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" -[[package]] -name = "futures-macro" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "futures-sink" version = "0.3.31" @@ -248,10 +242,8 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" dependencies = [ - "futures-channel", "futures-core", "futures-io", - "futures-macro", "futures-sink", "futures-task", "memchr", @@ -569,9 +561,8 @@ dependencies = [ name = "is-html-fast" version = "0.1.0" dependencies = [ - "futures", + "rayon", "reqwest", - "tokio", ] [[package]] @@ -608,16 +599,6 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" -[[package]] -name = "lock_api" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" -dependencies = [ - "autocfg", - "scopeguard", -] - [[package]] name = "log" version = "0.4.27" @@ -732,29 +713,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "parking_lot" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets", -] - [[package]] name = "percent-encoding" version = "2.3.1" @@ -813,12 +771,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" [[package]] -name = "redox_syscall" -version = "0.5.12" +name = "rayon" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ - "bitflags", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", ] [[package]] @@ -952,12 +921,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - [[package]] name = "security-framework" version = "2.11.1" @@ -1031,15 +994,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" -[[package]] -name = "signal-hook-registry" -version = "1.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410" -dependencies = [ - "libc", -] - [[package]] name = "slab" version = "0.4.9" @@ -1162,25 +1116,11 @@ dependencies = [ "bytes", "libc", "mio", - "parking_lot", "pin-project-lite", - "signal-hook-registry", "socket2", - "tokio-macros", "windows-sys 0.52.0", ] -[[package]] -name = "tokio-macros" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "tokio-native-tls" version = "0.3.1" diff --git a/is-html-fast/Cargo.toml b/is-html-fast/Cargo.toml index 44d60d1..7ce4703 100644 --- a/is-html-fast/Cargo.toml +++ b/is-html-fast/Cargo.toml @@ -4,6 +4,5 @@ version = "0.1.0" edition = "2024" [dependencies] -futures = "0.3.31" +rayon = "1.10.0" reqwest = { version = "0.12.19", features = ["blocking", "json"]} -tokio = { version = "1", features = ["full"] } diff --git a/is-html-fast/src/main.rs b/is-html-fast/src/main.rs index 68cb390..24e53d0 100644 --- a/is-html-fast/src/main.rs +++ b/is-html-fast/src/main.rs @@ -1,67 +1,64 @@ -use std::fs::File; -use std::io::Write; +use std::fs::{File, OpenOptions}; +use std::io::{BufRead, BufReader, Write}; +use std::sync::{Arc, Mutex}; use std::time::Duration; +use std::sync::atomic::{AtomicUsize, Ordering}; -use futures::stream::{FuturesUnordered, StreamExt}; -use reqwest::Client; -use tokio::fs; -use tokio::io::AsyncBufReadExt; +use rayon::prelude::*; +use reqwest::blocking::Client; +use reqwest::header::CONTENT_TYPE; -#[tokio::main] -async fn main() -> Result<(), Box> { - let file = fs::File::open("domains.txt").await?; - let reader = tokio::io::BufReader::new(file); - let mut lines = reader.lines(); +fn main() -> Result<(), Box> { + let input_file = File::open("domains.txt")?; + let reader = BufReader::new(input_file); + let domains: Vec = reader.lines().filter_map(Result::ok).collect(); - let client = Client::builder() - .timeout(Duration::from_secs(5)) - .build()?; + let total_count = domains.len(); + let counter = Arc::new(AtomicUsize::new(0)); - let mut tasks = FuturesUnordered::new(); + let output_file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open("domains-filtered.txt")?; - while let Some(line) = lines.next_line().await? { - let client = client.clone(); - let domain = line.clone(); - tasks.push(tokio::spawn(async move { - let url = format!("https://{}", domain); - let resp = client.get(&url).send().await; + let output = Arc::new(Mutex::new(output_file)); - match resp { - Ok(resp) => { - if let Some(content_type) = resp.headers().get(reqwest::header::CONTENT_TYPE) { - if let Ok(content_type_str) = content_type.to_str() { - if content_type_str.starts_with("text/html") { - println!("✅ HTML: {}", domain); - return Some(domain); - } else { - println!("❌ Not HTML: {} ({})", domain, content_type_str); + let client = Arc::new( + Client::builder() + .timeout(Duration::from_secs(5)) + .build()?, + ); + + domains.par_iter().for_each(|domain| { + let current = counter.fetch_add(1, Ordering::SeqCst) + 1; + let url = format!("https://{}", domain); + println!("[{}/{}] Checking {}", current, total_count, url); + + let response = client.get(&url).send(); + + match response { + Ok(resp) => { + if let Some(content_type) = resp.headers().get(CONTENT_TYPE) { + if let Ok(content_type_str) = content_type.to_str() { + if content_type_str.starts_with("text/html") { + if let Ok(mut file) = output.lock() { + writeln!(file, "{}", domain).ok(); } + println!("[{}/{}] ✅ HTML: {}", current, total_count, domain); + } else { + println!("[{}/{}] ❌ Not HTML: {} ({})", current, total_count, domain, content_type_str); } - } else { - println!("❌ No Content-Type: {}", domain); } - } - Err(_) => { - println!("⚠️ Failed to connect: {}", domain); + } else { + println!("[{}/{}] ❌ No Content-Type: {}", current, total_count, domain); } } - - None - })); - } - - let mut results = Vec::new(); - while let Some(res) = tasks.next().await { - if let Ok(Some(domain)) = res { - results.push(domain); + Err(_) => { + println!("[{}/{}] ⚠️ Failed to connect: {}", current, total_count, domain); + } } - } - - // 📝 한 번에 출력 - let mut output = File::create("domains-filtered.txt")?; - for domain in results { - writeln!(output, "{}", domain)?; - } + }); Ok(()) }