diff --git a/Cargo.lock b/Cargo.lock index 9360351..ef04a98 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -305,6 +305,28 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "html2text" +version = "0.16.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea4ba4c0f993633569337a4fadfbd4f27448c81adb1af0c2407065f52809662" +dependencies = [ + "html5ever", + "tendril", + "thiserror", + "unicode-width", +] + +[[package]] +name = "html5ever" +version = "0.37.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5935f02fdc02823ff15fec27c2b3d7ca19d629e996f7a0ae4d7d500e62e54c76" +dependencies = [ + "log", + "markup5ever", +] + [[package]] name = "http" version = "0.2.12" @@ -596,6 +618,15 @@ version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.29" @@ -613,6 +644,17 @@ dependencies = [ "quoted_printable", ] +[[package]] +name = "markup5ever" +version = "0.37.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cfb33ea12d5d83b1ba9a55ae7d05faec4f2189d47b79c04d4cea6bbe9f5b083" +dependencies = [ + "log", + "tendril", + "web_atoms", +] + [[package]] name = "memchr" version = "2.7.6" @@ -653,14 +695,22 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "newsletter" version = "0.1.0" dependencies = [ "chrono", + "html2text", "imap", "mailparse", "native-tls", + "regex", "reqwest", "serde", "serde_json", @@ -747,12 +797,74 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + [[package]] name = "percent-encoding" version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_shared", + "serde", +] + +[[package]] +name = "phf_codegen" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -780,6 +892,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" version = "1.0.105" @@ -810,6 +928,15 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.10.0", +] + [[package]] name = "regex" version = "1.12.2" @@ -922,6 +1049,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "security-framework" version = "2.11.1" @@ -1015,6 +1148,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.11" @@ -1059,6 +1198,30 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "string_cache" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a18596f8c785a729f2819c0f6a7eae6ebeebdfffbfe4214ae6b087f690e31901" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", +] + +[[package]] +name = "string_cache_codegen" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "585635e46db231059f76c5849798146164652513eb9e8ab2685939dd90f29b69" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "syn" version = "2.0.114" @@ -1121,6 +1284,36 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "tendril" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4790fc369d5a530f4b544b094e31388b9b3a37c0f4652ade4505945f5660d24" +dependencies = [ + "new_debug_unreachable", + "utf-8", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -1246,6 +1439,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + [[package]] name = "url" version = "2.5.8" @@ -1258,6 +1457,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -1369,6 +1574,18 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web_atoms" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30e588f10c7bc3465f5fc1ab087fc97877ec1064a7ec89fb685ac4ee998dac4a" +dependencies = [ + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", +] + [[package]] name = "windows-core" version = "0.62.2" diff --git a/Cargo.toml b/Cargo.toml index 33eea67..c4e11ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,8 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" toml = "0.8" chrono = { version = "0.4", features = ["serde"] } +html2text = "0.16.6" +regex = "1.12.2" diff --git a/src/main.rs b/src/main.rs index cc3eb7d..e2d42e0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ use mailparse::MailHeaderMap; use native_tls::TlsConnector; +use regex::Regex; use serde::Deserialize; use std::fs; use std::thread; @@ -139,9 +140,21 @@ fn run_monitor(config: &Config) -> Result<(), Box> { } } +fn clean_body(body: &str) -> String { + // Replace multiple newlines with double newline (max) + let re_newlines = Regex::new(r"\n{3,}").unwrap(); + let body = re_newlines.replace_all(body, "\n\n"); + + // Trim trailing spaces from each line + let re_trailing_spaces = Regex::new(r"(?m)[ \t]+$").unwrap(); + let body = re_trailing_spaces.replace_all(&body, ""); + + body.trim().to_string() +} + fn extract_body(parsed: &mailparse::ParsedMail) -> Option { if parsed.ctype.mimetype == "text/plain" { - return parsed.get_body().ok(); + return parsed.get_body().ok().map(|s| clean_body(&s)); } // If multipart, search for text/plain @@ -153,8 +166,11 @@ fn extract_body(parsed: &mailparse::ParsedMail) -> Option { // Fallback to text/html if no plain text found (or first part if nothing else) if parsed.ctype.mimetype == "text/html" { - // Naive stripping of HTML tags could be done here, or just return as is. - return parsed.get_body().ok(); + if let Ok(html_content) = parsed.get_body() { + if let Ok(md) = html2text::from_read(html_content.as_bytes(), 80) { + return Some(clean_body(&md)); + } + } } None