Skip to content

Commit

Permalink
Support underscores in Markdown URLs (#1555)
Browse files Browse the repository at this point in the history
  • Loading branch information
mre authored Nov 7, 2024
1 parent 6e3219e commit 6b53695
Showing 1 changed file with 26 additions and 2 deletions.
28 changes: 26 additions & 2 deletions lychee-lib/src/extract/markdown.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Extract links and fragments from markdown documents
use std::collections::{HashMap, HashSet};

use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd};
use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, TextMergeStream};

use crate::{extract::plaintext::extract_raw_uri_from_plaintext, types::uri::raw::RawUri};

Expand All @@ -19,7 +19,7 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
// which is why we keep track of entries and exits while traversing the input.
let mut inside_code_block = false;

let parser = Parser::new_ext(input, md_extensions());
let parser = TextMergeStream::new(Parser::new_ext(input, md_extensions()));
parser
.filter_map(|event| match event {
// A link.
Expand Down Expand Up @@ -349,4 +349,28 @@ $$
let uris = extract_markdown(markdown, true);
assert_eq!(uris, expected);
}

#[test]
fn test_underscore_in_urls_middle() {
let markdown = r"https://example.com/_/foo";
let expected = vec![RawUri {
text: "https://example.com/_/foo".to_string(),
element: None,
attribute: None,
}];
let uris = extract_markdown(markdown, true);
assert_eq!(uris, expected);
}

#[test]
fn test_underscore_in_urls_end() {
let markdown = r"https://example.com/_";
let expected = vec![RawUri {
text: "https://example.com/_".to_string(),
element: None,
attribute: None,
}];
let uris = extract_markdown(markdown, true);
assert_eq!(uris, expected);
}
}

0 comments on commit 6b53695

Please sign in to comment.