Skip to content

Commit

Permalink
Fix: Windows drive paths misidentified as URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
mre committed Aug 6, 2024
1 parent f386113 commit 5762b18
Showing 1 changed file with 168 additions and 29 deletions.
197 changes: 168 additions & 29 deletions lychee-lib/src/types/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,38 +132,62 @@ impl Input {
) -> Result<Self> {
let source = if value == STDIN {
InputSource::Stdin
} else if let Ok(url) = Url::parse(value) {
InputSource::RemoteUrl(Box::new(url))
// We use [`reqwest::Url::parse`] because it catches some other edge cases that [`http::Request:builder`] does not
// This could be improved with further refinement.
} else {
// this seems to be the only way to determine if this is a glob pattern
let is_glob = glob::Pattern::escape(value) != value;

if is_glob {
InputSource::FsGlob {
pattern: value.to_owned(),
ignore_case: glob_ignore_case,
}
} else {
let path = PathBuf::from(value);
if path.exists() {
InputSource::FsPath(path)
} else if value.starts_with('~') || value.starts_with('.') {
// The path is not valid, but it might be a valid URL
// Check if the path starts with a tilde or a dot
// and exit early if it does
// This check might not be sufficient to cover all cases
// but it catches the most common ones
return Err(ErrorKind::InvalidFile(path));
} else {
// Invalid path; check if a valid URL can be constructed from the input
// by prefixing it with a `http://` scheme.
// Curl also uses http (i.e. not https), see
// https://github.com/curl/curl/blob/70ac27604a2abfa809a7b2736506af0da8c3c8a9/lib/urlapi.c#L1104-L1124
let url = Url::parse(&format!("http://{value}")).map_err(|e| {
ErrorKind::ParseUrl(e, "Input is not a valid URL".to_string())
})?;
match Url::parse(value) {
// Weed out non-http schemes, including Windows drive specifiers, which will be successfully parsed by the Url crate
Ok(url) if url.scheme() == "http" || url.scheme() == "https" => {
InputSource::RemoteUrl(Box::new(url))
}
Ok(_) => {
// URL parsed successfully, but it's not http or https
return Err(ErrorKind::InvalidFile(PathBuf::from(value)));
}
_ => {
// this seems to be the only way to determine if this is a glob pattern
let is_glob = glob::Pattern::escape(value) != value;

if is_glob {
InputSource::FsGlob {
pattern: value.to_owned(),
ignore_case: glob_ignore_case,
}
} else {
let path = PathBuf::from(value);

// On Windows, a filepath can never be mistaken for a url because Windows filepaths use \ and urls use /
#[cfg(windows)]
if path.exists() {
// The file exists, so we return the path
InputSource::FsPath(path)
} else {
// We had a valid filepath, but the file didn't exist so we return an error
return Err(ErrorKind::InvalidFile(path));
}

#[cfg(unix)]
if path.exists() {
InputSource::FsPath(path)
} else if value.starts_with('~') || value.starts_with('.') {
// The path is not valid, but it might be a valid URL
// Check if the path starts with a tilde or a dot
// and exit early if it does
// This check might not be sufficient to cover all cases
// but it catches the most common ones
return Err(ErrorKind::InvalidFile(path));
} else {
// Invalid path; check if a valid URL can be constructed from the input
// by prefixing it with a `http://` scheme.
// Curl also uses http (i.e. not https), see
// https://github.com/curl/curl/blob/70ac27604a2abfa809a7b2736506af0da8c3c8a9/lib/urlapi.c#L1104-L1124
let url = Url::parse(&format!("http://{value}")).map_err(|e| {
ErrorKind::ParseUrl(e, "Input is not a valid URL".to_string())
})?;
InputSource::RemoteUrl(Box::new(url))
}
}
}
}
};
Ok(Self {
Expand Down Expand Up @@ -486,4 +510,119 @@ mod tests {
String::from("http://example.com/")
);
}

// Ensure that a Windows file path is not mistaken for a URL.
#[cfg(windows)]
#[test]
fn test_windows_style_filepath_not_existing() {
let input = Input::new("C:\\example\\project\\here", None, false, None);
assert!(input.is_err());
let input = input.unwrap_err();

match input {
ErrorKind::InvalidFile(_) => (),
_ => panic!("Should have received InvalidFile error"),
}
}

// Ensure that a Windows-style file path to an existing file is recognized
#[cfg(windows)]
#[test]
fn test_windows_style_filepath_existing() {
use std::env::temp_dir;
use tempfile::NamedTempFile;

let dir = temp_dir();
let file = NamedTempFile::new_in(dir).unwrap();
let path = file.path();
let input = Input::new(path.to_str().unwrap(), None, false, None).unwrap();

match input.source {
InputSource::FsPath(_) => (),
_ => panic!("Input source should be FsPath but was not"),
}
}

#[test]
fn test_url_scheme_check_succeeding() {
// Valid http and https URLs
assert!(matches!(
Input::new("http://example.com", None, false, None),
Ok(Input {
source: InputSource::RemoteUrl(_),
..
})
));
assert!(matches!(
Input::new("https://example.com", None, false, None),
Ok(Input {
source: InputSource::RemoteUrl(_),
..
})
));
assert!(matches!(
Input::new(
"http://subdomain.example.com/path?query=value",
None,
false,
None
),
Ok(Input {
source: InputSource::RemoteUrl(_),
..
})
));
assert!(matches!(
Input::new("https://example.com:8080", None, false, None),
Ok(Input {
source: InputSource::RemoteUrl(_),
..
})
));
}

#[test]
fn test_url_scheme_check_failing() {
// Invalid schemes
assert!(matches!(
Input::new("ftp://example.com", None, false, None),
Err(ErrorKind::InvalidFile(_))
));
assert!(matches!(
Input::new("httpx://example.com", None, false, None),
Err(ErrorKind::InvalidFile(_))
));
assert!(matches!(
Input::new("file:///path/to/file", None, false, None),
Err(ErrorKind::InvalidFile(_))
));
assert!(matches!(
Input::new("mailto:[email protected]", None, false, None),
Err(ErrorKind::InvalidFile(_))
));
}

#[test]
fn test_non_url_inputs() {
// Non-URL inputs
assert!(matches!(
Input::new("./local/path", None, false, None),
Err(ErrorKind::InvalidFile(_))
));
assert!(matches!(
Input::new("*.md", None, false, None),
Ok(Input {
source: InputSource::FsGlob { .. },
..
})
));
// Assuming the current directory exists
assert!(matches!(
Input::new(".", None, false, None),
Ok(Input {
source: InputSource::FsPath(_),
..
})
));
}
}

0 comments on commit 5762b18

Please sign in to comment.