Skip to content

Commit

Permalink
fix: symlinks to dirs are not handled
Browse files Browse the repository at this point in the history
  • Loading branch information
PhotonQuantum committed Feb 10, 2023
1 parent 5edf51e commit f534290
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 48 deletions.
183 changes: 157 additions & 26 deletions rsync-core/src/redis_.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::collections::HashSet;
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::time::Duration;

use clean_path::Clean;
Expand All @@ -12,7 +12,7 @@ use redis::{aio, AsyncCommands, AsyncIter, Client, Commands, FromRedisValue, Scr
use scan_fmt::scan_fmt;
use tokio::task::JoinHandle;
use tokio::time;
use tracing::{error, instrument, warn};
use tracing::{debug, error, instrument, warn};

use crate::metadata::{MetaExtra, Metadata};

Expand Down Expand Up @@ -325,7 +325,22 @@ pub async fn get_index(
Ok(filenames)
}

// Our algorithm takes slightly more steps to resolve a symlink than Linux, so > 40.
pub const MAX_SYMLINK_LOOKUP: usize = 100;

macro_rules! guard_depth {
($depth: expr, $key: expr) => {
$depth += 1;
if $depth > MAX_SYMLINK_LOOKUP {
warn!(filename=%$key.display(), "symlink depth limit exceeded");
return Ok(None);
}
};
}

#[allow(unreachable_code)]
/// Follow a symlink and return the hash of the target.
/// Only works on files.
/// Also works if the given metadata is already a regular file.
///
/// Returns None if it's a dead or circular symlink.
Expand All @@ -336,43 +351,159 @@ pub async fn get_index(
pub async fn follow_symlink(
conn: &mut (impl aio::ConnectionLike + Send),
redis_index: &str,
key: &[u8],
mut meta_extra: MetaExtra,
key: &Path,
mut maybe_meta_extra: Option<MetaExtra>,
) -> Result<Option<[u8; 20]>> {
let filename_display = Path::new(OsStr::from_bytes(key)).display();
let mut depth = 0;

let mut pwd = Path::new(OsStr::from_bytes(key))
.parent()
.unwrap_or_else(|| Path::new(""))
.to_path_buf();
if key.is_absolute() {
warn!(filename=%key.display(), "absolute path is not supported, refusing to follow");
return Ok(None);
}
if key.starts_with("..") {
warn!(filename=%key.display(), "path starts with .., refusing to follow");
return Ok(None);
}
debug!("following symlink: {}", key.display());
let original_key = key;

let mut visited = HashSet::new();
let hash = loop {
break match meta_extra {
MetaExtra::Symlink { ref target } => {
let new_loc = pwd.join(Path::new(OsStr::from_bytes(target))).clean();
if visited.insert(new_loc.clone()) {
if let Some(new_meta) = conn
.hget(redis_index, new_loc.as_os_str().as_bytes())
.await?
{
let mut key = key.to_path_buf();
let hash = 'outer: loop {
let mut pwd = key.parent().unwrap_or_else(|| Path::new("")).to_path_buf();

// This loop only resolves the symlink if there's no directory symlink in the path.
if let Some(mut meta_extra) = maybe_meta_extra.take() {
loop {
match meta_extra {
MetaExtra::Symlink { ref target } => {
let new_loc = pwd.join(Path::new(OsStr::from_bytes(target))).clean();
if !visited.insert(new_loc.clone()) {
warn!(filename = %original_key.display(), "symlink loop detected");
break 'outer None;
}

guard_depth!(depth, original_key);
let Some(new_meta) = conn
.hget(redis_index, new_loc.as_os_str().as_bytes())
.await? else {
key = new_loc;
break;
};

let new_meta: Metadata = new_meta;
meta_extra = new_meta.extra;
pwd = new_loc
.parent()
.unwrap_or_else(|| Path::new(""))
.to_path_buf();
continue;
}
warn!(filename=%filename_display, target=%new_loc.display(), "symlink target not found");
} else {
warn!(filename=%filename_display, "symlink loop detected");
}
None
MetaExtra::Regular { blake2b_hash } => break 'outer Some(blake2b_hash),
};
}
MetaExtra::Regular { blake2b_hash } => Some(blake2b_hash),
};
};

let ancestors = key
.ancestors()
.filter(|ancestor| !ancestor.as_os_str().is_empty())
.map(|ancestor| (ancestor, key.strip_prefix(ancestor).expect("ancestor")));
for (prefix, remaining) in ancestors {
let target_dir = try_resolve_dir_symlink(conn, redis_index, prefix).await?;
if let Some(target_dir) = target_dir {
// Only possible if prefix is a symlink points to a directory.
let new_loc = target_dir.join(remaining).clean();
if !visited.insert(new_loc.clone()) {
warn!(filename = %original_key.display(), "symlink loop detected");
break 'outer None;
}
// new_loc can not be absolute because prefix is not absolute (filtered by
// `follow_symlink_dir`).
if new_loc.starts_with("..") {
warn!(
prefix=%prefix.display(),
remaining=%remaining.display(),
"path starts with .., refusing to follow"
);
continue;
}

guard_depth!(depth, original_key);
let meta: Option<Metadata> = conn
.hget(redis_index, new_loc.as_os_str().as_bytes())
.await?;
key = new_loc;
maybe_meta_extra = meta.map(|meta| meta.extra);

continue 'outer;
}
}
break None;
};
Ok(hash)
}

pub async fn recursive_resolve_dir_symlink(
conn: &mut (impl aio::ConnectionLike + Send),
redis_index: &str,
key: &Path,
) -> Result<PathBuf> {
let mut visited = HashSet::new();

let mut key = key.to_path_buf();
let mut depth = 0;

loop {
if !visited.insert(key.clone()) {
warn!(filename = %key.display(), "symlink loop detected");
return Ok(key);
}
depth += 1;
if depth > MAX_SYMLINK_LOOKUP {
warn!("symlink lookup depth exceeded");
return Ok(key);
}

if let Some(k) = try_resolve_dir_symlink(conn, redis_index, &key).await? {
key = k;
continue;
}
break;
}

Ok(key)
}

async fn try_resolve_dir_symlink(
conn: &mut (impl aio::ConnectionLike + Send),
redis_index: &str,
key: &Path,
) -> Result<Option<PathBuf>> {
if key.is_absolute() {
warn!(filename=%key.display(), "absolute path is not supported, refusing to follow");
return Ok(None);
}
if key.starts_with("..") {
warn!(filename=%key.display(), "path starts with .., refusing to follow");
return Ok(None);
}

let pwd = key.parent().unwrap_or_else(|| Path::new("")).to_path_buf();

let Some(metadata): Option<Metadata> = conn.hget(redis_index, key.as_os_str().as_bytes()).await? else {
return Ok(None);
};
let target = match metadata.extra {
MetaExtra::Symlink { ref target } => {
let path = Path::new(OsStr::from_bytes(target));
let new_loc = pwd.join(path).clean();

Some(new_loc)
}
MetaExtra::Regular { .. } => {
warn!(filename=%key.display(), "expected dir, got file");
None
}
};

Ok(target)
}
10 changes: 10 additions & 0 deletions rsync-core/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::fmt::LowerHex;

#[cfg(feature = "percent-encoding")]
use percent_encoding::{AsciiSet, CONTROLS};
use tracing::level_filters::LevelFilter;
use tracing_error::ErrorLayer;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
Expand All @@ -27,6 +28,15 @@ pub fn init_logger() {
.init();
}

#[cfg(feature = "tests")]
pub fn test_init_logger() {
tracing_subscriber::Registry::default()
.with(LevelFilter::DEBUG)
.with(ErrorLayer::default())
.with(tracing_subscriber::fmt::layer().with_writer(std::io::stderr))
.init();
}

pub trait ToHex {
fn as_hex(&self) -> HexWrapper<'_>;
}
Expand Down
8 changes: 7 additions & 1 deletion rsync-fetcher/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
//! Adopted from [mirror-clone](https://github.com/sjtug/mirror-clone).
use std::collections::BTreeMap;
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
use std::path::Path;
use std::time::{Duration, UNIX_EPOCH};

use aws_sdk_s3::types::ByteStream;
Expand Down Expand Up @@ -180,7 +183,10 @@ async fn generate_index(
while let Some((key, meta)) = files.next_item().await? {
let filename = String::from_utf8_lossy(&key);

let hash = follow_symlink(&mut hget_conn, redis_index, &key, meta.extra).await?;
let key = Path::new(OsStr::from_bytes(&key));
// TODO if key points to a directory, it's ignored.
// i.e. symlinks to dirs are not present in the generated listing.
let hash = follow_symlink(&mut hget_conn, redis_index, key, Some(meta.extra)).await?;

if let Some(hash) = hash {
index.insert(&filename, hash, max_depth);
Expand Down
2 changes: 0 additions & 2 deletions rsync-fetcher/src/rsync/file_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,10 @@ const PATH_MAX: u32 = 4096;

#[derive(Clone)]
pub struct FileEntry {
// maybe PathBuf?
pub name: Vec<u8>,
pub len: u64,
pub modify_time: SystemTime,
pub mode: u32,
// maybe PathBuf?
pub link_target: Option<Vec<u8>>,
// int32 in rsync, but it couldn't be negative yes?
pub idx: u32,
Expand Down
11 changes: 6 additions & 5 deletions rsync-gateway/src/handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,25 @@ pub async fn handler(opts: Data<Opts>, state: Data<State>, req: HttpRequest) ->
let listing = path.ends_with(b"/");
let path = path.trim_start_with(|c| c == '/');
if listing {
Either::Left(listing_handler(&opts, &state, path))
Either::Left(listing_handler(&opts, &state, path).await)
} else {
Either::Right(file_handler(&opts, &state, path).await)
}
}

/// Handler for listing requests.
fn listing_handler(opts: &Opts, state: &State, path: &[u8]) -> impl Responder {
state.latest_index().map_or_else(
async fn listing_handler(opts: &Opts, state: &State, path: &[u8]) -> impl Responder {
let path = state.resolve_dir(path).await.into_resp_err()?;
Ok::<_, ReportWrapper>(state.latest_index().map_or_else(
|| Either::Right(HttpResponse::NotFound()),
|latest| {
let path = percent_encoding::percent_encode(path, PATH_ASCII_SET);
let path = percent_encoding::percent_encode(&path, PATH_ASCII_SET);
let s3_base = opts.s3_base.trim_end_matches('/');
Either::Left(Redirect::to(format!(
"{s3_base}/listing-{latest}/{path}index.html"
)))
},
)
))
}

/// Handler for file requests.
Expand Down
26 changes: 20 additions & 6 deletions rsync-gateway/src/state.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use std::ffi::OsStr;
use std::future::ready;
use std::num::NonZeroU64;
use std::os::unix::ffi::{OsStrExt, OsStringExt};
use std::path::Path;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use std::sync::Arc;
Expand All @@ -14,7 +17,7 @@ use tokio::time::interval;
use tracing::{info, warn};

use rsync_core::metadata::Metadata;
use rsync_core::redis_::{follow_symlink, get_latest_index};
use rsync_core::redis_::{follow_symlink, get_latest_index, recursive_resolve_dir_symlink};

use crate::utils::AbortJoinHandle;

Expand Down Expand Up @@ -55,12 +58,23 @@ impl State {
};
let meta: Option<Metadata> = conn.hget(&index, key).await?;

if let Some(meta) = meta {
// We follow the symlink instead of redirecting the client to avoid circular redirection.
Ok(follow_symlink(&mut conn, &index, key, meta.extra).await?)
// We follow the symlink instead of redirecting the client to avoid circular redirection.
let key = Path::new(OsStr::from_bytes(key));
follow_symlink(&mut conn, &index, key, meta.map(|meta| meta.extra)).await
}
pub async fn resolve_dir(&self, key: &[u8]) -> Result<Vec<u8>> {
let mut conn = self.conn.clone();
let namespace = &self.namespace;
let index = if let Some(index) = self.latest_index() {
format!("{namespace}:index:{index}")
} else {
Ok(None)
}
return Ok(key.to_vec());
};

let key = Path::new(OsStr::from_bytes(key));
recursive_resolve_dir_symlink(&mut conn, &index, key)
.await
.map(|p| p.into_os_string().into_vec())
}
}

Expand Down
Loading

0 comments on commit f534290

Please sign in to comment.