Skip to content

Commit

Permalink
PVF: remove audit log access (#2461)
Browse files Browse the repository at this point in the history
  • Loading branch information
mrcnski authored Nov 25, 2023
1 parent 73ff1c3 commit cfa19c3
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 205 deletions.
2 changes: 1 addition & 1 deletion .gitlab/pipeline/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -522,4 +522,4 @@ test-syscalls:
- if [[ "$CI_JOB_STATUS" == "failed" ]]; then
printf "The x86_64 syscalls used by the worker binaries have changed. Please review if this is expected and update polkadot/scripts/list-syscalls/*-worker-syscalls as needed.\n";
fi
allow_failure: true # TODO: remove this once we have an idea how often the syscall lists will change
allow_failure: false # this rarely triggers in practice
31 changes: 1 addition & 30 deletions polkadot/node/core/pvf/src/execute/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
use crate::{
artifacts::ArtifactPathId,
security,
worker_intf::{
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
Expand All @@ -33,7 +32,7 @@ use polkadot_node_core_pvf_common::{
execute::{Handshake, WorkerResponse},
worker_dir, SecurityStatus,
};
use polkadot_parachain_primitives::primitives::{ValidationCodeHash, ValidationResult};
use polkadot_parachain_primitives::primitives::ValidationResult;
use polkadot_primitives::ExecutorParams;
use std::{path::Path, time::Duration};
use tokio::{io, net::UnixStream};
Expand Down Expand Up @@ -132,10 +131,7 @@ pub async fn start_work(
artifact.path.display(),
);

let artifact_path = artifact.path.clone();
with_worker_dir_setup(worker_dir, pid, &artifact.path, |worker_dir| async move {
let audit_log_file = security::AuditLogFile::try_open_and_seek_to_end().await;

if let Err(error) = send_request(&mut stream, &validation_params, execution_timeout).await {
gum::warn!(
target: LOG_TARGET,
Expand All @@ -160,10 +156,7 @@ pub async fn start_work(
handle_response(
response,
pid,
&artifact.id.code_hash,
&artifact_path,
execution_timeout,
audit_log_file
)
.await,
Err(error) => {
Expand Down Expand Up @@ -217,10 +210,7 @@ pub async fn start_work(
async fn handle_response(
response: WorkerResponse,
worker_pid: u32,
validation_code_hash: &ValidationCodeHash,
artifact_path: &Path,
execution_timeout: Duration,
audit_log_file: Option<security::AuditLogFile>,
) -> WorkerResponse {
if let WorkerResponse::Ok { duration, .. } = response {
if duration > execution_timeout {
Expand All @@ -238,25 +228,6 @@ async fn handle_response(
}
}

if let WorkerResponse::JobDied { err: _, job_pid } = response {
// The job died. Check if it was due to a seccomp violation.
//
// NOTE: Log, but don't change the outcome. Not all validators may have
// auditing enabled, so we don't want attackers to abuse a non-deterministic
// outcome.
for syscall in security::check_seccomp_violations_for_job(audit_log_file, job_pid).await {
gum::error!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
%syscall,
?validation_code_hash,
?artifact_path,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}
}

response
}

Expand Down
26 changes: 1 addition & 25 deletions polkadot/node/core/pvf/src/prepare/worker_intf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
use crate::{
artifacts::ArtifactId,
metrics::Metrics,
security,
worker_intf::{
clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker,
SpawnErr, WorkerDir, WorkerHandle, JOB_TIMEOUT_WALL_CLOCK_FACTOR,
Expand Down Expand Up @@ -134,7 +133,6 @@ pub async fn start_work(
pid,
|tmp_artifact_file, mut stream, worker_dir| async move {
let preparation_timeout = pvf.prep_timeout();
let audit_log_file = security::AuditLogFile::try_open_and_seek_to_end().await;

if let Err(err) = send_request(&mut stream, &pvf).await {
gum::warn!(
Expand Down Expand Up @@ -170,7 +168,6 @@ pub async fn start_work(
&pvf,
&cache_path,
preparation_timeout,
audit_log_file,
)
.await,
Ok(Err(err)) => {
Expand Down Expand Up @@ -211,34 +208,13 @@ async fn handle_response(
pvf: &PvfPrepData,
cache_path: &Path,
preparation_timeout: Duration,
audit_log_file: Option<security::AuditLogFile>,
) -> Outcome {
let PrepareWorkerSuccess { checksum, stats: PrepareStats { cpu_time_elapsed, memory_stats } } =
match result.clone() {
Ok(result) => result,
// Timed out on the child. This should already be logged by the child.
Err(PrepareError::TimedOut) => return Outcome::TimedOut,
Err(PrepareError::JobDied { err, job_pid }) => {
// The job died. Check if it was due to a seccomp violation.
//
// NOTE: Log, but don't change the outcome. Not all validators may have
// auditing enabled, so we don't want attackers to abuse a non-deterministic
// outcome.
for syscall in
security::check_seccomp_violations_for_job(audit_log_file, job_pid).await
{
gum::error!(
target: LOG_TARGET,
%worker_pid,
%job_pid,
%syscall,
?pvf,
"A forbidden syscall was attempted! This is a violation of our seccomp security policy. Report an issue ASAP!"
);
}

return Outcome::JobDied { err, job_pid }
},
Err(PrepareError::JobDied { err, job_pid }) => return Outcome::JobDied { err, job_pid },
Err(PrepareError::OutOfMemory) => return Outcome::OutOfMemory,
Err(err) => return Outcome::Concluded { worker, result: Err(err) },
};
Expand Down
149 changes: 0 additions & 149 deletions polkadot/node/core/pvf/src/security.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@
use crate::{Config, SecurityStatus, LOG_TARGET};
use futures::join;
use std::{fmt, path::Path};
use tokio::{
fs::{File, OpenOptions},
io::{AsyncReadExt, AsyncSeekExt, SeekFrom},
};

const SECURE_MODE_ANNOUNCEMENT: &'static str =
"In the next release this will be a hard error by default.
Expand All @@ -35,7 +31,6 @@ const SECURE_MODE_ANNOUNCEMENT: &'static str =
pub async fn check_security_status(config: &Config) -> SecurityStatus {
let Config { prepare_worker_program_path, cache_path, .. } = config;

// TODO: add check that syslog is available and that seccomp violations are logged?
let (landlock, seccomp, change_root) = join!(
check_landlock(prepare_worker_program_path),
check_seccomp(prepare_worker_program_path),
Expand Down Expand Up @@ -303,147 +298,3 @@ async fn check_seccomp(
}
}
}

const AUDIT_LOG_PATH: &'static str = "/var/log/audit/audit.log";
const SYSLOG_PATH: &'static str = "/var/log/syslog";

/// System audit log.
pub struct AuditLogFile {
file: File,
path: &'static str,
}

impl AuditLogFile {
/// Looks for an audit log file on the system and opens it, seeking to the end to skip any
/// events from before this was called.
///
/// A bit of a verbose name, but it should clue future refactorers not to move calls closer to
/// where the `AuditLogFile` is used.
pub async fn try_open_and_seek_to_end() -> Option<Self> {
let mut path = AUDIT_LOG_PATH;
let mut file = match OpenOptions::new().read(true).open(AUDIT_LOG_PATH).await {
Ok(file) => Ok(file),
Err(_) => {
path = SYSLOG_PATH;
OpenOptions::new().read(true).open(SYSLOG_PATH).await
},
}
.ok()?;

let _pos = file.seek(SeekFrom::End(0)).await;

Some(Self { file, path })
}

async fn read_new_since_open(mut self) -> String {
let mut buf = String::new();
let _len = self.file.read_to_string(&mut buf).await;
buf
}
}

/// Check if a seccomp violation occurred for the given job process. As the syslog may be in a
/// different location, or seccomp auditing may be disabled, this function provides a best-effort
/// attempt only.
///
/// The `audit_log_file` must have been obtained before the job started. It only allows reading
/// entries that were written since it was obtained, so that we do not consider events from previous
/// processes with the same pid. This can still be racy, but it's unlikely and fine for a
/// best-effort attempt.
pub async fn check_seccomp_violations_for_job(
audit_log_file: Option<AuditLogFile>,
job_pid: i32,
) -> Vec<u32> {
let audit_event_pid_field = format!("pid={job_pid}");

let audit_log_file = match audit_log_file {
Some(file) => {
gum::trace!(
target: LOG_TARGET,
%job_pid,
audit_log_path = ?file.path,
"checking audit log for seccomp violations",
);
file
},
None => {
gum::warn!(
target: LOG_TARGET,
%job_pid,
"could not open either {AUDIT_LOG_PATH} or {SYSLOG_PATH} for reading audit logs"
);
return vec![]
},
};
let events = audit_log_file.read_new_since_open().await;

let mut violations = vec![];
for event in events.lines() {
if let Some(syscall) = parse_audit_log_for_seccomp_event(event, &audit_event_pid_field) {
violations.push(syscall);
}
}

violations
}

fn parse_audit_log_for_seccomp_event(event: &str, audit_event_pid_field: &str) -> Option<u32> {
const SECCOMP_AUDIT_EVENT_TYPE: &'static str = "type=1326";

// Do a series of simple .contains instead of a regex, because I'm not sure if the fields are
// guaranteed to always be in the same order.
if !event.contains(SECCOMP_AUDIT_EVENT_TYPE) || !event.contains(&audit_event_pid_field) {
return None
}

// Get the syscall. Let's avoid a dependency on regex just for this.
for field in event.split(" ") {
if let Some(syscall) = field.strip_prefix("syscall=") {
return syscall.parse::<u32>().ok()
}
}

None
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_parse_audit_log_for_seccomp_event() {
let audit_event_pid_field = "pid=2559058";

assert_eq!(
parse_audit_log_for_seccomp_event(
r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1326 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559058 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e syscall=53 compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
audit_event_pid_field
),
Some(53)
);
// pid is wrong
assert_eq!(
parse_audit_log_for_seccomp_event(
r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1326 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559057 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e syscall=53 compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
audit_event_pid_field
),
None
);
// type is wrong
assert_eq!(
parse_audit_log_for_seccomp_event(
r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1327 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559057 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e syscall=53 compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
audit_event_pid_field
),
None
);
// no syscall field
assert_eq!(
parse_audit_log_for_seccomp_event(
r#"Oct 24 13:15:24 build kernel: [5883980.283910] audit: type=1327 audit(1698153324.786:23): auid=0 uid=0 gid=0 ses=2162 subj=unconfined pid=2559057 comm="polkadot-prepar" exe="/root/paritytech/polkadot-sdk-2/target/debug/polkadot-prepare-worker" sig=31 arch=c000003e compat=0 ip=0x7f7542c80d5e code=0x80000000"#,
audit_event_pid_field
),
None
);
}
}

0 comments on commit cfa19c3

Please sign in to comment.