Skip to content

Commit

Permalink
Replace ByteStringHelper with the bstr crate to clean up string handling
Browse files Browse the repository at this point in the history
  • Loading branch information
chipturner committed Feb 12, 2023
1 parent dbc4ec9 commit 031fe16
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 140 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ license = "MIT"
path = "src/lib.rs"

[dependencies]
bstr = { version = "1.2.0", features = ["serde"] }
clap = { version = "4.1.4", features = ["derive", "env", "wrap_help"] }
log = "0.4.17"
env_logger = "0.10.0"
Expand Down
156 changes: 35 additions & 121 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::{
collections::HashMap,
env,
ffi::{OsStr, OsString},
fmt::Write,
fs::File,
io,
Expand All @@ -16,6 +15,7 @@ use std::{
time::Duration,
};

use bstr::{BString, ByteSlice};
use chrono::prelude::{Local, TimeZone};
use itertools::Itertools;
use regex::bytes::Regex;
Expand All @@ -26,86 +26,8 @@ type BoxError = Box<dyn std::error::Error + Send + Sync + 'static>;

const TIME_FORMAT: &str = "%Y-%m-%d %H:%M:%S";

pub fn get_hostname() -> OsString {
hostname::get().unwrap_or_else(|_| OsString::new())
}

#[derive(Debug, Eq, PartialEq, Serialize, Deserialize, Clone)]
#[serde(untagged)]
pub enum BinaryStringHelper {
Readable(String),
Encoded(Vec<u8>),
}

impl BinaryStringHelper {
pub fn to_bytes(&self) -> Vec<u8> {
match self {
Self::Encoded(b) => b.clone(),
Self::Readable(s) => s.as_bytes().to_vec(),
}
}

pub fn to_string_lossy(&self) -> String {
match self {
Self::Encoded(b) => String::from_utf8_lossy(b).to_string(),
Self::Readable(s) => s.clone(),
}
}

pub fn to_os_str(&self) -> OsString {
match self {
Self::Encoded(b) => OsString::from_vec(b.to_vec()),
Self::Readable(s) => OsString::from(s),
}
}
}

impl From<&[u8]> for BinaryStringHelper {
fn from(bytes: &[u8]) -> Self {
match str::from_utf8(bytes) {
Ok(v) => Self::Readable(v.to_string()),
_ => Self::Encoded(bytes.to_vec()),
}
}
}

impl From<&Vec<u8>> for BinaryStringHelper {
fn from(v: &Vec<u8>) -> Self {
Self::from(v.as_slice())
}
}

impl From<&OsString> for BinaryStringHelper {
fn from(osstr: &OsString) -> Self {
Self::from(osstr.as_bytes())
}
}

impl From<&OsStr> for BinaryStringHelper {
fn from(osstr: &OsStr) -> Self {
Self::from(osstr.as_bytes())
}
}

impl From<&PathBuf> for BinaryStringHelper {
fn from(pb: &PathBuf) -> Self {
Self::from(pb.as_path().as_os_str())
}
}

impl<T: From<T>> From<Option<T>> for BinaryStringHelper
where
BinaryStringHelper: From<T>,
{
fn from(t: Option<T>) -> Self {
t.map_or_else(Self::default, Self::from)
}
}

impl Default for BinaryStringHelper {
fn default() -> Self {
Self::Readable("".to_string())
}
pub fn get_hostname() -> BString {
hostname::get().unwrap_or_default().as_bytes().into()
}

pub fn sqlite_connection(path: &Option<PathBuf>) -> Result<Connection, Box<dyn std::error::Error>> {
Expand Down Expand Up @@ -139,11 +61,11 @@ pub fn sqlite_connection(path: &Option<PathBuf>) -> Result<Connection, Box<dyn s

#[derive(Debug, Default, Serialize, Deserialize)]
pub struct Invocation {
pub command: BinaryStringHelper,
pub command: BString,
pub shellname: String,
pub working_directory: Option<BinaryStringHelper>,
pub hostname: Option<BinaryStringHelper>,
pub username: Option<BinaryStringHelper>,
pub working_directory: Option<BString>,
pub hostname: Option<BString>,
pub username: Option<BString>,
pub exit_status: Option<i64>,
pub start_unix_timestamp: Option<i64>,
pub end_unix_timestamp: Option<i64>,
Expand All @@ -156,12 +78,6 @@ impl Invocation {
}

pub fn insert(&self, tx: &Transaction) -> Result<(), Box<dyn std::error::Error>> {
let command_bytes: Vec<u8> = self.command.to_bytes();
let username_bytes = self.username.as_ref().map_or_else(Vec::new, |v| v.to_bytes());
let hostname_bytes = self.hostname.as_ref().map_or_else(Vec::new, |v| v.to_bytes());
let working_directory_bytes =
self.working_directory.as_ref().map_or_else(Vec::new, |v| v.to_bytes());

tx.execute(
r#"
INSERT INTO command_history (
Expand All @@ -178,11 +94,11 @@ INSERT INTO command_history (
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"#,
(
self.session_id,
command_bytes.as_slice(),
&self.shellname,
hostname_bytes,
username_bytes,
working_directory_bytes,
self.command.as_slice(),
self.shellname.clone(),
self.hostname.as_ref().map(|v| v.to_vec()),
self.username.as_ref().map(|v| v.to_vec()),
self.working_directory.as_ref().map(|v| v.to_vec()),
self.exit_status,
self.start_unix_timestamp,
self.end_unix_timestamp,
Expand All @@ -205,17 +121,16 @@ fn generate_import_session_id(histfile: &Path) -> i64 {

pub fn import_zsh_history(
histfile: &Path,
hostname: Option<&OsString>,
username: Option<&OsString>,
hostname: Option<BString>,
username: Option<BString>,
) -> Result<Vec<Invocation>, Box<dyn std::error::Error>> {
let mut f = File::open(histfile)?;
let mut buf = Vec::new();
let _ = f.read_to_end(&mut buf)?;
let username = username
.cloned()
.or_else(users::get_current_username)
.unwrap_or_else(|| OsString::from("unknown"));
let hostname = hostname.cloned().unwrap_or_else(get_hostname);
.or_else(|| users::get_current_username().map(|v| BString::from(v.into_vec())))
.unwrap_or_else(|| BString::from("unknown"));
let hostname = hostname.unwrap_or_else(get_hostname);
let buf_iter = buf.split(|&ch| ch == b'\n');

let mut ret = vec![];
Expand All @@ -227,10 +142,10 @@ pub fn import_zsh_history(
{
let start_unix_timestamp = str::from_utf8(&start_time[1..])?.parse::<i64>()?; // 1.. is to skip the leading space!
let invocation = Invocation {
command: BinaryStringHelper::from(command),
command: BString::from(command),
shellname: "zsh".into(),
hostname: Some(BinaryStringHelper::from(&hostname)),
username: Some(BinaryStringHelper::from(&username)),
hostname: Some(BString::from(hostname.as_bytes())),
username: Some(BString::from(username.as_bytes())),
start_unix_timestamp: Some(start_unix_timestamp),
end_unix_timestamp: Some(
start_unix_timestamp + str::from_utf8(duration_seconds)?.parse::<i64>()?,
Expand All @@ -249,17 +164,16 @@ pub fn import_zsh_history(

pub fn import_bash_history(
histfile: &Path,
hostname: Option<&OsString>,
username: Option<&OsString>,
hostname: Option<BString>,
username: Option<BString>,
) -> Result<Vec<Invocation>, Box<dyn std::error::Error>> {
let mut f = File::open(histfile)?;
let mut buf = Vec::new();
let _ = f.read_to_end(&mut buf)?;
let username = username
.cloned()
.or_else(users::get_current_username)
.unwrap_or_else(|| OsString::from("unknown"));
let hostname = hostname.cloned().unwrap_or_else(get_hostname);
.or_else(|| users::get_current_username().map(|v| BString::from(v.as_bytes())))
.unwrap_or_else(|| BString::from("unknown"));
let hostname = hostname.unwrap_or_else(get_hostname);
let buf_iter = buf.split(|&ch| ch == b'\n').filter(|l| !l.is_empty());

let mut ret = vec![];
Expand All @@ -275,10 +189,10 @@ pub fn import_bash_history(
}
}
let invocation = Invocation {
command: BinaryStringHelper::from(line),
command: BString::from(line),
shellname: "bash".into(),
hostname: Some(BinaryStringHelper::from(&hostname)),
username: Some(BinaryStringHelper::from(&username)),
hostname: Some(BString::from(hostname.as_bytes())),
username: Some(BString::from(username.as_bytes())),
start_unix_timestamp: last_ts,
session_id,
..Default::default()
Expand Down Expand Up @@ -328,11 +242,11 @@ pub fn json_export(rows: &[InvocationExport]) -> Result<(), Box<dyn std::error::
let invocations: Vec<Invocation> = rows
.iter()
.map(|row| Invocation {
command: BinaryStringHelper::from(&row.full_command),
command: BString::from(row.full_command.as_slice()),
shellname: row.shellname.clone(),
hostname: row.hostname.as_ref().map(BinaryStringHelper::from),
username: row.username.as_ref().map(BinaryStringHelper::from),
working_directory: row.working_directory.as_ref().map(BinaryStringHelper::from),
hostname: row.hostname.clone().map(|v| BString::from(v.as_slice())),
username: row.username.clone().map(|v| BString::from(v.as_slice())),
working_directory: row.working_directory.clone().map(|v| BString::from(v.as_slice())),
exit_status: row.exit_status,
start_unix_timestamp: row.start_unix_timestamp,
end_unix_timestamp: row.end_unix_timestamp,
Expand Down Expand Up @@ -421,10 +335,10 @@ fn displayers() -> HashMap<&'static str, QueryResultColumnDisplayer> {
header: "Context",
displayer: Box::new(|row| {
let current_hostname = get_hostname();
let row_hostname = BinaryStringHelper::from(row.hostname.as_ref());
let row_hostname = row.hostname.clone().map(BString::from).unwrap_or_default();
let mut ret = String::new();
if current_hostname != row_hostname.to_os_str() {
write!(ret, "{}:", row_hostname.to_string_lossy()).unwrap_or_default();
if current_hostname != row_hostname {
write!(ret, "{row_hostname}:").unwrap_or_default();
}
let current_directory = env::current_dir().unwrap_or_default();
ret.push_str(&row.working_directory.as_ref().map_or_else(String::new, |v| {
Expand Down
29 changes: 14 additions & 15 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ use std::{
fs::{File, OpenOptions},
io,
io::{BufRead, BufReader, Write},
os::unix::ffi::{OsStrExt, OsStringExt},
path::PathBuf,
str,
};

use bstr::{BString, ByteSlice};
use clap::{Parser, Subcommand};
use rusqlite::{Connection, Result};

Expand Down Expand Up @@ -140,13 +142,13 @@ impl ImportCommand {
let invocations = match self.shellname.as_ref() {
"zsh" => pxh::import_zsh_history(
&self.histfile,
self.hostname.as_ref(),
self.username.as_ref(),
self.hostname.as_ref().map(|v| v.as_bytes().into()),
self.username.as_ref().map(|v| v.as_bytes().into()),
),
"bash" => pxh::import_bash_history(
&self.histfile,
self.hostname.as_ref(),
self.username.as_ref(),
self.hostname.as_ref().map(|v| v.as_bytes().into()),
self.username.as_ref().map(|v| v.as_bytes().into()),
),
"json" => pxh::import_json_history(&self.histfile),
_ => Err(Box::from(format!("Unsupported shell: {} (PRs welcome!)", self.shellname))),
Expand Down Expand Up @@ -280,11 +282,11 @@ impl SyncCommand {
fs::create_dir(&self.dirname)?;
}
let mut output_path = self.dirname.clone();
output_path.push(pxh::get_hostname());
output_path.push(pxh::get_hostname().to_path_lossy());
output_path.set_extension("db");
// TODO: vacuum seems to want a plain text path, unlike ATTACH
// above, so we can't use BinaryStringHelper to get a vec<u8>.
// Look into why this is and if there is a workaround.
// above, so we can't use BString to get a vec<u8>. Look into
// why this is and if there is a workaround.
let output_path_str =
output_path.to_str().ok_or("Unable to represent output filename as a string")?;

Expand Down Expand Up @@ -316,10 +318,7 @@ impl SyncCommand {
let tx = conn.transaction()?;
let before_count: u64 =
tx.prepare("SELECT COUNT(*) FROM main.command_history")?.query_row((), |r| r.get(0))?;
tx.execute(
"ATTACH DATABASE ? AS other",
(pxh::BinaryStringHelper::from(&path).to_bytes(),),
)?;
tx.execute("ATTACH DATABASE ? AS other", (path.as_os_str().as_bytes(),))?;
let other_count: u64 = tx
.prepare("SELECT COUNT(*) FROM other.command_history")?
.query_row((), |r| r.get(0))?;
Expand Down Expand Up @@ -497,14 +496,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut conn = pxh::sqlite_connection(&args.db)?;
let tx = conn.transaction()?;
let invocation = pxh::Invocation {
command: pxh::BinaryStringHelper::from(&cmd.command.join(OsStr::new(" "))),
command: cmd.command.join(OsStr::new(" ")).as_bytes().into(),
shellname: cmd.shellname.clone(),
working_directory: cmd
.working_directory
.as_ref()
.map(pxh::BinaryStringHelper::from),
hostname: Some(pxh::BinaryStringHelper::from(&cmd.hostname)),
username: Some(pxh::BinaryStringHelper::from(&cmd.username)),
.map(|v| BString::from(v.as_path().as_os_str().as_bytes())),
hostname: Some(BString::from(cmd.hostname.clone().into_vec())),
username: Some(BString::from(cmd.username.clone().into_vec())),
exit_status: cmd.exit_status,
start_unix_timestamp: cmd.start_unix_timestamp,
end_unix_timestamp: cmd.end_unix_timestamp,
Expand Down
9 changes: 5 additions & 4 deletions tests/integration_tests.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::{env, path::PathBuf};

use assert_cmd::Command;
use bstr::BString;
use tempfile::TempDir;

// Simple struct and helpers for invoking pxh with a given testdb.
Expand Down Expand Up @@ -161,17 +162,17 @@ fn test_insert_seal_roundtrip() {
serde_json::from_slice(json_output.stdout.as_slice()).unwrap();
assert_eq!(invocations.len(), commands.len());
for (idx, val) in invocations.iter().enumerate() {
assert_eq!(val.command.to_string_lossy(), commands[idx]);
assert_eq!(val.command, commands[idx]);
}
}

// Verify a given invocation list matches what we expect. The data is
// a bit of a torture test of non-utf8 data, spaces, etc.
fn matches_expected_history(invocations: &[pxh::Invocation]) {
let expected = vec![
pxh::BinaryStringHelper::Readable(r#"echo $'this "is" \'a\' \\n test\n\nboo'"#.to_string()),
pxh::BinaryStringHelper::Readable("fd zsh".to_string()),
pxh::BinaryStringHelper::Encoded(
BString::from(r#"echo $'this "is" \'a\' \\n test\n\nboo'"#.to_string()),
BString::from("fd zsh".to_string()),
BString::from(
[101, 99, 104, 111, 32, 0xf0, 0xce, 0xb1, 0xce, 0xa5, 0xef, 0xbd, 0xa9].to_vec(),
),
];
Expand Down

0 comments on commit 031fe16

Please sign in to comment.