From 1a76bbc5c0f09f051a8d50f5cb02a7c132a8d8bb Mon Sep 17 00:00:00 2001 From: hulloanson Date: Sat, 3 Apr 2021 23:16:40 +0800 Subject: [PATCH 01/30] testing out ttstub_diag_printf --- crates/engine_xetex/xetex/xetex-synctex.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/engine_xetex/xetex/xetex-synctex.c b/crates/engine_xetex/xetex/xetex-synctex.c index e09ca40a9e..f8bbebd4ec 100644 --- a/crates/engine_xetex/xetex/xetex-synctex.c +++ b/crates/engine_xetex/xetex/xetex-synctex.c @@ -771,7 +771,18 @@ synctex_record_preamble(void) static inline int synctex_record_input(int32_t tag, char *name) { - int len = ttstub_fprintf(synctex_ctxt.file, "Input:%i:%s\n", tag, name); + char cwd[4096]; + ttbc_diagnostic_t *errmsg = error_here_with_diagnostic("Failed to generate synctex info: "); + ttstub_diag_printf(errmsg, "?_?"); + capture_to_diagnostic(errmsg); + if (getcwd(cwd, 4096) == NULL) { + fprintf(stderr, "Error during "); + fprintf(stderr, "Failed to get absolute path to current directory.\n"); + fprintf(stderr, "Reason: %s\n", errno == ERANGE ? "directory path too long" : "unknown"); + return -1; + } + + int len = ttstub_fprintf(synctex_ctxt.file, "Input:%i:%s/%s\n", tag, cwd, name); if (len > 0) { synctex_ctxt.total_length += len; From fc31554db356ca61ad5485cd1cabdaff3d521f76 Mon Sep 17 00:00:00 2001 From: hulloanson Date: Sun, 4 Apr 2021 02:16:41 +0800 Subject: [PATCH 02/30] fatal error if path length exceeds PATH_MAX --- crates/engine_xetex/xetex/xetex-synctex.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/crates/engine_xetex/xetex/xetex-synctex.c b/crates/engine_xetex/xetex/xetex-synctex.c index f8bbebd4ec..f783ee9c3f 100644 --- a/crates/engine_xetex/xetex/xetex-synctex.c +++ b/crates/engine_xetex/xetex/xetex-synctex.c @@ -6,9 +6,10 @@ #include "xetex-core.h" #include "xetex-xetexd.h" #include "xetex-synctex.h" -#include "tectonic_bridge_core.h" +#include "core-bridge.h" #include +#include #include #define SYNCTEX_VERSION 1 @@ -771,14 +772,14 @@ synctex_record_preamble(void) static inline int synctex_record_input(int32_t tag, char *name) { - char cwd[4096]; - ttbc_diagnostic_t *errmsg = error_here_with_diagnostic("Failed to generate synctex info: "); - ttstub_diag_printf(errmsg, "?_?"); - capture_to_diagnostic(errmsg); - if (getcwd(cwd, 4096) == NULL) { - fprintf(stderr, "Error during "); - fprintf(stderr, "Failed to get absolute path to current directory.\n"); - fprintf(stderr, "Reason: %s\n", errno == ERANGE ? "directory path too long" : "unknown"); + char cwd[PATH_MAX + 1]; + if (getcwd(cwd, PATH_MAX + 1) == NULL) { + char errmsg[100]; + sprintf(errmsg, + "Failed to generate synctex info: Failed to get absolute path to current directory: %s" + , (errno == ERANGE || errno == ENAMETOOLONG) ? "path too long" : strerror(errno) + ); + fatal_error(errmsg); return -1; } From 10a1c2720939c1d3f5d6b843ae1baaf89d946312 Mon Sep 17 00:00:00 2001 From: hulloanson Date: Sun, 4 Apr 2021 02:21:06 +0800 Subject: [PATCH 03/30] fixed wrong include --- crates/engine_xetex/xetex/xetex-synctex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/engine_xetex/xetex/xetex-synctex.c b/crates/engine_xetex/xetex/xetex-synctex.c index f783ee9c3f..b05db4c0b9 100644 --- a/crates/engine_xetex/xetex/xetex-synctex.c +++ b/crates/engine_xetex/xetex/xetex-synctex.c @@ -6,7 +6,7 @@ #include "xetex-core.h" #include "xetex-xetexd.h" #include "xetex-synctex.h" -#include "core-bridge.h" +#include "tectonic_bridge_core.h" #include #include From 5f65778d70d2bd52b0bdc7d430887caade440ffa Mon Sep 17 00:00:00 2001 From: hulloanson Date: Sun, 4 Apr 2021 02:41:28 +0800 Subject: [PATCH 04/30] proper separators for *nix and windows --- crates/engine_xetex/xetex/xetex-synctex.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/engine_xetex/xetex/xetex-synctex.c b/crates/engine_xetex/xetex/xetex-synctex.c index b05db4c0b9..a8d3b8990a 100644 --- a/crates/engine_xetex/xetex/xetex-synctex.c +++ b/crates/engine_xetex/xetex/xetex-synctex.c @@ -782,8 +782,12 @@ synctex_record_input(int32_t tag, char *name) fatal_error(errmsg); return -1; } - - int len = ttstub_fprintf(synctex_ctxt.file, "Input:%i:%s/%s\n", tag, cwd, name); + #ifdef _WIN32 + char *sep = "\\"; + #else + char *sep = "/"; + #endif + int len = ttstub_fprintf(synctex_ctxt.file, "Input:%i:%s%s%s\n", tag, cwd, sep, name); if (len > 0) { synctex_ctxt.total_length += len; From fe34dfdd023d7c89bceb4b93aa9d8dd025cab6c8 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Jun 2021 10:51:20 -0400 Subject: [PATCH 05/30] tectonic: properly reexport FORMAT_SERIAL from engine_xetex --- src/lib.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0afe9bf1c1..db9d34cde7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -101,13 +101,9 @@ pub use crate::engines::xdvipdfmx::XdvipdfmxEngine; pub use crate::errors::{Error, ErrorKind, Result}; // Convenienece re-exports for migration into our multi-crate setup +pub use tectonic_engine_xetex::FORMAT_SERIAL; pub use tectonic_status_base::{tt_error, tt_note, tt_warning}; -// Increase this whenever the engine internals change such that the contents -// of the "format" files must be regenerated. - -pub const FORMAT_SERIAL: u32 = 29; - /// Compile LaTeX text to a PDF. /// /// This function is an all-in-one interface to the main Tectonic workflow. Given From 7730a77f511ec450ae40e7bc875e2c6d7d81a285 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Jun 2021 10:51:33 -0400 Subject: [PATCH 06/30] engine_xetex: fix #714 Due to a mis-transcription, \XeTeXfeaturename and \XeTeXselectorname were not properly implemented. Thanks @burrbull! Furthermore, even if they were properly implemented, they would not have executed currectly due to what appears to be a bug in upstream XeTeX relating to magic constants getting out of sync: https://sourceforge.net/p/xetex/bugs/174/ Fixing this bug requires a bump in the format file serial. --- crates/engine_xetex/src/lib.rs | 2 +- crates/engine_xetex/xetex/xetex-ext.c | 4 ++-- crates/engine_xetex/xetex/xetex-ext.h | 3 --- crates/engine_xetex/xetex/xetex-ini.c | 4 ++-- tests/formats.rs | 2 +- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/crates/engine_xetex/src/lib.rs b/crates/engine_xetex/src/lib.rs index 159209883d..4df939cc8f 100644 --- a/crates/engine_xetex/src/lib.rs +++ b/crates/engine_xetex/src/lib.rs @@ -32,7 +32,7 @@ use tectonic_errors::prelude::*; /// should munge this serial number in the filename, or something along those /// lines, to make sure that when the engine is updated you don’t attempt to /// reuse old files. -pub const FORMAT_SERIAL: u32 = 29; +pub const FORMAT_SERIAL: u32 = 30; /// A possible outcome from a (Xe)TeX engine invocation. /// diff --git a/crates/engine_xetex/xetex/xetex-ext.c b/crates/engine_xetex/xetex/xetex-ext.c index d214716e49..4f3f14ff24 100644 --- a/crates/engine_xetex/xetex/xetex-ext.c +++ b/crates/engine_xetex/xetex/xetex-ext.c @@ -1021,10 +1021,10 @@ gr_print_font_name(int32_t what, void* pEngine, int32_t param1, int32_t param2) char* name = NULL; XeTeXLayoutEngine engine = (XeTeXLayoutEngine)pEngine; switch (what) { - case XeTeX_feature_name: + case XETEX_FEATURE_NAME_CODE: name = getGraphiteFeatureLabel(engine, param1); break; - case XeTeX_selector_name: + case XETEX_SELECTOR_NAME_CODE: name = getGraphiteFeatureSettingLabel(engine, param1, param2); break; } diff --git a/crates/engine_xetex/xetex/xetex-ext.h b/crates/engine_xetex/xetex/xetex-ext.h index b0872b3f9a..1110a7a6c7 100644 --- a/crates/engine_xetex/xetex/xetex-ext.h +++ b/crates/engine_xetex/xetex/xetex-ext.h @@ -75,9 +75,6 @@ typedef void* CFDictionaryRef; /* dummy declaration just so the stubs can compil #define XeTeX_OT_feature_code 21 #define XeTeX_map_char_to_glyph_code 22 -#define XeTeX_feature_name 8 -#define XeTeX_selector_name 9 - /* accessing info in a native_word_node */ #define width_offset 1 #define depth_offset 2 diff --git a/crates/engine_xetex/xetex/xetex-ini.c b/crates/engine_xetex/xetex/xetex-ini.c index b58904e6bf..3039e27965 100644 --- a/crates/engine_xetex/xetex/xetex-ini.c +++ b/crates/engine_xetex/xetex/xetex-ini.c @@ -4199,8 +4199,8 @@ tt_run_engine(const char *dump_name, const char *input_file_name, time_t build_d primitive("XeTeXisdefaultselector", LAST_ITEM, XETEX_IS_DEFAULT_SELECTOR_CODE); primitive("XeTeXvariationname", CONVERT, XETEX_VARIATION_NAME_CODE); - primitive("XeTeXfeaturename", CONVERT, XeTeX_feature_name); - primitive("XeTeXselectorname", CONVERT, XeTeX_selector_name); + primitive("XeTeXfeaturename", CONVERT, XETEX_FEATURE_NAME_CODE); + primitive("XeTeXselectorname", CONVERT, XETEX_SELECTOR_NAME_CODE); primitive("XeTeXOTcountscripts", LAST_ITEM, XETEX_OT_COUNT_SCRIPTS_CODE); primitive("XeTeXOTcountlanguages", LAST_ITEM, XETEX_OT_COUNT_LANGUAGES_CODE); diff --git a/tests/formats.rs b/tests/formats.rs index f625a0d4f2..f3dffa5c73 100644 --- a/tests/formats.rs +++ b/tests/formats.rs @@ -196,6 +196,6 @@ fn plain_format() { test_format_generation( "plain.tex", "plain.fmt", - "8e33c4c9af66ddb064a36749db1e0ba681bbebd1a896d2886745a0efa9a745a1", + "7012eeebbbcec81f6ce2c4d232013e306898f211fa252685434a8624ac7323d4", ) } From e849a4c21a87f1b90f6154cbc40fe66f62dc21da Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Jun 2021 11:03:23 -0400 Subject: [PATCH 07/30] engine_xetex: symbolic-ify some XeTeX code constants in xetex-ext.h --- crates/engine_xetex/xetex/xetex-ext.h | 32 +++++++++++++-------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/crates/engine_xetex/xetex/xetex-ext.h b/crates/engine_xetex/xetex/xetex-ext.h index 1110a7a6c7..78f15bb1cc 100644 --- a/crates/engine_xetex/xetex/xetex-ext.h +++ b/crates/engine_xetex/xetex/xetex-ext.h @@ -58,22 +58,22 @@ typedef void* CFDictionaryRef; /* dummy declaration just so the stubs can compil #define pdfbox_none 6 /* command codes for XeTeX extension commands */ -#define XeTeX_count_glyphs 1 -#define XeTeX_count_features 8 -#define XeTeX_feature_code 9 -#define XeTeX_find_feature_by_name 10 -#define XeTeX_is_exclusive_feature 11 -#define XeTeX_count_selectors 12 -#define XeTeX_selector_code 13 -#define XeTeX_find_selector_by_name 14 -#define XeTeX_is_default_selector 15 -#define XeTeX_OT_count_scripts 16 -#define XeTeX_OT_count_languages 17 -#define XeTeX_OT_count_features 18 -#define XeTeX_OT_script_code 19 -#define XeTeX_OT_language_code 20 -#define XeTeX_OT_feature_code 21 -#define XeTeX_map_char_to_glyph_code 22 +#define XeTeX_count_glyphs (XETEX_COUNT_GLYPHS_CODE - XETEX_INT) +#define XeTeX_count_features (XETEX_COUNT_FEATURES_CODE - XETEX_INT) +#define XeTeX_feature_code (XETEX_FEATURE_CODE_CODE - XETEX_INT) +#define XeTeX_find_feature_by_name (XETEX_FIND_FEATURE_BY_NAME_CODE - XETEX_INT) +#define XeTeX_is_exclusive_feature (XETEX_IS_EXCLUSIVE_FEATURE_CODE - XETEX_INT) +#define XeTeX_count_selectors (XETEX_COUNT_SELECTORS_CODE - XETEX_INT) +#define XeTeX_selector_code (XETEX_SELECTOR_CODE_CODE - XETEX_INT) +#define XeTeX_find_selector_by_name (XETEX_FIND_SELECTOR_BY_NAME_CODE - XETEX_INT) +#define XeTeX_is_default_selector (XETEX_IS_DEFAULT_SELECTOR_CODE - XETEX_INT) +#define XeTeX_OT_count_scripts (XETEX_OT_COUNT_SCRIPTS_CODE - XETEX_INT) +#define XeTeX_OT_count_languages (XETEX_OT_COUNT_LANGUAGES_CODE - XETEX_INT) +#define XeTeX_OT_count_features (XETEX_OT_COUNT_FEATURES_CODE - XETEX_INT) +#define XeTeX_OT_script_code (XETEX_OT_SCRIPT_CODE - XETEX_INT) +#define XeTeX_OT_language_code (XETEX_OT_LANGUAGE_CODE - XETEX_INT) +#define XeTeX_OT_feature_code (XETEX_OT_FEATURE_CODE - XETEX_INT) +#define XeTeX_map_char_to_glyph_code (XETEX_MAP_CHAR_TO_GLYPH_CODE - XETEX_INT) /* accessing info in a native_word_node */ #define width_offset 1 From 8bb44ec9c5256a29e7bc4ca794be498ba0b40f84 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Jun 2021 11:52:57 -0400 Subject: [PATCH 08/30] engine_xetex: missed an instance in the macOS code --- crates/engine_xetex/xetex/xetex-ext.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/engine_xetex/xetex/xetex-ext.c b/crates/engine_xetex/xetex/xetex-ext.c index 4f3f14ff24..93f039325b 100644 --- a/crates/engine_xetex/xetex/xetex-ext.c +++ b/crates/engine_xetex/xetex/xetex-ext.c @@ -2069,9 +2069,10 @@ aat_font_get_named_1(int what, CFDictionaryRef attributes, int param) void aat_print_font_name(int what, CFDictionaryRef attributes, int param1, int param2) { + /* Tectonic: this function is called for XETEX_VARIATION_NAME_CODE but doesn't handle it */ #ifdef XETEX_MAC CFStringRef name = NULL; - if (what == XeTeX_feature_name || what == XeTeX_selector_name) { + if (what == XETEX_FEATURE_NAME_CODE || what == XETEX_SELECTOR_NAME_CODE) { CTFontRef font = fontFromAttributes(attributes); CFArrayRef features = CTFontCopyFeatures(font); if (features) { @@ -2079,7 +2080,7 @@ aat_print_font_name(int what, CFDictionaryRef attributes, int param1, int param2 kCTFontFeatureTypeIdentifierKey, param1); if (feature) { - if (what == XeTeX_feature_name) + if (what == XETEX_FEATURE_NAME_CODE) name = CFDictionaryGetValue(feature, kCTFontFeatureTypeNameKey); else { CFArrayRef selectors = CFDictionaryGetValue(feature, kCTFontFeatureTypeSelectorsKey); From f7eeff461778f7082db7ed5097d93aa63119eb12 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Jun 2021 21:37:52 -0400 Subject: [PATCH 09/30] io_base: add new "abspath" methods to IoProvider These are needed for us to implement proper SyncTeX output, with absolute paths to source files (#720) and accounting for the virtualized I/O system (e.g., #744). I don't love the new API here, but I think this is the best approach that will give us what we need without significant API breakage. At first I was thinking of adding an API to get the filesystem path for an open input handle, but that would require us to plumb that path information through the filesystem by wrapping open std::fs::Files, or something equally invasive. --- crates/io_base/src/filesystem.rs | 60 +++++++++++++++++++++++++------- crates/io_base/src/lib.rs | 57 ++++++++++++++++++++++++++++++ crates/io_base/src/stack.rs | 36 ++++++++++++++++++- 3 files changed, 139 insertions(+), 14 deletions(-) diff --git a/crates/io_base/src/filesystem.rs b/crates/io_base/src/filesystem.rs index f867fc3832..e5b6025855 100644 --- a/crates/io_base/src/filesystem.rs +++ b/crates/io_base/src/filesystem.rs @@ -42,18 +42,35 @@ impl FilesystemPrimaryInputIo { } impl IoProvider for FilesystemPrimaryInputIo { - fn input_open_primary(&mut self, _status: &mut dyn StatusBackend) -> OpenResult { + fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult { + match self.input_open_primary_with_abspath(status) { + OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + + fn input_open_primary_with_abspath( + &mut self, + _status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { let f = match try_open_file(&self.path) { OpenResult::Ok(f) => f, OpenResult::NotAvailable => return OpenResult::NotAvailable, OpenResult::Err(e) => return OpenResult::Err(e), }; - OpenResult::Ok(InputHandle::new( - "", - BufReader::new(f), - InputOrigin::Filesystem, - )) + let handle = InputHandle::new("", BufReader::new(f), InputOrigin::Filesystem); + + // For SyncTeX paths we need to make sure that we return an absolute path. + // The easiest way to do this (as far as I can see) is to canonicalize. + + let path = match std::fs::canonicalize(&self.path) { + Ok(m) => m, + Err(e) => return OpenResult::Err(e.into()), + }; + + OpenResult::Ok((handle, Some(path))) } } @@ -131,8 +148,20 @@ impl IoProvider for FilesystemIo { fn input_open_name( &mut self, name: &str, - _status: &mut dyn StatusBackend, + status: &mut dyn StatusBackend, ) -> OpenResult { + match self.input_open_name_with_abspath(name, status) { + OpenResult::Ok((h, _path)) => OpenResult::Ok(h), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + + fn input_open_name_with_abspath( + &mut self, + name: &str, + _status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { let path = match self.construct_path(name) { Ok(p) => p, Err(e) => return OpenResult::Err(e), @@ -142,7 +171,7 @@ impl IoProvider for FilesystemIo { return OpenResult::NotAvailable; } - let f = match File::open(path) { + let f = match File::open(&path) { Ok(f) => f, Err(e) => { return if e.kind() == io::ErrorKind::NotFound { @@ -173,12 +202,17 @@ impl IoProvider for FilesystemIo { return OpenResult::NotAvailable; } + // For SyncTeX paths we need to make sure that we return an absolute path. + // The easiest way to do this (as far as I can see) is to canonicalize. + + let path = match std::fs::canonicalize(path) { + Ok(m) => m, + Err(e) => return OpenResult::Err(e.into()), + }; + // Good to go. - OpenResult::Ok(InputHandle::new( - name, - BufReader::new(f), - InputOrigin::Filesystem, - )) + let handle = InputHandle::new(name, BufReader::new(f), InputOrigin::Filesystem); + OpenResult::Ok((handle, Some(path))) } } diff --git a/crates/io_base/src/lib.rs b/crates/io_base/src/lib.rs index 53e4da213b..26d28097c7 100644 --- a/crates/io_base/src/lib.rs +++ b/crates/io_base/src/lib.rs @@ -450,6 +450,33 @@ pub trait IoProvider: AsIoProviderMut { OpenResult::NotAvailable } + /// Open the named file for input and return filesystem path information. + /// + /// This method extends [`input_open_name`] to help support SyncTeX output. + /// While SyncTeX output files should contain absolute source file paths, + /// Tectonic’s pluggable I/O system makes it so that the mapping between + /// input names and filesystem paths is not well-defined. This optional + /// interface enables backends to provide filesystem information at the time + /// of opening. + /// + /// The default implementation returns None for the path information, to + /// preserve backwards compatibility. If you are implementing a new backend + /// that might provide path information, or you are implementing an I/O + /// provider that delegates to other I/O providers, you should implement + /// this function fully, and then provide a simple implementation of + /// [`input_open_name`] that drops the pathing information. + fn input_open_name_with_abspath( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + match self.input_open_name(name, status) { + OpenResult::Ok(h) => OpenResult::Ok((h, None)), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + /// Open the "primary" input file, which in the context of TeX is the main /// input that it's given. When the build is being done using the /// filesystem and the input is a file on the filesystem, this function @@ -459,6 +486,21 @@ pub trait IoProvider: AsIoProviderMut { OpenResult::NotAvailable } + /// Open the primary input and return filesystem path information. + /// + /// This method is as to [`input_open_primary`] as + /// [`input_open_name_with_abspath`] is to [`input_open_name`]. + fn input_open_primary_with_abspath( + &mut self, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + match self.input_open_primary(status) { + OpenResult::Ok(h) => OpenResult::Ok((h, None)), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + /// Open a format file with the specified name. Format files have a /// specialized entry point because IOProviders may wish to handle them /// specially: namely, to munge the filename to one that includes the @@ -502,10 +544,25 @@ impl IoProvider for Box

{ (**self).input_open_name(name, status) } + fn input_open_name_with_abspath( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + (**self).input_open_name_with_abspath(name, status) + } + fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult { (**self).input_open_primary(status) } + fn input_open_primary_with_abspath( + &mut self, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + (**self).input_open_primary_with_abspath(status) + } + fn input_open_format( &mut self, name: &str, diff --git a/crates/io_base/src/stack.rs b/crates/io_base/src/stack.rs index da8380125b..ebbbe9a1d7 100644 --- a/crates/io_base/src/stack.rs +++ b/crates/io_base/src/stack.rs @@ -1,9 +1,10 @@ -// Copyright 2016-2020 the Tectonic Project +// Copyright 2016-2021 the Tectonic Project // Licensed under the MIT License. //! An "I/O stack" is an I/O provider that delegates requests to //! a series of sub-providers in turn. +use std::path::PathBuf; use tectonic_status_base::StatusBackend; use super::{InputHandle, IoProvider, OpenResult, OutputHandle}; @@ -68,6 +69,23 @@ impl<'a> IoProvider for IoStack<'a> { OpenResult::NotAvailable } + fn input_open_name_with_abspath( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + for item in &mut self.items { + let r = item.input_open_name_with_abspath(name, status); + + match r { + OpenResult::NotAvailable => continue, + _ => return r, + }; + } + + OpenResult::NotAvailable + } + fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult { for item in &mut self.items { let r = item.input_open_primary(status); @@ -81,6 +99,22 @@ impl<'a> IoProvider for IoStack<'a> { OpenResult::NotAvailable } + fn input_open_primary_with_abspath( + &mut self, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + for item in &mut self.items { + let r = item.input_open_primary_with_abspath(status); + + match r { + OpenResult::NotAvailable => continue, + _ => return r, + }; + } + + OpenResult::NotAvailable + } + fn input_open_format( &mut self, name: &str, From 4e16bf963700aae59772a6fb223981ceaa9b5f57 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Jun 2021 21:44:51 -0400 Subject: [PATCH 10/30] bridge_core: expose filesystem paths for just-opened inputs Leveraging the new "abspath" support in io_base, now track the absolute path of the most recently-opened input and expose it to the C code with the new function `ttstub_get_last_input_abspath()`. The plumbing is a bit messy but fundamentally we're just passing an additional piece of information around. --- crates/bridge_core/Cargo.toml | 2 +- crates/bridge_core/src/lib.rs | 111 ++++++++++++++---- crates/bridge_core/support/support.c | 6 + .../support/tectonic_bridge_core.h | 1 + 4 files changed, 98 insertions(+), 22 deletions(-) diff --git a/crates/bridge_core/Cargo.toml b/crates/bridge_core/Cargo.toml index 5a6edb6ab2..0744c5b65b 100644 --- a/crates/bridge_core/Cargo.toml +++ b/crates/bridge_core/Cargo.toml @@ -33,5 +33,5 @@ cc = "^1.0.66" [package.metadata.internal_dep_versions] tectonic_errors = "5c9ba661edf5ef669f24f9904f99cca369d999e7" -tectonic_io_base = "thiscommit:2021-01-16:go5rieNg" +tectonic_io_base = "f7eeff461778f7082db7ed5097d93aa63119eb12" tectonic_status_base = "317ae79ceaa2593fb56090e37bf1f5cc24213dd9" diff --git a/crates/bridge_core/src/lib.rs b/crates/bridge_core/src/lib.rs index a5e20af801..00baf3edc5 100644 --- a/crates/bridge_core/src/lib.rs +++ b/crates/bridge_core/src/lib.rs @@ -35,9 +35,11 @@ use flate2::{read::GzDecoder, Compression, GzBuilder}; use md5::{Digest, Md5}; use std::{ + convert::TryInto, ffi::CStr, fmt::{Display, Error as FmtError, Formatter}, io::{self, Read, SeekFrom, Write}, + path::PathBuf, ptr, result::Result as StdResult, slice, @@ -271,6 +273,15 @@ pub struct CoreBridgeState<'a> { #[allow(clippy::vec_box)] output_handles: Vec>, + + /// A semi-hack to allow us to feed input file path information to SyncTeX. + /// This field is updated every time a new input file is opened. The XeTeX + /// engine queries it when opening new source input files to get the + /// absolute filesystem path info that SyncTeX wants. This field might be + /// None because we're still reading the primary input, or because the most + /// recent input didn't have a filesystem path (it came from a bundle or + /// memory or something else). + latest_input_path: Option, } impl<'a> CoreBridgeState<'a> { @@ -283,6 +294,7 @@ impl<'a> CoreBridgeState<'a> { status, output_handles: Vec::new(), input_handles: Vec::new(), + latest_input_path: None, } } @@ -290,18 +302,20 @@ impl<'a> CoreBridgeState<'a> { &mut self, name: &str, format: FileFormat, - ) -> OpenResult { + ) -> OpenResult<(InputHandle, Option)> { let io = self.hooks.io(); - let r = if let FileFormat::Format = format { - io.input_open_format(name, self.status) + if let FileFormat::Format = format { + match io.input_open_format(name, self.status) { + OpenResult::NotAvailable => {} + OpenResult::Err(e) => return OpenResult::Err(e), + OpenResult::Ok(h) => return OpenResult::Ok((h, None)), + } } else { - io.input_open_name(name, self.status) - }; - - match r { - OpenResult::NotAvailable => {} - r => return r, + match io.input_open_name_with_abspath(name, self.status) { + OpenResult::NotAvailable => {} + r => return r, + } } // It wasn't available under the immediately-given name. Try adding @@ -313,13 +327,19 @@ impl<'a> CoreBridgeState<'a> { let ext = format!("{}.{}", name, e); if let FileFormat::Format = format { - if let r @ OpenResult::Ok(_) = io.input_open_format(&ext, self.status) { - return r; + match io.input_open_format(&ext, self.status) { + OpenResult::NotAvailable => {} + OpenResult::Err(e) => return OpenResult::Err(e), + OpenResult::Ok(h) => return OpenResult::Ok((h, None)), + } + } else { + match io.input_open_name_with_abspath(&ext, self.status) { + OpenResult::NotAvailable => {} + r => return r, } - } else if let r @ OpenResult::Ok(_) = io.input_open_name(&ext, self.status) { - return r; } } + OpenResult::NotAvailable } @@ -328,7 +348,7 @@ impl<'a> CoreBridgeState<'a> { name: &str, format: FileFormat, is_gz: bool, - ) -> OpenResult { + ) -> OpenResult<(InputHandle, Option)> { let base = self.input_open_name_format(name, format); if !is_gz { @@ -336,11 +356,11 @@ impl<'a> CoreBridgeState<'a> { } match base { - OpenResult::Ok(ih) => { + OpenResult::Ok((ih, path)) => { let origin = ih.origin(); let dr = GzDecoder::new(ih.into_inner()); - OpenResult::Ok(InputHandle::new(name, dr, origin)) + OpenResult::Ok((InputHandle::new(name, dr, origin), path)) } _ => base, } @@ -356,7 +376,7 @@ impl<'a> CoreBridgeState<'a> { // idea to just go and read the file. let mut ih = match self.input_open_name_format(&name, FileFormat::Tex) { - OpenResult::Ok(ih) => ih, + OpenResult::Ok((ih, _path)) => ih, OpenResult::NotAvailable => { // We could issue a warning here, but the standard LaTeX // "rerun check" implementations trigger it very often, which @@ -498,8 +518,8 @@ impl<'a> CoreBridgeState<'a> { fn input_open(&mut self, name: &str, format: FileFormat, is_gz: bool) -> *mut InputHandle { let name = normalize_tex_path(name); - let ih = match self.input_open_name_format_gz(&name, format, is_gz) { - OpenResult::Ok(ih) => ih, + let (ih, path) = match self.input_open_name_format_gz(&name, format, is_gz) { + OpenResult::Ok(tup) => tup, OpenResult::NotAvailable => { return ptr::null_mut(); } @@ -510,14 +530,15 @@ impl<'a> CoreBridgeState<'a> { }; self.input_handles.push(Box::new(ih)); + self.latest_input_path = path; &mut **self.input_handles.last_mut().unwrap() } fn input_open_primary(&mut self) -> *mut InputHandle { let io = self.hooks.io(); - let ih = match io.input_open_primary(self.status) { - OpenResult::Ok(ih) => ih, + let (ih, path) = match io.input_open_primary_with_abspath(self.status) { + OpenResult::Ok(tup) => tup, OpenResult::NotAvailable => { tt_error!(self.status, "primary input not available (?!)"); return ptr::null_mut(); @@ -529,6 +550,7 @@ impl<'a> CoreBridgeState<'a> { }; self.input_handles.push(Box::new(ih)); + self.latest_input_path = path; &mut **self.input_handles.last_mut().unwrap() } @@ -815,6 +837,53 @@ pub extern "C" fn ttbc_input_open_primary(es: &mut CoreBridgeState) -> *mut Inpu es.input_open_primary() } +/// Get the filesystem path of the most-recently-opened input file. +/// +/// This function is needed by SyncTeX, because its output file should contain +/// absolute filesystem paths to the input source files. In principle this +/// functionality could be implemented in a few different ways, but the approach +/// used here is the most backward-compatible. This function will fill in the +/// caller's buffer with the filesystem path associated with the most +/// recently-opened input file, including a terminating NUL, if possible. +/// +/// It returns 0 if no such path is known, -1 if the path cannot be expressed +/// UTF-8, -2 if the destination buffer is not big enough, or the number of +/// bytes written into the buffer (including a terminating NUL) otherwise. +/// +/// # Safety +/// +/// This function is unsafe because it dereferences raw C pointers. +#[no_mangle] +pub unsafe extern "C" fn ttbc_get_last_input_abspath( + es: &mut CoreBridgeState, + buffer: *mut u8, + len: libc::size_t, +) -> libc::ssize_t { + match es.latest_input_path { + None => 0, + + Some(ref p) => { + // In principle we could try to handle the full fun of + // cross-platform PathBuf/Unicode conversions, but synctex and + // friends will be treating our data as a traditional C string in + // the end. So play it safe and stick to UTF-8. + let p = match p.to_str() { + Some(s) => s.as_bytes(), + None => return -1, + }; + + let n = p.len(); + if n + 1 > len { + return -2; + } + + std::ptr::copy(p.as_ptr(), buffer, n); + *buffer.offset(n.try_into().unwrap()) = b'\0'; + (n + 1).try_into().unwrap() + } + } +} + /// Get the size of a Tectonic input file. #[no_mangle] pub extern "C" fn ttbc_input_get_size( diff --git a/crates/bridge_core/support/support.c b/crates/bridge_core/support/support.c index 1a6c9e516e..1d49263817 100644 --- a/crates/bridge_core/support/support.c +++ b/crates/bridge_core/support/support.c @@ -238,6 +238,12 @@ ttstub_input_open_primary(void) } +ssize_t +ttstub_get_last_input_abspath(char *buffer, size_t len) +{ + return ttbc_get_last_input_abspath(tectonic_global_bridge_core, (uint8_t *) buffer, len); +} + size_t ttstub_input_get_size(rust_input_handle_t handle) { diff --git a/crates/bridge_core/support/tectonic_bridge_core.h b/crates/bridge_core/support/tectonic_bridge_core.h index ed167ec34d..0ae5495c72 100644 --- a/crates/bridge_core/support/tectonic_bridge_core.h +++ b/crates/bridge_core/support/tectonic_bridge_core.h @@ -238,6 +238,7 @@ int ttstub_output_close(rust_output_handle_t handle); rust_input_handle_t ttstub_input_open(char const *path, ttbc_file_format format, int is_gz); rust_input_handle_t ttstub_input_open_primary(void); +ssize_t ttstub_get_last_input_abspath(char *buffer, size_t len); size_t ttstub_input_get_size(rust_input_handle_t handle); time_t ttstub_input_get_mtime(rust_input_handle_t handle); size_t ttstub_input_seek(rust_input_handle_t handle, ssize_t offset, int whence); From b7a4085fa67c831d4532da6661bddafd1f9c24ff Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Jun 2021 21:48:18 -0400 Subject: [PATCH 11/30] engine_xetex: fix SyncTeX output (#720, #744) Using the new "abspath" support exposed in the updated bridge_core, we can emit proper absolute paths in the SyncTeX file. --- crates/engine_xetex/Cargo.toml | 2 +- crates/engine_xetex/xetex/xetex-io.c | 9 ++++++++ crates/engine_xetex/xetex/xetex-io.h | 1 + crates/engine_xetex/xetex/xetex-synctex.c | 28 ++++------------------- 4 files changed, 15 insertions(+), 25 deletions(-) diff --git a/crates/engine_xetex/Cargo.toml b/crates/engine_xetex/Cargo.toml index 8caab6b98f..d3af347bbf 100644 --- a/crates/engine_xetex/Cargo.toml +++ b/crates/engine_xetex/Cargo.toml @@ -40,7 +40,7 @@ external-harfbuzz = [ ] [package.metadata.internal_dep_versions] -tectonic_bridge_core = "thiscommit:2021-06-02:ieXoo6ne" +tectonic_bridge_core = "4e16bf963700aae59772a6fb223981ceaa9b5f57" tectonic_bridge_flate = "5933308152efb6ba206b4dc01ab6814063b835c0" tectonic_bridge_graphite2 = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" tectonic_bridge_harfbuzz = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" diff --git a/crates/engine_xetex/xetex/xetex-io.c b/crates/engine_xetex/xetex/xetex-io.c index 56c989005e..b84ce90bf4 100644 --- a/crates/engine_xetex/xetex/xetex-io.c +++ b/crates/engine_xetex/xetex/xetex-io.c @@ -13,6 +13,11 @@ char *name_of_input_file = NULL; +// Tectonic: This buffer is used for SyncTeX, which needs to emit absolute +// filesystem paths -- which are difficult to derive in our virtualized I/O +// system. The most backwards-compatible way to expose this information to the +// engine was to add the `ttstub_get_last_input_abspath()` API used below. +char abspath_of_input_file[1024] = ""; rust_input_handle_t tt_xetex_open_input (int filefmt) @@ -27,6 +32,10 @@ tt_xetex_open_input (int filefmt) if (handle == NULL) return NULL; + if (ttstub_get_last_input_abspath(abspath_of_input_file, sizeof(abspath_of_input_file)) < 1) { + abspath_of_input_file[0] = '\0'; + } + name_length = strlen(name_of_file); free(name_of_input_file); name_of_input_file = xstrdup(name_of_file); diff --git a/crates/engine_xetex/xetex/xetex-io.h b/crates/engine_xetex/xetex/xetex-io.h index e367840e0d..6e66061c1e 100644 --- a/crates/engine_xetex/xetex/xetex-io.h +++ b/crates/engine_xetex/xetex/xetex-io.h @@ -21,6 +21,7 @@ typedef struct { BEGIN_EXTERN_C extern char *name_of_input_file; +extern char abspath_of_input_file[]; extern const uint32_t offsetsFromUTF8[6]; extern const uint8_t bytesFromUTF8[256]; extern const uint8_t firstByteMark[7]; diff --git a/crates/engine_xetex/xetex/xetex-synctex.c b/crates/engine_xetex/xetex/xetex-synctex.c index a8d3b8990a..0403d790a6 100644 --- a/crates/engine_xetex/xetex/xetex-synctex.c +++ b/crates/engine_xetex/xetex/xetex-synctex.c @@ -130,14 +130,9 @@ static struct { static char * get_current_name (void) { - /* This used to always make the pathname absolute but I'm getting rid of - * that since it ends up adding dependencies on a bunch of functions I - * don't want to have to deal with. */ - - if (!name_of_input_file) - return xstrdup(""); - - return xstrdup(name_of_input_file); + /* Tectonic: this used to make pathnames absolute, but in the virtualized + * I/O system that information has to be provided externally. */ + return xstrdup(abspath_of_input_file); } @@ -772,22 +767,7 @@ synctex_record_preamble(void) static inline int synctex_record_input(int32_t tag, char *name) { - char cwd[PATH_MAX + 1]; - if (getcwd(cwd, PATH_MAX + 1) == NULL) { - char errmsg[100]; - sprintf(errmsg, - "Failed to generate synctex info: Failed to get absolute path to current directory: %s" - , (errno == ERANGE || errno == ENAMETOOLONG) ? "path too long" : strerror(errno) - ); - fatal_error(errmsg); - return -1; - } - #ifdef _WIN32 - char *sep = "\\"; - #else - char *sep = "/"; - #endif - int len = ttstub_fprintf(synctex_ctxt.file, "Input:%i:%s%s%s\n", tag, cwd, sep, name); + int len = ttstub_fprintf(synctex_ctxt.file, "Input:%i:%s\n", tag, name); if (len > 0) { synctex_ctxt.total_length += len; From 4f3d01a7cd96ea9acab91208c8c0794c92ef8f7b Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Jun 2021 21:50:47 -0400 Subject: [PATCH 12/30] tectonic: bring in the SyncTeX fixes First we need to update our IoProvider implementations to properly handle the new "abspath" APIs -- providers that delegate to other providers have to make sure to not just use the default implementation instead. We also need to update our test framework to handle the fact that the contents of the emitted SyncTeX files will depend on the system running the test suite. --- Cargo.toml | 6 +++--- src/driver.rs | 29 ++++++++++++++++++++++++--- tests/tex-outputs.rs | 3 ++- tests/tex-outputs/synctex.synctex.gz | Bin 333 -> 361 bytes tests/util/mod.rs | 23 +++++++++++++++++++-- 5 files changed, 52 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index dd3aea3431..ba18f07101 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -119,7 +119,7 @@ x86_64-unknown-linux-gnu = { install = ["fontconfig","freetype","harfbuzz[icu,gr x86_64-pc-windows-msvc = { triplet = "x64-windows-static", install = ["fontconfig","freetype","harfbuzz[icu,graphite2]"] } [package.metadata.internal_dep_versions] -tectonic_bridge_core = "thiscommit:2021-06-02:Oiyoowe2" +tectonic_bridge_core = "4e16bf963700aae59772a6fb223981ceaa9b5f57" tectonic_bridge_flate = "thiscommit:2021-01-01:eer4ahL4" tectonic_bridge_graphite2 = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" tectonic_bridge_harfbuzz = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" @@ -128,10 +128,10 @@ tectonic_cfg_support = "thiscommit:aeRoo7oa" tectonic_dep_support = "5faf4205bdd3d31101b749fc32857dd746f9e5bc" tectonic_engine_bibtex = "thiscommit:2021-01-17:KuhaeG1e" tectonic_engine_xdvipdfmx = "7dcbc52e58f9774b3d592919a9105377faeac509" -tectonic_engine_xetex = "thiscommit:2021-06-02:nahbie2O" +tectonic_engine_xetex = "b7a4085fa67c831d4532da6661bddafd1f9c24ff" tectonic_errors = "317ae79ceaa2593fb56090e37bf1f5cc24213dd9" tectonic_geturl = "thiscommit:2021-01-16:Aikoob9c" -tectonic_io_base = "thiscommit:2021-01-16:SaeK7eex" +tectonic_io_base = "f7eeff461778f7082db7ed5097d93aa63119eb12" tectonic_status_base = "317ae79ceaa2593fb56090e37bf1f5cc24213dd9" tectonic_xdv = "c91f2ef37858d1a0a724a5c3ddc2f7ea46373c77" tectonic_xetex_layout = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" diff --git a/src/driver.rs b/src/driver.rs index b9cd9c30ec..afa3c6d8d9 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -382,12 +382,24 @@ impl IoProvider for BridgeState { name: &str, status: &mut dyn StatusBackend, ) -> OpenResult { + match self.input_open_name_with_abspath(name, status) { + OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + + fn input_open_name_with_abspath( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { let r = (|| { - bridgestate_ioprovider_cascade!(self, input_open_name(name, status)); + bridgestate_ioprovider_cascade!(self, input_open_name_with_abspath(name, status)); })(); match r { - OpenResult::Ok(ref ih) => { + OpenResult::Ok((ref ih, ref _path)) => { if let Some(summ) = self.events.get_mut(name) { summ.access_pattern = match summ.access_pattern { AccessPattern::Written => AccessPattern::WrittenThenRead, @@ -430,7 +442,18 @@ impl IoProvider for BridgeState { } fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult { - bridgestate_ioprovider_cascade!(self, input_open_primary(status)); + match self.input_open_primary_with_abspath(status) { + OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih), + OpenResult::Err(e) => OpenResult::Err(e), + OpenResult::NotAvailable => OpenResult::NotAvailable, + } + } + + fn input_open_primary_with_abspath( + &mut self, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + bridgestate_ioprovider_cascade!(self, input_open_primary_with_abspath(status)); } fn input_open_format( diff --git a/tests/tex-outputs.rs b/tests/tex-outputs.rs index f0c625ab55..8fb34c3ca0 100644 --- a/tests/tex-outputs.rs +++ b/tests/tex-outputs.rs @@ -166,7 +166,8 @@ impl TestCase { } if self.check_synctex { - ExpectedInfo::read_with_extension_gz(&mut p, "synctex.gz").test_from_collection(&files); + ExpectedInfo::read_with_extension_rooted_gz(&mut p, "synctex.gz") + .test_from_collection(&files); } if self.check_pdf { diff --git a/tests/tex-outputs/synctex.synctex.gz b/tests/tex-outputs/synctex.synctex.gz index 905b039b0a0612f527d26e2116421e1dc3cc0ecc..4803c00843b1307d26bfe2d3e3188fc15853cb77 100644 GIT binary patch literal 361 zcmV-v0hazBiwFqF8o*!x19N$9V{~PBE^~QqV{~PB0ELoEYr;SnMfdY76kI5k@;x%o zZC72Cl-3qY>B8E?hH4ZuDHid+cM`D)QRpHh%suzZnPDD(^7$m0_MVe%k*#xw;x^xu z-k}@pAL4j&C<5S<-F0|GT>2{8e5B%Tw#c(Io6mgR0s!%pXQ3yiy*N#a#5)i#&F9TJ z_et)Z=nwYGBp+6@M^^~yAoeI;aZE1KtjOqbWFkeN*)eD%jpA{{nxr;r3?1_LZa@gc zuuZT*qfKj+lp16;==%}|P5xEoEF}?2IGZCz;;rMNjIji^WgVJiiMmdbEvl$xO@>gJ zfPG%VkP$RNy|-NjDAv`_PC&=>R3nQ~;7BYk#kK$NpHnB|h)+j>)+))`G+@{rS+IDt zA|T5k?O)#UIt@9fwii- literal 333 zcmV-T0kZxdiwFP!000002ZWN#O2a@DhWFIX=x@> z5#QZON=--$U4#th|IV3jhPnGL7x&q0^q6g{VqJQI>vB^EPriyt$aa_UgJ~U_;^r-f zoB6UV@?tR$EslurP!_QPW}|7IS6T1~Pu=orU52a-9>#K` zomo;jTm?C&l^ENB@pSCa(cX%YouG36`M~SAtx*^&68I$;Tew|UVZM6(%zS(wb$lHh f%PehuR4ul}Ce$kre*gdg|NjF37gzo(odN&=$`G28 diff --git a/tests/util/mod.rs b/tests/util/mod.rs index 6e1bec5466..559787fddb 100644 --- a/tests/util/mod.rs +++ b/tests/util/mod.rs @@ -131,14 +131,33 @@ impl ExpectedInfo { Self::read(pbase) } - pub fn read_with_extension_gz(pbase: &mut PathBuf, extension: &str) -> Self { + /// Special handling for synctex files -- we need to decode the gzip and + /// fill in the absolute paths of the output files (cf. #720) + pub fn read_with_extension_rooted_gz(pbase: &mut PathBuf, extension: &str) -> Self { pbase.set_extension(extension); let name = pbase.file_name().unwrap().to_str().unwrap().to_owned(); - let mut dec = GzDecoder::new(File::open(pbase).unwrap()); + let mut dec = GzDecoder::new(File::open(&pbase).unwrap()); let mut contents = Vec::new(); dec.read_to_end(&mut contents).unwrap(); + // Special SyncTeX rooting. We need a *mega* hack since there is a + // byte-offset field whose value depends on the length of the file + // prefix. + let root = format!( + "{}{}", + pbase.parent().unwrap().to_str().unwrap(), + std::path::MAIN_SEPARATOR + ); + let contents = String::from_utf8(contents) + .unwrap() + .replace("${ROOT}", &root) + .replace( + "${len(ROOT)+106}", + &(root.as_bytes().len() + 106).to_string(), + ) + .into_bytes(); + ExpectedInfo { name, contents, From 2f4f2b690e1d165523d851b4e885a2409427f6cb Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Tue, 8 Jun 2021 22:32:32 -0400 Subject: [PATCH 13/30] io_base: don't use std::fs::canonicalize Canonicalize is a bit overkill, and on Windows it seems that our paths all end up prefixed with `\\?\`. Instead let's try joining with `std::env::current_dir()`. --- crates/io_base/src/filesystem.rs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/crates/io_base/src/filesystem.rs b/crates/io_base/src/filesystem.rs index e5b6025855..ea7f24bd81 100644 --- a/crates/io_base/src/filesystem.rs +++ b/crates/io_base/src/filesystem.rs @@ -1,10 +1,11 @@ -// Copyright 2016-2020 the Tectonic Project +// Copyright 2016-2021 the Tectonic Project // Licensed under the MIT License. //! Tectonic I/O implementations for `std::fs` types. use std::{ collections::HashSet, + env, fs::File, io::{self, BufReader, Seek, SeekFrom}, path::{Path, PathBuf}, @@ -62,12 +63,9 @@ impl IoProvider for FilesystemPrimaryInputIo { let handle = InputHandle::new("", BufReader::new(f), InputOrigin::Filesystem); - // For SyncTeX paths we need to make sure that we return an absolute path. - // The easiest way to do this (as far as I can see) is to canonicalize. - - let path = match std::fs::canonicalize(&self.path) { + let path = match make_abspath(&self.path) { Ok(m) => m, - Err(e) => return OpenResult::Err(e.into()), + Err(e) => return OpenResult::Err(e), }; OpenResult::Ok((handle, Some(path))) @@ -202,12 +200,10 @@ impl IoProvider for FilesystemIo { return OpenResult::NotAvailable; } - // For SyncTeX paths we need to make sure that we return an absolute path. - // The easiest way to do this (as far as I can see) is to canonicalize. - - let path = match std::fs::canonicalize(path) { + // SyncTeX requires absolute paths. + let path = match make_abspath(path) { Ok(m) => m, - Err(e) => return OpenResult::Err(e.into()), + Err(e) => return OpenResult::Err(e), }; // Good to go. @@ -250,3 +246,11 @@ impl InputFeatures for BufReader { Ok(self.seek(pos)?) } } + +/// For SyncTeX paths we need to make sure that we return an absolute +/// path. `std::fs::canonicalize` is a bit overkill and prefixes all of +/// our paths with `\\?\` on Windows. +fn make_abspath>(path: P) -> Result { + let cwd = env::current_dir()?; + Ok(cwd.join(path.as_ref())) +} From cd77b60d48b1ae3ef80d708e6858ea91cd9fa812 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Wed, 9 Jun 2021 21:52:03 -0400 Subject: [PATCH 14/30] docmodel: separate out the document model into its own crate This way, people will be able to work with Tectonic.toml files without having to link with XeTeX and xdvipdfmx and everything else. That will be nice! --- Cargo.toml | 1 + crates/docmodel/CHANGELOG.md | 8 + crates/docmodel/Cargo.toml | 26 ++ crates/docmodel/README.md | 19 ++ crates/docmodel/src/document.rs | 436 +++++++++++++++++++++++++++++++ crates/docmodel/src/lib.rs | 22 ++ crates/docmodel/src/workspace.rs | 173 ++++++++++++ 7 files changed, 685 insertions(+) create mode 100644 crates/docmodel/CHANGELOG.md create mode 100644 crates/docmodel/Cargo.toml create mode 100644 crates/docmodel/README.md create mode 100644 crates/docmodel/src/document.rs create mode 100644 crates/docmodel/src/lib.rs create mode 100644 crates/docmodel/src/workspace.rs diff --git a/Cargo.toml b/Cargo.toml index 9933808ac6..6f09789b14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ members = [ "crates/bridge_icu", "crates/cfg_support", "crates/dep_support", + "crates/docmodel", "crates/engine_bibtex", "crates/engine_xdvipdfmx", "crates/engine_xetex", diff --git a/crates/docmodel/CHANGELOG.md b/crates/docmodel/CHANGELOG.md new file mode 100644 index 0000000000..2b2ba1a3be --- /dev/null +++ b/crates/docmodel/CHANGELOG.md @@ -0,0 +1,8 @@ +# See elsewhere for changelog + +This project’s release notes are curated from the Git history of its main +branch. You can find them by looking at [the version of this file on the +`release` branch][branch] or the [GitHub release history][gh-releases]. + +[branch]: https://github.com/tectonic-typesetting/tectonic/blob/release/crates/docmodel/CHANGELOG.md +[gh-releases]: https://github.com/tectonic-typesetting/tectonic/releases diff --git a/crates/docmodel/Cargo.toml b/crates/docmodel/Cargo.toml new file mode 100644 index 0000000000..914015cebf --- /dev/null +++ b/crates/docmodel/Cargo.toml @@ -0,0 +1,26 @@ +# Copyright 2020-2021 the Tectonic Project +# Licensed under the MIT License. + +# See README.md for discussion of features (or lack thereof) in this crate. + +[package] +name = "tectonic_docmodel" +version = "0.0.0-dev.0" # assigned with cranko (see README) +authors = ["Peter Williams "] +description = """ +The Tectonic document model and its serialization into `Tectonic.toml`. +""" +homepage = "https://tectonic-typesetting.github.io/" +documentation = "https://docs.rs/tectonic_docmodel" +repository = "https://github.com/tectonic-typesetting/tectonic/" +readme = "README.md" +license = "MIT" +edition = "2018" + +[dependencies] +serde = { version = "^1.0", features = ["derive"] } +tectonic_errors = { path = "../errors", version = "0.0.0-dev.0" } +toml = { version = "^0.5" } + +[package.metadata.internal_dep_versions] +tectonic_errors = "5c9ba661edf5ef669f24f9904f99cca369d999e7" diff --git a/crates/docmodel/README.md b/crates/docmodel/README.md new file mode 100644 index 0000000000..8aadce4298 --- /dev/null +++ b/crates/docmodel/README.md @@ -0,0 +1,19 @@ +# The `tectonic_docmodel` crate + +[![](http://meritbadge.herokuapp.com/tectonic_docmodel)](https://crates.io/crates/tectonic_docmodel) + +This crate is part of [the Tectonic +project](https://tectonic-typesetting.github.io/en-US/). It implements the +Tectonic document model, including the [`Tectonic.toml`] file. + +[`Tectonic.toml`]: https://tectonic-typesetting.github.io/book/latest/ref/tectonic-toml.html + +- [API documentation](https://docs.rs/tectonic_docmodel/). +- [Main Git repository](https://github.com/tectonic-typesetting/tectonic/). + + +## Cargo features + +This crate does not currently provides any [Cargo features][features]. + +[features]: https://doc.rust-lang.org/cargo/reference/features.html diff --git a/crates/docmodel/src/document.rs b/crates/docmodel/src/document.rs new file mode 100644 index 0000000000..3f9f17afb9 --- /dev/null +++ b/crates/docmodel/src/document.rs @@ -0,0 +1,436 @@ +// Copyright 2020-2021 the Tectonic Project +// Licensed under the MIT License. + +//! A single Tectonic document. +//! +//! Every document is part of a [`crate::workspace::Workspace`]. At the moment +//! workspaces can only contain a single document each, but in the future it +//! might become possible for one workspace to contain multiple documents. +//! +//! This crate, on its own, does not provide document-processing capabilities. +//! The main `tectonic` crate provides extension traits that set up document +//! processing, in the `tectonic::docmodel` module. + +use std::{ + collections::HashMap, + env, fs, + io::{Read, Write}, + path::{Component, Path, PathBuf}, +}; +use tectonic_errors::prelude::*; + +use crate::workspace::WorkspaceCreator; + +/// The default filesystem name for the "preamble" file of a document. +/// +/// This default can be overridden on an output-by-output basis in +/// `Tectonic.toml`. +pub const DEFAULT_PREAMBLE_FILE: &str = "_preamble.tex"; + +/// The default filesystem name for the main "index" file of a document. +/// +/// This default can be overridden on an output-by-output basis in +/// `Tectonic.toml`. +pub const DEFAULT_INDEX_FILE: &str = "index.tex"; + +/// The default filesystem name for the "postamble" file of a document. +/// +/// This default can be overridden on an output-by-output basis in +/// `Tectonic.toml`. +pub const DEFAULT_POSTAMBLE_FILE: &str = "_postamble.tex"; + +/// A Tectonic document. +#[derive(Debug)] +pub struct Document { + /// The directory containing the `Tectonic.toml` file and document source. + src_dir: PathBuf, + + /// The directory where document build artifacts will be output. By default + /// this will be a subdirectory of `src_dir` named `build`. + build_dir: PathBuf, + + /// The document name. This will be used to name build artifacts and the + /// like, and so should be relatively filesystem-friendly. It does not + /// need to be the same as the document title. + pub name: String, + + /// The name of core TeX file bundle upon which this document is based. + /// Either a URL or a local path. + pub bundle_loc: String, + + /// The different outputs that are created from the document source. These + /// may have different formats (e.g., PDF and HTML) or the same format but + /// different settings (e.g., PDF with A4 paper and PDF with US Letter + /// paper). + pub outputs: HashMap, +} + +impl Document { + /// Initialize a Document based on a TOML specification. + /// + /// This function can initialize a document directly from a TOML-formatted + /// data stream. In many circumstances you shouldn’t use it; instead you + /// should open a [`crate::workspace::Workspace`] and get a [`Document`] + /// through it. + pub fn new_from_toml, P2: Into, R: Read>( + src_dir: P1, + build_dir: P2, + toml_data: &mut R, + ) -> Result { + let mut toml_text = String::new(); + toml_data.read_to_string(&mut toml_text)?; + let doc: syntax::Document = toml::from_str(&toml_text)?; + + let mut outputs = HashMap::new(); + + for toml_output in &doc.outputs { + let output = toml_output.to_runtime(); + + if outputs.insert(output.name.clone(), output).is_some() { + bail!( + "duplicated output name `{}` in TOML specification", + &toml_output.name + ); + } + } + + if outputs.is_empty() { + bail!("TOML specification must define at least one output"); + } + + Ok(Document { + src_dir: src_dir.into(), + build_dir: build_dir.into(), + name: doc.doc.name, + bundle_loc: doc.doc.bundle, + outputs, + }) + } + + /// Write out this document's state as a fresh `Tectonic.toml` file in the + /// document’s [`Self::src_dir`]. + /// + /// This should only be used when creating a totally new document. Otherwise + /// TOML rewriting should be used, to preserve the user's file structure, + /// comments, etc. + pub fn create_toml(&self) -> Result<()> { + let outputs = self + .outputs + .values() + .map(|r| syntax::OutputProfile::from_runtime(r)) + .collect(); + + let doc = syntax::Document { + doc: syntax::DocSection { + name: self.name.clone(), + bundle: self.bundle_loc.clone(), + }, + outputs, + }; + + let toml_text = toml::to_string_pretty(&doc)?; + + let mut toml_path = self.src_dir.clone(); + toml_path.push("Tectonic.toml"); + + let mut toml_file = atry!(fs::OpenOptions::new() + .create_new(true) + .write(true) + .open(&toml_path); + ["couldn\'t create `{}`", toml_path.display()] + ); + + toml_file.write_all(toml_text.as_bytes())?; + Ok(()) + } + + /// Get this document's toplevel source directory. + /// + /// Note that this directory is the one containing the file `Tectonic.toml`. + /// The actual document source is contained in a subdirectory named `src`. + pub fn src_dir(&self) -> &Path { + &self.src_dir + } + + /// Get this document's build directory. + /// + /// This is the directory where persistent files associated with the + /// document build are stored. By default, it is a subdirectory of + /// [`Self::src_dir`] named `build`. + pub fn build_dir(&self) -> &Path { + &self.build_dir + } + + /// Iterate over the names of the output profiles defined for this document. + /// + /// These may have different formats (e.g., PDF and HTML) or the same format + /// but different settings (e.g., PDF with A4 paper and PDF with US Letter + /// paper). + pub fn output_names(&self) -> impl Iterator { + self.outputs.keys().map(|k| k.as_ref()) + } + + /// Get the path of the "main" output file for the given output profile. + /// + /// The exact meaning of "main" will depend on the output format. + pub fn output_main_file(&self, profile_name: &str) -> PathBuf { + let profile = self.outputs.get(profile_name).unwrap(); + + let mut p = self.build_dir.clone(); + p.push(&profile.name); + p.push(&profile.name); + p.set_extension(match profile.target_type { + BuildTargetType::Pdf => "pdf", + }); + p + } +} + +/// Persistent settings for a document build. +#[derive(Clone, Debug)] +pub struct OutputProfile { + /// The name of this profile. + pub name: String, + + /// The type of output targeted by this profile. + pub target_type: BuildTargetType, + + /// The name of the TeX format used by this profile. + pub tex_format: String, + + /// The name of the preamble file within the `src` directory. + pub preamble_file: String, + + /// The name of the index (main) file within the `src` directory. + pub index_file: String, + + /// The name of the postamble file within the `src` directory. + pub postamble_file: String, + + /// Whether TeX's shell-escape feature should be activated in this profile. + /// + /// Note that besides creating portability and reproducibility issues, + /// shell-escape opens enormous security holes. It should only ever be + /// activated with fully trusted input. + pub shell_escape: bool, +} + +/// The output target type of a document build. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum BuildTargetType { + /// Output to the Portable Document Format (PDF). + Pdf, +} + +impl Document { + /// Create a new in-memory Document, based on the settings of a + /// WorkspaceCreator object. + pub(crate) fn create_for(wc: &WorkspaceCreator, bundle_loc: String) -> Result { + let src_dir = wc.root_dir.clone(); + + let mut build_dir = src_dir.clone(); + build_dir.push("build"); + + // We're a bit roundabout in how we figure out the name of the + // containing src_dir, in an effort to Do The Right Thing with symlinks + // and whatnot. + let name = { + let mut name = "document".to_owned(); + let mut tried_src_path = false; + + if let Some(Component::Normal(t)) = src_dir.components().next_back() { + tried_src_path = true; + + if let Some(s) = t.to_str() { + name = s.to_owned(); + } + } + + if !tried_src_path { + if let Ok(cwd) = env::current_dir() { + let full_path = cwd.join(&src_dir); + + if let Some(Component::Normal(t)) = full_path.components().next_back() { + if let Some(s) = t.to_str() { + name = s.to_owned(); + } + } + } + } + + name + }; + + // All done. + Ok(Document { + src_dir, + build_dir, + name, + bundle_loc, + outputs: crate::document::default_outputs(), + }) + } +} + +pub(crate) fn default_outputs() -> HashMap { + let mut outputs = HashMap::new(); + outputs.insert( + "default".to_owned(), + OutputProfile { + name: "default".to_owned(), + target_type: BuildTargetType::Pdf, + tex_format: "latex".to_owned(), + preamble_file: DEFAULT_PREAMBLE_FILE.to_owned(), + index_file: DEFAULT_INDEX_FILE.to_owned(), + postamble_file: DEFAULT_POSTAMBLE_FILE.to_owned(), + shell_escape: false, + }, + ); + outputs +} + +/// The concrete syntax for saving document state, wired up via serde. +mod syntax { + use super::{DEFAULT_INDEX_FILE, DEFAULT_POSTAMBLE_FILE, DEFAULT_PREAMBLE_FILE}; + use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}; + + #[derive(Debug, Deserialize, Serialize)] + #[serde(deny_unknown_fields)] + pub struct Document { + pub doc: DocSection, + + #[serde(rename = "output")] + pub outputs: Vec, + } + + #[derive(Debug, Deserialize, Serialize)] + #[serde(deny_unknown_fields)] + pub struct DocSection { + pub name: String, + pub bundle: String, + } + + #[derive(Debug, Deserialize, Serialize)] + #[serde(deny_unknown_fields)] + pub struct OutputProfile { + pub name: String, + #[serde(rename = "type")] + pub target_type: BuildTargetType, + pub tex_format: Option, + #[serde(rename = "preamble")] + pub preamble_file: Option, + #[serde(rename = "index")] + pub index_file: Option, + #[serde(rename = "postamble")] + pub postamble_file: Option, + pub shell_escape: Option, + } + + impl OutputProfile { + pub fn from_runtime(rt: &super::OutputProfile) -> Self { + let tex_format = if rt.tex_format == "latex" { + None + } else { + Some(rt.tex_format.clone()) + }; + + let preamble_file = if rt.preamble_file == DEFAULT_PREAMBLE_FILE { + None + } else { + Some(rt.preamble_file.clone()) + }; + + let index_file = if rt.index_file == DEFAULT_INDEX_FILE { + None + } else { + Some(rt.index_file.clone()) + }; + + let postamble_file = if rt.postamble_file == DEFAULT_POSTAMBLE_FILE { + None + } else { + Some(rt.postamble_file.clone()) + }; + + let shell_escape = if !rt.shell_escape { None } else { Some(true) }; + + OutputProfile { + name: rt.name.clone(), + target_type: BuildTargetType::from_runtime(&rt.target_type), + tex_format, + preamble_file, + index_file, + postamble_file, + shell_escape, + } + } + + pub fn to_runtime(&self) -> super::OutputProfile { + super::OutputProfile { + name: self.name.clone(), + target_type: self.target_type.to_runtime(), + tex_format: self + .tex_format + .as_ref() + .map(|s| s.as_ref()) + .unwrap_or("latex") + .to_owned(), + preamble_file: self + .preamble_file + .clone() + .unwrap_or_else(|| DEFAULT_PREAMBLE_FILE.to_owned()), + index_file: self + .index_file + .clone() + .unwrap_or_else(|| DEFAULT_INDEX_FILE.to_owned()), + postamble_file: self + .postamble_file + .clone() + .unwrap_or_else(|| DEFAULT_POSTAMBLE_FILE.to_owned()), + shell_escape: self.shell_escape.unwrap_or_default(), + } + } + } + + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + pub enum BuildTargetType { + Pdf, + } + + impl BuildTargetType { + pub fn from_runtime(rt: &super::BuildTargetType) -> Self { + match rt { + super::BuildTargetType::Pdf => BuildTargetType::Pdf, + } + } + + pub fn to_runtime(&self) -> super::BuildTargetType { + match self { + BuildTargetType::Pdf => super::BuildTargetType::Pdf, + } + } + } + + impl Serialize for BuildTargetType { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(match *self { + BuildTargetType::Pdf => "pdf", + }) + } + } + impl<'de> Deserialize<'de> for BuildTargetType { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + Ok(match s.as_str() { + "pdf" => BuildTargetType::Pdf, + other => return Err(::Error::unknown_variant(other, &["pdf"])), + }) + } + } +} diff --git a/crates/docmodel/src/lib.rs b/crates/docmodel/src/lib.rs new file mode 100644 index 0000000000..c4fa9b8334 --- /dev/null +++ b/crates/docmodel/src/lib.rs @@ -0,0 +1,22 @@ +// Copyright 2020-2021 the Tectonic Project +// Licensed under the MIT License. + +#![deny(missing_docs)] + +//! The Tectonic document model and its serialization into `Tectonic.toml`. +//! +//! This crate provides data structures and serialization support for the +//! Tectonic document model and its expression in the `Tectonic.toml` file. It +//! only provides data access: you can use this crate without needing to link +//! with the full Tectonic engines and all of the dependencies they drag in. The +//! main `tectonic` crate provides extension traits that attach actual +//! document-processing capabilities to these data structures. +//! +//! Your primary entrypoint to this crate will likely be +//! [`workspace::Workspace::open_from_environment`], which will attempt to load +//! up a workspace by searching the process’ current directory and parents for a +//! `Tectonic.toml` file. There is also [`workspace::WorkspaceCreator`] for +//! creating new workspaces from scratch. + +pub mod document; +pub mod workspace; diff --git a/crates/docmodel/src/workspace.rs b/crates/docmodel/src/workspace.rs new file mode 100644 index 0000000000..ac9dcdb1b0 --- /dev/null +++ b/crates/docmodel/src/workspace.rs @@ -0,0 +1,173 @@ +// Copyright 2020-2021 the Tectonic Project +// Licensed under the MIT License. + +//! A Tectonic document-build workspace. +//! +//! For the time being, this is just a thin wrapper to provide access to a +//! `Document` instance. This API exists to future-proof a bit for a potential +//! world where one workspace can contain multiple documents. + +use std::{ + env, + error::Error, + fmt, fs, + io::{self, Write}, + path::PathBuf, +}; +use tectonic_errors::prelude::*; + +use crate::document::Document; + +/// A Tectonic workspace. +/// +/// For the time being, a Workspace is just a thin wrapper to provide access to +/// a `Document` instance. In the future, it might become possible for one +/// workspace to contain multiple documents. +/// +/// In most cases, you will want to create a [`Workspace`] by opening an +/// existing one using [`Workspace::open_from_environment`]. +#[derive(Debug)] +pub struct Workspace { + /// The root directory of the workspace. + root_dir: PathBuf, + + /// This workspace's document. In the future, there might be more than one. + doc: Document, +} + +impl Workspace { + /// Get the first document in the workspace. + /// + /// Right now, workspaces in fact only include one document. That may change + /// in the future. + pub fn first_document(&self) -> &Document { + &self.doc + } + + /// Get the first document in the workspace, mutably. + /// + /// Right now, workspaces in fact only include one document. That may change + /// in the future. + pub fn first_document_mut(&mut self) -> &mut Document { + &mut self.doc + } + + /// Open up a workspace based on the current process environment. + /// + /// This function searches the current directory and its parents for a + /// `Tectonic.toml` file. Because workspaces can currently only contain a + /// single document, the search stops when the first such file is found. If + /// no such file is found, an error downcastable into + /// [`NoWorkspaceFoundError`] is returned. + pub fn open_from_environment() -> Result { + let mut root_dir = env::current_dir()?; + root_dir.push("tmp"); // simplifies loop logic + + while root_dir.pop() { + root_dir.push("Tectonic.toml"); + + let mut doc_file = match fs::File::open(&root_dir) { + Ok(f) => f, + Err(ref e) if e.kind() == io::ErrorKind::NotFound => { + root_dir.pop(); // remove "Tectonic.toml" + continue; // this will pop up one directory and try again + } + Err(e) => return Err(e.into()), + }; + + root_dir.pop(); + let mut doc_build_dir = root_dir.clone(); + doc_build_dir.push("build"); + let doc = Document::new_from_toml(root_dir.clone(), doc_build_dir, &mut doc_file)?; + + return Ok(Workspace { root_dir, doc }); + } + + Err(NoWorkspaceFoundError {}.into()) + } +} + +/// An error for when the environment does not seem to contain a Tectonic +/// workspace. +#[derive(Debug)] +pub struct NoWorkspaceFoundError {} + +impl fmt::Display for NoWorkspaceFoundError { + fn fmt(&self, f: &mut fmt::Formatter) -> StdResult<(), fmt::Error> { + write!(f, "no get-URL backend was enabled") + } +} + +impl Error for NoWorkspaceFoundError {} + +/// A type for creating a new workspace. +#[derive(Debug)] +pub struct WorkspaceCreator { + /// The root directory of the workspace to be created. + pub(crate) root_dir: PathBuf, +} + +impl WorkspaceCreator { + /// Initialize a `WorkspaceCreator` variable. + pub fn new>(root_dir: P) -> Self { + WorkspaceCreator { + root_dir: root_dir.into(), + } + } + + /// Consume this object and attempt to create the new workspace. + pub fn create(self, bundle_loc: String) -> Result { + let doc = Document::create_for(&self, bundle_loc)?; + + let mut tex_dir = self.root_dir.clone(); + tex_dir.push("src"); + + atry!( + fs::create_dir_all(&tex_dir); + ["couldn\'t create workspace directory `{}`", tex_dir.display()] + ); + + doc.create_toml()?; + + // Stub out the TeX. + + { + tex_dir.push("_preamble.tex"); + let mut f = fs::File::create(&tex_dir)?; + f.write_all( + br#"\documentclass{article} +\title{My Title} +\begin{document} +"#, + )?; + tex_dir.pop(); + } + + { + tex_dir.push("index.tex"); + let mut f = fs::File::create(&tex_dir)?; + f.write_all( + br#"Hello, world. +"#, + )?; + tex_dir.pop(); + } + + { + tex_dir.push("_postamble.tex"); + let mut f = fs::File::create(&tex_dir)?; + f.write_all( + br#"\end{document} +"#, + )?; + tex_dir.pop(); + } + + // All done. + + Ok(Workspace { + root_dir: self.root_dir, + doc, + }) + } +} From 8a0d491499e1faeb5950569dd363cd42a2672e5c Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Wed, 9 Jun 2021 21:57:59 -0400 Subject: [PATCH 15/30] src/driver.rs: add a disable_insecure() method The idea is to give us a one-stop shop for disabling all known-insecure engine features, such as the recently-added "shell escape" functionality. It should be used unless the input data are completely trusted. It is *not* always turned on, however, because some of these insecure features deliver functionalities that users like. --- src/driver.rs | 70 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/src/driver.rs b/src/driver.rs index afa3c6d8d9..b98aad3bac 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -5,7 +5,11 @@ //! This module contains the high-level interface that ties together the various //! engines. The main struct is [`ProcessingSession`], which knows how to run -//! (and re-run if necessary) the various engines in the right order. +//! (and re-run if necessary) the various engines in the right order. Such a +//! session can be created with a [`ProcessingSessionBuilder`], which you might +//! obtain from a [`tectonic_docmodel::document::Document`] using the +//! [`crate::docmodel::DocumentExt::setup_session`] extension method, if you’re +//! using the Tectonic document model. //! //! For an example of how to use this module, see `src/bin/tectonic.rs`, which //! contains tectonic's main CLI program. @@ -632,8 +636,14 @@ impl Default for ShellEscapeMode { } /// A builder-style interface for creating a [`ProcessingSession`]. +/// +/// This uses standard builder patterns. See especially +/// [`Self::disable_insecure`], which prevents any known-insecure features from +/// being activated in the session. It should always be the first method you +/// call if you are going to process input that is not totally trusted. #[derive(Default)] pub struct ProcessingSessionBuilder { + disable_insecures: bool, primary_input: PrimaryInputMode, tex_input_name: Option, output_dest: OutputDestination, @@ -656,6 +666,35 @@ pub struct ProcessingSessionBuilder { } impl ProcessingSessionBuilder { + /// Disable any known insecure settings. + /// + /// Some session options, like [`Self::shell_escape_with_temp_dir`], are + /// known to create security risks and should not be used with untrusted + /// input. This function disables any such settings. The intended usage is + /// that you can create a session builder, activate this feature, and then + /// hand the session builder off to other initializers confident in the + /// knowledge that they will be prevented from activating any insecure + /// settings. Therefore this operation is idempotent and irreversible. + /// + /// When you know that you are handling trusted input, on the other hand, + /// some of these known-insecure capabilities provide functionality that + /// users empirically want. This is why this setting isn't permanently + /// enabled. + /// + /// Of course, this approach is only as good as our understanding of + /// Tectonic’s security profile. Future versions might disable or restrict + /// different pieces of functionality as new risks are discovered. + pub fn disable_insecure(&mut self) -> &mut Self { + self.disable_insecures = true; + self + } + + /// A very dumb helper to minimize the chances of boolean logic mistakes. + #[inline(always)] + fn allow_insecures(&self) -> bool { + !self.disable_insecures + } + /// Sets the path to the primary input file. /// /// If a primary input path is not specified, we will default to reading it from stdin. @@ -821,7 +860,10 @@ impl ProcessingSessionBuilder { /// disable shell-escape unless the [`UnstableOptions`] say otherwise, /// in which case a driver-managed temporary directory will be used. pub fn shell_escape_with_work_dir>(&mut self, path: P) -> &mut Self { - self.shell_escape_mode = ShellEscapeMode::ExternallyManagedDir(path.as_ref().to_owned()); + if self.allow_insecures() { + self.shell_escape_mode = + ShellEscapeMode::ExternallyManagedDir(path.as_ref().to_owned()); + } self } @@ -830,7 +872,9 @@ impl ProcessingSessionBuilder { /// unless the [`UnstableOptions`] say otherwise, in which case a /// driver-managed temporary directory will be used. pub fn shell_escape_with_temp_dir(&mut self) -> &mut Self { - self.shell_escape_mode = ShellEscapeMode::TempDir; + if self.allow_insecures() { + self.shell_escape_mode = ShellEscapeMode::TempDir; + } self } @@ -942,16 +986,20 @@ impl ProcessingSessionBuilder { let mut pdf_path = aux_path.clone(); pdf_path.set_extension("pdf"); - let shell_escape_mode = match self.shell_escape_mode { - ShellEscapeMode::Defaulted => { - if self.unstables.shell_escape { - ShellEscapeMode::TempDir - } else { - ShellEscapeMode::Disabled + let shell_escape_mode = if self.disable_insecures { + ShellEscapeMode::Disabled + } else { + match self.shell_escape_mode { + ShellEscapeMode::Defaulted => { + if self.unstables.shell_escape { + ShellEscapeMode::TempDir + } else { + ShellEscapeMode::Disabled + } } - } - other => other, + other => other, + } }; Ok(ProcessingSession { From 6f329e536199e2a34d591d8eae3b82b9e1663260 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Wed, 9 Jun 2021 22:02:38 -0400 Subject: [PATCH 16/30] tectonic(!): start using the "document model" We have now split the handling of `Tectonic.toml` files into a separate crate, `tectonic_docmodel`, so that other people can manipulate documents without needing to link to the main crate and all of the extra dependencies that it entails. This ends up working out pretty nicely, in that we can maintain functionlity with a few targeted extension traits that wire up the document settings to the actual engine invocations. However! This is a BREAKING CHANGE because the `tectonic::document` and `tectonic::workspace` modules have been removed. We don't need or want them anymore! I doubt anyone has been using them, but if so, use `tectonic_docmodel` instead, with the new extension traits in the `tectonic::docmodel` module if needed. --- Cargo.lock | 10 + Cargo.toml | 2 + src/bin/tectonic/compile.rs | 30 +- src/bin/tectonic/v2cli.rs | 71 ++++- src/docmodel.rs | 228 ++++++++++++++ src/document.rs | 572 ------------------------------------ src/lib.rs | 5 +- src/workspace.rs | 158 ---------- 8 files changed, 305 insertions(+), 771 deletions(-) create mode 100644 src/docmodel.rs delete mode 100644 src/document.rs delete mode 100644 src/workspace.rs diff --git a/Cargo.lock b/Cargo.lock index 54b08c3710..608570e432 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2123,6 +2123,7 @@ dependencies = [ "sha2", "structopt", "tectonic_bridge_core", + "tectonic_docmodel", "tectonic_engine_bibtex", "tectonic_engine_xdvipdfmx", "tectonic_engine_xetex", @@ -2211,6 +2212,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "tectonic_docmodel" +version = "0.0.0-dev.0" +dependencies = [ + "serde", + "tectonic_errors", + "toml", +] + [[package]] name = "tectonic_engine_bibtex" version = "0.0.0-dev.0" diff --git a/Cargo.toml b/Cargo.toml index 6f09789b14..57e000912a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,6 +67,7 @@ serde = { version = "^1.0", features = ["derive"], optional = true } sha2 = "^0.9" structopt = "0.3" tectonic_bridge_core = { path = "crates/bridge_core", version = "0.0.0-dev.0" } +tectonic_docmodel = { path = "crates/docmodel", version = "0.0.0-dev.0" } tectonic_engine_bibtex = { path = "crates/engine_bibtex", version = "0.0.0-dev.0" } tectonic_engine_xdvipdfmx = { path = "crates/engine_xdvipdfmx", version = "0.0.0-dev.0" } tectonic_engine_xetex = { path = "crates/engine_xetex", version = "0.0.0-dev.0" } @@ -127,6 +128,7 @@ tectonic_bridge_harfbuzz = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" tectonic_bridge_icu = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" tectonic_cfg_support = "thiscommit:aeRoo7oa" tectonic_dep_support = "5faf4205bdd3d31101b749fc32857dd746f9e5bc" +tectonic_docmodel = "cd77b60d48b1ae3ef80d708e6858ea91cd9fa812" tectonic_engine_bibtex = "thiscommit:2021-01-17:KuhaeG1e" tectonic_engine_xdvipdfmx = "7dcbc52e58f9774b3d592919a9105377faeac509" tectonic_engine_xetex = "b7a4085fa67c831d4532da6661bddafd1f9c24ff" diff --git a/src/bin/tectonic/compile.rs b/src/bin/tectonic/compile.rs index 5a197cfae1..5cc4526604 100644 --- a/src/bin/tectonic/compile.rs +++ b/src/bin/tectonic/compile.rs @@ -18,9 +18,9 @@ use tectonic::{ config::PersistentConfig, driver::{OutputFormat, PassSetting, ProcessingSessionBuilder}, errmsg, - errors::{ErrorKind, Result}, + errors::Result, status::StatusBackend, - tt_error, tt_note, + tt_note, unstable_opts::{UnstableArg, UnstableOptions}, }; @@ -198,30 +198,6 @@ impl CompileOptions { }; sess_builder.build_date(build_date); - let mut sess = sess_builder.create(status)?; - let result = sess.run(status); - - if let Err(e) = &result { - if let ErrorKind::EngineError(engine) = e.kind() { - let output = sess.get_stdout_content(); - - if output.is_empty() { - tt_error!( - status, - "something bad happened inside {}, but no output was logged", - engine - ); - } else { - tt_error!( - status, - "something bad happened inside {}; its output follows:\n", - engine - ); - status.dump_error_logs(&output); - } - } - } - - result.map(|_| 0) + crate::v2cli::run_and_report(sess_builder, status) } } diff --git a/src/bin/tectonic/v2cli.rs b/src/bin/tectonic/v2cli.rs index 22c045121b..c50c7ac644 100644 --- a/src/bin/tectonic/v2cli.rs +++ b/src/bin/tectonic/v2cli.rs @@ -10,11 +10,13 @@ use tectonic::{ self, config::PersistentConfig, ctry, - errors::{Result, SyncError}, + docmodel::{DocumentExt, DocumentSetupOptions, WorkspaceCreatorExt}, + driver::ProcessingSessionBuilder, + errors::{ErrorKind, Result, SyncError}, status::{termcolor::TermcolorStatusBackend, ChatterLevel, StatusBackend}, tt_error, tt_note, - workspace::{self, Workspace}, }; +use tectonic_docmodel::workspace::{Workspace, WorkspaceCreator}; use tectonic_errors::Error as NewError; use tectonic_status_base::plain::PlainStatusBackend; @@ -163,15 +165,33 @@ impl BuildCommand { let ws = Workspace::open_from_environment()?; let doc = ws.first_document(); + // XXX NO WAY TO DISABLE INSECURE FEATURES + let mut setup_options = DocumentSetupOptions::new(false); + setup_options.only_cached(self.only_cached); + for output_name in doc.output_names() { - let mut opts = doc.build_options_for(output_name); - opts.format_cache_path(config.format_cache_path()?) - .only_cached(self.only_cached) + let mut builder = doc.setup_session(output_name, &setup_options, status)?; + + builder + .format_cache_path(config.format_cache_path()?) .keep_intermediates(self.keep_intermediates) .keep_logs(self.keep_logs) - .print_stdout(self.print_stdout) - .open(self.open); - doc.build(output_name, &opts, status)?; + .print_stdout(self.print_stdout); + + run_and_report(builder, status)?; + + if self.open { + let out_file = doc.output_main_file(output_name); + tt_note!(status, "opening `{}`", out_file.display()); + if let Err(e) = open::that(&out_file) { + tt_error!( + status, + "failed to open `{}` with system handler", + out_file.display(); + e.into() + ) + } + } } Ok(0) @@ -264,11 +284,42 @@ impl NewCommand { self.path.display() ); - let wc = workspace::WorkspaceCreator::new(self.path); + let wc = WorkspaceCreator::new(self.path); ctry!( - wc.create(&config, status); + wc.create_defaulted(&config, status); "failed to create the new Tectonic workspace" ); Ok(0) } } + +pub(crate) fn run_and_report( + sess_builder: ProcessingSessionBuilder, + status: &mut dyn StatusBackend, +) -> Result { + let mut sess = sess_builder.create(status)?; + let result = sess.run(status); + + if let Err(e) = &result { + if let ErrorKind::EngineError(engine) = e.kind() { + let output = sess.get_stdout_content(); + + if output.is_empty() { + tt_error!( + status, + "something bad happened inside {}, but no output was logged", + engine + ); + } else { + tt_error!( + status, + "something bad happened inside {}; its output follows:\n", + engine + ); + status.dump_error_logs(&output); + } + } + } + + result.map(|_| 0) +} diff --git a/src/docmodel.rs b/src/docmodel.rs new file mode 100644 index 0000000000..cb747e7344 --- /dev/null +++ b/src/docmodel.rs @@ -0,0 +1,228 @@ +// Copyright 2020-2021 the Tectonic Project +// Licensed under the MIT License. + +//! Connecting the Tectonic document model to the engines. +//! +//! This module extends the document model types provided by the +//! `tectonic_docmodel` crate with the actual document-processing capabilities +//! provided by the processing engines. + +use std::{ + fmt::Write as FmtWrite, + fs, io, + path::{Path, PathBuf}, +}; +use tectonic_docmodel::{ + document::{BuildTargetType, Document}, + workspace::{Workspace, WorkspaceCreator}, +}; +use tectonic_geturl::{DefaultBackend, GetUrlBackend}; +use url::Url; + +use crate::{ + config, ctry, + driver::{OutputFormat, PassSetting, ProcessingSessionBuilder}, + errors::{ErrorKind, Result}, + io::{cached_itarbundle::CachedITarBundle, dirbundle::DirBundle, zipbundle::ZipBundle, Bundle}, + status::StatusBackend, + test_util, tt_note, +}; + +/// Options for setting up [`Document`] instances with the driver +#[derive(Clone, Debug)] +pub struct DocumentSetupOptions { + /// Disable requests to the network, if the document’s bundle happens to be + /// network-based. + only_cached: bool, + + /// Disable all known-insecure engine features. + /// + /// This setting should be true if any untrusted input will be handled. + /// However, it is not always activated because sometimes users want the + /// functionality provided by known-insecure features (such as + /// shell-escape). + disable_insecure: bool, +} + +impl DocumentSetupOptions { + /// Create a new set of document setup options. + /// + /// This function primarily exists to *force* you to consider whether you + /// ought to disable known-insecure features. As usual, they should be + /// disabled if there is any untrusted input that will be handled. + pub fn new(disable_insecure: bool) -> Self { + DocumentSetupOptions { + only_cached: false, + disable_insecure, + } + } + + /// Specify whether any requests to the network will be made for bundle + /// resources. + /// + /// If the document’s backing bundle is not network-based, this setting will + /// have no effect. + pub fn only_cached(&mut self, s: bool) -> &mut Self { + self.only_cached = s; + self + } +} + +pub trait DocumentExt { + /// Get the bundle used by this document. + /// + /// This parses [`Document::bundle_loc`] and turns it into the appropriate + /// bundle backend. + fn bundle( + &self, + setup_options: &DocumentSetupOptions, + status: &mut dyn StatusBackend, + ) -> Result>; + + /// Set up a [`ProcessingSessionBuilder`] for one of the outputs. + /// + /// The *output_profile* argument gives the name of the document’s output profile to + /// use. + fn setup_session( + &self, + output_profile: &str, + setup_options: &DocumentSetupOptions, + status: &mut dyn StatusBackend, + ) -> Result; +} + +impl DocumentExt for Document { + fn bundle( + &self, + setup_options: &DocumentSetupOptions, + status: &mut dyn StatusBackend, + ) -> Result> { + fn bundle_from_path(p: PathBuf) -> Result> { + if p.is_dir() { + Ok(Box::new(DirBundle::new(p))) + } else { + Ok(Box::new(ZipBundle::open(p)?)) + } + } + + if config::is_config_test_mode_activated() { + Ok(Box::new(test_util::TestBundle::default())) + } else if let Ok(url) = Url::parse(&self.bundle_loc) { + if url.scheme() != "file" { + let bundle = CachedITarBundle::new( + &self.bundle_loc, + setup_options.only_cached, + None, + status, + )?; + Ok(Box::new(bundle)) + } else { + let file_path = url.to_file_path().map_err(|_| { + io::Error::new(io::ErrorKind::InvalidInput, "failed to parse local path") + })?; + bundle_from_path(file_path) + } + } else { + bundle_from_path(Path::new(&self.bundle_loc).to_owned()) + } + } + + fn setup_session( + &self, + output_profile: &str, + setup_options: &DocumentSetupOptions, + status: &mut dyn StatusBackend, + ) -> Result { + let profile = self.outputs.get(output_profile).ok_or_else(|| { + ErrorKind::Msg(format!( + "unrecognized output profile name \"{}\"", + output_profile + )) + })?; + + let output_format = match profile.target_type { + BuildTargetType::Pdf => OutputFormat::Pdf, + }; + + let mut input_buffer = String::new(); + if !profile.preamble_file.is_empty() { + writeln!(input_buffer, "\\input{{{}}}", profile.preamble_file)?; + } + if !profile.index_file.is_empty() { + writeln!(input_buffer, "\\input{{{}}}", profile.index_file)?; + } + if !profile.postamble_file.is_empty() { + writeln!(input_buffer, "\\input{{{}}}", profile.postamble_file)?; + } + + let mut sess_builder = ProcessingSessionBuilder::default(); + + // Do this before anything else!!!! + if setup_options.disable_insecure { + sess_builder.disable_insecure(); + } + + sess_builder + .output_format(output_format) + .format_name(&profile.tex_format) + .build_date(std::time::SystemTime::now()) + .pass(PassSetting::Default) + .primary_input_buffer(input_buffer.as_bytes()) + .tex_input_name(output_profile); + + if profile.shell_escape { + // For now, this is the only option we allow. + sess_builder.shell_escape_with_temp_dir(); + } + + if setup_options.only_cached { + tt_note!(status, "using only cached resource files"); + } + sess_builder.bundle(self.bundle(setup_options, status)?); + + let mut tex_dir = self.src_dir().to_owned(); + tex_dir.push("src"); + sess_builder.filesystem_root(&tex_dir); + + let mut output_dir = self.build_dir().to_owned(); + output_dir.push(output_profile); + ctry!( + fs::create_dir_all(&output_dir); + "couldn\'t create output directory `{}`", output_dir.display() + ); + sess_builder.output_dir(&output_dir); + + Ok(sess_builder) + } +} + +/// Extension methods for [`WorkspaceCreator`]. +pub trait WorkspaceCreatorExt { + /// Create the new workspace with a good default for the bundle location. + /// + /// This method is a thin wrapper on [`WorkspaceCreator::create`] that uses + /// the current configuration to determine a good default bundle location + /// for the main document. + fn create_defaulted( + self, + config: &config::PersistentConfig, + status: &mut dyn StatusBackend, + ) -> Result; +} + +impl WorkspaceCreatorExt for WorkspaceCreator { + fn create_defaulted( + self, + config: &config::PersistentConfig, + status: &mut dyn StatusBackend, + ) -> Result { + let bundle_loc = if config::is_config_test_mode_activated() { + "test-bundle://".to_owned() + } else { + let mut gub = DefaultBackend::default(); + gub.resolve_url(config.default_bundle_loc(), status)? + }; + + Ok(self.create(bundle_loc)?) + } +} diff --git a/src/document.rs b/src/document.rs deleted file mode 100644 index cc6efcc035..0000000000 --- a/src/document.rs +++ /dev/null @@ -1,572 +0,0 @@ -// Copyright 2020 the Tectonic Project -// Licensed under the MIT License. - -//! Tectonic document definitions. - -use std::{ - collections::HashMap, - env, - fmt::Write as FmtWrite, - fs, - io::{self, Read, Write}, - path::{Component, Path, PathBuf}, -}; -use tectonic_geturl::{DefaultBackend, GetUrlBackend}; -use url::Url; - -use crate::{ - config, ctry, - driver::{OutputFormat, PassSetting, ProcessingSessionBuilder}, - errmsg, - errors::{ErrorKind, Result}, - io::{cached_itarbundle::CachedITarBundle, dirbundle::DirBundle, zipbundle::ZipBundle, Bundle}, - status::StatusBackend, - test_util, tt_error, tt_note, - workspace::WorkspaceCreator, -}; - -const DEFAULT_PREAMBLE_FILE: &str = "_preamble.tex"; -const DEFAULT_INDEX_FILE: &str = "index.tex"; -const DEFAULT_POSTAMBLE_FILE: &str = "_postamble.tex"; - -/// A Tectonic document. -#[derive(Debug)] -pub struct Document { - /// The directory containing the `Tectonic.toml` file and document source. - src_dir: PathBuf, - - /// The directory where document build artifacts will be output. By default - /// this will be a subdirectory of `src_dir` named `build`. - build_dir: PathBuf, - - /// The document name. This will be used to name build artifacts and the - /// like, and so should be relatively filesystem-friendly. It does not - /// need to be the same as the document title. - name: String, - - /// The name of core TeX file bundle upon which this document is based. - /// Either a URL or a local path. - bundle_loc: String, - - /// The different outputs that are created from the document source. These - /// may have different formats (e.g., PDF and HTML) or the same format but - /// different settings (e.g., PDF with A4 paper and PDF with US Letter - /// paper). - outputs: HashMap, -} - -fn default_outputs() -> HashMap { - let mut outputs = HashMap::new(); - outputs.insert( - "default".to_owned(), - OutputProfile { - name: "default".to_owned(), - target_type: BuildTargetType::Pdf, - tex_format: "latex".to_owned(), - preamble_file: DEFAULT_PREAMBLE_FILE.to_owned(), - index_file: DEFAULT_INDEX_FILE.to_owned(), - postamble_file: DEFAULT_POSTAMBLE_FILE.to_owned(), - shell_escape: false, - }, - ); - outputs -} - -impl Document { - /// Initialize a Document based on a TOML specification - pub(crate) fn new_from_toml( - src_dir: PathBuf, - build_dir: PathBuf, - toml_data: &mut R, - ) -> Result { - let mut toml_text = String::new(); - toml_data.read_to_string(&mut toml_text)?; - let doc: syntax::Document = toml::from_str(&toml_text)?; - - let mut outputs = HashMap::new(); - - for toml_output in &doc.outputs { - let output = toml_output.to_runtime(); - - if outputs.insert(output.name.clone(), output).is_some() { - return Err(errmsg!( - "duplicated output name `{}` in TOML specification", - &toml_output.name - )); - } - } - - if outputs.is_empty() { - return Err(errmsg!( - "TOML specification must define at least one output" - )); - } - - Ok(Document { - src_dir, - build_dir, - name: doc.doc.name, - bundle_loc: doc.doc.bundle, - outputs, - }) - } - - /// Create a new in-memory Document, based on the settings of a - /// WorkspaceCreator object. - pub(crate) fn new_for_creator( - wc: &WorkspaceCreator, - config: &config::PersistentConfig, - status: &mut dyn StatusBackend, - ) -> Result { - let src_dir = wc.root_dir().to_owned(); - - let mut build_dir = src_dir.clone(); - build_dir.push("build"); - - // We're a bit roundabout in how we figure out the name of the - // containing src_dir, in an effort to Do The Right Thing with symlinks - // and whatnot. - let name = { - let mut name = "document".to_owned(); - let mut tried_src_path = false; - - if let Some(Component::Normal(t)) = src_dir.components().next_back() { - tried_src_path = true; - - if let Some(s) = t.to_str() { - name = s.to_owned(); - } - } - - if !tried_src_path { - if let Ok(cwd) = env::current_dir() { - let full_path = cwd.join(&src_dir); - - if let Some(Component::Normal(t)) = full_path.components().next_back() { - if let Some(s) = t.to_str() { - name = s.to_owned(); - } - } - } - } - - name - }; - - // Determine the bundle URL that we'll put in as the default. - - let bundle_loc = if config::is_config_test_mode_activated() { - "test-bundle".to_owned() - } else { - let mut gub = DefaultBackend::default(); - gub.resolve_url(config.default_bundle_loc(), status)? - }; - - // All done. - Ok(Document { - src_dir, - build_dir, - name, - bundle_loc, - outputs: default_outputs(), - }) - } - - /// Write out this document's state as a new TOML file. This should only be - /// used when creating a totally new document; otherwise TOML rewriting - /// should be used. - pub(crate) fn create_toml(&self) -> Result<()> { - let outputs = self - .outputs - .values() - .map(|r| syntax::OutputProfile::from_runtime(r)) - .collect(); - - let doc = syntax::Document { - doc: syntax::DocSection { - name: self.name.clone(), - bundle: self.bundle_loc.clone(), - }, - outputs, - }; - - let toml_text = toml::to_string_pretty(&doc)?; - - let mut toml_path = self.src_dir.clone(); - toml_path.push("Tectonic.toml"); - - let mut toml_file = ctry!(fs::OpenOptions::new() - .create_new(true) - .write(true) - .open(&toml_path); - "couldn\'t create `{}`", toml_path.display() - ); - - toml_file.write_all(toml_text.as_bytes())?; - Ok(()) - } -} - -/// Persistent settings for a document build. -#[derive(Clone, Debug)] -pub struct OutputProfile { - name: String, - target_type: BuildTargetType, - tex_format: String, - preamble_file: String, - index_file: String, - postamble_file: String, - shell_escape: bool, -} - -/// The output target type of a document build. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum BuildTargetType { - /// Output to the Portable Document Format (PDF). - Pdf, -} - -/// Temporary options for a document build. -#[derive(Clone, Debug, Default)] -pub struct BuildOptions { - format_cache_path: Option, - only_cached: bool, - keep_intermediates: bool, - keep_logs: bool, - print_stdout: bool, - open: bool, -} - -impl BuildOptions { - pub fn format_cache_path>(&mut self, p: P) -> &mut Self { - self.format_cache_path = Some(p.as_ref().to_owned()); - self - } - - pub fn only_cached(&mut self, value: bool) -> &mut Self { - self.only_cached = value; - self - } - - pub fn keep_intermediates(&mut self, value: bool) -> &mut Self { - self.keep_intermediates = value; - self - } - - pub fn keep_logs(&mut self, value: bool) -> &mut Self { - self.keep_logs = value; - self - } - - pub fn print_stdout(&mut self, value: bool) -> &mut Self { - self.print_stdout = value; - self - } - - pub fn open(&mut self, value: bool) -> &mut Self { - self.open = value; - self - } -} - -impl Document { - /// Iterate over the names of the output profiles defined for this document. - /// These may have different formats (e.g., PDF and HTML) or the same format - /// but different settings (e.g., PDF with A4 paper and PDF with US Letter - /// paper). - pub fn output_names(&self) -> impl Iterator { - self.outputs.keys().map(|k| k.as_ref()) - } - - /// Get default the build options associated with an output profile. - /// - /// Panics if the output name is not one of the ones associated with this - /// document. - pub fn build_options_for(&self, _output_profile: &str) -> BuildOptions { - BuildOptions::default() - } - - /// Get the bundle used by this document. - pub fn bundle( - &self, - only_cached: bool, - status: &mut dyn StatusBackend, - ) -> Result> { - fn bundle_from_path(p: PathBuf) -> Result> { - if p.is_dir() { - Ok(Box::new(DirBundle::new(p))) - } else { - Ok(Box::new(ZipBundle::open(p)?)) - } - } - - if config::is_config_test_mode_activated() { - Ok(Box::new(test_util::TestBundle::default())) - } else if let Ok(url) = Url::parse(&self.bundle_loc) { - if url.scheme() != "file" { - let bundle = CachedITarBundle::new(&self.bundle_loc, only_cached, None, status)?; - Ok(Box::new(bundle)) - } else { - let file_path = url.to_file_path().map_err(|_| { - io::Error::new(io::ErrorKind::InvalidInput, "failed to parse local path") - })?; - bundle_from_path(file_path) - } - } else { - bundle_from_path(Path::new(&self.bundle_loc).to_owned()) - } - } - - /// Build one of the document’s outputs. - pub fn build( - &self, - output_profile: &str, - options: &BuildOptions, - status: &mut dyn StatusBackend, - ) -> Result { - let profile = self.outputs.get(output_profile).unwrap(); - - let output_format = match profile.target_type { - BuildTargetType::Pdf => OutputFormat::Pdf, - }; - - let mut input_buffer = String::new(); - if !profile.preamble_file.is_empty() { - writeln!(input_buffer, "\\input{{{}}}", profile.preamble_file)?; - } - if !profile.index_file.is_empty() { - writeln!(input_buffer, "\\input{{{}}}", profile.index_file)?; - } - if !profile.postamble_file.is_empty() { - writeln!(input_buffer, "\\input{{{}}}", profile.postamble_file)?; - } - - let mut sess_builder = ProcessingSessionBuilder::default(); - sess_builder - .output_format(output_format) - .format_name(&profile.tex_format) - .build_date(std::time::SystemTime::now()) - .pass(PassSetting::Default) - .primary_input_buffer(input_buffer.as_bytes()) - .tex_input_name(output_profile) - .keep_logs(options.keep_logs) - .keep_intermediates(options.keep_intermediates) - .print_stdout(options.print_stdout); - - if profile.shell_escape { - // For now, this is the only option we allow. - sess_builder.shell_escape_with_temp_dir(); - } - - if options.only_cached { - tt_note!(status, "using only cached resource files"); - } - sess_builder.bundle(self.bundle(options.only_cached, status)?); - - // keep intermed, keep logs, print stdout - - if let Some(ref p) = options.format_cache_path { - sess_builder.format_cache_path(p); - } - - let mut tex_dir = self.src_dir.clone(); - tex_dir.push("src"); - sess_builder.filesystem_root(&tex_dir); - - let mut output_dir = self.build_dir.clone(); - output_dir.push(output_profile); - ctry!( - fs::create_dir_all(&output_dir); - "couldn\'t create output directory `{}`", output_dir.display() - ); - sess_builder.output_dir(&output_dir); - - let mut sess = sess_builder.create(status)?; - let result = sess.run(status); - - if let Err(e) = &result { - if let ErrorKind::EngineError(engine) = e.kind() { - let output = sess.get_stdout_content(); - - if output.is_empty() { - tt_error!( - status, - "something bad happened inside {}, but no output was logged", - engine - ); - } else { - tt_error!( - status, - "something bad happened inside {}; its output follows:\n", - engine - ); - status.dump_error_logs(&output); - } - } - } else if options.open { - let out_file = - output_dir - .join(&profile.name) - .with_extension(match profile.target_type { - BuildTargetType::Pdf => "pdf", - }); - tt_note!(status, "opening `{}`", out_file.display()); - if let Err(e) = open::that(&out_file) { - tt_error!( - status, - "failed to open `{}` with system handler", - out_file.display(); - e.into() - ) - } - } - - result.map(|_| 0) - } -} - -/// The concrete syntax for saving document state, wired up via serde. -mod syntax { - use super::{DEFAULT_INDEX_FILE, DEFAULT_POSTAMBLE_FILE, DEFAULT_PREAMBLE_FILE}; - use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer}; - - #[derive(Debug, Deserialize, Serialize)] - #[serde(deny_unknown_fields)] - pub struct Document { - pub doc: DocSection, - - #[serde(rename = "output")] - pub outputs: Vec, - } - - #[derive(Debug, Deserialize, Serialize)] - #[serde(deny_unknown_fields)] - pub struct DocSection { - pub name: String, - pub bundle: String, - } - - #[derive(Debug, Deserialize, Serialize)] - #[serde(deny_unknown_fields)] - pub struct OutputProfile { - pub name: String, - #[serde(rename = "type")] - pub target_type: BuildTargetType, - pub tex_format: Option, - #[serde(rename = "preamble")] - pub preamble_file: Option, - #[serde(rename = "index")] - pub index_file: Option, - #[serde(rename = "postamble")] - pub postamble_file: Option, - pub shell_escape: Option, - } - - impl OutputProfile { - pub fn from_runtime(rt: &super::OutputProfile) -> Self { - let tex_format = if rt.tex_format == "latex" { - None - } else { - Some(rt.tex_format.clone()) - }; - - let preamble_file = if rt.preamble_file == DEFAULT_PREAMBLE_FILE { - None - } else { - Some(rt.preamble_file.clone()) - }; - - let index_file = if rt.index_file == DEFAULT_INDEX_FILE { - None - } else { - Some(rt.index_file.clone()) - }; - - let postamble_file = if rt.postamble_file == DEFAULT_POSTAMBLE_FILE { - None - } else { - Some(rt.postamble_file.clone()) - }; - - let shell_escape = if !rt.shell_escape { None } else { Some(true) }; - - OutputProfile { - name: rt.name.clone(), - target_type: BuildTargetType::from_runtime(&rt.target_type), - tex_format, - preamble_file, - index_file, - postamble_file, - shell_escape, - } - } - - pub fn to_runtime(&self) -> super::OutputProfile { - super::OutputProfile { - name: self.name.clone(), - target_type: self.target_type.to_runtime(), - tex_format: self - .tex_format - .as_ref() - .map(|s| s.as_ref()) - .unwrap_or("latex") - .to_owned(), - preamble_file: self - .preamble_file - .clone() - .unwrap_or_else(|| DEFAULT_PREAMBLE_FILE.to_owned()), - index_file: self - .index_file - .clone() - .unwrap_or_else(|| DEFAULT_INDEX_FILE.to_owned()), - postamble_file: self - .postamble_file - .clone() - .unwrap_or_else(|| DEFAULT_POSTAMBLE_FILE.to_owned()), - shell_escape: self.shell_escape.unwrap_or_default(), - } - } - } - - #[derive(Clone, Copy, Debug, Eq, PartialEq)] - pub enum BuildTargetType { - Pdf, - } - - impl BuildTargetType { - pub fn from_runtime(rt: &super::BuildTargetType) -> Self { - match rt { - super::BuildTargetType::Pdf => BuildTargetType::Pdf, - } - } - - pub fn to_runtime(&self) -> super::BuildTargetType { - match self { - BuildTargetType::Pdf => super::BuildTargetType::Pdf, - } - } - } - - impl Serialize for BuildTargetType { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - serializer.serialize_str(match *self { - BuildTargetType::Pdf => "pdf", - }) - } - } - impl<'de> Deserialize<'de> for BuildTargetType { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let s = String::deserialize(deserializer)?; - Ok(match s.as_str() { - "pdf" => BuildTargetType::Pdf, - other => return Err(::Error::unknown_variant(other, &["pdf"])), - }) - } - } -} diff --git a/src/lib.rs b/src/lib.rs index db9d34cde7..a06c59d3bc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,16 +78,13 @@ mod app_dirs; pub mod config; pub mod digest; -#[cfg(feature = "serialization")] -pub mod document; +pub mod docmodel; pub mod driver; pub mod engines; pub mod errors; pub mod io; pub mod status; pub mod unstable_opts; -#[cfg(feature = "serialization")] -pub mod workspace; // Note: this module is intentionally *not* gated by #[cfg(test)] -- see its // docstring for details. diff --git a/src/workspace.rs b/src/workspace.rs deleted file mode 100644 index 43f716445f..0000000000 --- a/src/workspace.rs +++ /dev/null @@ -1,158 +0,0 @@ -// Copyright 2020 the Tectonic Project -// Licensed under the MIT License. - -//! A Tectonic document-build workspace. -//! -//! For the time being, this is just a thin wrapper to provide access to a -//! `Document` instance. This API exists to future-proof a bit for a potential -//! world where one workspace can contain multiple documents. - -use std::{ - env, fs, - io::{self, Write}, - path::{Path, PathBuf}, -}; - -use crate::{ - config::PersistentConfig, ctry, document::Document, errmsg, errors::Result, - status::StatusBackend, -}; - -/// A Tectonic workspace. -#[derive(Debug)] -pub struct Workspace { - /// The root directory of the workspace. - root_dir: PathBuf, - - /// This workspace's document. In the future, there might be more than one. - doc: Document, -} - -impl Workspace { - /// Get the first document in the workspace. - /// - /// Right now, workspaces in fact only include one document. That may change - /// in the future. - pub fn first_document(&self) -> &Document { - &self.doc - } - - /// Get the first document in the workspace, mutably. - /// - /// Right now, workspaces in fact only include one document. That may change - /// in the future. - pub fn first_document_mut(&mut self) -> &mut Document { - &mut self.doc - } - - /// Open up a workspace baced on the current process environment. - pub fn open_from_environment() -> Result { - let mut root_dir = env::current_dir()?; - root_dir.push("tmp"); // simplifies loop logic - - while root_dir.pop() { - root_dir.push("Tectonic.toml"); - - let mut doc_file = match fs::File::open(&root_dir) { - Ok(f) => f, - Err(ref e) if e.kind() == io::ErrorKind::NotFound => { - root_dir.pop(); // remove "Tectonic.toml" - continue; // this will pop up one directory and try again - } - Err(e) => return Err(e.into()), - }; - - root_dir.pop(); - let mut doc_build_dir = root_dir.clone(); - doc_build_dir.push("build"); - let doc = Document::new_from_toml(root_dir.clone(), doc_build_dir, &mut doc_file)?; - - return Ok(Workspace { root_dir, doc }); - } - - Err(errmsg!( - "No `Tectonic.toml` found in current directory or any of its parents" - )) - } -} - -/// A type for creating a new workspace. -#[derive(Debug)] -pub struct WorkspaceCreator { - /// The root directory of the workspace to be created. - root_dir: PathBuf, -} - -impl WorkspaceCreator { - /// Initialize a `WorkspaceCreator` variable. - pub fn new>(root_dir: P) -> Self { - WorkspaceCreator { - root_dir: root_dir.into(), - } - } - - /// Consume this object and attempt to create the new workspace. - pub fn create( - self, - config: &PersistentConfig, - status: &mut dyn StatusBackend, - ) -> Result { - let doc = Document::new_for_creator(&self, config, status)?; - - let mut tex_dir = self.root_dir.clone(); - tex_dir.push("src"); - - ctry!( - fs::create_dir_all(&tex_dir); - "couldn\'t create workspace directory `{}`", tex_dir.display() - ); - - doc.create_toml()?; - - // Stub out the TeX. - - { - tex_dir.push("_preamble.tex"); - let mut f = fs::File::create(&tex_dir)?; - f.write_all( - br#"\documentclass{article} -\title{My Title} -\begin{document} -"#, - )?; - tex_dir.pop(); - } - - { - tex_dir.push("index.tex"); - let mut f = fs::File::create(&tex_dir)?; - f.write_all( - br#"Hello, world. -"#, - )?; - tex_dir.pop(); - } - - { - tex_dir.push("_postamble.tex"); - let mut f = fs::File::create(&tex_dir)?; - f.write_all( - br#"\end{document} -"#, - )?; - tex_dir.pop(); - } - - // All done. - - Ok(Workspace { - root_dir: self.root_dir, - doc, - }) - } - - /// Get the root directory of the workspace. - pub fn root_dir(&self) -> &Path { - &self.root_dir - } -} From 14763bf7afa552f275d14aaf8effed65190c8f1c Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Wed, 9 Jun 2021 22:27:36 -0400 Subject: [PATCH 17/30] tectonic: fix build for no-serialization-feature Fine, fix things up so that we build when the `serialization` Cargo feature is disabled. This is getting annoying to support and I'm not sure if it's useful anymore (we can build statically linked with proc macros by adopting a cross-compilation model), but it wasn't too hard to fix up here. --- Cargo.toml | 13 ++++++++----- src/bin/tectonic/compile.rs | 36 +++++++++++++++++++++++++++++++++--- src/bin/tectonic/v2cli.rs | 36 ++---------------------------------- src/lib.rs | 1 + 4 files changed, 44 insertions(+), 42 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 57e000912a..af30c9bc7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,7 +67,7 @@ serde = { version = "^1.0", features = ["derive"], optional = true } sha2 = "^0.9" structopt = "0.3" tectonic_bridge_core = { path = "crates/bridge_core", version = "0.0.0-dev.0" } -tectonic_docmodel = { path = "crates/docmodel", version = "0.0.0-dev.0" } +tectonic_docmodel = { path = "crates/docmodel", version = "0.0.0-dev.0", optional = true } tectonic_engine_bibtex = { path = "crates/engine_bibtex", version = "0.0.0-dev.0" } tectonic_engine_xdvipdfmx = { path = "crates/engine_xdvipdfmx", version = "0.0.0-dev.0" } tectonic_engine_xetex = { path = "crates/engine_xetex", version = "0.0.0-dev.0" } @@ -87,10 +87,13 @@ zip = { version = "^0.5", default-features = false, features = ["deflate"] } [features] default = ["geturl-reqwest", "serialization"] -# Note: we used to have this to couple "serde" and "serde-derive", but we've -# adopted the newer scheme to avoid having to depend on both -- should maybe -# just get rid of this feature: -serialization = ["serde", "toml"] +# The main motivation for this feature was to be able to compile without +# proc-macros (via serde-derive), for statically-linked targets which can't use +# them. In the CI, we now build for statically-linked targets using a +# cross-compilation model that allows us to have proc-macros anyway. So maybe +# this feature should go away? It's kind of annoying to support, and at this +# point proc-macros may have snuck into the dependency tree elsewhere, anyway. +serialization = ["serde", "tectonic_docmodel", "toml"] external-harfbuzz = ["tectonic_engine_xetex/external-harfbuzz"] diff --git a/src/bin/tectonic/compile.rs b/src/bin/tectonic/compile.rs index 5cc4526604..8830eaef3d 100644 --- a/src/bin/tectonic/compile.rs +++ b/src/bin/tectonic/compile.rs @@ -18,9 +18,9 @@ use tectonic::{ config::PersistentConfig, driver::{OutputFormat, PassSetting, ProcessingSessionBuilder}, errmsg, - errors::Result, + errors::{ErrorKind, Result}, status::StatusBackend, - tt_note, + tt_error, tt_note, unstable_opts::{UnstableArg, UnstableOptions}, }; @@ -197,7 +197,37 @@ impl CompileOptions { None => time::SystemTime::now(), }; sess_builder.build_date(build_date); + run_and_report(sess_builder, status) + } +} - crate::v2cli::run_and_report(sess_builder, status) +pub(crate) fn run_and_report( + sess_builder: ProcessingSessionBuilder, + status: &mut dyn StatusBackend, +) -> Result { + let mut sess = sess_builder.create(status)?; + let result = sess.run(status); + + if let Err(e) = &result { + if let ErrorKind::EngineError(engine) = e.kind() { + let output = sess.get_stdout_content(); + + if output.is_empty() { + tt_error!( + status, + "something bad happened inside {}, but no output was logged", + engine + ); + } else { + tt_error!( + status, + "something bad happened inside {}; its output follows:\n", + engine + ); + status.dump_error_logs(&output); + } + } } + + result.map(|_| 0) } diff --git a/src/bin/tectonic/v2cli.rs b/src/bin/tectonic/v2cli.rs index c50c7ac644..e9012432b6 100644 --- a/src/bin/tectonic/v2cli.rs +++ b/src/bin/tectonic/v2cli.rs @@ -11,8 +11,7 @@ use tectonic::{ config::PersistentConfig, ctry, docmodel::{DocumentExt, DocumentSetupOptions, WorkspaceCreatorExt}, - driver::ProcessingSessionBuilder, - errors::{ErrorKind, Result, SyncError}, + errors::{Result, SyncError}, status::{termcolor::TermcolorStatusBackend, ChatterLevel, StatusBackend}, tt_error, tt_note, }; @@ -178,7 +177,7 @@ impl BuildCommand { .keep_logs(self.keep_logs) .print_stdout(self.print_stdout); - run_and_report(builder, status)?; + crate::compile::run_and_report(builder, status)?; if self.open { let out_file = doc.output_main_file(output_name); @@ -292,34 +291,3 @@ impl NewCommand { Ok(0) } } - -pub(crate) fn run_and_report( - sess_builder: ProcessingSessionBuilder, - status: &mut dyn StatusBackend, -) -> Result { - let mut sess = sess_builder.create(status)?; - let result = sess.run(status); - - if let Err(e) = &result { - if let ErrorKind::EngineError(engine) = e.kind() { - let output = sess.get_stdout_content(); - - if output.is_empty() { - tt_error!( - status, - "something bad happened inside {}, but no output was logged", - engine - ); - } else { - tt_error!( - status, - "something bad happened inside {}; its output follows:\n", - engine - ); - status.dump_error_logs(&output); - } - } - } - - result.map(|_| 0) -} diff --git a/src/lib.rs b/src/lib.rs index a06c59d3bc..d4d4c358f6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,6 +78,7 @@ mod app_dirs; pub mod config; pub mod digest; +#[cfg(feature = "serialization")] pub mod docmodel; pub mod driver; pub mod engines; From bd8e710957d3e51bd550e5ce81fa2e85de2d8d0a Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Thu, 10 Jun 2021 23:00:23 -0400 Subject: [PATCH 18/30] io_base: correct broken internal links in docs --- crates/io_base/src/lib.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crates/io_base/src/lib.rs b/crates/io_base/src/lib.rs index 26d28097c7..73b9569d4f 100644 --- a/crates/io_base/src/lib.rs +++ b/crates/io_base/src/lib.rs @@ -28,6 +28,7 @@ use thiserror::Error as ThisError; use crate::digest::DigestData; +pub mod app_dirs; pub mod digest; pub mod filesystem; pub mod flate2; @@ -452,7 +453,7 @@ pub trait IoProvider: AsIoProviderMut { /// Open the named file for input and return filesystem path information. /// - /// This method extends [`input_open_name`] to help support SyncTeX output. + /// This method extends [`Self::input_open_name`] to help support SyncTeX output. /// While SyncTeX output files should contain absolute source file paths, /// Tectonic’s pluggable I/O system makes it so that the mapping between /// input names and filesystem paths is not well-defined. This optional @@ -464,7 +465,7 @@ pub trait IoProvider: AsIoProviderMut { /// that might provide path information, or you are implementing an I/O /// provider that delegates to other I/O providers, you should implement /// this function fully, and then provide a simple implementation of - /// [`input_open_name`] that drops the pathing information. + /// [`Self::input_open_name`] that drops the pathing information. fn input_open_name_with_abspath( &mut self, name: &str, @@ -488,8 +489,8 @@ pub trait IoProvider: AsIoProviderMut { /// Open the primary input and return filesystem path information. /// - /// This method is as to [`input_open_primary`] as - /// [`input_open_name_with_abspath`] is to [`input_open_name`]. + /// This method is as to [`Self::input_open_primary`] as + /// [`Self::input_open_name_with_abspath`] is to [`Self::input_open_name`]. fn input_open_primary_with_abspath( &mut self, status: &mut dyn StatusBackend, From dc434ef944902fe016b68b3a16e45e020a854359 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Thu, 10 Jun 2021 23:01:08 -0400 Subject: [PATCH 19/30] io_base: add app_dirs module for system-wide knowledge of per-user directories --- crates/io_base/Cargo.toml | 1 + crates/io_base/src/app_dirs.rs | 66 ++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 crates/io_base/src/app_dirs.rs diff --git a/crates/io_base/Cargo.toml b/crates/io_base/Cargo.toml index 28d0ee530c..7bba659a7f 100644 --- a/crates/io_base/Cargo.toml +++ b/crates/io_base/Cargo.toml @@ -16,6 +16,7 @@ license = "MIT" edition = "2018" [dependencies] +app_dirs2 = "^2.3" flate2 = { version = "^1.0.19", default-features = false, features = ["zlib"] } libc = "^0.2" # for EISDIR :-( sha2 = "^0.9" # for digest computations diff --git a/crates/io_base/src/app_dirs.rs b/crates/io_base/src/app_dirs.rs new file mode 100644 index 0000000000..363985fb06 --- /dev/null +++ b/crates/io_base/src/app_dirs.rs @@ -0,0 +1,66 @@ +// Copyright 2019-2021 the Tectonic Project +// Licensed under the MIT License. + +//! Default directories for per-user Tectonic files. +//! +//! If you want to use your own directories for your own application, that's OK, +//! but if you want to look at Tectonic’s default configuration and/or cache +//! data, these are the places to go. + +use app_dirs2::AppDataType; +use std::path::PathBuf; +use tectonic_errors::prelude::*; + +/// The instance of the `app_dirs2` crate that this crate links to. +pub use app_dirs2; + +/// Maybe we should just make this public? But we preserve some flexibility by +/// not doing so just yet. +const APP_INFO: app_dirs2::AppInfo = app_dirs2::AppInfo { + name: "Tectonic", + author: "TectonicProject", +}; + +/// Get the directory for per-user Tectonic configuration files. +/// +/// This constructs the path but does not ensure that the directory actually +/// exists. The function [`ensure_user_config`] makes sure that the directory is +/// created. +/// +/// This function is currently implemented with [`app_dirs2::get_app_root`] using +/// the `UserConfig` data type. Return values have the form: +/// +/// - Windows: `%APPDATA%\TectonicProject\Tectonic`, where `%APPDATA%` is +/// something like `C:\Users\knuth\AppData\Roaming`. +/// - macOS: `$HOME/Library/Application Support/Tectonic` +/// - Others: `$XDG_CONFIG_HOME/Tectonic` if defined, otherwise +/// `$HOME/.config/Tectonic` +pub fn get_user_config() -> Result { + Ok(app_dirs2::get_app_root(AppDataType::UserConfig, &APP_INFO)?) +} + +/// Get the directory for per-user Tectonic configuration files, creating it if needed. +/// +/// This is largely the same as [`get_user_config`], but ensures that the +/// returned directory actually exists. +pub fn ensure_user_config() -> Result { + Ok(app_dirs2::app_root(AppDataType::UserConfig, &APP_INFO)?) +} + +/// Get a directory for per-user Tectonic cache files, creating it if needed. +/// +/// The *path* argument may include subdirectories, but the directory separator +/// should be a forward slash on all platforms. It may be an empty string if you +/// want to get the toplevel user cache directory. +/// +/// This function is currently implemented with [`app_dirs2::app_dir`] using the +/// `UserCache` data type. Return values have the form: +/// +/// - Windows: `%LOCALAPPDATA%\TectonicProject\Tectonic`, where `%LOCALAPPDATA%` +/// is something like `C:\Users\knuth\AppData\Local`. +/// - macOS: `$HOME/Library/Caches/Tectonic` +/// - Others: `$XDG_CACHE_HOME/Tectonic` if defined, otherwise +/// `$HOME/.cache/Tectonic` +pub fn ensure_user_cache_dir(path: &str) -> Result { + Ok(app_dirs2::app_dir(AppDataType::UserCache, &APP_INFO, path)?) +} From 9319a9203a2b8c7aafd3670d589f3ceded065e4b Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sun, 13 Jun 2021 11:35:44 -0400 Subject: [PATCH 20/30] status_base: add PlainStatusBackend.always_stderr() --- crates/status_base/src/plain.rs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/crates/status_base/src/plain.rs b/crates/status_base/src/plain.rs index ee330cb542..79b7c6f30e 100644 --- a/crates/status_base/src/plain.rs +++ b/crates/status_base/src/plain.rs @@ -15,12 +15,29 @@ use super::{ChatterLevel, MessageKind, StatusBackend}; #[derive(Clone, Debug, Default)] pub struct PlainStatusBackend { chatter: ChatterLevel, + always_stderr: bool, } impl PlainStatusBackend { /// Create a new backend with the specified chatter level. + /// + /// To use the default chatter level, you can also use [`Self::default`]. pub fn new(chatter: ChatterLevel) -> Self { - PlainStatusBackend { chatter } + PlainStatusBackend { + chatter, + always_stderr: false, + } + } + + /// Configure this backend to always print to the standard error stream. + /// + /// This setting can be useful if you have a program that is printing output + /// to standard output that needs to be machine-parsable. By activating it + /// you can ensure that any status reports don't get mixed in with your + /// stdout output. + pub fn always_stderr(&mut self, setting: bool) -> &mut Self { + self.always_stderr = setting; + self } } @@ -36,7 +53,7 @@ impl StatusBackend for PlainStatusBackend { MessageKind::Error => "error:", }; - if kind == MessageKind::Note { + if kind == MessageKind::Note && !self.always_stderr { println!("{} {}", prefix, args); } else { eprintln!("{} {}", prefix, args); From 1660b31ca2fa089dd8a135163e1e2715b33dd5f7 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sun, 13 Jun 2021 11:40:27 -0400 Subject: [PATCH 21/30] bundles: add the `tectonic_bundles` crate This extracts the "bundle" implementations into a standalone crate, adding a bunch of documentation that has really clarified my thinking about why we need to distinguish bundles from generic IoProviders anyway. We also dramatically clean up the implementation of the caching layer, drawing a cleaner separation between the caching mechanism and the underlying "indexed tar" / "HTTP Range request" backend. --- Cargo.toml | 1 + crates/bundles/CHANGELOG.md | 8 + crates/bundles/Cargo.toml | 39 ++ crates/bundles/README.md | 29 ++ crates/bundles/src/cache.rs | 752 ++++++++++++++++++++++++++++++++++++ crates/bundles/src/dir.rs | 76 ++++ crates/bundles/src/itar.rs | 195 ++++++++++ crates/bundles/src/lib.rs | 126 ++++++ crates/bundles/src/zip.rs | 78 ++++ 9 files changed, 1304 insertions(+) create mode 100644 crates/bundles/CHANGELOG.md create mode 100644 crates/bundles/Cargo.toml create mode 100644 crates/bundles/README.md create mode 100644 crates/bundles/src/cache.rs create mode 100644 crates/bundles/src/dir.rs create mode 100644 crates/bundles/src/itar.rs create mode 100644 crates/bundles/src/lib.rs create mode 100644 crates/bundles/src/zip.rs diff --git a/Cargo.toml b/Cargo.toml index af30c9bc7c..0504082ad1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ members = [ "crates/bridge_graphite2", "crates/bridge_harfbuzz", "crates/bridge_icu", + "crates/bundles", "crates/cfg_support", "crates/dep_support", "crates/docmodel", diff --git a/crates/bundles/CHANGELOG.md b/crates/bundles/CHANGELOG.md new file mode 100644 index 0000000000..008638b0b7 --- /dev/null +++ b/crates/bundles/CHANGELOG.md @@ -0,0 +1,8 @@ +# See elsewhere for changelog + +This project’s release notes are curated from the Git history of its main +branch. You can find them by looking at [the version of this file on the +`release` branch][branch] or the [GitHub release history][gh-releases]. + +[branch]: https://github.com/tectonic-typesetting/tectonic/blob/release/crates/bundles/CHANGELOG.md +[gh-releases]: https://github.com/tectonic-typesetting/tectonic/releases diff --git a/crates/bundles/Cargo.toml b/crates/bundles/Cargo.toml new file mode 100644 index 0000000000..204dcdc9d4 --- /dev/null +++ b/crates/bundles/Cargo.toml @@ -0,0 +1,39 @@ +# Copyright 2020-2021 the Tectonic Project +# Licensed under the MIT License. + +# See README.md for discussion of features (or lack thereof) in this crate. + +[package] +name = "tectonic_bundles" +version = "0.0.0-dev.0" # assigned with cranko (see README) +authors = ["Peter Williams "] +description = """ +Tectonic "bundle" (support file collection) implementations. +""" +homepage = "https://tectonic-typesetting.github.io/" +documentation = "https://docs.rs/tectonic_bundles" +repository = "https://github.com/tectonic-typesetting/tectonic/" +readme = "README.md" +license = "MIT" +edition = "2018" + +[dependencies] +flate2 = { version = "^1.0.19", default-features = false, features = ["zlib"] } +fs2 = "^0.4" +tectonic_errors = { path = "../errors", version = "0.0.0-dev.0" } +tectonic_geturl = { path = "../geturl", version = "0.0.0-dev.0", default-features = false } +tectonic_io_base = { path = "../io_base", version = "0.0.0-dev.0" } +tectonic_status_base = { path = "../status_base", version = "0.0.0-dev.0" } +zip = { version = "^0.5", default-features = false, features = ["deflate"] } + +[features] +default = ["geturl-reqwest"] +geturl-curl = ["tectonic_geturl/curl"] +geturl-reqwest = ["tectonic_geturl/reqwest"] +native-tls-vendored = ["tectonic_geturl/native-tls-vendored"] + +[package.metadata.internal_dep_versions] +tectonic_errors = "5c9ba661edf5ef669f24f9904f99cca369d999e7" +tectonic_geturl = "c828bee7361ebd30e28392507a1406d27dc8fdbb" +tectonic_io_base = "thiscommit:2021-06-13:s9130zU" +tectonic_status_base = "317ae79ceaa2593fb56090e37bf1f5cc24213dd9" diff --git a/crates/bundles/README.md b/crates/bundles/README.md new file mode 100644 index 0000000000..67f582df6d --- /dev/null +++ b/crates/bundles/README.md @@ -0,0 +1,29 @@ +# The `tectonic_bundles` crate + +[![](http://meritbadge.herokuapp.com/tectonic_bundles)](https://crates.io/crates/tectonic_bundles) + +This crate is part of [the Tectonic +project](https://tectonic-typesetting.github.io/en-US/). It implements various +Tectonic “bundles” that provide access to collections of TeX support files. + +- [API documentation](https://docs.rs/tectonic_bundles/). +- [Main Git repository](https://github.com/tectonic-typesetting/tectonic/). + + +## Cargo features + +This crate provides the following [Cargo features][features]: + +[features]: https://doc.rust-lang.org/cargo/reference/features.html + +- `geturl-curl`: use the [curl] crate to implement HTTP requests. In order for + this to take effect, you must use `--no-default-features` because + `geturl-reqwest` is a default feature and it takes precedence +- `geturl-reqwest`: use the [reqwest] crate to implement HTTP requests (enabled + by default) +- `native-tls-vendored`: if using [reqwest], activate the `vendored` option in + the [native-tls] crate, causing OpenSSL to be vendored + +[curl]: https://docs.rs/curl/ +[reqwest]: https://docs.rs/reqwest/ +[native-tls]: https://github.com/sfackler/rust-native-tls diff --git a/crates/bundles/src/cache.rs b/crates/bundles/src/cache.rs new file mode 100644 index 0000000000..6674557c52 --- /dev/null +++ b/crates/bundles/src/cache.rs @@ -0,0 +1,752 @@ +// Copyright 2017-2021 the Tectonic Project +// Licensed under the MIT License. + +//! Local caching of bundle data. +//! +//! This module implements Tectonic’s local filesystem caching mechanism for TeX +//! support files. To enable efficient caching with proper invalidation +//! semantics, the caching layer does *not* merely wrap [`IoProvider`] +//! implementations. Instead, a cacheable bundle must implement the +//! [`CacheBackend`] trait defined in this module. An example of such a bundle +//! is the [`crate::itar::IndexedTarBackend`] for bundles served over HTTP. +//! +//! In order to access a cacheable bundle, you need a handle to a local +//! [`Cache`], probably obtained with [`Cache::get_user_default()`], and a URL, +//! which you’ll pass to [`Cache::open()`]. When using this function, you must +//! explicitly specify the concrete [`CacheBackend`] type that will service +//! backend requests. + +use fs2::FileExt; +use std::{ + collections::HashMap, + fs::{self, File}, + io::{BufRead, BufReader, Error as IoError, ErrorKind as IoErrorKind, Read, Write}, + path::{Path, PathBuf}, + str::FromStr, +}; +use tectonic_errors::prelude::*; +use tectonic_io_base::{ + app_dirs, + digest::{self, Digest, DigestData}, + try_open_file, InputHandle, InputOrigin, IoProvider, OpenResult, +}; +use tectonic_status_base::{tt_warning, StatusBackend}; + +use crate::Bundle; + +/// A cache of data from one or more bundles using the local filesystem. +#[derive(Debug)] +pub struct Cache { + root: PathBuf, +} + +impl Cache { + /// Get a handle to a bundle cache, using default per-user settings. + /// + /// This method may perform I/O to create the user cache directory, so it is + /// fallible. (Due to its `app_dirs2` implementation, it would have to be + /// fallible even if it didn't perform I/O.) + pub fn get_user_default() -> Result { + Ok(Cache { + root: app_dirs::ensure_user_cache_dir("")?, + }) + } + + /// Get a handle to a bundle cache, using a custom cache directory. + pub fn get_for_custom_directory>(root: P) -> Self { + Cache { root: root.into() } + } + + /// Get the root directory of this cache. + pub fn root(&self) -> &Path { + &self.root + } + + /// Open a bundle through the cache layer. + /// + /// The URL specifies where the backend data live; it must be understood by, + /// and contain data appropriate for, the [`CacheBackend`] type associated + /// with the bundle that you’re creating. If *only_cached* is true, this + /// instance will never actually connect to the backend; if any uncached + /// files are requested, they will be represented as “not found”. + pub fn open( + &mut self, + url: &str, + only_cached: bool, + status: &mut dyn StatusBackend, + ) -> Result> { + CachingBundle::new(url, only_cached, status, &self.root) + } +} + +/// Information describing a cache backend. +/// +/// This type is returned by a [`CacheBackend`] on a "pull", a first-time +/// connection to the backend. It contains the detailed information that needs +/// to be saved in the cache to provide for efficient operation in subsequent +/// uses. +#[derive(Clone, Debug)] +pub struct BackendPullData { + /// The final, "resolved" URL pointing to the backing content, in the case + /// that the starting URL redirects. + pub resolved_url: String, + + /// The digest of the overall bundle content. + pub digest: DigestData, + + /// The bundle indexing data, allowing efficient retrieval of files from the + /// backend. + /// + /// This is a multi-line string, where each line is an entry for a file. + /// These lines will be parsed by [`CacheBackend::parse_index_line`]. This + /// string will potentially contain several megabytes of data. + pub index: String, +} + +/// A source of files that can supply a cache-based bundle. +/// +/// This trait is combined with [`CachingBundle`] to implement a caching bundle +/// interface. +pub trait CacheBackend: Sized { + /// Information about a file stored in the backend. + /// + /// This information should be serializable to a single line of text. It is + /// parsed out of the contents of [`BackendPullData::index`] by + /// [`Self::parse_index_line`], and later passed to [`Self::get_file`] to + /// enable the backend to efficiently retrieve the file in question. For + /// instance, it might contain offset information informing the backend how + /// to efficiently retrieve the file in question. + type FileInfo: Clone; + + /// Connect to the backend and download its key information. + /// + /// This method is used the first time that the cache connects to a backend. + /// The return value includes a package of information ([`BackendPullData`]) + /// that the cache will store to enable efficient operation on subsequent + /// requests. + fn open_with_pull( + start_url: &str, + status: &mut dyn StatusBackend, + ) -> Result<(Self, BackendPullData)>; + + /// Connect to the backend and fetch validation information. + /// + /// This method is used when this backend has already been accessed by the + /// cache during a previous execution. If we need to download more data from + /// the backend, we first need to verify that the cached data still look + /// valid. This method asks the backend to pull its “digest file” (currently + /// named `SHA256SUM`) and return its contents for validate. The method + /// should return `Err` on actual errors, and `Ok(None)` if there are any + /// indications that the cached indexing data should be thrown out and + /// re-fetched. + fn open_with_quick_check( + resolved_url: &str, + digest_file_info: &Self::FileInfo, + status: &mut dyn StatusBackend, + ) -> Result>; + + /// Parse a line of the indexing data. + /// + /// The returned tuple should give the file name and an opaque + /// [`Self::FileInfo`] that may help the backend retrieve the file in the + /// future. The indexing data are originally obtained from + /// [`BackendPullData::index`], but are stored in a file locally. This + /// method should return an error if this particular line of index data + /// seems to be malformatted. Such lines will probably just be silently + /// ignored. + fn parse_index_line(line: &str) -> Result<(String, Self::FileInfo)>; + + /// Obtain a file from the backend. + /// + /// Backend-specific retrieval information can be passed in the + /// [`Self::FileInfo`] item, which is constructed from the backend’s index + /// information. The file should be returned as one large byte vector. + fn get_file( + &mut self, + name: &str, + info: &Self::FileInfo, + status: &mut dyn StatusBackend, + ) -> Result>; +} + +/// Information about a cached file. +#[derive(Clone, Copy, Debug)] +struct CachedFileInfo { + /// The length of the file in bytes. + /// + /// This field isn't currently used, but seems handy to keep around. + _length: u64, + + /// The digest of the file contents. + /// + /// This digest is used to locate the cached data on disk. + digest: DigestData, +} + +/// A caching bundle that obtains files from some a backend. +/// +/// This bundle implementation is the key to Tectonic’s ability to download TeX +/// support files on the fly. The cache backend is generally expected to be some +/// kind of network-based resource, and the caching scheme is designed so that a +/// document build can avoid touching the network altogether if no new files +/// need to be downloaded. +#[derive(Debug)] +pub struct CachingBundle { + /// The URL specifying where to start looking for the bundle data. + /// + /// The caching layer maintains two URLs: the "start" URL and the "resolved" + /// URL. The goal here is to be able to store a single URL for fetching + /// data, but maintain the capability to update the bundle data behind that + /// URL. Requests to the start URL may get redirected (one or more times) + /// until eventually we arrive at the "resolved" URL. While the redirection + /// of the start URL might change, the contents of a resolved URL should + /// never change once published. + start_url: String, + + /// The "resolved" URL for the backing data. + /// + /// The bundle data located at this URL should never change. + resolved_url: String, + + /// The cached value of the backend’s content digest. + /// + /// This is stored in a file at [`Self::digest_path`]. This value may be + /// inaccurate, if the backing bundle has been updated (or if the cache is + /// corrupt, etc.) and we haven't yet synchronized with the backend and + /// discovered that fact. + cached_digest: DigestData, + + /// Information about all of the files that have been cached locally. + /// + /// This maps filenames to summary information that can then be used to + /// retrieve file data from [`Self::data_base`]. The contents are loaded + /// from the manifest file if the cache is non-empty. + contents: HashMap, + + /// Information about all of the files known to the backend. + /// + /// This maps filenames to [`CacheBackend::FileInfo`] data that can be used + /// to retrieve a file from the backend if needed. + index: HashMap, + + /// If true, only use cached files -- never connect to the backend. + /// + /// This option can be useful if we are operating disconnected from the + /// network (e.g., on an airplane). If you add a new figure to your + /// document, the engine will inquire about several related files that it + /// thinks might exist. Without this option, such an inquiry might require + /// Tectonic to hit the network, when the user knows for sure that the + /// bundle is not going to contain these files. + only_cached: bool, + + /// The connection to the cache backend, maybe. + /// + /// This field will be `None` if there are locally cached data present and + /// there has not yet been a need to connect to the backend. If it becomes + /// necessary to "pull" and/or download a new file from the backend, this + /// value will become `Some` — it represents something like an open network + /// connection. + backend: Option, + + /// The path to a file containing a cached copy of the backend's content + /// digest. + /// + /// This file path is based on [`Self::start_url`]. + digest_path: PathBuf, + + /// A directory where we will save [`Self::resolved_url`]. + /// + /// We need to cache `resolved_url` to enable the "quick check" backend + /// reconnection path. The actual cache file path is based on the backend’s + /// content digest. + resolved_base: PathBuf, + + /// A directory where we will save the cache manifest. + /// + /// The manifest file contains information about the files that have + /// actually been fetched from the backend and saved locally. The actual + /// manifest file path is based on the backend’s content digest. + manifest_path: PathBuf, + + /// A directory where we will save cached file data. + /// + /// This directory contains the actual cached file contents, in a directory + /// structured based on the digest of each file’s content. + data_base: PathBuf, +} + +/// A locally-cached analogue of [`BackendPullData`]. +/// +/// This data structure is what we try to recover from the cache to see if we +/// can avoid connecting to the backend. +#[derive(Clone, Debug)] +struct CachedPullData { + /// The saved backend content digest. + pub digest: DigestData, + + /// The saved "resolved URL" for the backend. + pub resolved_url: String, + + /// The saved indexing information for the backend. + pub index: HashMap, +} + +impl CachingBundle { + fn new( + start_url: &str, + only_cached: bool, + status: &mut dyn StatusBackend, + cache_root: &Path, + ) -> Result { + // Set up our paths. + let digest_path = + ensure_cache_dir(cache_root, "urls")?.join(app_dirs::app_dirs2::sanitized(start_url)); + let resolved_base = ensure_cache_dir(cache_root, "redirects")?; + let index_base = ensure_cache_dir(cache_root, "indexes")?; + let manifest_base = ensure_cache_dir(cache_root, "manifests")?; + let data_base = ensure_cache_dir(cache_root, "files")?; + + // The whole point of this cache is to avoid connecting to the backend + // if at all possible. So we first see if we have cached the "pull data" + // that describe the overall backend contents. + + let mut backend = None; + + let cached_pull_data = + match load_cached_pull_data::(&digest_path, &resolved_base, &index_base)? { + Some(c) => c, + None => { + // Some portion of the required cached data is missing. We need to + // do a complete pull and then cache the results. + + let (new_backend, pull_data) = CB::open_with_pull(start_url, status)?; + backend = Some(new_backend); + + let digest_text = pull_data.digest.to_string(); + file_create_write(&digest_path, |f| writeln!(f, "{}", &digest_text))?; + file_create_write(make_txt_path(&resolved_base, &digest_text), |f| { + f.write_all(pull_data.resolved_url.as_bytes()) + })?; + file_create_write(make_txt_path(&index_base, &digest_text), |f| { + f.write_all(pull_data.index.as_bytes()) + })?; + + // Now that we've done that, load_cached_pull_data() really ought to succeed ... + atry!( + load_cached_pull_data::(&digest_path, &resolved_base, &index_base)?; + ["cache files missing even after they were created"] + ) + } + }; + + // We call this `cached_digest`, but if `backend` is Some, it is a + // validated, fresh digest. + + let cached_digest = cached_pull_data.digest; + + // Now that we have the backend content digest, we know which manifest + // to use. Read it in, if it exists. + + let manifest_path = make_txt_path(&manifest_base, &cached_digest.to_string()); + let mut contents = HashMap::new(); + + match try_open_file(&manifest_path) { + OpenResult::NotAvailable => {} + OpenResult::Err(e) => { + return Err(e); + } + OpenResult::Ok(mfile) => { + // Note that the lock is released when the file is closed, + // which is good since BufReader::new() and BufReader::lines() + // consume their objects. + if let Err(e) = mfile.lock_shared() { + tt_warning!(status, "failed to lock manifest file \"{}\" for reading; this might be fine", + manifest_path.display(); e.into()); + } + + let f = BufReader::new(mfile); + + for res in f.lines() { + let line = res?; + let mut bits = line.rsplitn(3, ' '); + + let (original_name, length, digest) = + match (bits.next(), bits.next(), bits.next(), bits.next()) { + (Some(s), Some(t), Some(r), None) => (r, t, s), + _ => continue, + }; + + let name = original_name.to_owned(); + + let length = match length.parse::() { + Ok(l) => l, + Err(_) => continue, + }; + + let digest = if digest == "-" { + continue; + } else { + match DigestData::from_str(&digest) { + Ok(d) => d, + Err(e) => { + tt_warning!(status, "ignoring bad digest data \"{}\" for \"{}\" in \"{}\"", + &digest, original_name, manifest_path.display() ; e); + continue; + } + } + }; + + contents.insert( + name, + CachedFileInfo { + _length: length, + digest, + }, + ); + } + } + } + + // All set. + + Ok(CachingBundle { + start_url: start_url.to_owned(), + resolved_url: cached_pull_data.resolved_url, + digest_path, + cached_digest, + manifest_path, + data_base, + resolved_base, + contents, + only_cached, + backend, + index: cached_pull_data.index, + }) + } + + /// Save data about a file to our local cache manifest. + fn save_to_manifest(&mut self, name: &str, length: u64, digest: DigestData) -> Result<()> { + let digest_text = digest.to_string(); + + // Due to a quirk about permissions for file locking on Windows, we + // need to add `.read(true)` to be able to lock a file opened in + // append mode. + let mut man = fs::OpenOptions::new() + .append(true) + .create(true) + .read(true) + .open(&self.manifest_path)?; + + // Lock will be released when file is closed at the end of this function. + atry!( + man.lock_exclusive(); + ["failed to lock manifest file \"{}\" for writing", self.manifest_path.display()] + ); + + // If a filename contains newline characters, it will mess up our + // line-based manifest format. Be paranoid and refuse to record such + // filenames. + if !name.contains(|c| c == '\n' || c == '\r') { + writeln!(man, "{} {} {}", name, length, digest_text)?; + } + + self.contents.insert( + name.to_owned(), + CachedFileInfo { + _length: length, + digest, + }, + ); + + Ok(()) + } + + /// Ensure that the backend is connected and valid. + /// + /// Here we do a "quick check" to see if the backend's digest is what we + /// expect. If not, we do a lame thing where we error out but set things up + /// so that things should succeed if the program is re-run. Exactly the lame + /// TeX user experience that I've been trying to avoid! + /// + /// After this function has been called, you can assume that `self.backend` + /// is Some. + fn ensure_backend_validity(&mut self, status: &mut dyn StatusBackend) -> Result<()> { + // If backend is Some, we already have a validated connection to it. + if self.backend.is_some() { + return Ok(()); + } + + // Do the quick check. If anything goes wrong, eat the error and try a + // fresh pull. + if let Some(info) = self.index.get(digest::DIGEST_NAME) { + if let Ok(Some((backend, digest))) = + CB::open_with_quick_check(&self.resolved_url, &info, status) + { + if self.cached_digest == digest { + // We managed to pull some data that match the digest. We + // can be quite confident that the bundle is what we expect + // it to be. + self.backend = Some(backend); + return Ok(()); + } + } + } + + // The quick check failed. Try to pull all data to make sure that it + // wasn't a network error or that the resolved URL hasn't been updated. + let (new_backend, pull_data) = CB::open_with_pull(&self.start_url, status)?; + + if self.cached_digest != pull_data.digest { + // Crap! The backend isn't what we thought it was. We may have been + // giving incorrect results if we pulled files out of the cache + // before this invocation. Rewrite the digest file so that next time + // we'll start afresh, then bail. + file_create_write(&self.digest_path, |f| { + writeln!(f, "{}", pull_data.digest.to_string()) + })?; + bail!("backend digest changed; rerun tectonic to use updated information"); + } + + if self.resolved_url != pull_data.resolved_url { + // The resolved URL has changed, but the digest is the same. So + // let's just update the URL and keep going. + let resolved_path = make_txt_path(&self.resolved_base, &pull_data.digest.to_string()); + file_create_write(&resolved_path, |f| { + f.write_all(pull_data.resolved_url.as_bytes()) + })?; + + self.resolved_url = pull_data.resolved_url; + } + + // OK, it seems that everything is in order. + self.backend = Some(new_backend); + Ok(()) + } + + /// Make sure that a file is available, and return its filesystem path. + /// + /// If the file is already cached, just pull it out. Otherwise, fetch it + /// from the backend. + fn ensure_file_availability( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult { + // Already in the cache? + if let Some(info) = self.contents.get(name) { + return match info.digest.create_two_part_path(&self.data_base) { + Ok(p) => OpenResult::Ok(p), + Err(e) => OpenResult::Err(e), + }; + } + + // No, it's not. Are we in cache-only mode? + if self.only_cached { + return OpenResult::NotAvailable; + } + + // Is the file in the backend at all? + let info = match self.index.get(name).cloned() { + Some(info) => info, + None => return OpenResult::NotAvailable, + }; + + // Yes, it is. Time to fetch it! In order to do that, we need to ensure + // that we have a valid backend connection. + if let Err(e) = self.ensure_backend_validity(status) { + return OpenResult::Err(e); + } + + // Cool, we're connected to the backend now. Get the file. Note that we + // don't need to check for updates to the index after the + // ensure-validity, because we require that the contents of the bundle + // are unchanged (as expressed in the content digest): if they did + // change, ensure_backend_validity() would have bailed, because we might + // have returned incorrect data for previous requests that hit the + // cache. + + let content = match self.backend.as_mut().unwrap().get_file(name, &info, status) { + Ok(c) => c, + Err(e) => return OpenResult::Err(e), + }; + + let length = content.len(); + + let mut digest_builder = digest::create(); + digest_builder.update(&content); + let digest = DigestData::from(digest_builder); + + let final_path = match digest.create_two_part_path(&self.data_base) { + Ok(p) => p, + Err(e) => return OpenResult::Err(e), + }; + + // Perform a racy check for the destination existing, because this + // matters on Windows: if the destination is already there, we'll get + // an error because the destination is marked read-only. Assuming + // non-pathological filesystem manipulation, though, we'll only be + // subject to the race once. + + if !final_path.exists() { + if let Err(e) = file_create_write(&final_path, |f| f.write_all(&content)) { + return OpenResult::Err(e); + } + + // Now we can make the file readonly. It would be nice to set the + // permissions using the already-open file handle owned by the + // tempfile, but mkstemp doesn't give us access. + let mut perms = match fs::metadata(&final_path) { + Ok(p) => p, + Err(e) => { + return OpenResult::Err(e.into()); + } + } + .permissions(); + perms.set_readonly(true); + + if let Err(e) = fs::set_permissions(&final_path, perms) { + return OpenResult::Err(e.into()); + } + } + + // And finally add a record of this file to our manifest. Note that + // we're opening and closing the manifest every time we cache a new + // file; not so efficient, but whatever. + + if let Err(e) = self.save_to_manifest(name, length as u64, digest) { + return OpenResult::Err(e); + } + + OpenResult::Ok(final_path) + } +} + +impl IoProvider for CachingBundle { + fn input_open_name( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult { + let path = match self.ensure_file_availability(name, status) { + OpenResult::Ok(p) => p, + OpenResult::NotAvailable => return OpenResult::NotAvailable, + OpenResult::Err(e) => return OpenResult::Err(e), + }; + + let f = match File::open(&path) { + Ok(f) => f, + Err(e) => return OpenResult::Err(e.into()), + }; + + OpenResult::Ok(InputHandle::new_read_only( + name, + BufReader::new(f), + InputOrigin::Other, + )) + } +} + +impl Bundle for CachingBundle { + fn get_digest(&mut self, _status: &mut dyn StatusBackend) -> Result { + Ok(self.cached_digest) + } + + fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result> { + if !self.only_cached { + self.ensure_backend_validity(status)?; + } + Ok(self.index.keys().cloned().collect()) + } +} + +/// Load the cached "pull" data for a backend. +/// +/// If any of the files are not found or otherwise have issues, return None. +fn load_cached_pull_data( + digest_path: &Path, + resolved_base: &Path, + index_base: &Path, +) -> Result>> { + // Convert file-not-found errors into None. + return match inner::(digest_path, resolved_base, index_base) { + Ok(r) => Ok(Some(r)), + Err(e) => { + if let Some(ioe) = e.downcast_ref::() { + if ioe.kind() == IoErrorKind::NotFound { + return Ok(None); + } + } + + Err(e) + } + }; + + fn inner( + digest_path: &Path, + resolved_base: &Path, + index_base: &Path, + ) -> Result> { + let digest_text = { + let f = File::open(digest_path)?; + let mut digest_text = String::with_capacity(digest::DIGEST_LEN); + f.take(digest::DIGEST_LEN as u64) + .read_to_string(&mut digest_text)?; + digest_text + }; + + let resolved_path = make_txt_path(resolved_base, &digest_text); + let resolved_url = fs::read_to_string(resolved_path)?; + + let index_path = make_txt_path(index_base, &digest_text); + let index = { + let f = File::open(index_path)?; + let mut index = HashMap::new(); + for line in BufReader::new(f).lines() { + if let Ok((name, info)) = CB::parse_index_line(&line?) { + index.insert(name, info); + } + } + index + }; + + Ok(CachedPullData { + digest: DigestData::from_str(&digest_text)?, + resolved_url, + index, + }) + } +} + +/// A convenience method to provide a better error message when writing to a created file. +fn file_create_write(path: P, write_fn: F) -> Result<()> +where + P: AsRef, + F: FnOnce(&mut File) -> std::result::Result<(), E>, + E: std::error::Error + 'static + Sync + Send, +{ + let path = path.as_ref(); + let mut f = atry!( + File::create(path); + ["couldn't open {} for writing", path.display()] + ); + atry!( + write_fn(&mut f); + ["couldn't write to {}", path.display()] + ); + Ok(()) +} + +/// Ensure that a directory exists. +fn ensure_cache_dir(root: &Path, path: &str) -> Result { + let full_path = root.join(path); + atry!( + fs::create_dir_all(&full_path); + ["failed to create directory `{}` or one of its parents", full_path.display()] + ); + Ok(full_path) +} + +/// Convenience to generate a text filename +fn make_txt_path(base: &Path, name: &str) -> PathBuf { + base.join(&name).with_extension("txt") +} diff --git a/crates/bundles/src/dir.rs b/crates/bundles/src/dir.rs new file mode 100644 index 0000000000..db431ddfa5 --- /dev/null +++ b/crates/bundles/src/dir.rs @@ -0,0 +1,76 @@ +// Copyright 2017-2021 the Tectonic Project +// Licensed under the MIT License. + +//! A module for the directory bundle [`DirBundle`]. + +use std::{ + fs, + path::{Path, PathBuf}, +}; +use tectonic_errors::prelude::*; +use tectonic_io_base::{filesystem::FilesystemIo, InputHandle, IoProvider, OpenResult}; +use tectonic_status_base::StatusBackend; + +use super::Bundle; + +/// A "bundle" of a bunch of files in a directory. +/// +/// This implementation essentially just wraps +/// [`tectonic_io_base::filesystem::FilesystemIo`], ensuring that it is +/// read-only, self-contained, and implements the [`Bundle`] trait. The +/// directory should contain a file named `SHA256SUM` if the bundle fingerprint +/// will be needed. +pub struct DirBundle(FilesystemIo); + +impl DirBundle { + /// Create a new directory bundle. + /// + /// No validation of the input path is performed, which is why this function + /// is infallible. + pub fn new>(dir: P) -> DirBundle { + DirBundle(FilesystemIo::new( + dir.as_ref(), + false, // no writes + false, // no absolute paths + Default::default(), // no hidden files + )) + } +} + +impl IoProvider for DirBundle { + fn input_open_name( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult { + self.0.input_open_name(name, status) + } + + fn input_open_name_with_abspath( + &mut self, + name: &str, + status: &mut dyn StatusBackend, + ) -> OpenResult<(InputHandle, Option)> { + self.0.input_open_name_with_abspath(name, status) + } +} + +impl Bundle for DirBundle { + fn all_files(&mut self, _status: &mut dyn StatusBackend) -> Result> { + let mut files = Vec::new(); + + // We intentionally do not explore the directory recursively. + for entry in fs::read_dir(&self.0.root())? { + let entry = entry?; + + // This catches both regular files and symlinks:` + if !entry.file_type()?.is_dir() { + if let Some(s) = entry.file_name().to_str() { + files.push(s.to_owned()); + } + } + } + + Ok(files) + } +} diff --git a/crates/bundles/src/itar.rs b/crates/bundles/src/itar.rs new file mode 100644 index 0000000000..c486e9c891 --- /dev/null +++ b/crates/bundles/src/itar.rs @@ -0,0 +1,195 @@ +// Copyright 2017-2021 the Tectonic Project +// Licensed under the MIT License. + +//! The web-friendly "indexed tar" bundle backend. +//! +//! The main type offered by this module is the [`IndexedTarBackend`] struct, +//! which cannot be used directly as a [`tectonic_io_base::IoProvider`] but is +//! the default backend for cached web-based bundle access through the +//! [`crate::cache::CachingBundle`] framework. +//! +//! While the on-server file format backing the “indexed tar” backend is indeed +//! a standard `tar` file, as far as the client is concerned, this backend is +//! centered on HTTP byte-range requests. For each file contained in the backing +//! resource, the index file merely contains a byte offset and length that are +//! then used to construct an HTTP Range request to obtain the file as needed. + +use flate2::read::GzDecoder; +use std::{convert::TryInto, io::Read, str::FromStr}; +use tectonic_errors::prelude::*; +use tectonic_geturl::{DefaultBackend, DefaultRangeReader, GetUrlBackend, RangeReader}; +use tectonic_io_base::digest::{self, DigestData}; +use tectonic_status_base::{tt_note, tt_warning, StatusBackend}; + +use crate::cache::{BackendPullData, CacheBackend}; + +const MAX_HTTP_ATTEMPTS: usize = 4; + +/// The internal file-information struct used by the [`IndexedTarBackend`]. +#[derive(Clone, Copy, Debug)] +pub struct FileInfo { + offset: u64, + length: u64, +} + +/// A simple web-based file backend based on HTTP Range requests. +/// +/// This type implements the [`CacheBackend`] trait and so can be used for +/// web-based bundle access thorugh the [`crate::cache::CachingBundle`] +/// framework. +#[derive(Debug)] +pub struct IndexedTarBackend { + reader: DefaultRangeReader, +} + +impl CacheBackend for IndexedTarBackend { + type FileInfo = FileInfo; + + fn open_with_pull( + start_url: &str, + status: &mut dyn StatusBackend, + ) -> Result<(Self, BackendPullData)> { + // Step 1: resolve URL + let mut geturl_backend = DefaultBackend::default(); + let resolved_url = geturl_backend.resolve_url(start_url, status)?; + + // Step 2: fetch index + let index = { + let mut index = String::new(); + let index_url = format!("{}.index.gz", &resolved_url); + tt_note!(status, "downloading index {}", index_url); + GzDecoder::new(geturl_backend.get_url(&index_url, status)?) + .read_to_string(&mut index)?; + index + }; + + // Step 3: get digest, setting up instance as we go + + let mut cache_backend = IndexedTarBackend { + reader: geturl_backend.open_range_reader(&resolved_url), + }; + + let digest_info = { + let mut digest_info = None; + + for line in index.lines() { + if let Ok((name, info)) = Self::parse_index_line(line) { + if name == digest::DIGEST_NAME { + digest_info = Some(info); + break; + } + } + } + + atry!( + digest_info; + ["backend does not provide needed {} file", digest::DIGEST_NAME] + ) + }; + + let digest_text = + String::from_utf8(cache_backend.get_file(digest::DIGEST_NAME, &digest_info, status)?) + .map_err(|e| e.utf8_error())?; + let digest = DigestData::from_str(&digest_text)?; + + // All done. + Ok(( + cache_backend, + BackendPullData { + resolved_url, + digest, + index, + }, + )) + } + + fn open_with_quick_check( + resolved_url: &str, + digest_file_info: &Self::FileInfo, + status: &mut dyn StatusBackend, + ) -> Result> { + let mut cache_backend = IndexedTarBackend { + reader: DefaultBackend::default().open_range_reader(resolved_url), + }; + + if let Ok(d) = cache_backend.get_file(digest::DIGEST_NAME, &digest_file_info, status) { + if let Ok(d) = String::from_utf8(d) { + if let Ok(d) = DigestData::from_str(&d) { + return Ok(Some((cache_backend, d))); + } + } + } + + Ok(None) + } + + fn parse_index_line(line: &str) -> Result<(String, Self::FileInfo)> { + let mut bits = line.split_whitespace(); + + if let (Some(name), Some(offset), Some(length)) = (bits.next(), bits.next(), bits.next()) { + Ok(( + name.to_owned(), + FileInfo { + offset: offset.parse::()?, + length: length.parse::()?, + }, + )) + } else { + // TODO: preserve the warning info or something! + bail!("malformed index line"); + } + } + + fn get_file( + &mut self, + name: &str, + info: &Self::FileInfo, + status: &mut dyn StatusBackend, + ) -> Result> { + tt_note!(status, "downloading {}", name); + + // Historically, sometimes our web service would drop connections when + // fetching a bunch of resource files (i.e., on the first invocation). + // The error manifested itself in a way that has a not-so-nice user + // experience. Our solution: retry the request a few times in case it + // was a transient problem. + + let n = info.length.try_into().unwrap(); + let mut buf = Vec::with_capacity(n); + let mut overall_failed = true; + let mut any_failed = false; + + for _ in 0..MAX_HTTP_ATTEMPTS { + let mut stream = match self.reader.read_range(info.offset, n) { + Ok(r) => r, + Err(e) => { + tt_warning!(status, "failure requesting \"{}\" from network", name; e); + any_failed = true; + continue; + } + }; + + if let Err(e) = stream.read_to_end(&mut buf) { + tt_warning!(status, "failure downloading \"{}\" from network", name; e.into()); + any_failed = true; + continue; + } + + overall_failed = false; + break; + } + + if overall_failed { + bail!( + "failed to retrieve \"{}\" from the network; \ + this most probably is not Tectonic's fault \ + -- please check your network connection.", + name + ); + } else if any_failed { + tt_note!(status, "download succeeded after retry"); + } + + Ok(buf) + } +} diff --git a/crates/bundles/src/lib.rs b/crates/bundles/src/lib.rs new file mode 100644 index 0000000000..a0d40ee2b4 --- /dev/null +++ b/crates/bundles/src/lib.rs @@ -0,0 +1,126 @@ +// Copyright 2016-2021 the Tectonic Project +// Licensed under the MIT License. + +#![deny(missing_docs)] + +//! Implementations of Tectonic bundle formats. +//! +//! A Tectonic “bundle” is a collection of TeX support files. In code, bundles +//! implement the [`Bundle`] trait defined here, although most of the action in +//! a bundle will be in its implementation of [`tectonic_io_base::IoProvider`]. +//! +//! This crate provides the following bundle implementations: +//! +//! - [`cache::CachingBundle`] for access to remote bundles with local +//! filesystem caching. +//! - [`dir::DirBundle`] turns a directory full of files into a bundle; it is +//! useful for testing and lightweight usage. +//! - [`zip::ZipBundle`] for a ZIP-format bundle. + +use std::{io::Read, str::FromStr}; +use tectonic_errors::{anyhow::bail, atry, Result}; +use tectonic_io_base::{digest, digest::DigestData, IoProvider, OpenResult}; +use tectonic_status_base::StatusBackend; + +pub mod cache; +pub mod dir; +pub mod itar; +pub mod zip; + +/// A trait for bundles of Tectonic support files. +/// +/// A “bundle” is an [`IoProvider`] with a few special properties. Bundles are +/// read-only, and their contents can be enumerated In principle a bundle is +/// completely defined by its file contents, which can be summarized by a +/// cryptographic digest, obtainable using the [`Self::get_digest`] method: two +/// bundles with the same digest should contain exactly the same set of files, +/// and if any aspect of a bundle’s file contents change, so should its digest. +/// Finally, it is generally expected that a bundle will contain a large number +/// of TeX support files, and that you can generate one or more TeX format files +/// using only the files contained in a bundle. +pub trait Bundle: IoProvider { + /// Get a cryptographic digest summarizing this bundle’s contents. + /// + /// The digest summarizes the exact contents of every file in the bundle. It + /// is computed from the sorted names and SHA256 digests of the component + /// files [as implemented in the TeXLive bundle builder][x]. + /// + /// [x]: https://github.com/tectonic-typesetting/tectonic-texlive-bundles/blob/master/scripts/ttb_utils.py#L321 + /// + /// The default implementation gets the digest from a file named + /// `SHA256SUM`, which is expected to contain the digest in hex-encoded + /// format. + fn get_digest(&mut self, status: &mut dyn StatusBackend) -> Result { + let digest_text = match self.input_open_name(digest::DIGEST_NAME, status) { + OpenResult::Ok(h) => { + let mut text = String::new(); + h.take(64).read_to_string(&mut text)?; + text + } + + OpenResult::NotAvailable => { + // Broken or un-cacheable backend. + bail!("bundle does not provide needed SHA256SUM file"); + } + + OpenResult::Err(e) => { + return Err(e); + } + }; + + Ok(atry!(DigestData::from_str(&digest_text); ["corrupted SHA256 digest data"])) + } + + /// Enumerate the files in this bundle. + /// + /// This interface is intended to be used for diagnostics, not by anything + /// during actual execution of an engine. This should include meta-files + /// such as the `SHA256SUM` file. The ordering of the returned filenames is + /// unspecified. + /// + /// To ease implementation, the filenames are returned in one big vector of + /// owned strings. For a large bundle, the memory consumed by this operation + /// might be fairly substantial (although we are talking megabytes, not + /// gigabytes). + fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result>; +} + +impl Bundle for Box { + fn get_digest(&mut self, status: &mut dyn StatusBackend) -> Result { + (**self).get_digest(status) + } + + fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result> { + (**self).all_files(status) + } +} + +/// The URL of the default bundle. +/// +/// This is a hardcoded URL of a default bundle that will provide some +/// "sensible" set of TeX support files. The higher-level `tectonic` crate +/// provides a configuration mechanism to allow the user to override this +/// setting, so you should use that if you are in a position to do so. +/// +/// This URL will be embedded in the binaries that you create, which may be used +/// for years into the future, so it needs to be durable and reliable. At the +/// moment, the URL is hosted on `archive.org` and redirects to a web-based +/// storage service that has changed a few times over the years. Note that +/// `archive.org` is blocked in China, causing problems for that potential user +/// base. +pub const FALLBACK_BUNDLE_URL: &str = + "https://archive.org/services/purl/net/pkgwpub/tectonic-default"; + +/// Open the fallback bundle. +/// +/// This is essentially the default Tectonic bundle, but the higher-level +/// `tectonic` crate provides a configuration mechanism to allow the user to +/// override the [`FALLBACK_BUNDLE_URL`] setting, and that should be preferred +/// if you’re in a position to use it. +pub fn get_fallback_bundle( + only_cached: bool, + status: &mut dyn StatusBackend, +) -> Result> { + let mut cache = cache::Cache::get_user_default()?; + cache.open(FALLBACK_BUNDLE_URL, only_cached, status) +} diff --git a/crates/bundles/src/zip.rs b/crates/bundles/src/zip.rs new file mode 100644 index 0000000000..c0d2757d4f --- /dev/null +++ b/crates/bundles/src/zip.rs @@ -0,0 +1,78 @@ +// Copyright 2016-2021 the Tectonic Project +// Licensed under the MIT License. + +//! ZIP files as Tectonic bundles. + +use std::{ + fs::File, + io::{Cursor, Read, Seek}, + path::Path, +}; +use tectonic_errors::prelude::*; +use tectonic_io_base::{InputHandle, InputOrigin, IoProvider, OpenResult}; +use tectonic_status_base::StatusBackend; +use zip::{result::ZipError, ZipArchive}; + +use crate::Bundle; + +/// A bundle backed by a ZIP file. +pub struct ZipBundle { + zip: ZipArchive, +} + +impl ZipBundle { + /// Create a new ZIP bundle for a generic readable and seekable stream. + pub fn new(reader: R) -> Result> { + Ok(ZipBundle { + zip: ZipArchive::new(reader)?, + }) + } +} + +impl ZipBundle { + /// Open a file on the filesystem as a ZIP bundle. + pub fn open>(path: P) -> Result> { + Self::new(File::open(path)?) + } +} + +impl IoProvider for ZipBundle { + fn input_open_name( + &mut self, + name: &str, + _status: &mut dyn StatusBackend, + ) -> OpenResult { + // We need to be able to look at other items in the Zip file while + // reading this one, so the only path forward is to read the entire + // contents into a buffer right now. RAM is cheap these days. + + let mut zipitem = match self.zip.by_name(name) { + Ok(f) => f, + Err(e) => { + return match e { + ZipError::Io(sube) => OpenResult::Err(sube.into()), + ZipError::FileNotFound => OpenResult::NotAvailable, + _ => OpenResult::Err(e.into()), + }; + } + }; + + let mut buf = Vec::with_capacity(zipitem.size() as usize); + + if let Err(e) = zipitem.read_to_end(&mut buf) { + return OpenResult::Err(e.into()); + } + + OpenResult::Ok(InputHandle::new_read_only( + name, + Cursor::new(buf), + InputOrigin::Other, + )) + } +} + +impl Bundle for ZipBundle { + fn all_files(&mut self, _status: &mut dyn StatusBackend) -> Result> { + Ok(self.zip.file_names().map(|s| s.to_owned()).collect()) + } +} From 26fb3f84364000b05ed1b3cecb73b327e7ccdbf6 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sun, 13 Jun 2021 11:44:27 -0400 Subject: [PATCH 22/30] tectonic(!): use io_base app_dirs implementation The app_dirs have been moved to a lower level. This is a subtle BREAKING CHANGE because we remove the app_dirs error variant from our un-boxed error type. --- Cargo.lock | 2 +- Cargo.toml | 3 +-- src/app_dirs.rs | 27 --------------------------- src/config.rs | 7 +++---- src/errors.rs | 1 - src/lib.rs | 1 - 6 files changed, 5 insertions(+), 36 deletions(-) delete mode 100644 src/app_dirs.rs diff --git a/Cargo.lock b/Cargo.lock index 608570e432..e32d0a8efa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2104,7 +2104,6 @@ dependencies = [ name = "tectonic" version = "0.0.0-dev.0" dependencies = [ - "app_dirs2", "atty", "byte-unit", "cfg-if 1.0.0", @@ -2283,6 +2282,7 @@ dependencies = [ name = "tectonic_io_base" version = "0.0.0-dev.0" dependencies = [ + "app_dirs2", "flate2", "libc", "sha2", diff --git a/Cargo.toml b/Cargo.toml index 0504082ad1..2508cee0ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,7 +53,6 @@ name = "tectonic" crate-type = ["rlib"] [dependencies] -app_dirs = { version = "2", package = "app_dirs2" } atty = "0.2" byte-unit = "^4.0" cfg-if = "1.0" @@ -138,7 +137,7 @@ tectonic_engine_xdvipdfmx = "7dcbc52e58f9774b3d592919a9105377faeac509" tectonic_engine_xetex = "b7a4085fa67c831d4532da6661bddafd1f9c24ff" tectonic_errors = "317ae79ceaa2593fb56090e37bf1f5cc24213dd9" tectonic_geturl = "thiscommit:2021-01-16:Aikoob9c" -tectonic_io_base = "f7eeff461778f7082db7ed5097d93aa63119eb12" +tectonic_io_base = "thiscommit:2021-06-13:XFjtSsZ" tectonic_status_base = "317ae79ceaa2593fb56090e37bf1f5cc24213dd9" tectonic_xdv = "c91f2ef37858d1a0a724a5c3ddc2f7ea46373c77" tectonic_xetex_layout = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" diff --git a/src/app_dirs.rs b/src/app_dirs.rs deleted file mode 100644 index 62219ad0ee..0000000000 --- a/src/app_dirs.rs +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright 2019 the Tectonic Project -// Licensed under the MIT License. - -use crate::errors::Result; -use app_dirs::AppDataType; -use std::path::PathBuf; - -pub use app_dirs::sanitized; - -const APP_INFO: app_dirs::AppInfo = app_dirs::AppInfo { - name: "Tectonic", - author: "TectonicProject", -}; - -#[cfg(feature = "serialization")] -pub fn user_config() -> Result { - Ok(app_dirs::app_root(AppDataType::UserConfig, &APP_INFO)?) -} - -#[cfg(feature = "serialization")] -pub fn get_user_config() -> Result { - Ok(app_dirs::get_app_root(AppDataType::UserConfig, &APP_INFO)?) -} - -pub fn user_cache_dir(path: &str) -> Result { - Ok(app_dirs::app_dir(AppDataType::UserCache, &APP_INFO, path)?) -} diff --git a/src/config.rs b/src/config.rs index 1044a0acbb..8a9c94f882 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,5 +1,4 @@ -// src/config.rs -- configuration for the Tectonic library. -// Copyright 2016-2020 the Tectonic Project +// Copyright 2016-2021 the Tectonic Project // Licensed under the MIT License. //! User configuration settings for the Tectonic engine. @@ -74,7 +73,7 @@ impl PersistentConfig { }; let mut cfg_path = if auto_create_config_file { - app_dirs::user_config()? + app_dirs::ensure_user_config()? } else { app_dirs::get_user_config()? }; @@ -182,7 +181,7 @@ impl PersistentConfig { if CONFIG_TEST_MODE_ACTIVATED.load(Ordering::SeqCst) { Ok(crate::test_util::test_path(&[])) } else { - Ok(app_dirs::user_cache_dir("formats")?) + Ok(app_dirs::ensure_user_cache_dir("formats")?) } } } diff --git a/src/errors.rs b/src/errors.rs index d4702c3371..f0b665bbc5 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -60,7 +60,6 @@ error_chain! { } foreign_links { - AppDirs(app_dirs::AppDirsError); Io(io::Error); Fmt(fmt::Error); Nul(ffi::NulError); diff --git a/src/lib.rs b/src/lib.rs index d4d4c358f6..e0eb423572 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,7 +75,6 @@ //! The [`driver`] module provides a high-level interface for driving the //! engines in more realistic circumstances. -mod app_dirs; pub mod config; pub mod digest; #[cfg(feature = "serialization")] From a070d2b4f8ba0bfc5e7beb492dd16662360fcb3c Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sun, 13 Jun 2021 11:46:02 -0400 Subject: [PATCH 23/30] src/status/termcolor.rs: add always_stderr() option here --- src/status/termcolor.rs | 47 +++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/src/status/termcolor.rs b/src/status/termcolor.rs index 42764c6497..f2151be78d 100644 --- a/src/status/termcolor.rs +++ b/src/status/termcolor.rs @@ -15,6 +15,7 @@ use super::{ChatterLevel, MessageKind, StatusBackend}; pub struct TermcolorStatusBackend { chatter: ChatterLevel, + always_stderr: bool, stdout: StandardStream, stderr: StandardStream, note_spec: ColorSpec, @@ -39,6 +40,7 @@ impl TermcolorStatusBackend { TermcolorStatusBackend { chatter, + always_stderr: false, stdout: StandardStream::stdout(ColorChoice::Auto), stderr: StandardStream::stderr(ColorChoice::Auto), note_spec, @@ -48,6 +50,11 @@ impl TermcolorStatusBackend { } } + pub fn always_stderr(&mut self, setting: bool) -> &mut Self { + self.always_stderr = setting; + self + } + fn styled(&mut self, kind: MessageKind, f: F) where F: FnOnce(&mut StandardStream), @@ -57,7 +64,13 @@ impl TermcolorStatusBackend { } let (spec, stream) = match kind { - MessageKind::Note => (&self.note_spec, &mut self.stdout), + MessageKind::Note => { + if self.always_stderr { + (&self.note_spec, &mut self.stderr) + } else { + (&self.note_spec, &mut self.stdout) + } + } MessageKind::Warning => (&self.warning_spec, &mut self.stderr), MessageKind::Error => (&self.error_spec, &mut self.stderr), }; @@ -76,7 +89,13 @@ impl TermcolorStatusBackend { } let stream = match kind { - MessageKind::Note => &mut self.stdout, + MessageKind::Note => { + if self.always_stderr { + &mut self.stderr + } else { + &mut self.stdout + } + } MessageKind::Warning => &mut self.stderr, MessageKind::Error => &mut self.stderr, }; @@ -108,7 +127,11 @@ impl TermcolorStatusBackend { pub fn note_styled(&mut self, args: Arguments) { if self.chatter > ChatterLevel::Minimal { - writeln!(self.stdout, "{}", args).expect("write to stdout failed"); + if self.always_stderr { + writeln!(self.stderr, "{}", args).expect("write to stderr failed"); + } else { + writeln!(self.stdout, "{}", args).expect("write to stdout failed"); + } } } @@ -166,13 +189,19 @@ impl StatusBackend for TermcolorStatusBackend { fn note_highlighted(&mut self, before: &str, highlighted: &str, after: &str) { if self.chatter > ChatterLevel::Minimal { - write!(self.stdout, "{}", before).expect("write to stdout failed"); - self.stdout + let stream = if self.always_stderr { + &mut self.stderr + } else { + &mut self.stdout + }; + + write!(stream, "{}", before).expect("write failed"); + stream .set_color(&self.highlight_spec) - .expect("write to stdout failed"); - write!(self.stdout, "{}", highlighted).expect("write to stdout failed"); - self.stdout.reset().expect("write to stdout failed"); - writeln!(self.stdout, "{}", after).expect("write to stdout failed"); + .expect("write failed"); + write!(stream, "{}", highlighted).expect("write failed"); + stream.reset().expect("write failed"); + writeln!(stream, "{}", after).expect("write failed"); } } From 51ee421466fddb6bd65a03346f7c3e1542cb368c Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sun, 13 Jun 2021 11:47:54 -0400 Subject: [PATCH 24/30] tectonic(!): switch to using `tectonic_bundles` Start using the separated-out bundle implementation crate. Now you can work with bundles, and the cache, without having to link to XeTeX and everything! This is a BREAKING CHANGE because the original bundle implementations have been removed, and the Bundle trait has gained a new required method. --- CARGO_README.md | 2 + Cargo.lock | 14 + Cargo.toml | 8 +- src/config.rs | 18 +- src/docmodel.rs | 8 +- src/driver.rs | 3 +- src/io/cached_itarbundle.rs | 619 ------------------------------------ src/io/dirbundle.rs | 40 --- src/io/mod.rs | 52 --- src/io/zipbundle.rs | 69 ---- src/test_util.rs | 23 +- 11 files changed, 60 insertions(+), 796 deletions(-) delete mode 100644 src/io/cached_itarbundle.rs delete mode 100644 src/io/dirbundle.rs delete mode 100644 src/io/zipbundle.rs diff --git a/CARGO_README.md b/CARGO_README.md index f1205f2cf2..ea4e871465 100644 --- a/CARGO_README.md +++ b/CARGO_README.md @@ -57,8 +57,10 @@ sub-crates: - [`tectonic_bridge_graphite2`](https://crates.io/crates/tectonic_bridge_graphite2) - [`tectonic_bridge_harfbuzz`](https://crates.io/crates/tectonic_bridge_harfbuzz) - [`tectonic_bridge_icu`](https://crates.io/crates/tectonic_bridge_icu) +- [`tectonic_bundles`](https://crates.io/crates/tectonic_bundles) - [`tectonic_cfg_support`](https://crates.io/crates/tectonic_cfg_support) - [`tectonic_dep_support`](https://crates.io/crates/tectonic_dep_support) +- [`tectonic_docmodel`](https://crates.io/crates/tectonic_docmodel) - [`tectonic_engine_bibtex`](https://crates.io/crates/tectonic_engine_bibtex) - [`tectonic_engine_xdvipdfmx`](https://crates.io/crates/tectonic_engine_xdvipdfmx) - [`tectonic_engine_xetex`](https://crates.io/crates/tectonic_engine_xetex) diff --git a/Cargo.lock b/Cargo.lock index e32d0a8efa..ac54d517c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2122,6 +2122,7 @@ dependencies = [ "sha2", "structopt", "tectonic_bridge_core", + "tectonic_bundles", "tectonic_docmodel", "tectonic_engine_bibtex", "tectonic_engine_xdvipdfmx", @@ -2196,6 +2197,19 @@ dependencies = [ "tectonic_dep_support", ] +[[package]] +name = "tectonic_bundles" +version = "0.0.0-dev.0" +dependencies = [ + "flate2", + "fs2", + "tectonic_errors", + "tectonic_geturl", + "tectonic_io_base", + "tectonic_status_base", + "zip", +] + [[package]] name = "tectonic_cfg_support" version = "0.0.0-dev.0" diff --git a/Cargo.toml b/Cargo.toml index 2508cee0ed..102d6ea513 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,6 +67,7 @@ serde = { version = "^1.0", features = ["derive"], optional = true } sha2 = "^0.9" structopt = "0.3" tectonic_bridge_core = { path = "crates/bridge_core", version = "0.0.0-dev.0" } +tectonic_bundles = { path = "crates/bundles", version = "0.0.0-dev.0", default-features = false } tectonic_docmodel = { path = "crates/docmodel", version = "0.0.0-dev.0", optional = true } tectonic_engine_bibtex = { path = "crates/engine_bibtex", version = "0.0.0-dev.0" } tectonic_engine_xdvipdfmx = { path = "crates/engine_xdvipdfmx", version = "0.0.0-dev.0" } @@ -97,10 +98,10 @@ serialization = ["serde", "tectonic_docmodel", "toml"] external-harfbuzz = ["tectonic_engine_xetex/external-harfbuzz"] -geturl-curl = ["tectonic_geturl/curl"] -geturl-reqwest = ["tectonic_geturl/reqwest"] +geturl-curl = ["tectonic_bundles/geturl-curl", "tectonic_geturl/curl"] +geturl-reqwest = ["tectonic_bundles/geturl-reqwest", "tectonic_geturl/reqwest"] -native-tls-vendored = ["tectonic_geturl/native-tls-vendored"] +native-tls-vendored = ["tectonic_bundles/native-tls-vendored", "tectonic_geturl/native-tls-vendored"] # developer feature to compile with the necessary flags for profiling tectonic. profile = [] @@ -129,6 +130,7 @@ tectonic_bridge_flate = "thiscommit:2021-01-01:eer4ahL4" tectonic_bridge_graphite2 = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" tectonic_bridge_harfbuzz = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" tectonic_bridge_icu = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" +tectonic_bundles = "thiscommit:2021-06-13:Q0esYor" tectonic_cfg_support = "thiscommit:aeRoo7oa" tectonic_dep_support = "5faf4205bdd3d31101b749fc32857dd746f9e5bc" tectonic_docmodel = "cd77b60d48b1ae3ef80d708e6858ea91cd9fa812" diff --git a/src/config.rs b/src/config.rs index 8a9c94f882..3efe1d6c37 100644 --- a/src/config.rs +++ b/src/config.rs @@ -15,15 +15,14 @@ use std::{ path::{Path, PathBuf}, sync::atomic::{AtomicBool, Ordering}, }; +use tectonic_bundles::{ + cache::Cache, dir::DirBundle, itar::IndexedTarBackend, zip::ZipBundle, Bundle, +}; +use tectonic_io_base::app_dirs; use url::Url; use crate::{ - app_dirs, errors::{ErrorKind, Result}, - io::cached_itarbundle::CachedITarBundle, - io::dirbundle::DirBundle, - io::zipbundle::ZipBundle, - io::Bundle, status::StatusBackend, }; @@ -123,8 +122,13 @@ impl PersistentConfig { custom_cache_root: Option<&Path>, status: &mut dyn StatusBackend, ) -> Result> { - let bundle = CachedITarBundle::new(url, only_cached, custom_cache_root, status)?; + let mut cache = if let Some(root) = custom_cache_root { + Cache::get_for_custom_directory(root) + } else { + Cache::get_user_default()? + }; + let bundle = cache.open::(url, only_cached, status)?; Ok(Box::new(bundle) as _) } @@ -190,7 +194,7 @@ impl Default for PersistentConfig { fn default() -> Self { PersistentConfig { default_bundles: vec![BundleInfo { - url: String::from("https://archive.org/services/purl/net/pkgwpub/tectonic-default"), + url: String::from(tectonic_bundles::FALLBACK_BUNDLE_URL), }], } } diff --git a/src/docmodel.rs b/src/docmodel.rs index cb747e7344..f36dadbc56 100644 --- a/src/docmodel.rs +++ b/src/docmodel.rs @@ -12,6 +12,9 @@ use std::{ fs, io, path::{Path, PathBuf}, }; +use tectonic_bundles::{ + cache::Cache, dir::DirBundle, itar::IndexedTarBackend, zip::ZipBundle, Bundle, +}; use tectonic_docmodel::{ document::{BuildTargetType, Document}, workspace::{Workspace, WorkspaceCreator}, @@ -23,7 +26,6 @@ use crate::{ config, ctry, driver::{OutputFormat, PassSetting, ProcessingSessionBuilder}, errors::{ErrorKind, Result}, - io::{cached_itarbundle::CachedITarBundle, dirbundle::DirBundle, zipbundle::ZipBundle, Bundle}, status::StatusBackend, test_util, tt_note, }; @@ -109,10 +111,10 @@ impl DocumentExt for Document { Ok(Box::new(test_util::TestBundle::default())) } else if let Ok(url) = Url::parse(&self.bundle_loc) { if url.scheme() != "file" { - let bundle = CachedITarBundle::new( + let mut cache = Cache::get_user_default()?; + let bundle = cache.open::( &self.bundle_loc, setup_options.only_cached, - None, status, )?; Ok(Box::new(bundle)) diff --git a/src/driver.rs b/src/driver.rs index b98aad3bac..c7db3d2d91 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -27,6 +27,7 @@ use std::{ time::SystemTime, }; use tectonic_bridge_core::{CoreBridgeLauncher, DriverHooks, SystemRequestError}; +use tectonic_bundles::Bundle; use tectonic_io_base::{ digest::DigestData, filesystem::{FilesystemIo, FilesystemPrimaryInputIo}, @@ -40,7 +41,7 @@ use crate::{ io::{ format_cache::FormatCache, memory::{MemoryFileCollection, MemoryIo}, - Bundle, InputOrigin, + InputOrigin, }, status::StatusBackend, tt_error, tt_note, tt_warning, diff --git a/src/io/cached_itarbundle.rs b/src/io/cached_itarbundle.rs deleted file mode 100644 index 976916059c..0000000000 --- a/src/io/cached_itarbundle.rs +++ /dev/null @@ -1,619 +0,0 @@ -// Copyright 2017-2020 the Tectonic Project -// Licensed under the MIT License. - -use flate2::read::GzDecoder; -use fs2::FileExt; -use std::{ - collections::HashMap, - fs::{self, File}, - io::{BufRead, BufReader, Error as IoError, ErrorKind as IoErrorKind, Read, Write}, - path::{Path, PathBuf}, - str::FromStr, -}; -use tectonic_errors::{anyhow::bail, atry, Result}; -use tectonic_geturl::{DefaultBackend, DefaultRangeReader, GetUrlBackend, RangeReader}; - -use super::{try_open_file, Bundle, InputHandle, InputOrigin, IoProvider, OpenResult}; -use crate::app_dirs; -use crate::digest::{self, Digest, DigestData}; -use crate::errors::SyncError; -use crate::status::StatusBackend; -use crate::{tt_note, tt_warning}; - -const MAX_HTTP_ATTEMPTS: usize = 4; - -#[derive(Clone, Copy, Debug)] -struct FileInfo { - offset: u64, - length: u64, -} - -#[derive(Clone, Copy, Debug)] -struct LocalCacheItem { - _length: u64, - digest: DigestData, -} - -/// Attempts to download a file from the bundle. -fn get_file( - data: &mut DefaultRangeReader, - name: &str, - offset: u64, - length: usize, - status: &mut dyn StatusBackend, -) -> Result> { - // In principle it'd be cool to return a handle right to the HTTP - // response, but those can't be seekable, and doing so introduces - // lifetime-related issues. So for now we just slurp the whole thing - // into RAM. - - tt_note!(status, "downloading {}", name); - - // When fetching a bunch of resource files (i.e., on the first - // invocation), bintray will sometimes drop connections. The error - // manifests itself in a way that has a not-so-nice user experience. - // Our solution: retry the HTTP a few times in case it was a transient - // problem. - - let mut buf = Vec::with_capacity(length); - let mut overall_failed = true; - let mut any_failed = false; - - for _ in 0..MAX_HTTP_ATTEMPTS { - let mut stream = match data.read_range(offset, length) { - Ok(r) => r, - Err(e) => { - tt_warning!(status, "failure requesting \"{}\" from network", name; e); - any_failed = true; - continue; - } - }; - - if let Err(e) = stream.read_to_end(&mut buf) { - tt_warning!(status, "failure downloading \"{}\" from network", name; e.into()); - any_failed = true; - continue; - } - - overall_failed = false; - break; - } - - if overall_failed { - bail!( - "failed to retrieve \"{}\" from the network; \ - this most probably is not Tectonic's fault \ - -- please check your network connection.", - name - ); - } else if any_failed { - tt_note!(status, "download succeeded after retry"); - } - - Ok(buf) -} - -fn parse_index_line(line: &str) -> Result> { - let mut bits = line.split_whitespace(); - - if let (Some(name), Some(offset), Some(length)) = (bits.next(), bits.next(), bits.next()) { - Ok(Some(( - name.to_owned(), - FileInfo { - offset: offset.parse::()?, - length: length.parse::()?, - }, - ))) - } else { - // TODO: preserve the warning info or something! - Ok(None) - } -} - -/// Attempts to find the redirected url, download the index and digest. -fn get_everything( - backend: &mut DefaultBackend, - url: &str, - status: &mut dyn StatusBackend, -) -> Result<(String, String, String)> { - let url = backend.resolve_url(url, status)?; - - let index = { - let mut index = String::new(); - let index_url = format!("{}.index.gz", &url); - tt_note!(status, "downloading index {}", index_url); - GzDecoder::new(backend.get_url(&index_url, status)?).read_to_string(&mut index)?; - index - }; - - let digest_text = { - // Find the location of the digest file. - let digest_info = { - let mut digest_info = None; - for line in index.lines() { - if let Some((name, info)) = parse_index_line(line)? { - if name == digest::DIGEST_NAME { - digest_info = Some(info); - break; - } - } - } - atry!(digest_info; ["backend does not provide needed {} file", digest::DIGEST_NAME]) - }; - - let mut range_reader = backend.open_range_reader(&url); - String::from_utf8(get_file( - &mut range_reader, - digest::DIGEST_NAME, - digest_info.offset, - digest_info.length as usize, - status, - )?) - .map_err(|e| e.utf8_error())? - }; - - Ok((digest_text, index, url)) -} - -#[derive(Clone, Debug)] -struct CacheContent { - digest_text: String, - redirect_url: String, - index: HashMap, -} - -/// Load cached data. -/// -/// If any of the files is not found return None. -fn load_cache( - digest_path: &Path, - redirect_base: &Path, - index_base: &Path, -) -> Result> { - // Convert file-not-found errors into None. - match load_cache_inner(digest_path, redirect_base, index_base) { - Ok(r) => Ok(Some(r)), - Err(e) => { - if let Some(ioe) = e.downcast_ref::() { - if ioe.kind() == IoErrorKind::NotFound { - return Ok(None); - } - } - - Err(e) - } - } -} - -/// See `load_cache`. -fn load_cache_inner( - digest_path: &Path, - redirect_base: &Path, - index_base: &Path, -) -> Result { - let digest_text = { - let f = File::open(digest_path)?; - let mut digest_text = String::with_capacity(digest::DIGEST_LEN); - f.take(digest::DIGEST_LEN as u64) - .read_to_string(&mut digest_text)?; - digest_text - }; - - let redirect_path = make_txt_path(redirect_base, &digest_text); - let redirect_url = fs::read_to_string(redirect_path)?; - - let index_path = make_txt_path(index_base, &digest_text); - - let index = { - let f = File::open(index_path)?; - let mut index = HashMap::new(); - for line in BufReader::new(f).lines() { - if let Some((name, info)) = parse_index_line(&line?)? { - index.insert(name, info); - } - } - index - }; - Ok(CacheContent { - digest_text, - redirect_url, - index, - }) -} - -fn make_txt_path(base: &Path, digest_text: &str) -> PathBuf { - base.join(&digest_text).with_extension("txt") -} - -/// Bundle provided by an indexed tar file over http with a local cache. -#[derive(Debug)] -pub struct CachedITarBundle { - url: String, - redirect_url: String, - digest_path: PathBuf, - cached_digest: DigestData, - checked_digest: bool, - redirect_base: PathBuf, - manifest_path: PathBuf, - data_base: PathBuf, - contents: HashMap, - only_cached: bool, - - tar_data: DefaultRangeReader, - index: HashMap, -} - -impl CachedITarBundle { - pub fn new( - url: &str, - only_cached: bool, - custom_cache_root: Option<&Path>, - status: &mut dyn StatusBackend, - ) -> Result { - let mut backend = DefaultBackend::default(); - let digest_path = cache_dir("urls", custom_cache_root)?.join(app_dirs::sanitized(url)); - - let redirect_base = &cache_dir("redirects", custom_cache_root)?; - let index_base = &cache_dir("indexes", custom_cache_root)?; - let manifest_base = &cache_dir("manifests", custom_cache_root)?; - let data_base = &cache_dir("files", custom_cache_root)?; - - let mut checked_digest = false; - let CacheContent {digest_text, redirect_url, index} = - // Try loading the cached files. - match load_cache(&digest_path, &redirect_base, &index_base)? { - Some(c) => c, - None => { - // At least one of the cached files does not exists. We fetch everything from - // scratch and save the files. - let (digest_text, index, redirect_url) = get_everything(&mut backend, url, status)?; - let _ = DigestData::from_str(&digest_text)?; - checked_digest = true; - - file_create_write(&digest_path, |f| writeln!(f, "{}", digest_text))?; - file_create_write(make_txt_path(&redirect_base, &digest_text), |f| f.write_all(redirect_url.as_bytes()))?; - file_create_write(make_txt_path(&index_base, &digest_text), |f| f.write_all(index.as_bytes()))?; - - // Reload the cached files now when they were saved. - atry!(load_cache(&digest_path, &redirect_base, &index_base)?; ["cache files missing even after they were created"]) - } - }; - - let cached_digest = DigestData::from_str(&digest_text)?; - - // We can now figure out which manifest to use. - let manifest_path = make_txt_path(manifest_base, &digest_text); - - // Read it in, if it exists. - - let mut contents = HashMap::new(); - - match try_open_file(&manifest_path) { - OpenResult::NotAvailable => {} - OpenResult::Err(e) => { - return Err(e); - } - OpenResult::Ok(mfile) => { - // Note that the lock is released when the file is closed, - // which is good since BufReader::new() and BufReader::lines() - // consume their objects. - if let Err(e) = mfile.lock_shared() { - tt_warning!(status, "failed to lock manifest file \"{}\" for reading; this might be fine", - manifest_path.display(); e.into()); - } - - let f = BufReader::new(mfile); - - for res in f.lines() { - let line = res?; - let mut bits = line.rsplitn(3, ' '); - - let (original_name, length, digest) = - match (bits.next(), bits.next(), bits.next(), bits.next()) { - (Some(s), Some(t), Some(r), None) => (r, t, s), - _ => continue, - }; - - let name = original_name.to_owned(); - - let length = match length.parse::() { - Ok(l) => l, - Err(_) => continue, - }; - - let digest = if digest == "-" { - continue; - } else { - match DigestData::from_str(&digest) { - Ok(d) => d, - Err(e) => { - tt_warning!(status, "ignoring bad digest data \"{}\" for \"{}\" in \"{}\"", - &digest, original_name, manifest_path.display() ; e); - continue; - } - } - }; - - contents.insert( - name, - LocalCacheItem { - _length: length, - digest, - }, - ); - } - } - } - - // All set. - - let tar_data = backend.open_range_reader(&redirect_url); - - Ok(CachedITarBundle { - url: url.to_owned(), - redirect_url, - digest_path, - cached_digest, - checked_digest, - manifest_path, - data_base: data_base.to_owned(), - redirect_base: redirect_base.to_owned(), - contents, - only_cached, - tar_data, - index, - }) - } - - fn record_cache_result(&mut self, name: &str, length: u64, digest: DigestData) -> Result<()> { - let digest_text = digest.to_string(); - - // Due to a quirk about permissions for file locking on Windows, we - // need to add `.read(true)` to be able to lock a file opened in - // append mode. - - let mut man = fs::OpenOptions::new() - .append(true) - .create(true) - .read(true) - .open(&self.manifest_path)?; - - // Lock will be released when file is closed at the end of this function. - atry!(man.lock_exclusive(); ["failed to lock manifest file \"{}\" for writing", self.manifest_path.display()]); - - if !name.contains(|c| c == '\n' || c == '\r') { - writeln!(man, "{} {} {}", name, length, digest_text)?; - } - self.contents.insert( - name.to_owned(), - LocalCacheItem { - _length: length, - digest, - }, - ); - Ok(()) - } - - /// If we're going to make a request of the backend, we should check that - /// its digest is what we expect. If not, we do a lame thing where we - /// error out but set things up so that things should succeed if the - /// program is re-run. Exactly the lame TeX user experience that I've been - /// trying to avoid! - fn check_digest(&mut self, status: &mut dyn StatusBackend) -> Result<()> { - if self.checked_digest { - return Ok(()); - } - - // Do a quick and dirty check first and ignore errors. - if let Some(info) = self.index.get(digest::DIGEST_NAME) { - if let Ok(d) = get_file( - &mut self.tar_data, - digest::DIGEST_NAME, - info.offset, - info.length as usize, - status, - ) { - if let Ok(d) = String::from_utf8(d) { - if let Ok(d) = DigestData::from_str(&d) { - if self.cached_digest == d { - // We managed to pull some data that match the digest. - // We can be quite confident that the bundle is what we expect it to be. - self.checked_digest = true; - return Ok(()); - } - } - } - } - } - - // The quick check failed. Try to pull all data to make sure that it wasn't a network - // error or that the redirect url hasn't been updated. - let mut backend = DefaultBackend::default(); - let (digest_text, _index, redirect_url) = get_everything(&mut backend, &self.url, status)?; - - let current_digest = - atry!(DigestData::from_str(&digest_text); ["bad SHA256 digest from bundle"]); - - if self.cached_digest != current_digest { - // Crap! The backend isn't what we thought it was. Rewrite the - // digest file so that next time we'll start afresh. - - file_create_write(&self.digest_path, |f| { - writeln!(f, "{}", current_digest.to_string()) - })?; - bail!("backend digest changed; rerun tectonic to use updated information"); - } - - if self.redirect_url != redirect_url { - // The redirect url has changed, let's update it. - let redirect_path = make_txt_path(&self.redirect_base, &digest_text); - file_create_write(&redirect_path, |f| f.write_all(redirect_url.as_bytes()))?; - - self.redirect_url = redirect_url; - } - - // Index should've changed as the digest hasn't. - - // Phew, the backend hasn't changed. Don't check again. - self.checked_digest = true; - Ok(()) - } - - /// Find the path in the local cache for the provided file. Download the file first if it is - /// not in the local cache already. - fn path_for_name(&mut self, name: &str, status: &mut dyn StatusBackend) -> OpenResult { - if let Some(info) = self.contents.get(name) { - return match info.digest.create_two_part_path(&self.data_base) { - Ok(p) => OpenResult::Ok(p), - Err(e) => OpenResult::Err(e), - }; - } - - // The file is not in the cache and we are asked not to try to fetch it. - if self.only_cached { - return OpenResult::NotAvailable; - } - - let info = match self.index.get(name).cloned() { - Some(info) => info, - None => return OpenResult::NotAvailable, - }; - - // Bummer, we haven't seen this file before. We need to (try to) fetch - // the item from the backend, saving it to disk and calculating its - // digest ourselves, then enter it in the cache and in our manifest. - // Fun times. Because we're touching the backend, we need to verify that - // its digest is what we think. - - if let Err(e) = self.check_digest(status) { - return OpenResult::Err(e); - } - - // The bundle's overall digest is OK. Now try open the file. If it's - // not available, cache that result, since LaTeX compilations commonly - // touch nonexistent files. If we didn't maintain the negative cache, - // we'd have to touch the network for virtually every compilation. - - let content = match get_file( - &mut self.tar_data, - name, - info.offset, - info.length as usize, - status, - ) { - Ok(c) => c, - Err(e) => return OpenResult::Err(e), - }; - - // OK, we can stream the file to a temporary location on disk, - // computing its SHA256 as we go. - - let length = content.len(); - - let mut digest_builder = digest::create(); - digest_builder.update(&content); - - let digest = DigestData::from(digest_builder); - - let final_path = match digest.create_two_part_path(&self.data_base) { - Ok(p) => p, - Err(e) => return OpenResult::Err(e), - }; - - // Perform a racy check for the destination existing, because this - // matters on Windows: if the destination is already there, we'll get - // an error because the destination is marked read-only. Assuming - // non-pathological filesystem manipulation, though, we'll only be - // subject to the race once. - - if !final_path.exists() { - if let Err(e) = file_create_write(&final_path, |f| f.write_all(&content)) { - return OpenResult::Err(e); - } - - // Now we can make the file readonly. It would be nice to set the - // permissions using the already-open file handle owned by the - // tempfile, but mkstemp doesn't give us access. - let mut perms = match fs::metadata(&final_path) { - Ok(p) => p, - Err(e) => { - return OpenResult::Err(e.into()); - } - } - .permissions(); - perms.set_readonly(true); - - if let Err(e) = fs::set_permissions(&final_path, perms) { - return OpenResult::Err(e.into()); - } - } - - // And finally add a record of this file to our manifest. Note that - // we're opening and closing this file every time we load a new file; - // not so efficient, but whatever. - - if let Err(e) = self.record_cache_result(name, length as u64, digest) { - return OpenResult::Err(e); - } - - OpenResult::Ok(final_path) - } -} - -impl IoProvider for CachedITarBundle { - fn input_open_name( - &mut self, - name: &str, - status: &mut dyn StatusBackend, - ) -> OpenResult { - let path = match self.path_for_name(name, status) { - OpenResult::Ok(p) => p, - OpenResult::NotAvailable => return OpenResult::NotAvailable, - OpenResult::Err(e) => return OpenResult::Err(e), - }; - - let f = match File::open(&path) { - Ok(f) => f, - Err(e) => return OpenResult::Err(e.into()), - }; - - OpenResult::Ok(InputHandle::new_read_only( - name, - BufReader::new(f), - InputOrigin::Other, - )) - } -} - -impl Bundle for CachedITarBundle { - fn get_digest(&mut self, _status: &mut dyn StatusBackend) -> Result { - Ok(self.cached_digest) - } -} - -/// A convenience method to provide a better error message when writing to a created file. -fn file_create_write(path: P, write_fn: F) -> Result<()> -where - P: AsRef, - F: FnOnce(&mut File) -> std::result::Result<(), E>, - E: std::error::Error + 'static + Sync + Send, -{ - let path = path.as_ref(); - let mut f = atry!(File::create(path); ["couldn't open {} for writing", - path.display()]); - atry!(write_fn(&mut f); ["couldn't write to {}", path.display()]); - Ok(()) -} - -fn cache_dir(path: &str, custom_cache_root: Option<&Path>) -> Result { - if let Some(root) = custom_cache_root { - if !root.is_dir() { - bail!("Custom cache path {} is not a directory", root.display()); - } - let full_path = root.join(path); - atry!(fs::create_dir_all(&full_path); ["failed to create directory {}", full_path.display()]); - Ok(full_path) - } else { - Ok(app_dirs::user_cache_dir(path).map_err(SyncError::new)?) - } -} diff --git a/src/io/dirbundle.rs b/src/io/dirbundle.rs deleted file mode 100644 index 3a898bcde5..0000000000 --- a/src/io/dirbundle.rs +++ /dev/null @@ -1,40 +0,0 @@ -use std::{fs::File, io::BufReader, path::PathBuf}; - -use super::{Bundle, InputHandle, InputOrigin, IoProvider, OpenResult}; -use crate::status::StatusBackend; - -pub struct DirBundle { - dir: PathBuf, -} - -impl DirBundle { - pub fn new(dir: PathBuf) -> DirBundle { - DirBundle { dir } - } -} - -impl IoProvider for DirBundle { - fn input_open_name( - &mut self, - name: &str, - _status: &mut dyn StatusBackend, - ) -> OpenResult { - let mut path = self.dir.clone(); - path.push(name); - - if path.is_file() { - match File::open(path) { - Err(e) => OpenResult::Err(e.into()), - Ok(f) => OpenResult::Ok(InputHandle::new( - name, - BufReader::new(f), - InputOrigin::Filesystem, - )), - } - } else { - OpenResult::NotAvailable - } - } -} - -impl Bundle for DirBundle {} diff --git a/src/io/mod.rs b/src/io/mod.rs index f6a4a6760c..c18997f5dc 100644 --- a/src/io/mod.rs +++ b/src/io/mod.rs @@ -3,15 +3,10 @@ //! Extensions to Tectonic’s pluggable I/O backend. -use std::{io::Read, str::FromStr}; -use tectonic_errors::{anyhow::bail, atry, Result}; use tectonic_status_base::StatusBackend; -pub mod cached_itarbundle; -pub mod dirbundle; pub mod format_cache; pub mod memory; -pub mod zipbundle; // Convenience re-exports. @@ -28,53 +23,6 @@ pub use tectonic_io_base::{ pub use self::memory::MemoryIo; -/// A special IoProvider that can make TeX format files. -/// -/// A “bundle” is expected to contain a large number of TeX support files — -/// for instance, a compilation of a TeXLive distribution. In terms of the -/// software architecture, though, what is special about a bundle is that one -/// can generate one or more TeX format files from its contents without -/// reference to any other I/O resources. -pub trait Bundle: IoProvider { - /// Get a cryptographic digest summarizing this bundle’s contents. - /// - /// The digest summarizes the exact contents of every file in the bundle. - /// It is computed from the sorted names and SHA256 digests of the - /// component files [as implemented in the script - /// builder/make-zipfile.py](https://github.com/tectonic-typesetting/tectonic-staging/blob/master/builder/make-zipfile.py#L138) - /// in the `tectonic-staging` module. - /// - /// The default implementation gets the digest from a file name - /// `SHA256SUM`, which is expected to contain the digest in hex-encoded - /// format. - fn get_digest(&mut self, status: &mut dyn StatusBackend) -> Result { - let digest_text = match self.input_open_name(digest::DIGEST_NAME, status) { - OpenResult::Ok(h) => { - let mut text = String::new(); - h.take(64).read_to_string(&mut text)?; - text - } - - OpenResult::NotAvailable => { - // Broken or un-cacheable backend. - bail!("bundle does not provide needed SHA256SUM file"); - } - - OpenResult::Err(e) => { - return Err(e); - } - }; - - Ok(atry!(DigestData::from_str(&digest_text); ["corrupted SHA256 digest data"])) - } -} - -impl Bundle for Box { - fn get_digest(&mut self, status: &mut dyn StatusBackend) -> Result { - (**self).get_digest(status) - } -} - // Helper for testing. FIXME: I want this to be conditionally compiled with // #[cfg(test)] but things break if I do that. diff --git a/src/io/zipbundle.rs b/src/io/zipbundle.rs deleted file mode 100644 index 6d30b801a6..0000000000 --- a/src/io/zipbundle.rs +++ /dev/null @@ -1,69 +0,0 @@ -// src/io/zipbundle.rs -- I/O on files in a Zipped-up "bundle" -// Copyright 2016-2020 the Tectonic Project -// Licensed under the MIT License. - -use std::{ - fs::File, - io::{Cursor, Read, Seek}, - path::Path, -}; -use zip::{result::ZipError, ZipArchive}; - -use super::{Bundle, InputHandle, InputOrigin, IoProvider, OpenResult}; -use crate::errors::Result; -use crate::status::StatusBackend; - -pub struct ZipBundle { - zip: ZipArchive, -} - -impl ZipBundle { - pub fn new(reader: R) -> Result> { - Ok(ZipBundle { - zip: ZipArchive::new(reader)?, - }) - } -} - -impl ZipBundle { - pub fn open>(path: P) -> Result> { - Self::new(File::open(path)?) - } -} - -impl IoProvider for ZipBundle { - fn input_open_name( - &mut self, - name: &str, - _status: &mut dyn StatusBackend, - ) -> OpenResult { - // We need to be able to look at other items in the Zip file while - // reading this one, so the only path forward is to read the entire - // contents into a buffer right now. RAM is cheap these days. - - let mut zipitem = match self.zip.by_name(name) { - Ok(f) => f, - Err(e) => { - return match e { - ZipError::Io(sube) => OpenResult::Err(sube.into()), - ZipError::FileNotFound => OpenResult::NotAvailable, - _ => OpenResult::Err(e.into()), - }; - } - }; - - let mut buf = Vec::with_capacity(zipitem.size() as usize); - - if let Err(e) = zipitem.read_to_end(&mut buf) { - return OpenResult::Err(e.into()); - } - - OpenResult::Ok(InputHandle::new_read_only( - name, - Cursor::new(buf), - InputOrigin::Other, - )) - } -} - -impl Bundle for ZipBundle {} diff --git a/src/test_util.rs b/src/test_util.rs index e2fc2e8525..09c9fa89ec 100644 --- a/src/test_util.rs +++ b/src/test_util.rs @@ -35,12 +35,13 @@ //! That call simultaneously tells this module where to find the test assets, //! and also activates the test mode. -use std::{collections::HashSet, env, ffi::OsStr, path::PathBuf}; +use std::{collections::HashSet, env, ffi::OsStr, fs, path::PathBuf}; +use tectonic_bundles::Bundle; use tectonic_errors::Result; use crate::{ digest::DigestData, - io::{Bundle, FilesystemIo, InputHandle, IoProvider, OpenResult}, + io::{FilesystemIo, InputHandle, IoProvider, OpenResult}, status::StatusBackend, }; @@ -129,4 +130,22 @@ impl Bundle for TestBundle { fn get_digest(&mut self, _status: &mut dyn StatusBackend) -> Result { Ok(DigestData::zeros()) } + + fn all_files(&mut self, _status: &mut dyn StatusBackend) -> Result> { + // XXX: this is copy/paste of DirBundle. + let mut files = Vec::new(); + + for entry in fs::read_dir(&self.0.root())? { + let entry = entry?; + + // This catches both regular files and symlinks:` + if !entry.file_type()?.is_dir() { + if let Some(s) = entry.file_name().to_str() { + files.push(s.to_owned()); + } + } + } + + Ok(files) + } } From a55aba9bfe0bad1a1c9e19cfe81bbd2187f4f71d Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sun, 13 Jun 2021 11:49:25 -0400 Subject: [PATCH 25/30] v2cli: add some new bundle-related commands --- docs/src/SUMMARY.md | 3 + docs/src/v2cli/bundle.md | 73 ++++++++++++ docs/src/v2cli/show.md | 36 ++++++ docs/src/v2cli/watch.md | 29 +++++ src/bin/tectonic/v2cli.rs | 229 ++++++++++++++++++++++++++++++++++++-- 5 files changed, 363 insertions(+), 7 deletions(-) create mode 100644 docs/src/v2cli/bundle.md create mode 100644 docs/src/v2cli/show.md create mode 100644 docs/src/v2cli/watch.md diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 407fce6ff0..47266e17da 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -12,8 +12,11 @@ # “V2” Command-Line Interface - [`tectonic -X build`](v2cli/build.md) +- [`tectonic -X bundle`](v2cli/bundle.md) - [`tectonic -X compile`](v2cli/compile.md) - [`tectonic -X new`](v2cli/new.md) +- [`tectonic -X show`](v2cli/show.md) +- [`tectonic -X watch`](v2cli/watch.md) # Concept Reference diff --git a/docs/src/v2cli/bundle.md b/docs/src/v2cli/bundle.md new file mode 100644 index 0000000000..e294c1a7e1 --- /dev/null +++ b/docs/src/v2cli/bundle.md @@ -0,0 +1,73 @@ +# tectonic -X bundle + +Commands relating to Tectonic’s “bundles” of support files. + +***This is a [V2 CLI][v2cli-ref] command. For information on the original (“V1” +CLI), see [its reference page][v1cli-ref].*** + +[v2cli-ref]: ../ref/v2cli.md +[v1cli-ref]: ../ref/v1cli.md + +The `bundle` subcommands are: + +- [`tectonic -X bundle cat`](#tectonic--x-bundle-cat) +- [`tectonic -X bundle search`](#tectonic--x-bundle-search) + + +## tectonic -X bundle cat + +Print out a file stored in the current document’s backing bundle. + +#### Usage Synopsis + +```sh +tectonic -X bundle cat +``` + +#### Example + +```sh +$ tectonic -X bundle cat latex.ltx +%% +%% This is file `latex.ltx', +%% generated with the docstrip utility. +... +``` + +#### Remarks + +If this command is run outside of a [document workspace](../ref/workspaces.md), +the system default bundle will be used. + + +## tectonic -X bundle search + +Print out the names of files in the current document’s backing bundle, +potentially with filtering. + +#### Usage Synopsis + +```sh +tectonic -X bundle search [TERM] +``` + +#### Example + +```sh +$ tectonic -X bundle search minted +minted1.sty +tcbminted.code.tex +minted.4ht +minted.sty +``` + +#### Remarks + +If no term is specified, *all* of the files in the bundle are printed. The +ordering of those filenames is unspecified. + +The default search method is to use simple substring matching. Other methods may +be added in the future, activated by additional options. + +If this command is run outside of a [document workspace](../ref/workspaces.md), +the system default bundle will be used. diff --git a/docs/src/v2cli/show.md b/docs/src/v2cli/show.md new file mode 100644 index 0000000000..4697513674 --- /dev/null +++ b/docs/src/v2cli/show.md @@ -0,0 +1,36 @@ +# tectonic -X show + +Display various useful pieces of information. + +***This is a [V2 CLI][v2cli-ref] command. For information on the original (“V1” +CLI), see [its reference page][v1cli-ref].*** + +[v2cli-ref]: ../ref/v2cli.md +[v1cli-ref]: ../ref/v1cli.md + +The `show` subcommands are: + +- [`tectonic -X show user-cache-dir`](#tectonic--x-show-user-cache-dir) + +## tectonic -X show user-cache-dir + +Print out the location of Tectonic’s default per-user cache directory. + +#### Usage Synopsis + +```sh +tectonic -X show user-cache-dir +``` + +#### Example + +```sh +$ tectonic -X show user-cache-dir +/home/knuth/.cache/Tectonic # Unix + +$ tectonic -X show user-cache-dir +/home/knuth/Library/Caches/Tectonic # macOS + +$ tectonic -X show user-cache-dir +C:\Users\knuth\AppData\Local\TectonicProject\Tectonic # Windows +``` diff --git a/docs/src/v2cli/watch.md b/docs/src/v2cli/watch.md new file mode 100644 index 0000000000..97b3368391 --- /dev/null +++ b/docs/src/v2cli/watch.md @@ -0,0 +1,29 @@ +# tectonic -X watch + +Build the current document and rebuild it as input files change. + +***This is a [V2 CLI][v2cli-ref] command. For information on the original (“V1” +CLI), see [its reference page][v1cli-ref].*** + +[v2cli-ref]: ../ref/v2cli.md +[v1cli-ref]: ../ref/v1cli.md + +#### Usage Synopsis + +```sh +tectonic -X watch + [--exec COMMAND] [-x COMMAND] +``` + +#### Remarks + +This command builds the current document in the same fashion as [`tectonic -X +build`](./build.md), and then stays running and watches for changes to the input +files. It rebuilds the document when changes are detected. + +#### Command-Line Options + +The `--exec` option (or `-x` for short) configures the command that is used to +run the document build. The value of this option is appended to `tectonic -X` +and defaults to `build`. If you want to pass options to the build command, this +is the way to do so. diff --git a/src/bin/tectonic/v2cli.rs b/src/bin/tectonic/v2cli.rs index e9012432b6..7fe825462d 100644 --- a/src/bin/tectonic/v2cli.rs +++ b/src/bin/tectonic/v2cli.rs @@ -15,6 +15,7 @@ use tectonic::{ status::{termcolor::TermcolorStatusBackend, ChatterLevel, StatusBackend}, tt_error, tt_note, }; +use tectonic_bundles::Bundle; use tectonic_docmodel::workspace::{Workspace, WorkspaceCreator}; use tectonic_errors::Error as NewError; use tectonic_status_base::plain::PlainStatusBackend; @@ -50,6 +51,13 @@ struct V2CliOptions { command: Commands, } +/// A semi-hack to allow command-specific customizations of the centralized app +/// initialization. +#[derive(Debug, Default)] +struct CommandCustomizations { + always_stderr: bool, +} + /// The main function for the Cargo-like, "V2" CLI. This intentionally /// duplicates a lot of the "old" main() function, so that the implementation /// can drift over time as needed. @@ -73,6 +81,13 @@ pub fn v2_main(effective_args: &[OsString]) { let args = V2CliOptions::from_iter(effective_args); + // Command-specific customizations before we do our centralized setup. + // This is a semi-hack so that we can set up certain commands to ensure + // that status info is always printed to stderr. + + let mut customizations = CommandCustomizations::default(); + args.command.customize(&mut customizations); + // Set up colorized output. let chatter_level = ChatterLevel::from_str(&args.chatter_level).unwrap(); @@ -84,9 +99,13 @@ pub fn v2_main(effective_args: &[OsString]) { }; let mut status = if use_cli_color { - Box::new(TermcolorStatusBackend::new(chatter_level)) as Box + let mut sb = TermcolorStatusBackend::new(chatter_level); + sb.always_stderr(customizations.always_stderr); + Box::new(sb) as Box } else { - Box::new(PlainStatusBackend::new(chatter_level)) as Box + let mut sb = PlainStatusBackend::new(chatter_level); + sb.always_stderr(customizations.always_stderr); + Box::new(sb) as Box }; // For now ... @@ -111,26 +130,47 @@ enum Commands { /// Build a document Build(BuildCommand), + #[structopt(name = "bundle")] + /// Commands relating to this document’s TeX file bundle + Bundle(BundleCommand), + #[structopt(name = "compile")] /// Run a standalone (La)TeX compilation Compile(crate::compile::CompileOptions), - #[structopt(name = "watch")] - /// Watch input files and execute commands on change - Watch(WatchCommand), - #[structopt(name = "new")] /// Create a new document New(NewCommand), + + #[structopt(name = "show")] + /// Display various useful pieces of information + Show(ShowCommand), + + #[structopt(name = "watch")] + /// Watch input files and execute commands on change + Watch(WatchCommand), } impl Commands { + fn customize(&self, cc: &mut CommandCustomizations) { + match self { + Commands::Build(o) => o.customize(cc), + Commands::Bundle(o) => o.customize(cc), + Commands::Compile(_) => {} // avoid namespacing/etc issues + Commands::New(o) => o.customize(cc), + Commands::Show(o) => o.customize(cc), + Commands::Watch(o) => o.customize(cc), + } + } + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { match self { Commands::Build(o) => o.execute(config, status), + Commands::Bundle(o) => o.execute(config, status), Commands::Compile(o) => o.execute(config, status), - Commands::Watch(o) => o.execute(config, status), Commands::New(o) => o.execute(config, status), + Commands::Show(o) => o.execute(config, status), + Commands::Watch(o) => o.execute(config, status), } } } @@ -160,6 +200,8 @@ pub struct BuildCommand { } impl BuildCommand { + fn customize(&self, _cc: &mut CommandCustomizations) {} + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { let ws = Workspace::open_from_environment()?; let doc = ws.first_document(); @@ -197,6 +239,133 @@ impl BuildCommand { } } +/// `bundle`: Commands relating to Tectonic bundles +#[derive(Debug, PartialEq, StructOpt)] +pub struct BundleCommand { + #[structopt(subcommand)] + command: BundleCommands, +} + +#[derive(Debug, PartialEq, StructOpt)] +enum BundleCommands { + #[structopt(name = "cat")] + /// Dump the contents of a file in the bundle + Cat(BundleCatCommand), + + #[structopt(name = "search")] + /// Filter the list of filenames contained in the bundle + Search(BundleSearchCommand), +} + +impl BundleCommand { + fn customize(&self, cc: &mut CommandCustomizations) { + match &self.command { + BundleCommands::Cat(c) => c.customize(cc), + BundleCommands::Search(c) => c.customize(cc), + } + } + + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { + match self.command { + BundleCommands::Cat(c) => c.execute(config, status), + BundleCommands::Search(c) => c.execute(config, status), + } + } +} + +fn get_a_bundle( + _config: PersistentConfig, + only_cached: bool, + status: &mut dyn StatusBackend, +) -> Result> { + use tectonic_docmodel::workspace::NoWorkspaceFoundError; + + match Workspace::open_from_environment() { + Ok(ws) => { + let doc = ws.first_document(); + let mut options = DocumentSetupOptions::new(true); + options.only_cached(only_cached); + doc.bundle(&options, status) + } + + Err(e) => { + if e.downcast_ref::().is_none() { + Err(e.into()) + } else { + tt_note!( + status, + "not in a document workspace; using the built-in default bundle" + ); + Ok(Box::new(tectonic_bundles::get_fallback_bundle( + only_cached, + status, + )?)) + } + } + } +} + +#[derive(Debug, PartialEq, StructOpt)] +struct BundleCatCommand { + /// Use only resource files cached locally + #[structopt(short = "C", long)] + only_cached: bool, + + #[structopt(help = "The name of the file to dump")] + filename: String, +} + +impl BundleCatCommand { + fn customize(&self, cc: &mut CommandCustomizations) { + cc.always_stderr = true; + } + + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { + let mut bundle = get_a_bundle(config, self.only_cached, status)?; + let mut ih = bundle + .input_open_name(&self.filename, status) + .must_exist()?; + std::io::copy(&mut ih, &mut std::io::stdout())?; + Ok(0) + } +} + +#[derive(Debug, PartialEq, StructOpt)] +struct BundleSearchCommand { + /// Use only resource files cached locally + #[structopt(short = "C", long)] + only_cached: bool, + + #[structopt(help = "The search term")] + term: Option, +} + +impl BundleSearchCommand { + fn customize(&self, cc: &mut CommandCustomizations) { + cc.always_stderr = true; + } + + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { + let mut bundle = get_a_bundle(config, self.only_cached, status)?; + let files = bundle.all_files(status)?; + + // Is there a better way to do this? + let filter: Box bool> = if let Some(t) = self.term { + Box::new(move |s: &str| s.contains(&t)) + } else { + Box::new(|_: &str| true) + }; + + for filename in &files { + if filter(filename) { + println!("{}", filename); + } + } + + Ok(0) + } +} + /// `watch`: Watch input files and execute commands on change #[derive(Debug, PartialEq, StructOpt)] pub struct WatchCommand { @@ -206,6 +375,8 @@ pub struct WatchCommand { } impl WatchCommand { + fn customize(&self, _cc: &mut CommandCustomizations) {} + fn execute(self, _config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { let exe_name = crate::watch::get_trimmed_exe_name() .into_os_string() @@ -276,6 +447,8 @@ pub struct NewCommand { } impl NewCommand { + fn customize(&self, _cc: &mut CommandCustomizations) {} + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { tt_note!( status, @@ -291,3 +464,45 @@ impl NewCommand { Ok(0) } } + +/// `show`: Show various useful pieces of information. +#[derive(Debug, PartialEq, StructOpt)] +pub struct ShowCommand { + #[structopt(subcommand)] + command: ShowCommands, +} + +#[derive(Debug, PartialEq, StructOpt)] +enum ShowCommands { + #[structopt(name = "user-cache-dir")] + /// Print the location of the default per-user cache directory + UserCacheDir(ShowUserCacheDirCommand), +} + +impl ShowCommand { + fn customize(&self, cc: &mut CommandCustomizations) { + match &self.command { + ShowCommands::UserCacheDir(c) => c.customize(cc), + } + } + + fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { + match self.command { + ShowCommands::UserCacheDir(c) => c.execute(config, status), + } + } +} + +#[derive(Debug, PartialEq, StructOpt)] +struct ShowUserCacheDirCommand {} + +impl ShowUserCacheDirCommand { + fn customize(&self, _cc: &mut CommandCustomizations) {} + + fn execute(self, _config: PersistentConfig, _status: &mut dyn StatusBackend) -> Result { + use tectonic_bundles::cache::Cache; + let cache = Cache::get_user_default()?; + println!("{}", cache.root().display()); + Ok(0) + } +} From 562442d22c68dc826aa0706d643fe5b46eaf84e2 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sun, 13 Jun 2021 11:54:23 -0400 Subject: [PATCH 26/30] src/test_util.rs: use DirBundle here --- src/test_util.rs | 37 +++++++++++-------------------------- 1 file changed, 11 insertions(+), 26 deletions(-) diff --git a/src/test_util.rs b/src/test_util.rs index 09c9fa89ec..1a37b116e8 100644 --- a/src/test_util.rs +++ b/src/test_util.rs @@ -35,13 +35,13 @@ //! That call simultaneously tells this module where to find the test assets, //! and also activates the test mode. -use std::{collections::HashSet, env, ffi::OsStr, fs, path::PathBuf}; -use tectonic_bundles::Bundle; +use std::{env, ffi::OsStr, path::PathBuf}; +use tectonic_bundles::{dir::DirBundle, Bundle}; use tectonic_errors::Result; use crate::{ digest::DigestData, - io::{FilesystemIo, InputHandle, IoProvider, OpenResult}, + io::{InputHandle, IoProvider, OpenResult}, status::StatusBackend, }; @@ -102,16 +102,15 @@ pub fn test_path(parts: &[&str]) -> PathBuf { } /// Utility for being able to treat the "assets/" directory as a bundle. -pub struct TestBundle(FilesystemIo); +/// +/// I think we want to always wrap DirBundle so that we can override +/// `get_digest()`? But once DirBundle implements `get_digest()` for real we +/// could consider just dropping this type altogether. +pub struct TestBundle(DirBundle); impl Default for TestBundle { fn default() -> Self { - TestBundle(FilesystemIo::new( - &test_path(&["assets"]), - false, - false, - HashSet::new(), - )) + TestBundle(DirBundle::new(&test_path(&["assets"]))) } } @@ -131,21 +130,7 @@ impl Bundle for TestBundle { Ok(DigestData::zeros()) } - fn all_files(&mut self, _status: &mut dyn StatusBackend) -> Result> { - // XXX: this is copy/paste of DirBundle. - let mut files = Vec::new(); - - for entry in fs::read_dir(&self.0.root())? { - let entry = entry?; - - // This catches both regular files and symlinks:` - if !entry.file_type()?.is_dir() { - if let Some(s) = entry.file_name().to_str() { - files.push(s.to_owned()); - } - } - } - - Ok(files) + fn all_files(&mut self, status: &mut dyn StatusBackend) -> Result> { + self.0.all_files(status) } } From d12e9964110bbf6a02d93247164135a3180a6640 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sun, 13 Jun 2021 11:56:33 -0400 Subject: [PATCH 27/30] src/bin/tectonic/main.rs: you know what ... get rid of the beta flag --- src/bin/tectonic/main.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/bin/tectonic/main.rs b/src/bin/tectonic/main.rs index 4bc0a56462..436865ae55 100644 --- a/src/bin/tectonic/main.rs +++ b/src/bin/tectonic/main.rs @@ -11,7 +11,6 @@ use tectonic::{ errors::SyncError, status::termcolor::TermcolorStatusBackend, status::{ChatterLevel, StatusBackend}, - tt_note, }; mod compile; @@ -128,13 +127,6 @@ fn main() { Box::new(PlainStatusBackend::new(chatter_level)) as Box }; - // For now ... - - tt_note!( - status, - "this is a BETA release; ask questions and report bugs at https://tectonic.newton.cx/" - ); - // Now that we've got colorized output, pass off to the inner function ... // all so that we can print out the word "error:" in red. This code // parallels various bits of the `error_chain` crate. From 4550bc05bf4f636f48456b87a9204bba8e42151c Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Sun, 13 Jun 2021 12:01:51 -0400 Subject: [PATCH 28/30] v2cli: use always-stderr status mode for "show user-cache-dir" --- src/bin/tectonic/v2cli.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/bin/tectonic/v2cli.rs b/src/bin/tectonic/v2cli.rs index 7fe825462d..96f389b7b9 100644 --- a/src/bin/tectonic/v2cli.rs +++ b/src/bin/tectonic/v2cli.rs @@ -497,7 +497,9 @@ impl ShowCommand { struct ShowUserCacheDirCommand {} impl ShowUserCacheDirCommand { - fn customize(&self, _cc: &mut CommandCustomizations) {} + fn customize(&self, cc: &mut CommandCustomizations) { + cc.always_stderr = true; + } fn execute(self, _config: PersistentConfig, _status: &mut dyn StatusBackend) -> Result { use tectonic_bundles::cache::Cache; From 8ed3270c8119a41e42624b75247ac10c133fba58 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Mon, 14 Jun 2021 18:34:55 -0400 Subject: [PATCH 29/30] bridge_core: add a basic security infrastructure After adding shell-escape I started feeling like we should have a bit more of a systematic approach to ensuring that API users do their due diligence with regards to security. Here, we minorly break the crate behavior to default to disabling all known-insecure features (currently, only shell-escape), but give a new mechanism to override that. The setup is centralized such that whenever you try to create a security policy, it can be overridden in a more conservative direction by setting the environment variable `TECTONIC_UNTRUSTED_MODE`. --- crates/bridge_core/README.md | 2 +- crates/bridge_core/src/lib.rs | 144 +++++++++++++++++++++++++++++++--- 2 files changed, 133 insertions(+), 13 deletions(-) diff --git a/crates/bridge_core/README.md b/crates/bridge_core/README.md index 84ae7a8a77..06dbc3f3c1 100644 --- a/crates/bridge_core/README.md +++ b/crates/bridge_core/README.md @@ -28,6 +28,6 @@ use tectonic_bridge_core; ## Cargo features -This crate does not currently provides any [Cargo features][features]. +This crate does not currently provide any [Cargo features][features]. [features]: https://doc.rust-lang.org/cargo/reference/features.html diff --git a/crates/bridge_core/src/lib.rs b/crates/bridge_core/src/lib.rs index 00baf3edc5..79679c49f7 100644 --- a/crates/bridge_core/src/lib.rs +++ b/crates/bridge_core/src/lib.rs @@ -216,14 +216,32 @@ impl std::error::Error for EngineAbortedError {} pub struct CoreBridgeLauncher<'a> { hooks: &'a mut dyn DriverHooks, status: &'a mut dyn StatusBackend, + security: SecuritySettings, } impl<'a> CoreBridgeLauncher<'a> { /// Set up a new context for launching bridged FFI code. + /// + /// This function uses the default security stance, which disallows all + /// known-insecure engine features. Use [`Self::new_with_security`] to + /// provide your own security settings that can attempt to allow the use of + /// such features. pub fn new(hooks: &'a mut dyn DriverHooks, status: &'a mut dyn StatusBackend) -> Self { - CoreBridgeLauncher { hooks, status } + Self::new_with_security(hooks, status, SecuritySettings::default()) } + /// Set up a new context for launching bridged FFI code. + pub fn new_with_security( + hooks: &'a mut dyn DriverHooks, + status: &'a mut dyn StatusBackend, + security: SecuritySettings, + ) -> Self { + CoreBridgeLauncher { + hooks, + status, + security, + } + } /// Invoke a function to launch a bridged FFI engine with a global mutex /// held. /// @@ -242,7 +260,7 @@ impl<'a> CoreBridgeLauncher<'a> { F: FnOnce(&mut CoreBridgeState<'_>) -> Result, { let _guard = ENGINE_LOCK.lock().unwrap(); - let mut state = CoreBridgeState::new(self.hooks, self.status); + let mut state = CoreBridgeState::new(self.security.clone(), self.hooks, self.status); let result = callback(&mut state); if let Err(ref e) = result { @@ -262,6 +280,9 @@ impl<'a> CoreBridgeLauncher<'a> { /// these state structures into the C/C++ layer. It is essential that lifetimes /// be properly managed across the Rust/C boundary. pub struct CoreBridgeState<'a> { + /// The security settings for this invocation + security: SecuritySettings, + /// The driver hooks associated with this engine invocation. hooks: &'a mut dyn DriverHooks, @@ -286,10 +307,12 @@ pub struct CoreBridgeState<'a> { impl<'a> CoreBridgeState<'a> { fn new( + security: SecuritySettings, hooks: &'a mut dyn DriverHooks, status: &'a mut dyn StatusBackend, ) -> CoreBridgeState<'a> { CoreBridgeState { + security, hooks, status, output_handles: Vec::new(), @@ -636,22 +659,119 @@ impl<'a> CoreBridgeState<'a> { } fn shell_escape(&mut self, command: &str) -> bool { - match self.hooks.sysrq_shell_escape(command, self.status) { - Ok(_) => false, + if self.security.allow_shell_escape() { + match self.hooks.sysrq_shell_escape(command, self.status) { + Ok(_) => false, - Err(e) => { - tt_error!( - self.status, - "failed to execute the shell-escape command \"{}\": {}", - command, - e - ); - true + Err(e) => { + tt_error!( + self.status, + "failed to execute the shell-escape command \"{}\": {}", + command, + e + ); + true + } } + } else { + tt_error!( + self.status, + "forbidden to execute shell-escape command \"{}\"", + command + ); + true } } } +/// A type for storing settings about potentially insecure engine features. +/// +/// This type encapsulates configuration about which potentially insecure engine +/// features are enabled. Methods that configure or instantiate engines require +/// values of this type, and values of this type can only be created through +/// centralized methods that respect standard environment variables, ensuring +/// that there is some level of uniform control over the activation of any +/// known-insecure features. +/// +/// The purpose of this framework is to manage the use of engine features that +/// are known to create security risks with *untrusted* input, but that trusted +/// users may wish to use due to the extra functionalities they bring. (This is +/// why these are settings and not simply security flaws!) The primary example +/// of this is the TeX engine’s shell-escape feature. +/// +/// Of course, this framework is only as good as our understanding of Tectonic’s +/// security profile. Future versions might disable or restrict different pieces +/// of functionality as new risks are discovered. +#[derive(Clone, Debug)] +pub struct SecuritySettings { + /// While we might eventually gain finer-grained enable/disable settings, + /// there should always be a hard "disable everything known to be risky" + /// option that supersedes everything else. + disable_insecures: bool, +} + +/// Different high-level security stances that can be adopted when creating +/// [`SecuritySettings`]. +#[derive(Clone, Debug)] +pub enum SecurityStance { + /// Ensure that all known-insecure features are disabled. + /// + /// Use this stance if you are processing untrusted input. + DisableInsecures, + + /// Request to allow the use of known-insecure features. + /// + /// Use this stance if you are processing trusted input *and* there is some + /// user-level request to use such features. The request to allow insecure + /// features might be overridden if the environment variable + /// `TECTONIC_UNTRUSTED_MODE` is set. + MaybeAllowInsecures, +} + +impl Default for SecurityStance { + fn default() -> Self { + // Obvi, the default is secure!!! + SecurityStance::DisableInsecures + } +} + +impl SecuritySettings { + /// Create a new security configuration. + /// + /// The *stance* argument specifies the high-level security stance. If your + /// program will be run by a trusted user, they should be able to control + /// the setting through a command-line argument or something comparable. + /// Even if there is a request to enable known-insecure features, however, + /// such a request might be overridden by other mechanisms. In particular, + /// if the environment variable `TECTONIC_UNTRUSTED_MODE` is set to any + /// value, insecure features will always be disabled regardless of the + /// user-level setting. Other mechanisms for disable known-insecure features + /// may be added in the future. + pub fn new(stance: SecurityStance) -> Self { + let disable_insecures = if std::env::var_os("TECTONIC_UNTRUSTED_MODE").is_some() { + true + } else { + match stance { + SecurityStance::DisableInsecures => true, + SecurityStance::MaybeAllowInsecures => false, + } + }; + + SecuritySettings { disable_insecures } + } + + /// Query whether the shell-escape TeX engine feature is allowed to be used. + pub fn allow_shell_escape(&self) -> bool { + !self.disable_insecures + } +} + +impl Default for SecuritySettings { + fn default() -> Self { + SecuritySettings::new(SecurityStance::default()) + } +} + // The entry points. /// Issue a warning. From ce4f000b9d71ddecc83c0c3086c4522cd4ab0937 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Mon, 14 Jun 2021 18:38:17 -0400 Subject: [PATCH 30/30] tectonic: use the new security API Building off of the new core bridge API, we can provide a bit of a more refined approach to security controls. `tectonic -X compile` and `tectonic -X build` now take an `--untrusted` option that ensures that `-Z shell-escape`, and any future insecure features, cannot be enabled. As with the core bridge work, this changes the ProcessingSessionBuilder to be in untrusted mode by default, and adds a new API to configure in a more-trusted mode if that's what you want to do. --- Cargo.toml | 2 +- docs/src/v2cli/build.md | 13 +++++++ docs/src/v2cli/compile.md | 25 +++++++++++++- src/bin/tectonic/compile.rs | 23 ++++++++++--- src/bin/tectonic/v2cli.rs | 21 ++++++++++-- src/docmodel.rs | 31 ++++++----------- src/driver.rs | 68 +++++++++++++++---------------------- tests/executable.rs | 62 ++++++++++++++++++++++++++++----- 8 files changed, 166 insertions(+), 79 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 102d6ea513..bbf1519630 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -125,7 +125,7 @@ x86_64-unknown-linux-gnu = { install = ["fontconfig","freetype","harfbuzz[icu,gr x86_64-pc-windows-msvc = { triplet = "x64-windows-static", install = ["fontconfig","freetype","harfbuzz[icu,graphite2]"] } [package.metadata.internal_dep_versions] -tectonic_bridge_core = "4e16bf963700aae59772a6fb223981ceaa9b5f57" +tectonic_bridge_core = "thiscommit:2021-06-14:3sp2O1O" tectonic_bridge_flate = "thiscommit:2021-01-01:eer4ahL4" tectonic_bridge_graphite2 = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" tectonic_bridge_harfbuzz = "2c1ffcd702a662c003bd3d7d0ca4d169784cb6ad" diff --git a/docs/src/v2cli/build.md b/docs/src/v2cli/build.md index 74305d7d4c..ab5330a592 100644 --- a/docs/src/v2cli/build.md +++ b/docs/src/v2cli/build.md @@ -17,6 +17,7 @@ tectonic -X build [--only-cached] [--print] [--open] + [--untrusted] ``` #### Remarks @@ -52,3 +53,15 @@ identical to, the contents of the log file. By default, this output is only printed if the engine encounteres a fatal error. The `--open` option will open the built document using the system handler. + +Use the `--untrusted` option if building untrusted content. This is not the +default because in most cases you *will* trust the document that you’re +building, probably because you have created it yourself, and it would be very +annoying to have to pass `--trusted` every time you build a document that uses +shell-escape. See the security discussion in the documentation of the +[compile](./compile.md) command for details. In actual usage, it would obviously +be easy to forget to use this option; in cases where untrusted inputs are a +genuine concern, we recommend setting the environment variable +`TECTONIC_UNTRUSTED_MODE` to a non-empty value. This has the same effect as the +`--untrusted` option. Note, however, that a hostile shell user can trivially +clear this variable. \ No newline at end of file diff --git a/docs/src/v2cli/compile.md b/docs/src/v2cli/compile.md index 9378097e2d..5985e1a738 100644 --- a/docs/src/v2cli/compile.md +++ b/docs/src/v2cli/compile.md @@ -36,6 +36,7 @@ tectonic -X compile # full form [--print] [-p] [--reruns COUNT] [-r COUNT] [--synctex] + [--untrusted] [--web-bundle URL] [-w] [-Z UNSTABLE-OPTION] TEXPATH @@ -63,6 +64,27 @@ This will compile the file and create `myfile.pdf` if nothing went wrong. You can use an input filename of `-` to have Tectonic process standard input. (In this case, the output file will be named `texput.pdf`.) +##### Security + +By default, the document is compiled in a “trusted” mode. This means that the +calling user can request to enable certain engine features that could raise +security concerns if used with untrusted input: the classic example of this +being TeX's “shell-escape” functionality. These features are *not* enabled by +default, but they can be enabled on the command line; in the case of +shell-escape, this is done with `-Z shell-escape`. + +If the command-line argument `--untrusted` is provided, these features cannot be +enabled, regardless of other settings such as `-Z shell-escape`. So if you are +going to process untrusted input in a command-line script, as long as you make +sure that `--untrusted` is provided, the known-dangerous features will be +disabled. + +Furthermore, if the environment variable `TECTONIC_UNTRUSTED_MODE` is set to a +non-empty value, Tectonic will behave as if `--untrusted` were specified, +regardless of the actual command-line arguments. Setting this variable can +provide a modest extra layer of protection if the Tectonic engine is being run +outside of its CLI form. Keep in mind that untrusted shell scripts and the like +can trivially defeat this by explicitly clearing the environment variable. #### Options @@ -87,6 +109,7 @@ The following are the available flags. | `-p` | `--print` | Print the engine's chatter during processing | | `-r` | `--reruns ` | Rerun the TeX engine exactly this many times after the first | | | `--synctex` | Generate SyncTeX data | +| | `--untrusted` | Input is untrusted: disable all known-insecure features | | `-V` | `--version` | Prints version information | | `-w` | `--web-bundle ` | Use this URL find resource files instead of the default | | `-Z` | `-Z ` | Activate experimental “unstable” options | @@ -102,5 +125,5 @@ the set of unstable options is subject to change at any time. | `-Z continue-on-errors` | Keep compiling even when severe errors occur | | `-Z min-crossrefs=` | Equivalent to bibtex's `-min-crossrefs` flag. Default vaue: 2 | | `-Z paper-size=` | Change the initial paper size. Default: `letter` | -| `-Z shell-escape` | Enable `\write18` | +| `-Z shell-escape` | Enable `\write18` (unless `--untrusted` has been specified) | diff --git a/src/bin/tectonic/compile.rs b/src/bin/tectonic/compile.rs index 8830eaef3d..069914f1c3 100644 --- a/src/bin/tectonic/compile.rs +++ b/src/bin/tectonic/compile.rs @@ -1,18 +1,18 @@ -// Copyright 2016-2020 the Tectonic Project +// Copyright 2016-2021 the Tectonic Project // Licensed under the MIT License. //! Standalone compilation of TeX documents. This implements the "classic" / //! "V1" / "rustc-like" Tectonic command-line interface, as well as the //! `compile` subcommand of the "V2" / "cargo-like" interface. -use structopt::StructOpt; - use std::{ env, path::{Path, PathBuf}, str::FromStr, time, }; +use structopt::StructOpt; +use tectonic_bridge_core::{SecuritySettings, SecurityStance}; use tectonic::{ config::PersistentConfig, @@ -87,6 +87,10 @@ pub struct CompileOptions { #[structopt(name = "outdir", short, long, parse(from_os_str))] outdir: Option, + /// Input is untrusted -- disable all known-insecure features + #[structopt(long)] + untrusted: bool, + /// Unstable options. Pass -Zhelp to show a list // TODO we can't pass -Zhelp without also passing #[structopt(name = "option", short = "Z", number_of_values = 1)] @@ -97,7 +101,18 @@ impl CompileOptions { pub fn execute(self, config: PersistentConfig, status: &mut dyn StatusBackend) -> Result { let unstable = UnstableOptions::from_unstable_args(self.unstable.into_iter()); - let mut sess_builder = ProcessingSessionBuilder::default(); + // Default to allowing insecure since it would be super duper annoying + // to have to pass `--trusted` every time to build a personal document + // that uses shell-escape! This default can be overridden by setting the + // environment variable TECTONIC_UNTRUSTED_MODE to a nonempty value. + let stance = if self.untrusted { + SecurityStance::DisableInsecures + } else { + SecurityStance::MaybeAllowInsecures + }; + + let mut sess_builder = + ProcessingSessionBuilder::new_with_security(SecuritySettings::new(stance)); let format_path = self.format; sess_builder .unstables(unstable) diff --git a/src/bin/tectonic/v2cli.rs b/src/bin/tectonic/v2cli.rs index 96f389b7b9..2a52397246 100644 --- a/src/bin/tectonic/v2cli.rs +++ b/src/bin/tectonic/v2cli.rs @@ -15,6 +15,7 @@ use tectonic::{ status::{termcolor::TermcolorStatusBackend, ChatterLevel, StatusBackend}, tt_error, tt_note, }; +use tectonic_bridge_core::{SecuritySettings, SecurityStance}; use tectonic_bundles::Bundle; use tectonic_docmodel::workspace::{Workspace, WorkspaceCreator}; use tectonic_errors::Error as NewError; @@ -178,6 +179,10 @@ impl Commands { /// `build`: Build a document #[derive(Debug, PartialEq, StructOpt)] pub struct BuildCommand { + /// Document is untrusted -- disable all known-insecure features + #[structopt(long)] + untrusted: bool, + /// Use only resource files cached locally #[structopt(short = "C", long)] only_cached: bool, @@ -206,8 +211,18 @@ impl BuildCommand { let ws = Workspace::open_from_environment()?; let doc = ws.first_document(); - // XXX NO WAY TO DISABLE INSECURE FEATURES - let mut setup_options = DocumentSetupOptions::new(false); + // Default to allowing insecure since it would be super duper annoying + // to have to pass `--trusted` every time to build a personal document + // that uses shell-escape! This default can be overridden by setting the + // environment variable TECTONIC_UNTRUSTED_MODE to a nonempty value. + let stance = if self.untrusted { + SecurityStance::DisableInsecures + } else { + SecurityStance::MaybeAllowInsecures + }; + + let mut setup_options = + DocumentSetupOptions::new_with_security(SecuritySettings::new(stance)); setup_options.only_cached(self.only_cached); for output_name in doc.output_names() { @@ -283,7 +298,7 @@ fn get_a_bundle( match Workspace::open_from_environment() { Ok(ws) => { let doc = ws.first_document(); - let mut options = DocumentSetupOptions::new(true); + let mut options: DocumentSetupOptions = Default::default(); options.only_cached(only_cached); doc.bundle(&options, status) } diff --git a/src/docmodel.rs b/src/docmodel.rs index f36dadbc56..0a23b3a214 100644 --- a/src/docmodel.rs +++ b/src/docmodel.rs @@ -12,6 +12,7 @@ use std::{ fs, io, path::{Path, PathBuf}, }; +use tectonic_bridge_core::SecuritySettings; use tectonic_bundles::{ cache::Cache, dir::DirBundle, itar::IndexedTarBackend, zip::ZipBundle, Bundle, }; @@ -31,31 +32,23 @@ use crate::{ }; /// Options for setting up [`Document`] instances with the driver -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Default)] pub struct DocumentSetupOptions { /// Disable requests to the network, if the document’s bundle happens to be /// network-based. only_cached: bool, - /// Disable all known-insecure engine features. - /// - /// This setting should be true if any untrusted input will be handled. - /// However, it is not always activated because sometimes users want the - /// functionality provided by known-insecure features (such as - /// shell-escape). - disable_insecure: bool, + /// Security settings for engine features. + security: SecuritySettings, } impl DocumentSetupOptions { - /// Create a new set of document setup options. - /// - /// This function primarily exists to *force* you to consider whether you - /// ought to disable known-insecure features. As usual, they should be - /// disabled if there is any untrusted input that will be handled. - pub fn new(disable_insecure: bool) -> Self { + /// Create a new set of document setup options with custom security + /// settings. + pub fn new_with_security(security: SecuritySettings) -> Self { DocumentSetupOptions { only_cached: false, - disable_insecure, + security, } } @@ -157,12 +150,8 @@ impl DocumentExt for Document { writeln!(input_buffer, "\\input{{{}}}", profile.postamble_file)?; } - let mut sess_builder = ProcessingSessionBuilder::default(); - - // Do this before anything else!!!! - if setup_options.disable_insecure { - sess_builder.disable_insecure(); - } + let mut sess_builder = + ProcessingSessionBuilder::new_with_security(setup_options.security.clone()); sess_builder .output_format(output_format) diff --git a/src/driver.rs b/src/driver.rs index c7db3d2d91..5da8d127b6 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -26,7 +26,7 @@ use std::{ str::FromStr, time::SystemTime, }; -use tectonic_bridge_core::{CoreBridgeLauncher, DriverHooks, SystemRequestError}; +use tectonic_bridge_core::{CoreBridgeLauncher, DriverHooks, SecuritySettings, SystemRequestError}; use tectonic_bundles::Bundle; use tectonic_io_base::{ digest::DigestData, @@ -638,13 +638,14 @@ impl Default for ShellEscapeMode { /// A builder-style interface for creating a [`ProcessingSession`]. /// -/// This uses standard builder patterns. See especially -/// [`Self::disable_insecure`], which prevents any known-insecure features from -/// being activated in the session. It should always be the first method you -/// call if you are going to process input that is not totally trusted. +/// This uses standard builder patterns. The `Default` implementation defaults +/// to restrictive security settings that disable all known-insecure features +/// that could be abused by untrusted inputs. Use +/// [`ProcessingSessionBuilder::new_with_security()`] in order to have the +/// option to enable potentially-insecure features such as shell-escape. #[derive(Default)] pub struct ProcessingSessionBuilder { - disable_insecures: bool, + security: SecuritySettings, primary_input: PrimaryInputMode, tex_input_name: Option, output_dest: OutputDestination, @@ -667,33 +668,12 @@ pub struct ProcessingSessionBuilder { } impl ProcessingSessionBuilder { - /// Disable any known insecure settings. - /// - /// Some session options, like [`Self::shell_escape_with_temp_dir`], are - /// known to create security risks and should not be used with untrusted - /// input. This function disables any such settings. The intended usage is - /// that you can create a session builder, activate this feature, and then - /// hand the session builder off to other initializers confident in the - /// knowledge that they will be prevented from activating any insecure - /// settings. Therefore this operation is idempotent and irreversible. - /// - /// When you know that you are handling trusted input, on the other hand, - /// some of these known-insecure capabilities provide functionality that - /// users empirically want. This is why this setting isn't permanently - /// enabled. - /// - /// Of course, this approach is only as good as our understanding of - /// Tectonic’s security profile. Future versions might disable or restrict - /// different pieces of functionality as new risks are discovered. - pub fn disable_insecure(&mut self) -> &mut Self { - self.disable_insecures = true; - self - } - - /// A very dumb helper to minimize the chances of boolean logic mistakes. - #[inline(always)] - fn allow_insecures(&self) -> bool { - !self.disable_insecures + /// Create a new builder with customized security settings. + pub fn new_with_security(security: SecuritySettings) -> Self { + ProcessingSessionBuilder { + security, + ..Default::default() + } } /// Sets the path to the primary input file. @@ -861,7 +841,7 @@ impl ProcessingSessionBuilder { /// disable shell-escape unless the [`UnstableOptions`] say otherwise, /// in which case a driver-managed temporary directory will be used. pub fn shell_escape_with_work_dir>(&mut self, path: P) -> &mut Self { - if self.allow_insecures() { + if self.security.allow_shell_escape() { self.shell_escape_mode = ShellEscapeMode::ExternallyManagedDir(path.as_ref().to_owned()); } @@ -873,7 +853,7 @@ impl ProcessingSessionBuilder { /// unless the [`UnstableOptions`] say otherwise, in which case a /// driver-managed temporary directory will be used. pub fn shell_escape_with_temp_dir(&mut self) -> &mut Self { - if self.allow_insecures() { + if self.security.allow_shell_escape() { self.shell_escape_mode = ShellEscapeMode::TempDir; } self @@ -987,7 +967,7 @@ impl ProcessingSessionBuilder { let mut pdf_path = aux_path.clone(); pdf_path.set_extension("pdf"); - let shell_escape_mode = if self.disable_insecures { + let shell_escape_mode = if !self.security.allow_shell_escape() { ShellEscapeMode::Disabled } else { match self.shell_escape_mode { @@ -1004,6 +984,7 @@ impl ProcessingSessionBuilder { }; Ok(ProcessingSession { + security: self.security, bs, pass: self.pass, primary_input_path, @@ -1036,6 +1017,9 @@ enum RerunReason { /// processing a file. It understands, for example, the need to re-run the TeX /// engine if the `.aux` file changed. pub struct ProcessingSession { + // Security settings. + security: SecuritySettings, + /// The subset of the session state that's can be mutated while the C/C++ /// engines are running. Importantly, this includes the full I/O stack. bs: BridgeState, @@ -1528,7 +1512,8 @@ impl ProcessingSession { let result = { self.bs .enter_format_mode(&format!("tectonic-format-{}.tex", stem)); - let mut launcher = CoreBridgeLauncher::new(&mut self.bs, status); + let mut launcher = + CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone()); let r = TexEngine::default() .halt_on_error_mode(true) .initex_mode(true) @@ -1590,7 +1575,8 @@ impl ProcessingSession { status.note_highlighted("Running ", "TeX", " ..."); } - let mut launcher = CoreBridgeLauncher::new(&mut self.bs, status); + let mut launcher = + CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone()); TexEngine::default() .halt_on_error_mode(true) @@ -1623,7 +1609,8 @@ impl ProcessingSession { fn bibtex_pass(&mut self, status: &mut dyn StatusBackend) -> Result { let result = { status.note_highlighted("Running ", "BibTeX", " ..."); - let mut launcher = CoreBridgeLauncher::new(&mut self.bs, status); + let mut launcher = + CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone()); let mut engine = BibtexEngine::new(); engine.process(&mut launcher, &self.tex_aux_path, &self.unstables) }; @@ -1655,7 +1642,8 @@ impl ProcessingSession { { status.note_highlighted("Running ", "xdvipdfmx", " ..."); - let mut launcher = CoreBridgeLauncher::new(&mut self.bs, status); + let mut launcher = + CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone()); let mut engine = XdvipdfmxEngine::default(); engine.build_date(self.build_date); diff --git a/tests/executable.rs b/tests/executable.rs index 062d8e2049..339b17c522 100644 --- a/tests/executable.rs +++ b/tests/executable.rs @@ -567,6 +567,16 @@ fn v2_new_build_multiple_outputs() { success_or_panic(output); } +const SHELL_ESCAPE_TEST_DOC: &str = r#"\immediate\write18{mkdir shellwork} +\immediate\write18{echo 123 >shellwork/persist} +\ifnum123=\input{shellwork/persist} +a +\else +\ohnotheshellescapedidntwork +\fi +\bye +"#; + /// Test that shell escape actually runs the commands #[test] fn shell_escape() { @@ -576,15 +586,49 @@ fn shell_escape() { let output = run_tectonic_with_stdin( tempdir.path(), &[&fmt_arg, "-", "-Zshell-escape"], - r#"\immediate\write18{mkdir shellwork} - \immediate\write18{echo 123 >shellwork/persist} - \ifnum123=\input{shellwork/persist} - a - \else - \ohnotheshellescapedidntwork - \fi - \bye - "#, + SHELL_ESCAPE_TEST_DOC, ); success_or_panic(output); } + +/// Test that shell-escape can be killed by command-line-option +#[test] +fn shell_escape_cli_override() { + let fmt_arg = get_plain_format_arg(); + let tempdir = setup_and_copy_files(&[]); + + let output = run_tectonic_with_stdin( + tempdir.path(), + &[&fmt_arg, "--untrusted", "-", "-Zshell-escape"], + SHELL_ESCAPE_TEST_DOC, + ); + error_or_panic(output); +} + +/// Test that shell-escape can be killed by environment variable +#[test] +fn shell_escape_env_override() { + let fmt_arg = get_plain_format_arg(); + let tempdir = setup_and_copy_files(&[]); + + // Note that we intentionally set the variable to 0 below -- it takes it + // effect if it has ANY value, not just a "truthy" one. + + let mut command = prep_tectonic(tempdir.path(), &[&fmt_arg, "-", "-Zshell-escape"]); + command + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .env("TECTONIC_UNTRUSTED_MODE", "0"); + + println!("running {:?}", command); + let mut child = command.spawn().expect("tectonic failed to start"); + write!(child.stdin.as_mut().unwrap(), "{}", SHELL_ESCAPE_TEST_DOC) + .expect("failed to send data to tectonic subprocess"); + + let output = child + .wait_with_output() + .expect("failed to wait on tectonic subprocess"); + + error_or_panic(output); +}