From 5a6e1420bcf6bbd8425a97a516c3607edef57a3c Mon Sep 17 00:00:00 2001 From: anderejd Date: Mon, 25 May 2020 12:16:37 +0200 Subject: [PATCH] Added an optional word_delimiter. --- src/lib.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index f0887e4..364eb6d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1899,6 +1899,7 @@ pub struct PlainTextOutput { last_y: f64, first_char: bool, flip_ctm: Transform, + word_delimiter: Option, } impl PlainTextOutput { @@ -1909,8 +1910,16 @@ impl PlainTextOutput { first_char: false, last_y: 0., flip_ctm: Transform2D::identity(), + word_delimiter: None, } } + + /// Can be useful when trying to extract table like pdf text, with a bit a + /// luck some of the output can be interpreted as CSV. + pub fn word_delimiter(&mut self, delim: Option) -> &mut Self { + self.word_delimiter = delim; + self + } } /* There are some structural hints that PDFs can use to signal word and line endings: @@ -1957,7 +1966,15 @@ impl OutputDev for PlainTextOutput { self.first_char = true; Ok(()) } - fn end_word(&mut self) -> Result<(), OutputError> {Ok(())} + + fn end_word(&mut self) -> Result<(), OutputError> { + use std::fmt::Write; + if let Some(s) = &self.word_delimiter { + write!(self.writer, "{}", s)?; + } + Ok(()) + } + fn end_line(&mut self) -> Result<(), OutputError>{ //write!(self.file, "\n"); Ok(())