Skip to content

Commit

Permalink
Merge pull request #308 from mikehaertl/307-is-file-check
Browse files Browse the repository at this point in the history
Issue #307 Refactor check for temp file creation
  • Loading branch information
mikehaertl authored Mar 12, 2019
2 parents 35cb070 + 96c78ad commit dcd1236
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 28 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ composer require mikehaertl/phpwkhtmltopdf
Make sure, that you include the composer [autoloader](https://getcomposer.org/doc/01-basic-usage.md#autoloading)
somewhere in your codebase.

## Examples

### Single page PDF

```php
Expand Down Expand Up @@ -186,6 +188,23 @@ $pdf = new Pdf(array(
));
```

### Passing strings

Some options like `header-html` usually expect a URL or a filename. With our
library you can also pass a string. The class will try to detect if the
argument is a URL, a filename or some HTML or XML content. To make detection
easier you can surround your content in `<html>` tag.

If this doesn't work correctly you can also pass an instance of our `File`
helper as a last resort:

```php
use mikehaertl\tmp\File;
$options = [
'header-html' => new File('Complex content', '.html'),
];
```

## Error handling

`send()`, `saveAs()` and `toString()` will return `false` on error. In this case the detailed error message is
Expand Down
85 changes: 57 additions & 28 deletions src/Pdf.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,19 @@ class Pdf
// Regular expression to detect XML strings
const REGEX_XML = '/<\??xml/i';

// Regular expression to detect URL strings
const REGEX_URL = '/^(https?:)?\/\//i';

// Regular expression to detect options that expect an URL or a file name,
// so we need to create a tmp file for the content.
const REGEX_OPTS_TMPFILE = '/^((header|footer)-html|(xsl|user)-style-sheet)$/i';

// prefix for tmp files
// Prefix for tmp files
const TMP_PREFIX = 'tmp_wkhtmlto_pdf_';

// Maximum length of a file path if PHP_MAXPATHLEN is not defined
const MAX_PATHLEN = 255;

/**
* @var string the name of the `wkhtmltopdf` binary. Default is
* `wkhtmltopdf`. You can also configure a full path here.
Expand Down Expand Up @@ -122,8 +128,8 @@ public function __construct($options = null)
*/
public function addPage($input, $options = array(), $type = null)
{
$options['inputArg'] = $this->processInput($input, $type);
$this->_objects[] = $this->processOptions($options);
$options['inputArg'] = $this->ensureUrlOrFile($input, $type);
$this->_objects[] = $this->ensureUrlOrFileOptions($options);
return $this;
}

Expand All @@ -139,9 +145,9 @@ public function addPage($input, $options = array(), $type = null)
*/
public function addCover($input, $options = array(), $type = null)
{
$options['input'] = ($this->version9 ? '--' : '').'cover';
$options['inputArg'] = $this->processInput($input, $type);
$this->_objects[] = $this->processOptions($options);
$options['input'] = ($this->version9 ? '--' : '') . 'cover';
$options['inputArg'] = $this->ensureUrlOrFile($input, $type);
$this->_objects[] = $this->ensureUrlOrFileOptions($options);
return $this;
}

Expand All @@ -153,8 +159,8 @@ public function addCover($input, $options = array(), $type = null)
*/
public function addToc($options = array())
{
$options['input'] = ($this->version9 ? '--' : '')."toc";
$this->_objects[] = $this->processOptions($options);
$options['input'] = ($this->version9 ? '--' : '') . 'toc';
$this->_objects[] = $this->ensureUrlOrFileOptions($options);
return $this;
}

Expand Down Expand Up @@ -215,16 +221,16 @@ public function toString()
*/
public function setOptions($options = array())
{
// #264 tmpDir must be set before calling processOptions
// #264 tmpDir must be set before calling ensureUrlOrFileOptions
if (isset($options['tmpDir'])) {
$this->tmpDir = $options['tmpDir'];
unset($options['tmpDir']);
}
$options = $this->processOptions($options);
$options = $this->ensureUrlOrFileOptions($options);
foreach ($options as $key => $val) {
if (is_int($key)) {
$this->_options[] = $val;
} elseif ($key[0]!=='_' && property_exists($this, $key)) {
} elseif ($key[0] !== '_' && property_exists($this, $key)) {
$this->$key = $val;
} else {
$this->_options[$key] = $val;
Expand Down Expand Up @@ -287,7 +293,7 @@ protected function createPdf()
$command->addArg($fileName, null, true); // Always escape filename
if (!$command->execute()) {
$this->_error = $command->getError();
if (!(file_exists($fileName) && filesize($fileName)!==0 && $this->ignoreWarnings)) {
if (!(file_exists($fileName) && filesize($fileName) !== 0 && $this->ignoreWarnings)) {
return false;
}
}
Expand All @@ -296,35 +302,58 @@ protected function createPdf()
}

/**
* @param string $input
* @param string|null $type a type hint if the input is a string of known type. This can either be
* `TYPE_HTML` or `TYPE_XML`. If `null` (default) the type is auto detected from the string content.
* @return \mikehaertl\tmp\File|string a File object if the input is a HTML or XML string. The unchanged input otherwhise.
* This method creates a temporary file if the passed argument is neither a
* File instance or URL nor contains XML or HTML and is also not a valid
* file name.
*
* @param string|File $input the input argument File to check
* @param string|null $type a type hint if the input is a string of known
* type. This can either be `TYPE_HTML` or `TYPE_XML`. If `null` (default)
* the type is auto detected from the string content.
* @return \mikehaertl\tmp\File|string a File object if the input is a HTML
* or XML string. The unchanged input otherwhise.
*/
protected function processInput($input, $type = null)
protected function ensureUrlOrFile($input, $type = null)
{
if ($type === self::TYPE_HTML || $type === null && preg_match(self::REGEX_HTML, $input)) {
return $this->_tmpFiles[] = new File($input, '.html', self::TMP_PREFIX, $this->tmpDir);
} elseif ($type === self::TYPE_XML || preg_match(self::REGEX_XML, $input)) {
return $this->_tmpFiles[] = new File($input, '.xml', self::TMP_PREFIX, $this->tmpDir);
} else {
if ($input instanceof File) {
$this->_tmpFiles[] = $input;
return $input;
} elseif (preg_match(self::REGEX_URL, $input)) {
return $input;
} elseif ($type === self::TYPE_XML || $type === null && preg_match(self::REGEX_XML, $input)) {
$ext = '.xml';
} else {
// First check for obvious HTML content to avoid is_file() as much
// as possible as it can trigger open_basedir restriction warnings
// with long strings.
$isHtml = $type === self::TYPE_HTML || preg_match(self::REGEX_HTML, $input);
if (!$isHtml) {
$maxPathLen = defined('PHP_MAXPATHLEN') ?
constant('PHP_MAXPATHLEN') : self::MAX_PATHLEN;
if (strlen($input) <= $maxPathLen && is_file($input)) {
return $input;
}
}
$ext = '.html';
}
$file = new File($input, $ext, self::TMP_PREFIX, $this->tmpDir);
$this->_tmpFiles[] = $file;
return $file;
}

/**
* @param array $options list of options as name/value pairs
* @return array options with raw content converted to tmp files where neccessary
* @return array options with raw HTML/XML/String content converted to tmp
* files where neccessary
*/
protected function processOptions($options = array())
protected function ensureUrlOrFileOptions($options = array())
{
foreach ($options as $key => $val) {
// Some options expect a URL or a file name, so check if we need a temp file
if (is_string($val) && preg_match(self::REGEX_OPTS_TMPFILE, $key) ) {
defined('PHP_MAXPATHLEN') || define('PHP_MAXPATHLEN', 255);
$isFile = (strlen($val) <= PHP_MAXPATHLEN) ? is_file($val) : false;
if (!($isFile || preg_match('/^(https?:)?\/\//i',$val) || $val === strip_tags($val))) {
$options[$key] = new File($val, '.html', self::TMP_PREFIX, $this->tmpDir);
$file = $this->ensureUrlOrFile($val);
if ($file instanceof File) {
$options[$key] = $file;
}
}
}
Expand Down
32 changes: 32 additions & 0 deletions tests/PdfTest.php
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<?php
use mikehaertl\wkhtmlto\Pdf;
use mikehaertl\tmp\File;

class PdfTest extends \PHPUnit\Framework\TestCase
{
Expand Down Expand Up @@ -129,6 +130,18 @@ public function testCanAddPageFromHtmlString()
$this->assertRegexp('/tmp_wkhtmlto_pdf_.*?\.html/', $pdf->getCommand()->getExecCommand());
unlink($outFile);
}
public function testCanAddPageFromFileInstance()
{
$outFile = $this->getOutFile();
$binary = $this->getBinary();

$pdf = new Pdf;
$pdf->binary = $binary;
$pdf->addPage(new File('Some content', '.html'));
$pdf->saveAs($outFile);
$this->assertRegexp('/php_tmpfile_.*?\.html/', $pdf->getCommand()->getExecCommand());
unlink($outFile);
}
public function testCanAddPageFromXmlString()
{
$outFile = $this->getOutFile();
Expand Down Expand Up @@ -353,6 +366,25 @@ public function testCanAddHeaderAndFooterAsHtml()
$this->assertRegExp("#$binary --header-html '/tmp/[^ ]+' --footer-html '/tmp/[^ ]+' '$inFile' '$tmpFile'#", (string) $pdf->getCommand());
unlink($outFile);
}
public function testCanAddHeaderAndFooterAsFile()
{
$inFile = $this->getHtmlAsset();
$outFile = $this->getOutFile();
$binary = $this->getBinary();

$pdf = new Pdf(array(
'binary' => $binary,
'header-html' => new File('Some header content', '.html'),
'footer-html' => new File('Some footer content', '.html'),
));
$this->assertInstanceOf('mikehaertl\wkhtmlto\Pdf', $pdf->addPage($inFile));
$this->assertTrue($pdf->saveAs($outFile));
$this->assertFileExists($outFile);

$tmpFile = $pdf->getPdfFilename();
$this->assertRegExp("#$binary --header-html '/tmp/[^ ]+' --footer-html '/tmp/[^ ]+' '$inFile' '$tmpFile'#", (string) $pdf->getCommand());
unlink($outFile);
}
public function testCanAddHeaderAndFooterAsHtmlToPagesAndCoverAndToc()
{
$inFile = $this->getHtmlAsset();
Expand Down

0 comments on commit dcd1236

Please sign in to comment.