Skip to content

Commit

Permalink
disabled depreciated warnings for now since thers some each() calls …
Browse files Browse the repository at this point in the history
…in here. updated the mkdir calls to pass the recursive flags ensuring parents are created. switched markdown_github to gfm to match recent versions of pandoc. added alot of character filtering to the parsed url to fix various issues. added parsing of [[Category:cat]] links converting them into tags. added setting tag frontmatter. added an extra mkdir command in case the url is in the format of a ptah itself to ensure parnts are crteated
  • Loading branch information
detain committed Dec 6, 2019
1 parent 385ecda commit 938f5d6
Showing 1 changed file with 30 additions and 11 deletions.
41 changes: 30 additions & 11 deletions convert.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@

<?php

error_reporting(E_ALL & ~E_DEPRECATED);
$arguments = arguments($argv);

require 'vendor/autoload.php';
Expand All @@ -19,7 +18,7 @@

if(!file_exists($output_path)) {
echo "Creating output directory $output_path" . PHP_EOL . PHP_EOL;
mkdir($output_path);
mkdir($output_path,0777,true);
}

} else {
Expand All @@ -29,11 +28,11 @@
if(!empty($arguments['format'])) {
$format = $arguments['format'];
} else {
$format = 'markdown_github';
$format = 'gfm';
}


if(!empty($arguments['fm']) OR (empty($arguments['fm']) && $format == 'markdown_github')) {
if(!empty($arguments['fm']) OR (empty($arguments['fm']) && $format == 'gfm')) {
$add_meta = true;
} else {
$add_meta = false;
Expand All @@ -59,8 +58,11 @@

$title = $node->xpath('title');
$title = $title[0];
$url = str_replace(' ', '_', $title);

$url = str_replace(['/', ' ', '"', '\'', ':', '`', '\`', '!', '(', ')', '@', ',', ';', '$', '\\', '.', '+', '&', '='], ['-', '-', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''], $title);
if (substr($url, 0, 1) == '-') {
$url = substr($url , 1);
}
$url = strtolower($url);
if($slash = strpos($url, '/')){
$title = str_replace('/', ' ', $title);
$directory = substr($url, 0, $slash);
Expand All @@ -70,17 +72,29 @@
$directory = '';
$filename = $url;
}
if (preg_match('/^Category:/', $title) || preg_match('/^Category talk:/', $title)) {
echo "Skipping Page $title\n";
continue;
}

$text = $node->xpath('revision/text');
$text = $text[0];
$text = html_entity_decode($text); // decode inline html
$text = preg_replace_callback('/\[\[(.+?)\]\]/', "new_link", $text); // adds leading slash to links, "absolute-path reference"
$tags = ['intwiki'];
if (preg_match_all('/\[\[Category:([^\]]+)\]\]/', $text, $matches)) {
foreach ($matches[1] as $match) {
$tags[] = $match;
$text = str_replace('[[Category:'.$match.']]', '', $text);
}
}

$text = preg_replace_callback('/\[\[(.+?)\]\]/', "new_link", $text); // adds leading slash to links, "absolute-path reference"
// prepare to append page title frontmatter to text
if ($add_meta) {
$frontmatter = "---\n";
$frontmatter .= "title: $title\n";
$frontmatter .= "permalink: /$url/\n";
$frontmatter .= "tags: ".implode(', ', $tags)."\n";
$frontmatter .= "---\n\n";
}

Expand All @@ -89,7 +103,12 @@
"from" => "mediawiki",
"to" => $format
);
$text = $pandoc->runWith($text, $options);
try {
$text = $pandoc->runWith($text, $options);
} catch (\Pandoc\PandocException $e) {
echo "Failed Converting in Pandoc With Error: ".$e->getMessage().PHP_EOL;
continue;
}

$text = str_replace('\_', '_', $text);

Expand All @@ -104,13 +123,14 @@
// create directory if necessary
if(!empty($directory)) {
if(!file_exists($directory)) {
mkdir($directory);
mkdir($directory,0777,true);
}

$directory = $directory . '/';
}

// create file
@mkdir(dirname(normalizePath($directory . $filename . '.md')), 0777, true);
$file = fopen(normalizePath($directory . $filename . '.md'), 'w');
fwrite($file, $text);
fclose($file);
Expand Down Expand Up @@ -202,4 +222,3 @@ function normalizePath($path)
}


?>

0 comments on commit 938f5d6

Please sign in to comment.