-
Notifications
You must be signed in to change notification settings - Fork 0
/
import.rb
134 lines (112 loc) · 3.4 KB
/
import.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
require 'rubygems'
require 'sequel'
require 'fileutils'
require 'yaml'
class WordpressToMiddlman
attr_accessor :database, :db_user, :db_password, :table_prefix, :host, :db
def initialize(database, db_user, db_password, table_prefix = 'wp', host = 'localhost')
@database = database
@db_user = db_user
@db_password = db_password
@table_prefix = table_prefix
@host = host
end
# connect to the database
def connect
@db = Sequel.mysql(@database,
:user => @db_user,
:password => @db_password,
:host => @host,
:encoding => 'utf8')
end
# retrieve each post (not drafts, not revisions)
def retrieve_posts
query = "SELECT * FROM `wp_posts` WHERE `post_type` = 'post' AND post_status = 'publish'"
db[query]
end
def generate_post_files(posts)
posts.each do |post_data|
print '.'
MiddlemanPost.new(post_data).create_post_file
end
print "\n"
end
# generate front matter
## directory_index: true
# generate post body
# produce file structure
# find and download assets included in posts
# category indexes?
# redirections work
# comments?
# categories and tags?
def redirections
result = db["SELECT * FROM `wp_redirection_items`"]
result.each { |r| puts r.inspect }
end
def output_posts!
connect
generate_post_files(retrieve_posts)
end
end
class MiddlemanPost
attr_accessor :data
def initialize(data)
@data = data
end
def create_post_file
File.open(filename, "w") do |f|
f.puts front_matter
f.puts "---"
f.puts content
end
end
## where can we put these to generate the correct end url?
## source/2011-10-18-middleman.html(.markdown)
## /2013/12/25/slug/
def filename
date = data[:post_date]
slug = data[:post_name]
"source/#{date.strftime('%Y-%m-%d')}-#{slug}.html.erb"
end
def front_matter
{
'layout' => 'article',
'title' => data[:post_title].force_encoding("UTF-8").to_s,
'excerpt' => data[:post_excerpt].to_s,
'post_date' => data[:post_date].to_date
# 'wordpress_id' => post[:ID],
# 'wordpress_url' => post[:guid],
# 'categories' => categories,
# 'tags' => post_tags
}.delete_if { |k,v| v.nil? || v == ''}.to_yaml
end
def content
# TODO: escape .erb tags or use a different preprocessor
replace_tags(
escape_entities(
data[:post_content]
)
)
end
def escape_entities(string)
string.gsub('<%=', '<%=')
end
def replace_tags(string)
string
.gsub('<pre name="code" class="ruby">', '<% code("ruby") do %>')
.gsub('<pre class="ruby" name="code">', '<% code("ruby") do %>')
.gsub('<pre class="ruby">', '<% code("ruby") do %>')
.gsub('<pre class="code" name="ruby">', '<% code("ruby") do %>')
.gsub('<pre name=\'code\' class="ruby">', '<% code("ruby") do %>')
.gsub('<pre name="code" lang="vb">', '<% code("shell") do %>')
.gsub('<pre name="code" class="vb:nogutter">', '<% code("shell") do %>')
.gsub('<pre name="code" class="css">', '<% code("shell") do %>') # not really css originally!
.gsub('<pre>', '<% code("shell") do %>') # convert everything else to shell
.gsub('</pre>', '<% end %>')
.gsub(/\n\n/, "\n<br><br>\n")
end
end
puts 'starting'
importer = WordpressToMiddlman.new('db_wordpress', 'root', '')
importer.output_posts!