-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathmake4ht-dvireader.lua
182 lines (163 loc) · 5.23 KB
/
make4ht-dvireader.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
-- This is not actually full DVI reader. It just calculates hash for each page,
-- so it can be detected if it changed between compilations and needs to be
-- converted to image using Dvisvgm or Dvipng
--
-- information about DVI format is from here: https://web.archive.org/web/20070403030353/http://www.math.umd.edu/~asnowden/comp-cont/dvi.html
--
local M
-- the file after post_post is filled with bytes 223
local endfill = 223
-- numbers of bytes for each data type in DVI file
local int = 4
local byte = 1
local sixteen = 2
local function read_char(str, pos)
if pos and pos > string.len(str) then return nil end
return string.sub(str, pos, pos + 1)
end
local function read_byte(str, pos)
return string.byte(read_char(str, pos))
end
-- DVI file format uses signed big endian integers. This code doesn't take into account
-- the sign, so it will return incorrect result for negative numbers. It doesn't matter
-- for the original purpose of this library, but it should be fixed for general use.
local function read_integer(str, pos)
local first = read_byte(str, pos)
local num = first * (256 ^ 3)
num = read_byte(str, pos + 1) * (256 ^ 2) + num
num = read_byte(str, pos + 2) * 256 + num
num = read_byte(str, pos + 3) + num
return num
end
local function read_sixteen(str, pos)
local num = read_byte(str, pos) * 256
num = read_byte(str, pos + 1) + num
return num
end
-- select reader function with number of bytes of an argument
local readers = {
[byte] = read_byte,
[int] = read_integer,
[sixteen] = read_sixteen
}
local opcodes = {
post_post = {
opcode = 249, args = {
{name="q", type = int}, -- postamble address
{name="i", type = byte}
}
},
post = {
opcode = 248,
args = {
{name="p", type = int}, -- address of the last page
{name="num", type = int},
{name="den", type = int},
{name="mag", type = int},
{name="l", type = int},
{name="u", type = int},
{name="s", type = sixteen},
{name="t", type = sixteen},
}
},
bop = {
opcode = 139,
args = {
{name="c0", type=int},
{name="c1", type=int},
{name="c2", type=int},
{name="c3", type=int},
{name="c4", type=int},
{name="c5", type=int},
{name="c6", type=int},
{name="c7", type=int},
{name="c8", type=int},
{name="c9", type=int},
{name="p", type=int}, -- previous page
}
}
}
local function read_arguments(str, pos, args)
local t = {}
for _, v in ipairs(args) do
local fn = readers[v.type]
t[v.name] = fn(str, pos)
-- seek the position. v.type contains size of the current data type in bytes
pos = pos + v.type
end
return t
end
local function read_opcode(opcode, str, pos)
local format = opcodes[opcode]
if not format then return nil, "Cannot find opcode format: " .. opcode end
-- check that opcode byte in the current position is the same as required opcode
local op = read_byte(str, pos)
if op ~= format.opcode then return nil, "Wrong opcode " .. op .. " at position " .. pos end
return read_arguments(str, pos+1, format.args)
end
-- find the postamble address
local function get_postamble_addr(dvicontent)
local pos = string.len(dvicontent)
local last = read_char(dvicontent, pos)
-- skip endfill bytes at the end of file
while string.byte(last) == endfill do
pos = pos - 1
last = read_char(dvicontent, pos)
end
-- first read post_post to get address of the postamble
local post_postamble, msg = read_opcode("post_post", dvicontent, pos-5)
if not post_postamble then return nil, msg end
-- return the postamble address
return post_postamble.q + 1
-- return read_opcode("post", dvicontent, post_postamble.q + 1)
end
local function read_page(str, start, stop)
local function get_end_of_page(str, pos)
if read_byte(str, pos) == 140 then -- end of page
return pos
end
return get_end_of_page(str, pos - 1)
end
-- we reached the end of file
if start == 2^32-1 then return nil end
local current_page = read_opcode("bop", str, start + 1)
if not current_page then return nil end
local endofpage = get_end_of_page(str, stop)
-- get the page contents, but skip all parameters, because they can change
-- (especially pointer to the previous page)
local page = str:sub(start + 46, endofpage)
local page_obj = {
number = current_page.c0, -- the page number
hash = md5.sumhexa(page) -- hash the page contents
}
return page_obj, current_page.p, start
end
local function get_pages(dvicontent)
local pages = {}
local postamble_pos = get_postamble_addr(dvicontent)
local postamble = read_opcode("post", dvicontent, postamble_pos)
local next_page_pos = postamble.p
local page, previous_page = nil, postamble_pos
local page_sequence = {}
while next_page_pos do
page, next_page_pos, previous_page = read_page(dvicontent, next_page_pos, previous_page)
page_sequence[#page_sequence+1] = page
end
-- reorder pages
for _, v in ipairs(page_sequence) do
pages[v.number] = v.hash
end
return pages
end
-- if arg[1] then
-- local f = io.open(arg[1], "r")
-- local dvicontent = f:read("*all")
-- f:close()
-- local pages = get_pages(dvicontent)
-- for k,v in pairs(pages) do
-- print(k,v)
-- end
-- end
return {
get_pages = get_pages
}