Skip to content

Commit

Permalink
fixed /Encoding in the output of ConvertType1FontsToType1C; this fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter Szabo committed Oct 4, 2017
1 parent cd7b8d6 commit cbb88d4
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 6 deletions.
90 changes: 84 additions & 6 deletions lib/pdfsizeopt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5370,8 +5370,34 @@ def GetFonts(self, font_type=None,
@classmethod
def GenerateType1CFontsFromType1(cls, objs, ref_objs, ps_tmp_file_name,
pdf_tmp_file_name):
"""Converts objs with Type1 font programs to Type1C font programs.
Uses Ghostscript to do the conversion.
Removes the temporary files it creates.
As a side effect, the /Encoding field in the returned Type1C font
programs is useless (it's not the same as in the input Type1 font
programs). This is a limitation of Ghostscript. As a workaround, the
correct /Encoding values are returned in `encodings'.
Fonts with more than 256 glyphs in their /CharStrings are not converted.
This is a limitation of Ghostscript.
Args:
objs: dict mapping obj numbers to PdfObj objects containing a Type1
font program.
ref_objs: dict containing objs to be used when following references.
ps_tmp_file_name: Temporary .ps filename to create. Will get removed
unless an exception is raised.
pdf_tmp_file_name: Temporary .pdf filename to create. Willget removed
unless an exception is raised.
Returns:
(type1c_objs, encodings), where keys in both type1c_objs and encodings
are the keys in objs (obj numbers).
"""
if not objs:
return {}
return {}, {}
output = ['%!PS-Adobe-3.0\n',
'% Ghostscript helper for converting Type1 fonts to Type1C\n',
'%% autogenerated by %s at %s\n' % ('pdfsizeopt', time.time())]
Expand Down Expand Up @@ -5425,16 +5451,42 @@ def GenerateType1CFontsFromType1(cls, objs, ref_objs, ps_tmp_file_name,
'info: executing Type1CConverter with Ghostscript: %s' % gs_cmd)
sys.stdout.flush()
p = os.popen(gs_cmd, 'rb')
encoding_prefix = 'obj encoding '
skip_prefix = 'skipping big-CharStrings font obj '
big_charstrings_obj_nums = set()
encodings = {}
try:
for line in iter(p.readline, ''):
if line.startswith(skip_prefix):
obj_num = int(line[len(skip_prefix):])
big_charstrings_obj_nums.add(obj_num)
elif line.startswith(encoding_prefix):
obj_num, data = line[len(encoding_prefix):].split(' ', 1)
obj_num = int(obj_num)
data = data.strip().replace('#', '#23')
## This escapes eg. * to #2A.
data = PdfObj.PDF_HEXTOKENS_SAFE_HEX_ESCAPE_RE.sub(
lambda match: '#%02X' % ord(match.group()), data)
encoding = PdfObj.ParseArray(data)
for i in xrange(len(encoding)):
char_name = encoding[i]
if char_name is None:
encoding[i] = '/.notdef'
else:
char_name = str(char_name)
assert char_name.startswith('/'), [char_name]
encoding[i] = str(char_name)
encoding.extend('/.notdef' for i in xrange(len(encoding), 256))
if len(encoding) > 256:
raise ValueError('Encoding for obj %d too long.' % obj_num)
encodings[obj_num] = encoding
else:
sys.stdout.write(line)
finally:
try:
p.read()
except IOError:
pass
status = p.close()
sys.stdout.flush()
if status:
Expand Down Expand Up @@ -5468,16 +5520,18 @@ def GenerateType1CFontsFromType1(cls, objs, ref_objs, ps_tmp_file_name,
for obj_num in type1c_objs:
# TODO(pts): Also cross-check /FontFile3 with pdf.GetFonts.
if type1c_objs[obj_num].Get('Subtype') != '/Type1C':
raise ValueError('Could not convert font %s to Type1C.' % obj_num)
raise ValueError('Could not convert font obj %d to Type1C.' % obj_num)
type1c_size += type1c_objs[obj_num].size
if obj_num not in encodings:
raise ValueError('Missing encoding for font obj %d.' % obj_num)
# TODO(pts): Don't remove if command-line flag.
os.remove(pdf_tmp_file_name)
# TODO(pts): Undo if no reduction in size.
print >>sys.stderr, (
'info: optimized total Type1 font size %s to Type1C font size %s '
'(%s)' %
(type1_size, type1c_size, FormatPercent(type1c_size, type1_size)))
return type1c_objs
return type1c_objs, encodings


@classmethod
Expand Down Expand Up @@ -5621,11 +5675,11 @@ def MoveToPrivate(parsed_font, key):
def ConvertType1FontsToType1C(self):
"""Convert all Type1 fonts to Type1C in self, returns self."""
# GenerateType1CFontsFromType1 removes the tmp files it creates.
type1c_objs = self.GenerateType1CFontsFromType1(
type1c_objs, encodings = self.GenerateType1CFontsFromType1(
self.GetFonts('Type1'), self.objs,
TMP_PREFIX + 'conv.tmp.ps', TMP_PREFIX + 'conv.tmp.pdf')
for obj_num in type1c_objs:
obj = self.objs[obj_num]
obj = self.objs[obj_num] # obj.get('Type') == 'FontDescriptor'.
assert str(obj.Get('FontName')).startswith('/')
type1c_obj = type1c_objs[obj_num]
type1c_obj.FixFontNameInType1C(objs=self.objs)
Expand All @@ -5646,10 +5700,33 @@ def ConvertType1FontsToType1C(self):
FormatPercent(new_size, old_size)))
else:
# TODO(pts): How to optimize/unify these?
# TODO(pts): Don't keep, prevents further optimizations.
print >>sys.stderr, (
'info: keeping original Type1 font XObject %s,%s, '
'replacement too large: old size=%s, new size=%s' %
(obj_num, font_file_obj_num, old_size, new_size))
encodings.pop(obj_num, None)

# Update encodings.
if encodings:
for obj_num in sorted(self.objs):
obj = self.objs[obj_num]
head = obj.head
if (head.startswith('<<') and
'/Font' in head and '/Type' in head and
'/Type1' in head and '/Subtype' in head and
'/FontDescriptor' in head and
obj.Get('Type') == '/Font' and
obj.Get('Subtype') == '/Type1'):
match = obj.PDF_REF_AT_EOS_RE.match(str(obj.Get('FontDescriptor')))
if match:
fd_obj_num = int(match.group(1)) # /Type/FontDescriptor.
if (fd_obj_num in encodings and
self.IsFontBuiltInEncodingUsed(
obj.ResolveReferences(obj.Get('Encoding'),
objs=self.objs)[0])):
obj.Set('Encoding', self.FormatEncoding(encodings[fd_obj_num]))

return self

@classmethod
Expand Down Expand Up @@ -6070,7 +6147,8 @@ def _ProcessType1CFonts(self, type1c_objs, do_unify_fonts,
for obj_num in sorted(self.objs):
obj = self.objs[obj_num]
head = obj.head
if ('/Font' in head and '/Type' in head and
if (head.startswith('<<') and
'/Font' in head and '/Type' in head and
'/Type1' in head and '/Subtype' in head and
'/FontDescriptor' in head and
obj.Get('Type') == '/Font' and
Expand Down
3 changes: 3 additions & 0 deletions lib/pdfsizeopt/psproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,9 @@
% As a workaround for `S1' above, we skip a font with too many
% /CharStrings.
dup /CharStrings get length 256 lt {
(obj encoding ) print _ObjNumber ===only ( ) print
dup /Encoding .knownget not {[]} if ===
% Create /Encoding from sorted keys of /CharStrings.
[1 index /CharStrings get {pop} forall] NameSort
% Pad it to size 256.
Expand Down

0 comments on commit cbb88d4

Please sign in to comment.