blob: c76e21caa6d1abbcebc43a10377974fa25f00a27 [file] [log] [blame]
Brad Bishopa34c0302019-09-23 22:34:48 -04001From 0bafbd9c1273fab0dc79fd20db0ffc4443683f96 Mon Sep 17 00:00:00 2001
2From: Ken Sharp <ken.sharp@artifex.com>
3Date: Mon, 29 Apr 2019 11:14:06 +0100
4Subject: [PATCH 1/2] PDF interpreter - Decode ToUnicode entries of the form
5 /Identity-H/V
6
7Bug #701003 "Text searchability broken due to omission of /ToUnicode /Identity-H"
8
9The PDF references from 1.2 too 2.0 all state that the value associated
10with a ToUnicode key in a FontDescriptor must be a stream object. However
11this file (and one case seen previously, bug 687351) have FontDescriptor
12dictionaries where the value associated with a /ToUnicode key is a
13name object, in both cases /Identity-H.
14
15Although this is clearly not legal, Acrobat not only tolerates it, it
16actually uses it for search/copy/paste (see bug 701003 for details).
17Without the key Acrobat is unable to successfully search the output file.
18
19We can't simply preserve the name object as a ToUnicode value; when
20handling ToUnicode we actually decode the CMap and build a
21GlyphNames2Unicode map (an internal representation of the G2U data
22produced by the Microsoft PostScript printer driver). When writing the
23output file we use that information to get a Unicode value for each
24character we write, and build a new ToUnicode CMap using that.
25
26This commit tackles the problem by pre-scanning for a name object and
27then checking to see if its Identity-H or Identity-V (although we have
28not seen an Identity-V, there seems no reason why it wouldn't be
29equally valid). If we find either of these then we construct a
30GlyphNames2Unicode table for all possible values (0 - 65535) and store
31that with the font as normal. When we write the output file we only
32write the required entries for the subset font, so we write a now
33completely legal ToUnicode CMap, and Acrobat is equally happy with that
34as the original name.
35
36If the ToUnicode value isn't a name object, or isn't one of the
37identities then we proceed as before. This means we will print a
38warning for non conforming ToUnicode entries and ignore them.
39
40CVE: CVE-2019-14817
41Upstream-Status: Backport [git://git.ghostscript.com/ghostpdl.git]
42
43Signed-off-by: Stefan Ghinea <stefan.ghinea@windriver.com>
44---
45 Resource/Init/pdf_font.ps | 200 ++++++++++++++++++++++++--------------
46 1 file changed, 129 insertions(+), 71 deletions(-)
47
48diff --git a/Resource/Init/pdf_font.ps b/Resource/Init/pdf_font.ps
49index 9fb85f6..2df3303 100644
50--- a/Resource/Init/pdf_font.ps
51+++ b/Resource/Init/pdf_font.ps
52@@ -621,86 +621,144 @@ currentdict end readonly def
53 PDFDEBUG {
54 (.processToUnicode beg) =
55 } if
56- 2 index /ToUnicode knownoget {
57- dup type /dicttype eq { dup /File known not } { //true } ifelse {
58- % We undefine wrong /Length and define /File in stream dictionaries.
59- % Bug687351.pdf defines /ToUnicode /Identity-H, what is incorrect.
60- ( **** Warning: Ignoring bad ToUnicode CMap.\n) pdfformatwarning
61- pop
62+
63+ 2 index /ToUnicode knownoget
64+ {
65+ dup type /nametype eq {
66+ % This is contrary to the specification but it seems that Acrobat at least will accept
67+ % a ToUnicode with a value of Identity-H *and* will use that for search, copy/paste.
68+ % We can't pass through a name, so the best we can do is build a GlyphNames2Unicode
69+ % map matching that which would have been generated by a full 16-bit Identity CMap
70+ %
71+ % See bug numbers 701003 and 687351
72+ %
73+ dup /Identity-H eq 1 index /Identity-V eq or{
74+ pop
75+ 1 index /FontInfo .knownget not {
76+ currentglobal 2 index dup gcheck setglobal
77+ /FontInfo 5 dict dup 5 1 roll .forceput
78+ setglobal
79+ } if
80+ dup /GlyphNames2Unicode .knownget not {
81+ //true % No existing G2U, make one
82+ } {
83+ dup wcheck {
84+ //false % Existing, writeable G2U, don't make new one
85+ } {
86+ pop //true % Existing read only G2U, make new one
87+ } ifelse
88+ } ifelse
89+ {
90+ currentglobal exch dup gcheck setglobal
91+ dup /GlyphNames2Unicode 100 dict dup 4 1 roll .forceput
92+ 3 2 roll setglobal
93+ } if % font-res font-dict encoding|null font-info g2u
94+
95+ 0 1 65535{
96+ % g2u index
97+ dup dup 256 mod exch 256 idiv % g2u index lo-byte hi-byte
98+ 2 string dup 0 4 -1 roll % g2u index lo-byte () () 0 hi-byte
99+ put % g2u index lo-byte (x)
100+ dup 1 % g2u index lo-byte (x) (x) 1
101+ 4 -1 roll put % g2u index (x) (x) 1 lo-byte -> dict index (xx)
102+ 2 index % g2u index (xx) dict
103+ 3 1 roll % g2u g2u index (xx)
104+ put % g2u
105+ } for
106+ pop % font-res font-dict encoding|null font-info
107+ pop % font-res font-dict encoding|null
108+ //false % We built a GlyphNames2Unicode table, don't need to process further
109+ }{
110+ //true % name is not Identity-V or H, fail by falling through
111+ }ifelse
112 } {
113- /PDFScanRules .getuserparam dup //null eq {
114- pop //PDFScanRules_null
115- } {
116- 1 dict dup /PDFScanRules 4 -1 roll put
117- } ifelse
118- //PDFScanRules_true setuserparams
119- PDFfile fileposition
120- 3 -1 roll
121- count 1 sub
122- countdictstack
123- { //false resolvestream
124- % Following Acrobat we ignore everything outside
125- % begincodespacerange .. endcmap.
126- dup 0 (begincodespacerange) /SubFileDecode filter flushfile
127- /CIDInit /ProcSet findresource begin
128- //ToUnicodeCMapReader begin
129- 12 dict begin
130- /CMapType 2 def
131- mark exch % emulate 'begincodespacerange'
132- 0 (endcmap) /SubFileDecode filter cvx /begincmap cvx exch 2 .execn
133- endcmap
134- userdict /.lastToUnicode currentdict put
135- end end end
136- }
137+ //true
138+ } ifelse % not a name, try as a dictionary (as specified)
139
140- PDFSTOPONERROR {
141- { exec } 0 get
142- //false
143- 5 -2 roll
144- 5
145+ % If the ToUnicode isn't a name, or the name isn't Identity-V or -H then follow the specification
146+ % If its not a dictionary type throw an error, otherwise decode it and build a GlyphNames2Unicode
147+ %
148+ {
149+ dup type /dicttype eq { dup /File known not } { //true } ifelse {
150+ % We undefine wrong /Length and define /File in stream dictionaries.
151+ % Bug687351.pdf defines /ToUnicode /Identity-H, what is incorrect.
152+ ( **** Warning: Ignoring bad ToUnicode CMap.\n) pdfformatwarning
153+ pop
154 } {
155- { stopped } 0 get
156- 4 2 roll
157- 4
158- } ifelse
159- array astore cvx exec
160+ /PDFScanRules .getuserparam dup //null eq {
161+ pop //PDFScanRules_null
162+ } {
163+ 1 dict dup /PDFScanRules 4 -1 roll put
164+ } ifelse
165+ //PDFScanRules_true setuserparams
166+ PDFfile fileposition
167+ 3 -1 roll
168+ count 1 sub
169+ countdictstack
170+ { //false resolvestream
171+ % Following Acrobat we ignore everything outside
172+ % begincodespacerange .. endcmap.
173+ dup 0 (begincodespacerange) /SubFileDecode filter flushfile
174+ /CIDInit /ProcSet findresource begin
175+ //ToUnicodeCMapReader begin
176+ 12 dict begin
177+ /CMapType 2 def
178+ mark exch % emulate 'begincodespacerange'
179+ 0 (endcmap) /SubFileDecode filter cvx /begincmap cvx exch 2 .execn
180+ endcmap
181+ userdict /.lastToUnicode currentdict put
182+ end end end
183+ }
184
185- countdictstack exch sub 0 .max { end } repeat
186- count exch sub 2 sub 0 .max { exch pop } repeat
187- 3 1 roll % Stach the stop flag.
188- PDFfile exch setfileposition
189- setuserparams
190- {
191- ( **** Warning: Failed to read ToUnicode CMap.\n) pdfformatwarning
192- } {
193- 1 index /FontInfo .knownget not {
194- currentglobal 2 index dup gcheck setglobal
195- /FontInfo 5 dict dup 5 1 roll .forceput
196- setglobal
197- } if
198- dup /GlyphNames2Unicode .knownget not {
199- //true % No existing G2U, make one
200+ PDFSTOPONERROR {
201+ { exec } 0 get
202+ //false
203+ 5 -2 roll
204+ 5
205+ } {
206+ { stopped } 0 get
207+ 4 2 roll
208+ 4
209+ } ifelse
210+ array astore cvx exec
211+
212+ countdictstack exch sub 0 .max { end } repeat
213+ count exch sub 2 sub 0 .max { exch pop } repeat
214+ 3 1 roll % Stach the stop flag.
215+ PDFfile exch setfileposition
216+ setuserparams
217+ {
218+ ( **** Warning: Failed to read ToUnicode CMap.\n) pdfformatwarning
219 } {
220- dup wcheck {
221- //false % Existing, writeable G2U, don't make new one
222+ 1 index /FontInfo .knownget not {
223+ currentglobal 2 index dup gcheck setglobal
224+ /FontInfo 5 dict dup 5 1 roll .forceput
225+ setglobal
226+ } if
227+ dup /GlyphNames2Unicode .knownget not {
228+ //true % No existing G2U, make one
229 } {
230- pop //true % Existing read only G2U, make new one
231+ dup wcheck {
232+ //false % Existing, writeable G2U, don't make new one
233+ } {
234+ pop //true % Existing read only G2U, make new one
235+ } ifelse
236 } ifelse
237+ {
238+ currentglobal exch dup gcheck setglobal
239+ dup /GlyphNames2Unicode 100 dict dup 4 1 roll .forceput
240+ 3 2 roll setglobal
241+ } if % font-res font-dict encoding|null font-info g2u
242+ exch pop exch % font-res font-dict g2u encoding|null
243+ userdict /.lastToUnicode get % font-res font-dict g2u Encoding|null CMap
244+ .convert_ToUnicode-into-g2u % font-res font-dict
245+ //null % font-res font-dict //null
246 } ifelse
247- {
248- currentglobal exch dup gcheck setglobal
249- dup /GlyphNames2Unicode 100 dict dup 4 1 roll .forceput
250- 3 2 roll setglobal
251- } if % font-res font-dict encoding|null font-info g2u
252- exch pop exch % font-res font-dict g2u encoding|null
253- userdict /.lastToUnicode get % font-res font-dict g2u Encoding|null CMap
254- .convert_ToUnicode-into-g2u % font-res font-dict
255- //null % font-res font-dict //null
256 } ifelse
257- } ifelse
258- } if
259- PDFDEBUG {
260- (.processToUnicode end) =
261+ } if
262+ PDFDEBUG {
263+ (.processToUnicode end) =
264+ } if
265 } if
266 } if
267 } stopped
268--
2692.20.1
270