Brad Bishop | a34c030 | 2019-09-23 22:34:48 -0400 | [diff] [blame^] | 1 | From 0bafbd9c1273fab0dc79fd20db0ffc4443683f96 Mon Sep 17 00:00:00 2001 |
| 2 | From: Ken Sharp <ken.sharp@artifex.com> |
| 3 | Date: Mon, 29 Apr 2019 11:14:06 +0100 |
| 4 | Subject: [PATCH 1/2] PDF interpreter - Decode ToUnicode entries of the form |
| 5 | /Identity-H/V |
| 6 | |
| 7 | Bug #701003 "Text searchability broken due to omission of /ToUnicode /Identity-H" |
| 8 | |
| 9 | The PDF references from 1.2 too 2.0 all state that the value associated |
| 10 | with a ToUnicode key in a FontDescriptor must be a stream object. However |
| 11 | this file (and one case seen previously, bug 687351) have FontDescriptor |
| 12 | dictionaries where the value associated with a /ToUnicode key is a |
| 13 | name object, in both cases /Identity-H. |
| 14 | |
| 15 | Although this is clearly not legal, Acrobat not only tolerates it, it |
| 16 | actually uses it for search/copy/paste (see bug 701003 for details). |
| 17 | Without the key Acrobat is unable to successfully search the output file. |
| 18 | |
| 19 | We can't simply preserve the name object as a ToUnicode value; when |
| 20 | handling ToUnicode we actually decode the CMap and build a |
| 21 | GlyphNames2Unicode map (an internal representation of the G2U data |
| 22 | produced by the Microsoft PostScript printer driver). When writing the |
| 23 | output file we use that information to get a Unicode value for each |
| 24 | character we write, and build a new ToUnicode CMap using that. |
| 25 | |
| 26 | This commit tackles the problem by pre-scanning for a name object and |
| 27 | then checking to see if its Identity-H or Identity-V (although we have |
| 28 | not seen an Identity-V, there seems no reason why it wouldn't be |
| 29 | equally valid). If we find either of these then we construct a |
| 30 | GlyphNames2Unicode table for all possible values (0 - 65535) and store |
| 31 | that with the font as normal. When we write the output file we only |
| 32 | write the required entries for the subset font, so we write a now |
| 33 | completely legal ToUnicode CMap, and Acrobat is equally happy with that |
| 34 | as the original name. |
| 35 | |
| 36 | If the ToUnicode value isn't a name object, or isn't one of the |
| 37 | identities then we proceed as before. This means we will print a |
| 38 | warning for non conforming ToUnicode entries and ignore them. |
| 39 | |
| 40 | CVE: CVE-2019-14817 |
| 41 | Upstream-Status: Backport [git://git.ghostscript.com/ghostpdl.git] |
| 42 | |
| 43 | Signed-off-by: Stefan Ghinea <stefan.ghinea@windriver.com> |
| 44 | --- |
| 45 | Resource/Init/pdf_font.ps | 200 ++++++++++++++++++++++++-------------- |
| 46 | 1 file changed, 129 insertions(+), 71 deletions(-) |
| 47 | |
| 48 | diff --git a/Resource/Init/pdf_font.ps b/Resource/Init/pdf_font.ps |
| 49 | index 9fb85f6..2df3303 100644 |
| 50 | --- a/Resource/Init/pdf_font.ps |
| 51 | +++ b/Resource/Init/pdf_font.ps |
| 52 | @@ -621,86 +621,144 @@ currentdict end readonly def |
| 53 | PDFDEBUG { |
| 54 | (.processToUnicode beg) = |
| 55 | } if |
| 56 | - 2 index /ToUnicode knownoget { |
| 57 | - dup type /dicttype eq { dup /File known not } { //true } ifelse { |
| 58 | - % We undefine wrong /Length and define /File in stream dictionaries. |
| 59 | - % Bug687351.pdf defines /ToUnicode /Identity-H, what is incorrect. |
| 60 | - ( **** Warning: Ignoring bad ToUnicode CMap.\n) pdfformatwarning |
| 61 | - pop |
| 62 | + |
| 63 | + 2 index /ToUnicode knownoget |
| 64 | + { |
| 65 | + dup type /nametype eq { |
| 66 | + % This is contrary to the specification but it seems that Acrobat at least will accept |
| 67 | + % a ToUnicode with a value of Identity-H *and* will use that for search, copy/paste. |
| 68 | + % We can't pass through a name, so the best we can do is build a GlyphNames2Unicode |
| 69 | + % map matching that which would have been generated by a full 16-bit Identity CMap |
| 70 | + % |
| 71 | + % See bug numbers 701003 and 687351 |
| 72 | + % |
| 73 | + dup /Identity-H eq 1 index /Identity-V eq or{ |
| 74 | + pop |
| 75 | + 1 index /FontInfo .knownget not { |
| 76 | + currentglobal 2 index dup gcheck setglobal |
| 77 | + /FontInfo 5 dict dup 5 1 roll .forceput |
| 78 | + setglobal |
| 79 | + } if |
| 80 | + dup /GlyphNames2Unicode .knownget not { |
| 81 | + //true % No existing G2U, make one |
| 82 | + } { |
| 83 | + dup wcheck { |
| 84 | + //false % Existing, writeable G2U, don't make new one |
| 85 | + } { |
| 86 | + pop //true % Existing read only G2U, make new one |
| 87 | + } ifelse |
| 88 | + } ifelse |
| 89 | + { |
| 90 | + currentglobal exch dup gcheck setglobal |
| 91 | + dup /GlyphNames2Unicode 100 dict dup 4 1 roll .forceput |
| 92 | + 3 2 roll setglobal |
| 93 | + } if % font-res font-dict encoding|null font-info g2u |
| 94 | + |
| 95 | + 0 1 65535{ |
| 96 | + % g2u index |
| 97 | + dup dup 256 mod exch 256 idiv % g2u index lo-byte hi-byte |
| 98 | + 2 string dup 0 4 -1 roll % g2u index lo-byte () () 0 hi-byte |
| 99 | + put % g2u index lo-byte (x) |
| 100 | + dup 1 % g2u index lo-byte (x) (x) 1 |
| 101 | + 4 -1 roll put % g2u index (x) (x) 1 lo-byte -> dict index (xx) |
| 102 | + 2 index % g2u index (xx) dict |
| 103 | + 3 1 roll % g2u g2u index (xx) |
| 104 | + put % g2u |
| 105 | + } for |
| 106 | + pop % font-res font-dict encoding|null font-info |
| 107 | + pop % font-res font-dict encoding|null |
| 108 | + //false % We built a GlyphNames2Unicode table, don't need to process further |
| 109 | + }{ |
| 110 | + //true % name is not Identity-V or H, fail by falling through |
| 111 | + }ifelse |
| 112 | } { |
| 113 | - /PDFScanRules .getuserparam dup //null eq { |
| 114 | - pop //PDFScanRules_null |
| 115 | - } { |
| 116 | - 1 dict dup /PDFScanRules 4 -1 roll put |
| 117 | - } ifelse |
| 118 | - //PDFScanRules_true setuserparams |
| 119 | - PDFfile fileposition |
| 120 | - 3 -1 roll |
| 121 | - count 1 sub |
| 122 | - countdictstack |
| 123 | - { //false resolvestream |
| 124 | - % Following Acrobat we ignore everything outside |
| 125 | - % begincodespacerange .. endcmap. |
| 126 | - dup 0 (begincodespacerange) /SubFileDecode filter flushfile |
| 127 | - /CIDInit /ProcSet findresource begin |
| 128 | - //ToUnicodeCMapReader begin |
| 129 | - 12 dict begin |
| 130 | - /CMapType 2 def |
| 131 | - mark exch % emulate 'begincodespacerange' |
| 132 | - 0 (endcmap) /SubFileDecode filter cvx /begincmap cvx exch 2 .execn |
| 133 | - endcmap |
| 134 | - userdict /.lastToUnicode currentdict put |
| 135 | - end end end |
| 136 | - } |
| 137 | + //true |
| 138 | + } ifelse % not a name, try as a dictionary (as specified) |
| 139 | |
| 140 | - PDFSTOPONERROR { |
| 141 | - { exec } 0 get |
| 142 | - //false |
| 143 | - 5 -2 roll |
| 144 | - 5 |
| 145 | + % If the ToUnicode isn't a name, or the name isn't Identity-V or -H then follow the specification |
| 146 | + % If its not a dictionary type throw an error, otherwise decode it and build a GlyphNames2Unicode |
| 147 | + % |
| 148 | + { |
| 149 | + dup type /dicttype eq { dup /File known not } { //true } ifelse { |
| 150 | + % We undefine wrong /Length and define /File in stream dictionaries. |
| 151 | + % Bug687351.pdf defines /ToUnicode /Identity-H, what is incorrect. |
| 152 | + ( **** Warning: Ignoring bad ToUnicode CMap.\n) pdfformatwarning |
| 153 | + pop |
| 154 | } { |
| 155 | - { stopped } 0 get |
| 156 | - 4 2 roll |
| 157 | - 4 |
| 158 | - } ifelse |
| 159 | - array astore cvx exec |
| 160 | + /PDFScanRules .getuserparam dup //null eq { |
| 161 | + pop //PDFScanRules_null |
| 162 | + } { |
| 163 | + 1 dict dup /PDFScanRules 4 -1 roll put |
| 164 | + } ifelse |
| 165 | + //PDFScanRules_true setuserparams |
| 166 | + PDFfile fileposition |
| 167 | + 3 -1 roll |
| 168 | + count 1 sub |
| 169 | + countdictstack |
| 170 | + { //false resolvestream |
| 171 | + % Following Acrobat we ignore everything outside |
| 172 | + % begincodespacerange .. endcmap. |
| 173 | + dup 0 (begincodespacerange) /SubFileDecode filter flushfile |
| 174 | + /CIDInit /ProcSet findresource begin |
| 175 | + //ToUnicodeCMapReader begin |
| 176 | + 12 dict begin |
| 177 | + /CMapType 2 def |
| 178 | + mark exch % emulate 'begincodespacerange' |
| 179 | + 0 (endcmap) /SubFileDecode filter cvx /begincmap cvx exch 2 .execn |
| 180 | + endcmap |
| 181 | + userdict /.lastToUnicode currentdict put |
| 182 | + end end end |
| 183 | + } |
| 184 | |
| 185 | - countdictstack exch sub 0 .max { end } repeat |
| 186 | - count exch sub 2 sub 0 .max { exch pop } repeat |
| 187 | - 3 1 roll % Stach the stop flag. |
| 188 | - PDFfile exch setfileposition |
| 189 | - setuserparams |
| 190 | - { |
| 191 | - ( **** Warning: Failed to read ToUnicode CMap.\n) pdfformatwarning |
| 192 | - } { |
| 193 | - 1 index /FontInfo .knownget not { |
| 194 | - currentglobal 2 index dup gcheck setglobal |
| 195 | - /FontInfo 5 dict dup 5 1 roll .forceput |
| 196 | - setglobal |
| 197 | - } if |
| 198 | - dup /GlyphNames2Unicode .knownget not { |
| 199 | - //true % No existing G2U, make one |
| 200 | + PDFSTOPONERROR { |
| 201 | + { exec } 0 get |
| 202 | + //false |
| 203 | + 5 -2 roll |
| 204 | + 5 |
| 205 | + } { |
| 206 | + { stopped } 0 get |
| 207 | + 4 2 roll |
| 208 | + 4 |
| 209 | + } ifelse |
| 210 | + array astore cvx exec |
| 211 | + |
| 212 | + countdictstack exch sub 0 .max { end } repeat |
| 213 | + count exch sub 2 sub 0 .max { exch pop } repeat |
| 214 | + 3 1 roll % Stach the stop flag. |
| 215 | + PDFfile exch setfileposition |
| 216 | + setuserparams |
| 217 | + { |
| 218 | + ( **** Warning: Failed to read ToUnicode CMap.\n) pdfformatwarning |
| 219 | } { |
| 220 | - dup wcheck { |
| 221 | - //false % Existing, writeable G2U, don't make new one |
| 222 | + 1 index /FontInfo .knownget not { |
| 223 | + currentglobal 2 index dup gcheck setglobal |
| 224 | + /FontInfo 5 dict dup 5 1 roll .forceput |
| 225 | + setglobal |
| 226 | + } if |
| 227 | + dup /GlyphNames2Unicode .knownget not { |
| 228 | + //true % No existing G2U, make one |
| 229 | } { |
| 230 | - pop //true % Existing read only G2U, make new one |
| 231 | + dup wcheck { |
| 232 | + //false % Existing, writeable G2U, don't make new one |
| 233 | + } { |
| 234 | + pop //true % Existing read only G2U, make new one |
| 235 | + } ifelse |
| 236 | } ifelse |
| 237 | + { |
| 238 | + currentglobal exch dup gcheck setglobal |
| 239 | + dup /GlyphNames2Unicode 100 dict dup 4 1 roll .forceput |
| 240 | + 3 2 roll setglobal |
| 241 | + } if % font-res font-dict encoding|null font-info g2u |
| 242 | + exch pop exch % font-res font-dict g2u encoding|null |
| 243 | + userdict /.lastToUnicode get % font-res font-dict g2u Encoding|null CMap |
| 244 | + .convert_ToUnicode-into-g2u % font-res font-dict |
| 245 | + //null % font-res font-dict //null |
| 246 | } ifelse |
| 247 | - { |
| 248 | - currentglobal exch dup gcheck setglobal |
| 249 | - dup /GlyphNames2Unicode 100 dict dup 4 1 roll .forceput |
| 250 | - 3 2 roll setglobal |
| 251 | - } if % font-res font-dict encoding|null font-info g2u |
| 252 | - exch pop exch % font-res font-dict g2u encoding|null |
| 253 | - userdict /.lastToUnicode get % font-res font-dict g2u Encoding|null CMap |
| 254 | - .convert_ToUnicode-into-g2u % font-res font-dict |
| 255 | - //null % font-res font-dict //null |
| 256 | } ifelse |
| 257 | - } ifelse |
| 258 | - } if |
| 259 | - PDFDEBUG { |
| 260 | - (.processToUnicode end) = |
| 261 | + } if |
| 262 | + PDFDEBUG { |
| 263 | + (.processToUnicode end) = |
| 264 | + } if |
| 265 | } if |
| 266 | } if |
| 267 | } stopped |
| 268 | -- |
| 269 | 2.20.1 |
| 270 | |