| From 0bafbd9c1273fab0dc79fd20db0ffc4443683f96 Mon Sep 17 00:00:00 2001 |
| From: Ken Sharp <ken.sharp@artifex.com> |
| Date: Mon, 29 Apr 2019 11:14:06 +0100 |
| Subject: [PATCH 1/2] PDF interpreter - Decode ToUnicode entries of the form |
| /Identity-H/V |
| |
| Bug #701003 "Text searchability broken due to omission of /ToUnicode /Identity-H" |
| |
| The PDF references from 1.2 too 2.0 all state that the value associated |
| with a ToUnicode key in a FontDescriptor must be a stream object. However |
| this file (and one case seen previously, bug 687351) have FontDescriptor |
| dictionaries where the value associated with a /ToUnicode key is a |
| name object, in both cases /Identity-H. |
| |
| Although this is clearly not legal, Acrobat not only tolerates it, it |
| actually uses it for search/copy/paste (see bug 701003 for details). |
| Without the key Acrobat is unable to successfully search the output file. |
| |
| We can't simply preserve the name object as a ToUnicode value; when |
| handling ToUnicode we actually decode the CMap and build a |
| GlyphNames2Unicode map (an internal representation of the G2U data |
| produced by the Microsoft PostScript printer driver). When writing the |
| output file we use that information to get a Unicode value for each |
| character we write, and build a new ToUnicode CMap using that. |
| |
| This commit tackles the problem by pre-scanning for a name object and |
| then checking to see if its Identity-H or Identity-V (although we have |
| not seen an Identity-V, there seems no reason why it wouldn't be |
| equally valid). If we find either of these then we construct a |
| GlyphNames2Unicode table for all possible values (0 - 65535) and store |
| that with the font as normal. When we write the output file we only |
| write the required entries for the subset font, so we write a now |
| completely legal ToUnicode CMap, and Acrobat is equally happy with that |
| as the original name. |
| |
| If the ToUnicode value isn't a name object, or isn't one of the |
| identities then we proceed as before. This means we will print a |
| warning for non conforming ToUnicode entries and ignore them. |
| |
| CVE: CVE-2019-14817 |
| Upstream-Status: Backport [git://git.ghostscript.com/ghostpdl.git] |
| |
| Signed-off-by: Stefan Ghinea <stefan.ghinea@windriver.com> |
| --- |
| Resource/Init/pdf_font.ps | 200 ++++++++++++++++++++++++-------------- |
| 1 file changed, 129 insertions(+), 71 deletions(-) |
| |
| diff --git a/Resource/Init/pdf_font.ps b/Resource/Init/pdf_font.ps |
| index 9fb85f6..2df3303 100644 |
| --- a/Resource/Init/pdf_font.ps |
| +++ b/Resource/Init/pdf_font.ps |
| @@ -621,86 +621,144 @@ currentdict end readonly def |
| PDFDEBUG { |
| (.processToUnicode beg) = |
| } if |
| - 2 index /ToUnicode knownoget { |
| - dup type /dicttype eq { dup /File known not } { //true } ifelse { |
| - % We undefine wrong /Length and define /File in stream dictionaries. |
| - % Bug687351.pdf defines /ToUnicode /Identity-H, what is incorrect. |
| - ( **** Warning: Ignoring bad ToUnicode CMap.\n) pdfformatwarning |
| - pop |
| + |
| + 2 index /ToUnicode knownoget |
| + { |
| + dup type /nametype eq { |
| + % This is contrary to the specification but it seems that Acrobat at least will accept |
| + % a ToUnicode with a value of Identity-H *and* will use that for search, copy/paste. |
| + % We can't pass through a name, so the best we can do is build a GlyphNames2Unicode |
| + % map matching that which would have been generated by a full 16-bit Identity CMap |
| + % |
| + % See bug numbers 701003 and 687351 |
| + % |
| + dup /Identity-H eq 1 index /Identity-V eq or{ |
| + pop |
| + 1 index /FontInfo .knownget not { |
| + currentglobal 2 index dup gcheck setglobal |
| + /FontInfo 5 dict dup 5 1 roll .forceput |
| + setglobal |
| + } if |
| + dup /GlyphNames2Unicode .knownget not { |
| + //true % No existing G2U, make one |
| + } { |
| + dup wcheck { |
| + //false % Existing, writeable G2U, don't make new one |
| + } { |
| + pop //true % Existing read only G2U, make new one |
| + } ifelse |
| + } ifelse |
| + { |
| + currentglobal exch dup gcheck setglobal |
| + dup /GlyphNames2Unicode 100 dict dup 4 1 roll .forceput |
| + 3 2 roll setglobal |
| + } if % font-res font-dict encoding|null font-info g2u |
| + |
| + 0 1 65535{ |
| + % g2u index |
| + dup dup 256 mod exch 256 idiv % g2u index lo-byte hi-byte |
| + 2 string dup 0 4 -1 roll % g2u index lo-byte () () 0 hi-byte |
| + put % g2u index lo-byte (x) |
| + dup 1 % g2u index lo-byte (x) (x) 1 |
| + 4 -1 roll put % g2u index (x) (x) 1 lo-byte -> dict index (xx) |
| + 2 index % g2u index (xx) dict |
| + 3 1 roll % g2u g2u index (xx) |
| + put % g2u |
| + } for |
| + pop % font-res font-dict encoding|null font-info |
| + pop % font-res font-dict encoding|null |
| + //false % We built a GlyphNames2Unicode table, don't need to process further |
| + }{ |
| + //true % name is not Identity-V or H, fail by falling through |
| + }ifelse |
| } { |
| - /PDFScanRules .getuserparam dup //null eq { |
| - pop //PDFScanRules_null |
| - } { |
| - 1 dict dup /PDFScanRules 4 -1 roll put |
| - } ifelse |
| - //PDFScanRules_true setuserparams |
| - PDFfile fileposition |
| - 3 -1 roll |
| - count 1 sub |
| - countdictstack |
| - { //false resolvestream |
| - % Following Acrobat we ignore everything outside |
| - % begincodespacerange .. endcmap. |
| - dup 0 (begincodespacerange) /SubFileDecode filter flushfile |
| - /CIDInit /ProcSet findresource begin |
| - //ToUnicodeCMapReader begin |
| - 12 dict begin |
| - /CMapType 2 def |
| - mark exch % emulate 'begincodespacerange' |
| - 0 (endcmap) /SubFileDecode filter cvx /begincmap cvx exch 2 .execn |
| - endcmap |
| - userdict /.lastToUnicode currentdict put |
| - end end end |
| - } |
| + //true |
| + } ifelse % not a name, try as a dictionary (as specified) |
| |
| - PDFSTOPONERROR { |
| - { exec } 0 get |
| - //false |
| - 5 -2 roll |
| - 5 |
| + % If the ToUnicode isn't a name, or the name isn't Identity-V or -H then follow the specification |
| + % If its not a dictionary type throw an error, otherwise decode it and build a GlyphNames2Unicode |
| + % |
| + { |
| + dup type /dicttype eq { dup /File known not } { //true } ifelse { |
| + % We undefine wrong /Length and define /File in stream dictionaries. |
| + % Bug687351.pdf defines /ToUnicode /Identity-H, what is incorrect. |
| + ( **** Warning: Ignoring bad ToUnicode CMap.\n) pdfformatwarning |
| + pop |
| } { |
| - { stopped } 0 get |
| - 4 2 roll |
| - 4 |
| - } ifelse |
| - array astore cvx exec |
| + /PDFScanRules .getuserparam dup //null eq { |
| + pop //PDFScanRules_null |
| + } { |
| + 1 dict dup /PDFScanRules 4 -1 roll put |
| + } ifelse |
| + //PDFScanRules_true setuserparams |
| + PDFfile fileposition |
| + 3 -1 roll |
| + count 1 sub |
| + countdictstack |
| + { //false resolvestream |
| + % Following Acrobat we ignore everything outside |
| + % begincodespacerange .. endcmap. |
| + dup 0 (begincodespacerange) /SubFileDecode filter flushfile |
| + /CIDInit /ProcSet findresource begin |
| + //ToUnicodeCMapReader begin |
| + 12 dict begin |
| + /CMapType 2 def |
| + mark exch % emulate 'begincodespacerange' |
| + 0 (endcmap) /SubFileDecode filter cvx /begincmap cvx exch 2 .execn |
| + endcmap |
| + userdict /.lastToUnicode currentdict put |
| + end end end |
| + } |
| |
| - countdictstack exch sub 0 .max { end } repeat |
| - count exch sub 2 sub 0 .max { exch pop } repeat |
| - 3 1 roll % Stach the stop flag. |
| - PDFfile exch setfileposition |
| - setuserparams |
| - { |
| - ( **** Warning: Failed to read ToUnicode CMap.\n) pdfformatwarning |
| - } { |
| - 1 index /FontInfo .knownget not { |
| - currentglobal 2 index dup gcheck setglobal |
| - /FontInfo 5 dict dup 5 1 roll .forceput |
| - setglobal |
| - } if |
| - dup /GlyphNames2Unicode .knownget not { |
| - //true % No existing G2U, make one |
| + PDFSTOPONERROR { |
| + { exec } 0 get |
| + //false |
| + 5 -2 roll |
| + 5 |
| + } { |
| + { stopped } 0 get |
| + 4 2 roll |
| + 4 |
| + } ifelse |
| + array astore cvx exec |
| + |
| + countdictstack exch sub 0 .max { end } repeat |
| + count exch sub 2 sub 0 .max { exch pop } repeat |
| + 3 1 roll % Stach the stop flag. |
| + PDFfile exch setfileposition |
| + setuserparams |
| + { |
| + ( **** Warning: Failed to read ToUnicode CMap.\n) pdfformatwarning |
| } { |
| - dup wcheck { |
| - //false % Existing, writeable G2U, don't make new one |
| + 1 index /FontInfo .knownget not { |
| + currentglobal 2 index dup gcheck setglobal |
| + /FontInfo 5 dict dup 5 1 roll .forceput |
| + setglobal |
| + } if |
| + dup /GlyphNames2Unicode .knownget not { |
| + //true % No existing G2U, make one |
| } { |
| - pop //true % Existing read only G2U, make new one |
| + dup wcheck { |
| + //false % Existing, writeable G2U, don't make new one |
| + } { |
| + pop //true % Existing read only G2U, make new one |
| + } ifelse |
| } ifelse |
| + { |
| + currentglobal exch dup gcheck setglobal |
| + dup /GlyphNames2Unicode 100 dict dup 4 1 roll .forceput |
| + 3 2 roll setglobal |
| + } if % font-res font-dict encoding|null font-info g2u |
| + exch pop exch % font-res font-dict g2u encoding|null |
| + userdict /.lastToUnicode get % font-res font-dict g2u Encoding|null CMap |
| + .convert_ToUnicode-into-g2u % font-res font-dict |
| + //null % font-res font-dict //null |
| } ifelse |
| - { |
| - currentglobal exch dup gcheck setglobal |
| - dup /GlyphNames2Unicode 100 dict dup 4 1 roll .forceput |
| - 3 2 roll setglobal |
| - } if % font-res font-dict encoding|null font-info g2u |
| - exch pop exch % font-res font-dict g2u encoding|null |
| - userdict /.lastToUnicode get % font-res font-dict g2u Encoding|null CMap |
| - .convert_ToUnicode-into-g2u % font-res font-dict |
| - //null % font-res font-dict //null |
| } ifelse |
| - } ifelse |
| - } if |
| - PDFDEBUG { |
| - (.processToUnicode end) = |
| + } if |
| + PDFDEBUG { |
| + (.processToUnicode end) = |
| + } if |
| } if |
| } if |
| } stopped |
| -- |
| 2.20.1 |
| |