Brad Bishop | 220d553 | 2018-08-14 00:59:39 +0100 | [diff] [blame] | 1 | From 0abf1e8d89aecd32dbdabda5da4d52a2d57a7cff Mon Sep 17 00:00:00 2001 |
| 2 | From: Karl Williamson <khw@cpan.org> |
| 3 | Date: Tue, 6 Feb 2018 14:50:48 -0700 |
| 4 | Subject: [PATCH] [perl #132063]: Heap buffer overflow |
| 5 | |
| 6 | The proximal cause is several instances in regexec.c of the code |
| 7 | assuming that the input was valid UTF-8, whereas the input was too short |
| 8 | for what the start byte claimed it would be. |
| 9 | |
| 10 | I grepped through the core for any other similar uses, and did not find |
| 11 | any. |
| 12 | |
| 13 | (cherry picked from commit fe7d8ba0a1bf567af8fa8fea128e2b9f4c553e84) |
| 14 | |
| 15 | CVE: CVE-2018-6798 |
| 16 | Upstream-Status: Backport [https://perl5.git.perl.org/perl.git/patch/0abf1e8d89aecd32dbdabda5da4d52a2d57a7cff] |
| 17 | |
| 18 | Signed-off-by: Jagadeesh Krishnanjanappa <jkrishnanjanappa@mvista.com> |
| 19 | --- |
| 20 | regexec.c | 29 ++++++++++++++++------------- |
| 21 | t/lib/warnings/regexec | 7 +++++++ |
| 22 | 2 files changed, 23 insertions(+), 13 deletions(-) |
| 23 | |
| 24 | diff --git a/regexec.c b/regexec.c |
| 25 | index 5735b997fd..ea432c39d3 100644 |
| 26 | --- a/regexec.c |
| 27 | +++ b/regexec.c |
| 28 | @@ -1466,7 +1466,9 @@ Perl_re_intuit_start(pTHX_ |
| 29 | ? trie_utf8_fold \ |
| 30 | : trie_latin_utf8_fold))) |
| 31 | |
| 32 | -#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \ |
| 33 | +/* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is |
| 34 | + * 'foldbuf+sizeof(foldbuf)' */ |
| 35 | +#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uc_end, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \ |
| 36 | STMT_START { \ |
| 37 | STRLEN skiplen; \ |
| 38 | U8 flags = FOLD_FLAGS_FULL; \ |
| 39 | @@ -1474,7 +1476,7 @@ STMT_START { |
| 40 | case trie_flu8: \ |
| 41 | _CHECK_AND_WARN_PROBLEMATIC_LOCALE; \ |
| 42 | if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) { \ |
| 43 | - _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc)); \ |
| 44 | + _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end - uc); \ |
| 45 | } \ |
| 46 | goto do_trie_utf8_fold; \ |
| 47 | case trie_utf8_exactfa_fold: \ |
| 48 | @@ -1483,7 +1485,7 @@ STMT_START { |
| 49 | case trie_utf8_fold: \ |
| 50 | do_trie_utf8_fold: \ |
| 51 | if ( foldlen>0 ) { \ |
| 52 | - uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \ |
| 53 | + uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags ); \ |
| 54 | foldlen -= len; \ |
| 55 | uscan += len; \ |
| 56 | len=0; \ |
| 57 | @@ -1500,7 +1502,7 @@ STMT_START { |
| 58 | /* FALLTHROUGH */ \ |
| 59 | case trie_latin_utf8_fold: \ |
| 60 | if ( foldlen>0 ) { \ |
| 61 | - uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \ |
| 62 | + uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags ); \ |
| 63 | foldlen -= len; \ |
| 64 | uscan += len; \ |
| 65 | len=0; \ |
| 66 | @@ -1519,7 +1521,7 @@ STMT_START { |
| 67 | } \ |
| 68 | /* FALLTHROUGH */ \ |
| 69 | case trie_utf8: \ |
| 70 | - uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \ |
| 71 | + uvc = utf8n_to_uvchr( (const U8*) uc, uc_end - uc, &len, uniflags ); \ |
| 72 | break; \ |
| 73 | case trie_plain: \ |
| 74 | uvc = (UV)*uc; \ |
| 75 | @@ -2599,10 +2601,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, |
| 76 | } |
| 77 | points[pointpos++ % maxlen]= uc; |
| 78 | if (foldlen || uc < (U8*)strend) { |
| 79 | - REXEC_TRIE_READ_CHAR(trie_type, trie, |
| 80 | - widecharmap, uc, |
| 81 | - uscan, len, uvc, charid, foldlen, |
| 82 | - foldbuf, uniflags); |
| 83 | + REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, |
| 84 | + (U8 *) strend, uscan, len, uvc, |
| 85 | + charid, foldlen, foldbuf, |
| 86 | + uniflags); |
| 87 | DEBUG_TRIE_EXECUTE_r({ |
| 88 | dump_exec_pos( (char *)uc, c, strend, |
| 89 | real_start, s, utf8_target, 0); |
| 90 | @@ -5511,8 +5513,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) |
| 91 | if ( base && (foldlen || uc < (U8*)(reginfo->strend))) { |
| 92 | I32 offset; |
| 93 | REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, |
| 94 | - uscan, len, uvc, charid, foldlen, |
| 95 | - foldbuf, uniflags); |
| 96 | + (U8 *) reginfo->strend, uscan, |
| 97 | + len, uvc, charid, foldlen, |
| 98 | + foldbuf, uniflags); |
| 99 | charcount++; |
| 100 | if (foldlen>0) |
| 101 | ST.longfold = TRUE; |
| 102 | @@ -5642,8 +5645,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) |
| 103 | while (foldlen) { |
| 104 | if (!--chars) |
| 105 | break; |
| 106 | - uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len, |
| 107 | - uniflags); |
| 108 | + uvc = utf8n_to_uvchr(uscan, foldlen, &len, |
| 109 | + uniflags); |
| 110 | uscan += len; |
| 111 | foldlen -= len; |
| 112 | } |
| 113 | diff --git a/t/lib/warnings/regexec b/t/lib/warnings/regexec |
| 114 | index 900dd6ee7f..6635142dea 100644 |
| 115 | --- a/t/lib/warnings/regexec |
| 116 | +++ b/t/lib/warnings/regexec |
| 117 | @@ -260,3 +260,10 @@ setlocale(&POSIX::LC_CTYPE, $utf8_locale); |
| 118 | "k" =~ /(?[ \N{KELVIN SIGN} ])/i; |
| 119 | ":" =~ /(?[ \: ])/; |
| 120 | EXPECT |
| 121 | +######## |
| 122 | +# NAME perl #132063, read beyond buffer end |
| 123 | +# OPTION fatal |
| 124 | +"\xff" =~ /(?il)\x{100}|\x{100}/; |
| 125 | +EXPECT |
| 126 | +Malformed UTF-8 character: \xff (too short; 1 byte available, need 13) in pattern match (m//) at - line 2. |
| 127 | +Malformed UTF-8 character (fatal) at - line 2. |
| 128 | -- |
| 129 | 2.15.1-424-g9478a660812 |
| 130 | |