blob: 34771624f7b4de32767af5d7261ca98303af3294 [file] [log] [blame]
Brad Bishop220d5532018-08-14 00:59:39 +01001From 0abf1e8d89aecd32dbdabda5da4d52a2d57a7cff Mon Sep 17 00:00:00 2001
2From: Karl Williamson <khw@cpan.org>
3Date: Tue, 6 Feb 2018 14:50:48 -0700
4Subject: [PATCH] [perl #132063]: Heap buffer overflow
5
6The proximal cause is several instances in regexec.c of the code
7assuming that the input was valid UTF-8, whereas the input was too short
8for what the start byte claimed it would be.
9
10I grepped through the core for any other similar uses, and did not find
11any.
12
13(cherry picked from commit fe7d8ba0a1bf567af8fa8fea128e2b9f4c553e84)
14
15CVE: CVE-2018-6798
16Upstream-Status: Backport [https://perl5.git.perl.org/perl.git/patch/0abf1e8d89aecd32dbdabda5da4d52a2d57a7cff]
17
18Signed-off-by: Jagadeesh Krishnanjanappa <jkrishnanjanappa@mvista.com>
19---
20 regexec.c | 29 ++++++++++++++++-------------
21 t/lib/warnings/regexec | 7 +++++++
22 2 files changed, 23 insertions(+), 13 deletions(-)
23
24diff --git a/regexec.c b/regexec.c
25index 5735b997fd..ea432c39d3 100644
26--- a/regexec.c
27+++ b/regexec.c
28@@ -1466,7 +1466,9 @@ Perl_re_intuit_start(pTHX_
29 ? trie_utf8_fold \
30 : trie_latin_utf8_fold)))
31
32-#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
33+/* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is
34+ * 'foldbuf+sizeof(foldbuf)' */
35+#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uc_end, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
36 STMT_START { \
37 STRLEN skiplen; \
38 U8 flags = FOLD_FLAGS_FULL; \
39@@ -1474,7 +1476,7 @@ STMT_START {
40 case trie_flu8: \
41 _CHECK_AND_WARN_PROBLEMATIC_LOCALE; \
42 if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) { \
43- _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc)); \
44+ _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end - uc); \
45 } \
46 goto do_trie_utf8_fold; \
47 case trie_utf8_exactfa_fold: \
48@@ -1483,7 +1485,7 @@ STMT_START {
49 case trie_utf8_fold: \
50 do_trie_utf8_fold: \
51 if ( foldlen>0 ) { \
52- uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
53+ uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags ); \
54 foldlen -= len; \
55 uscan += len; \
56 len=0; \
57@@ -1500,7 +1502,7 @@ STMT_START {
58 /* FALLTHROUGH */ \
59 case trie_latin_utf8_fold: \
60 if ( foldlen>0 ) { \
61- uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, uniflags ); \
62+ uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags ); \
63 foldlen -= len; \
64 uscan += len; \
65 len=0; \
66@@ -1519,7 +1521,7 @@ STMT_START {
67 } \
68 /* FALLTHROUGH */ \
69 case trie_utf8: \
70- uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags ); \
71+ uvc = utf8n_to_uvchr( (const U8*) uc, uc_end - uc, &len, uniflags ); \
72 break; \
73 case trie_plain: \
74 uvc = (UV)*uc; \
75@@ -2599,10 +2601,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
76 }
77 points[pointpos++ % maxlen]= uc;
78 if (foldlen || uc < (U8*)strend) {
79- REXEC_TRIE_READ_CHAR(trie_type, trie,
80- widecharmap, uc,
81- uscan, len, uvc, charid, foldlen,
82- foldbuf, uniflags);
83+ REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
84+ (U8 *) strend, uscan, len, uvc,
85+ charid, foldlen, foldbuf,
86+ uniflags);
87 DEBUG_TRIE_EXECUTE_r({
88 dump_exec_pos( (char *)uc, c, strend,
89 real_start, s, utf8_target, 0);
90@@ -5511,8 +5513,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
91 if ( base && (foldlen || uc < (U8*)(reginfo->strend))) {
92 I32 offset;
93 REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
94- uscan, len, uvc, charid, foldlen,
95- foldbuf, uniflags);
96+ (U8 *) reginfo->strend, uscan,
97+ len, uvc, charid, foldlen,
98+ foldbuf, uniflags);
99 charcount++;
100 if (foldlen>0)
101 ST.longfold = TRUE;
102@@ -5642,8 +5645,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
103 while (foldlen) {
104 if (!--chars)
105 break;
106- uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len,
107- uniflags);
108+ uvc = utf8n_to_uvchr(uscan, foldlen, &len,
109+ uniflags);
110 uscan += len;
111 foldlen -= len;
112 }
113diff --git a/t/lib/warnings/regexec b/t/lib/warnings/regexec
114index 900dd6ee7f..6635142dea 100644
115--- a/t/lib/warnings/regexec
116+++ b/t/lib/warnings/regexec
117@@ -260,3 +260,10 @@ setlocale(&POSIX::LC_CTYPE, $utf8_locale);
118 "k" =~ /(?[ \N{KELVIN SIGN} ])/i;
119 ":" =~ /(?[ \: ])/;
120 EXPECT
121+########
122+# NAME perl #132063, read beyond buffer end
123+# OPTION fatal
124+"\xff" =~ /(?il)\x{100}|\x{100}/;
125+EXPECT
126+Malformed UTF-8 character: \xff (too short; 1 byte available, need 13) in pattern match (m//) at - line 2.
127+Malformed UTF-8 character (fatal) at - line 2.
128--
1292.15.1-424-g9478a660812
130