Patrick Williams | b48b7b4 | 2016-08-17 15:04:38 -0500 | [diff] [blame^] | 1 | From 3bb3f42f3749d40b8d4de65871e8d828b18d4a45 Mon Sep 17 00:00:00 2001 |
| 2 | From: Tom Lane <tgl@sss.pgh.pa.us> |
| 3 | Date: Mon, 8 Feb 2016 10:25:40 -0500 |
| 4 | Subject: [PATCH] Fix some regex issues with out-of-range characters and large |
| 5 | char ranges. |
| 6 | |
| 7 | Previously, our regex code defined CHR_MAX as 0xfffffffe, which is a |
| 8 | bad choice because it is outside the range of type "celt" (int32). |
| 9 | Characters approaching that limit could lead to infinite loops in logic |
| 10 | such as "for (c = a; c <= b; c++)" where c is of type celt but the |
| 11 | range bounds are chr. Such loops will work safely only if CHR_MAX+1 |
| 12 | is representable in celt, since c must advance to beyond b before the |
| 13 | loop will exit. |
| 14 | |
| 15 | Fortunately, there seems no reason not to restrict CHR_MAX to 0x7ffffffe. |
| 16 | It's highly unlikely that Unicode will ever assign codes that high, and |
| 17 | none of our other backend encodings need characters beyond that either. |
| 18 | |
| 19 | In addition to modifying the macro, we have to explicitly enforce character |
| 20 | range restrictions on the values of \u, \U, and \x escape sequences, else |
| 21 | the limit is trivially bypassed. |
| 22 | |
| 23 | Also, the code for expanding case-independent character ranges in bracket |
| 24 | expressions had a potential integer overflow in its calculation of the |
| 25 | number of characters it could generate, which could lead to allocating too |
| 26 | small a character vector and then overwriting memory. An attacker with the |
| 27 | ability to supply arbitrary regex patterns could easily cause transient DOS |
| 28 | via server crashes, and the possibility for privilege escalation has not |
| 29 | been ruled out. |
| 30 | |
| 31 | Quite aside from the integer-overflow problem, the range expansion code was |
| 32 | unnecessarily inefficient in that it always produced a result consisting of |
| 33 | individual characters, abandoning the knowledge that we had a range to |
| 34 | start with. If the input range is large, this requires excessive memory. |
| 35 | Change it so that the original range is reported as-is, and then we add on |
| 36 | any case-equivalent characters that are outside that range. With this |
| 37 | approach, we can bound the number of individual characters allowed without |
| 38 | sacrificing much. This patch allows at most 100000 individual characters, |
| 39 | which I believe to be more than the number of case pairs existing in |
| 40 | Unicode, so that the restriction will never be hit in practice. |
| 41 | |
| 42 | It's still possible for range() to take awhile given a large character code |
| 43 | range, so also add statement-cancel detection to its loop. The downstream |
| 44 | function dovec() also lacked cancel detection, and could take a long time |
| 45 | given a large output from range(). |
| 46 | |
| 47 | Per fuzz testing by Greg Stark. Back-patch to all supported branches. |
| 48 | |
| 49 | Security: CVE-2016-0773 |
| 50 | |
| 51 | Upstream-Status: Backport |
| 52 | |
| 53 | Signed-off-by: Tom Lane <tgl@sss.pgh.pa.us> |
| 54 | Signed-off-by: Zhixiong Chi <zhixiong.chi@windriver.com> |
| 55 | |
| 56 | Index: postgresql-9.4.5/src/backend/regex/regc_lex.c |
| 57 | =================================================================== |
| 58 | --- postgresql-9.4.5.orig/src/backend/regex/regc_lex.c 2015-10-06 03:12:06.000000000 +0800 |
| 59 | +++ postgresql-9.4.5/src/backend/regex/regc_lex.c 2016-03-10 10:29:57.045784317 +0800 |
| 60 | @@ -792,13 +792,13 @@ |
| 61 | break; |
| 62 | case CHR('u'): |
| 63 | c = lexdigits(v, 16, 4, 4); |
| 64 | - if (ISERR()) |
| 65 | + if (ISERR() || c < CHR_MIN || c > CHR_MAX) |
| 66 | FAILW(REG_EESCAPE); |
| 67 | RETV(PLAIN, c); |
| 68 | break; |
| 69 | case CHR('U'): |
| 70 | c = lexdigits(v, 16, 8, 8); |
| 71 | - if (ISERR()) |
| 72 | + if (ISERR() || c < CHR_MIN || c > CHR_MAX) |
| 73 | FAILW(REG_EESCAPE); |
| 74 | RETV(PLAIN, c); |
| 75 | break; |
| 76 | @@ -816,7 +816,7 @@ |
| 77 | case CHR('x'): |
| 78 | NOTE(REG_UUNPORT); |
| 79 | c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */ |
| 80 | - if (ISERR()) |
| 81 | + if (ISERR() || c < CHR_MIN || c > CHR_MAX) |
| 82 | FAILW(REG_EESCAPE); |
| 83 | RETV(PLAIN, c); |
| 84 | break; |
| 85 | @@ -872,6 +872,9 @@ |
| 86 | |
| 87 | /* |
| 88 | * lexdigits - slurp up digits and return chr value |
| 89 | + * |
| 90 | + * This does not account for overflow; callers should range-check the result |
| 91 | + * if maxlen is large enough to make that possible. |
| 92 | */ |
| 93 | static chr /* chr value; errors signalled via ERR */ |
| 94 | lexdigits(struct vars * v, |
| 95 | Index: postgresql-9.4.5/src/backend/regex/regc_locale.c |
| 96 | =================================================================== |
| 97 | --- postgresql-9.4.5.orig/src/backend/regex/regc_locale.c 2015-10-06 03:12:06.000000000 +0800 |
| 98 | +++ postgresql-9.4.5/src/backend/regex/regc_locale.c 2016-03-10 10:34:28.757781726 +0800 |
| 99 | @@ -408,8 +408,7 @@ |
| 100 | int nchrs; |
| 101 | struct cvec *cv; |
| 102 | celt c, |
| 103 | - lc, |
| 104 | - uc; |
| 105 | + cc; |
| 106 | |
| 107 | if (a != b && !before(a, b)) |
| 108 | { |
| 109 | @@ -427,24 +426,48 @@ |
| 110 | |
| 111 | /* |
| 112 | * When case-independent, it's hard to decide when cvec ranges are usable, |
| 113 | - * so for now at least, we won't try. We allocate enough space for two |
| 114 | - * case variants plus a little extra for the two title case variants. |
| 115 | + * so for now at least, we won't try. We use a range for the originally |
| 116 | + * specified chrs and then add on any case-equivalents that are outside |
| 117 | + * that range as individual chrs. |
| 118 | + * |
| 119 | + * To ensure sane behavior if someone specifies a very large range, limit |
| 120 | + * the allocation size to 100000 chrs (arbitrary) and check for overrun |
| 121 | + * inside the loop below. |
| 122 | */ |
| 123 | |
| 124 | - nchrs = (b - a + 1) * 2 + 4; |
| 125 | - |
| 126 | - cv = getcvec(v, nchrs, 0); |
| 127 | + cv = getcvec(v, nchrs, 1); |
| 128 | NOERRN(); |
| 129 | + addrange(cv, a, b); |
| 130 | |
| 131 | for (c = a; c <= b; c++) |
| 132 | { |
| 133 | - addchr(cv, c); |
| 134 | - lc = pg_wc_tolower((chr) c); |
| 135 | - if (c != lc) |
| 136 | - addchr(cv, lc); |
| 137 | - uc = pg_wc_toupper((chr) c); |
| 138 | - if (c != uc) |
| 139 | - addchr(cv, uc); |
| 140 | + cc = pg_wc_tolower((chr) c); |
| 141 | + if (cc != c && |
| 142 | + (before(cc, a) || before(b, cc))) |
| 143 | + { |
| 144 | + if (cv->nchrs >= cv->chrspace) |
| 145 | + { |
| 146 | + ERR(REG_ETOOBIG); |
| 147 | + return NULL; |
| 148 | + } |
| 149 | + addchr(cv, cc); |
| 150 | + } |
| 151 | + cc = pg_wc_toupper((chr) c); |
| 152 | + if (cc != c && |
| 153 | + (before(cc, a) || before(b, cc))) |
| 154 | + { |
| 155 | + if (cv->nchrs >= cv->chrspace) |
| 156 | + { |
| 157 | + ERR(REG_ETOOBIG); |
| 158 | + return NULL; |
| 159 | + } |
| 160 | + addchr(cv, cc); |
| 161 | + } |
| 162 | + if (CANCEL_REQUESTED(v->re)) |
| 163 | + { |
| 164 | + ERR(REG_CANCEL); |
| 165 | + return NULL; |
| 166 | + } |
| 167 | } |
| 168 | |
| 169 | return cv; |
| 170 | Index: postgresql-9.4.5/src/backend/regex/regcomp.c |
| 171 | =================================================================== |
| 172 | --- postgresql-9.4.5.orig/src/backend/regex/regcomp.c 2015-10-06 03:12:06.000000000 +0800 |
| 173 | +++ postgresql-9.4.5/src/backend/regex/regcomp.c 2016-03-10 10:35:25.397781185 +0800 |
| 174 | @@ -1569,6 +1569,7 @@ |
| 175 | { |
| 176 | ch = *p; |
| 177 | newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp); |
| 178 | + NOERR(); |
| 179 | } |
| 180 | |
| 181 | /* and the ranges */ |
| 182 | @@ -1578,6 +1579,7 @@ |
| 183 | to = *(p + 1); |
| 184 | if (from <= to) |
| 185 | subrange(v, from, to, lp, rp); |
| 186 | + NOERR(); |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | Index: postgresql-9.4.5/src/include/regex/regcustom.h |
| 191 | =================================================================== |
| 192 | --- postgresql-9.4.5.orig/src/include/regex/regcustom.h 2015-10-06 03:12:06.000000000 +0800 |
| 193 | +++ postgresql-9.4.5/src/include/regex/regcustom.h 2016-03-10 10:37:09.989780188 +0800 |
| 194 | @@ -65,7 +65,8 @@ |
| 195 | #define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */ |
| 196 | #define CHRBITS 32 /* bits in a chr; must not use sizeof */ |
| 197 | #define CHR_MIN 0x00000000 /* smallest and largest chr; the value */ |
| 198 | -#define CHR_MAX 0xfffffffe /* CHR_MAX-CHR_MIN+1 should fit in uchr */ |
| 199 | +#define CHR_MAX 0x7ffffffe /* CHR_MAX-CHR_MIN+1 must fit in an int, and |
| 200 | + * CHR_MAX+1 must fit in both chr and celt */ |
| 201 | |
| 202 | /* functions operating on chr */ |
| 203 | #define iscalnum(x) pg_wc_isalnum(x) |
| 204 | Index: postgresql-9.4.5/src/test/regress/expected/regex.out |
| 205 | =================================================================== |
| 206 | --- postgresql-9.4.5.orig/src/test/regress/expected/regex.out 2015-10-06 03:12:06.000000000 +0800 |
| 207 | +++ postgresql-9.4.5/src/test/regress/expected/regex.out 2016-03-10 10:38:28.821779436 +0800 |
| 208 | @@ -222,3 +222,5 @@ |
| 209 | t |
| 210 | (1 row) |
| 211 | |
| 212 | +select 'a' ~ '\x7fffffff'; -- invalid chr code |
| 213 | +ERROR: invalid regular expression: invalid escape \ sequence |
| 214 | Index: postgresql-9.4.5/src/test/regress/sql/regex.sql |
| 215 | =================================================================== |
| 216 | --- postgresql-9.4.5.orig/src/test/regress/sql/regex.sql 2015-10-06 03:12:06.000000000 +0800 |
| 217 | +++ postgresql-9.4.5/src/test/regress/sql/regex.sql 2016-03-10 10:38:57.845779159 +0800 |
| 218 | @@ -57,3 +57,4 @@ |
| 219 | select 'a' ~ '.. ()|\1'; |
| 220 | select 'a' ~ '()*\1'; |
| 221 | select 'a' ~ '()+\1'; |
| 222 | +select 'a' ~ '\x7fffffff'; -- invalid chr code |