| From 0a320d753fe7fca03df259a4dfd8e641e51edaa8 Mon Sep 17 00:00:00 2001 |
| From: Hugo van der Sanden <hv@crypt.org> |
| Date: Tue, 18 Feb 2020 13:51:16 +0000 |
| Subject: [PATCH] study_chunk: extract rck_elide_nothing |
| |
| (CVE-2020-10878) |
| |
| (cherry picked from commit 93dee06613d4e1428fb10905ce1c3c96f53113dc) |
| |
| Upstream-Status: Backport [https://github.com/perl/perl5/commit/0a320d753fe7fca03df259a4dfd8e641e51edaa8] |
| CVE: CVE-2020-10878 |
| Signed-off-by: Lee Chee Yang <chee.yang.lee@intel.com> |
| --- |
| embed.fnc | 1 + |
| embed.h | 1 + |
| proto.h | 3 +++ |
| regcomp.c | 70 ++++++++++++++++++++++++++++++++++--------------------- |
| 4 files changed, 48 insertions(+), 27 deletions(-) |
| |
| diff --git a/embed.fnc b/embed.fnc |
| index aedb4baef19..d7cd04d3fc3 100644 |
| --- a/embed.fnc |
| +++ b/embed.fnc |
| @@ -2481,6 +2481,7 @@ Es |SSize_t|study_chunk |NN RExC_state_t *pRExC_state \ |
| |I32 stopparen|U32 recursed_depth \ |
| |NULLOK regnode_ssc *and_withp \ |
| |U32 flags|U32 depth |
| +Es |void |rck_elide_nothing|NN regnode *node |
| EsR |SV * |get_ANYOFM_contents|NN const regnode * n |
| EsRn |U32 |add_data |NN RExC_state_t* const pRExC_state \ |
| |NN const char* const s|const U32 n |
| diff --git a/embed.h b/embed.h |
| index 75c91f77f45..356a8b98d96 100644 |
| --- a/embed.h |
| +++ b/embed.h |
| @@ -1208,6 +1208,7 @@ |
| #define parse_lparen_question_flags(a) S_parse_lparen_question_flags(aTHX_ a) |
| #define parse_uniprop_string(a,b,c,d,e,f,g,h,i) Perl_parse_uniprop_string(aTHX_ a,b,c,d,e,f,g,h,i) |
| #define populate_ANYOF_from_invlist(a,b) S_populate_ANYOF_from_invlist(aTHX_ a,b) |
| +#define rck_elide_nothing(a) S_rck_elide_nothing(aTHX_ a) |
| #define reg(a,b,c,d) S_reg(aTHX_ a,b,c,d) |
| #define reg2Lanode(a,b,c,d) S_reg2Lanode(aTHX_ a,b,c,d) |
| #define reg_node(a,b) S_reg_node(aTHX_ a,b) |
| diff --git a/proto.h b/proto.h |
| index 141ddbaee6d..f316fe134e1 100644 |
| --- a/proto.h |
| +++ b/proto.h |
| @@ -5543,6 +5543,9 @@ PERL_CALLCONV SV * Perl_parse_uniprop_string(pTHX_ const char * const name, cons |
| STATIC void S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr); |
| #define PERL_ARGS_ASSERT_POPULATE_ANYOF_FROM_INVLIST \ |
| assert(node); assert(invlist_ptr) |
| +STATIC void S_rck_elide_nothing(pTHX_ regnode *node); |
| +#define PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING \ |
| + assert(node) |
| PERL_STATIC_NO_RET void S_re_croak2(pTHX_ bool utf8, const char* pat1, const char* pat2, ...) |
| __attribute__noreturn__; |
| #define PERL_ARGS_ASSERT_RE_CROAK2 \ |
| diff --git a/regcomp.c b/regcomp.c |
| index 5f86be8086d..4ba2980db66 100644 |
| --- a/regcomp.c |
| +++ b/regcomp.c |
| @@ -4450,6 +4450,44 @@ S_unwind_scan_frames(pTHX_ const void *p) |
| } while (f); |
| } |
| |
| +/* Follow the next-chain of the current node and optimize away |
| + all the NOTHINGs from it. |
| + */ |
| +STATIC void |
| +S_rck_elide_nothing(pTHX_ regnode *node) |
| +{ |
| + dVAR; |
| + |
| + PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING; |
| + |
| + if (OP(node) != CURLYX) { |
| + const int max = (reg_off_by_arg[OP(node)] |
| + ? I32_MAX |
| + /* I32 may be smaller than U16 on CRAYs! */ |
| + : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX)); |
| + int off = (reg_off_by_arg[OP(node)] ? ARG(node) : NEXT_OFF(node)); |
| + int noff; |
| + regnode *n = node; |
| + |
| + /* Skip NOTHING and LONGJMP. */ |
| + while ( |
| + (n = regnext(n)) |
| + && ( |
| + (PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n))) |
| + || ((OP(n) == LONGJMP) && (noff = ARG(n))) |
| + ) |
| + && off + noff < max |
| + ) { |
| + off += noff; |
| + } |
| + if (reg_off_by_arg[OP(node)]) |
| + ARG(node) = off; |
| + else |
| + NEXT_OFF(node) = off; |
| + } |
| + return; |
| +} |
| + |
| /* the return from this sub is the minimum length that could possibly match */ |
| STATIC SSize_t |
| S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, |
| @@ -4550,28 +4588,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, |
| */ |
| JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0); |
| |
| - /* Follow the next-chain of the current node and optimize |
| - away all the NOTHINGs from it. */ |
| - if (OP(scan) != CURLYX) { |
| - const int max = (reg_off_by_arg[OP(scan)] |
| - ? I32_MAX |
| - /* I32 may be smaller than U16 on CRAYs! */ |
| - : (I32_MAX < U16_MAX ? I32_MAX : U16_MAX)); |
| - int off = (reg_off_by_arg[OP(scan)] ? ARG(scan) : NEXT_OFF(scan)); |
| - int noff; |
| - regnode *n = scan; |
| - |
| - /* Skip NOTHING and LONGJMP. */ |
| - while ((n = regnext(n)) |
| - && ((PL_regkind[OP(n)] == NOTHING && (noff = NEXT_OFF(n))) |
| - || ((OP(n) == LONGJMP) && (noff = ARG(n)))) |
| - && off + noff < max) |
| - off += noff; |
| - if (reg_off_by_arg[OP(scan)]) |
| - ARG(scan) = off; |
| - else |
| - NEXT_OFF(scan) = off; |
| - } |
| + /* Follow the next-chain of the current node and optimize |
| + away all the NOTHINGs from it. |
| + */ |
| + rck_elide_nothing(scan); |
| |
| /* The principal pseudo-switch. Cannot be a switch, since we |
| look into several different things. */ |
| @@ -5745,11 +5765,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", |
| if (data && (fl & SF_HAS_EVAL)) |
| data->flags |= SF_HAS_EVAL; |
| optimize_curly_tail: |
| - if (OP(oscan) != CURLYX) { |
| - while (PL_regkind[OP(next = regnext(oscan))] == NOTHING |
| - && NEXT_OFF(next)) |
| - NEXT_OFF(oscan) += NEXT_OFF(next); |
| - } |
| + rck_elide_nothing(oscan); |
| continue; |
| |
| default: |