| From 51c500269bf53749b107807d84271385fad35628 Mon Sep 17 00:00:00 2001 |
| From: Marek Polacek <polacek@redhat.com> |
| Date: Wed, 6 Oct 2021 14:33:59 -0400 |
| Subject: [PATCH] libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026] |
| |
| From a link below: |
| "An issue was discovered in the Bidirectional Algorithm in the Unicode |
| Specification through 14.0. It permits the visual reordering of |
| characters via control sequences, which can be used to craft source code |
| that renders different logic than the logical ordering of tokens |
| ingested by compilers and interpreters. Adversaries can leverage this to |
| encode source code for compilers accepting Unicode such that targeted |
| vulnerabilities are introduced invisibly to human reviewers." |
| |
| More info: |
| https://nvd.nist.gov/vuln/detail/CVE-2021-42574 |
| https://trojansource.codes/ |
| |
| This is not a compiler bug. However, to mitigate the problem, this patch |
| implements -Wbidi-chars=[none|unpaired|any] to warn about possibly |
| misleading Unicode bidirectional control characters the preprocessor may |
| encounter. |
| |
| The default is =unpaired, which warns about improperly terminated |
| bidirectional control characters; e.g. a LRE without its corresponding PDF. |
| The level =any warns about any use of bidirectional control characters. |
| |
| This patch handles both UCNs and UTF-8 characters. UCNs designating |
| bidi characters in identifiers are accepted since r204886. Then r217144 |
| enabled -fextended-identifiers by default. Extended characters in C/C++ |
| identifiers have been accepted since r275979. However, this patch still |
| warns about mixing UTF-8 and UCN bidi characters; there seems to be no |
| good reason to allow mixing them. |
| |
| We warn in different contexts: comments (both C and C++-style), string |
| literals, character constants, and identifiers. Expectedly, UCNs are ignored |
| in comments and raw string literals. The bidirectional control characters |
| can nest so this patch handles that as well. |
| |
| I have not included nor tested this at all with Fortran (which also has |
| string literals and line comments). |
| |
| Dave M. posted patches improving diagnostic involving Unicode characters. |
| This patch does not make use of this new infrastructure yet. |
| |
| PR preprocessor/103026 |
| |
| gcc/c-family/ChangeLog: |
| |
| * c.opt (Wbidi-chars, Wbidi-chars=): New option. |
| |
| gcc/ChangeLog: |
| |
| * doc/invoke.texi: Document -Wbidi-chars. |
| |
| libcpp/ChangeLog: |
| |
| * include/cpplib.h (enum cpp_bidirectional_level): New. |
| (struct cpp_options): Add cpp_warn_bidirectional. |
| (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL. |
| * internal.h (struct cpp_reader): Add warn_bidi_p member |
| function. |
| * init.c (cpp_create_reader): Set cpp_warn_bidirectional. |
| * lex.c (bidi): New namespace. |
| (get_bidi_utf8): New function. |
| (get_bidi_ucn): Likewise. |
| (maybe_warn_bidi_on_close): Likewise. |
| (maybe_warn_bidi_on_char): Likewise. |
| (_cpp_skip_block_comment): Implement warning about bidirectional |
| control characters. |
| (skip_line_comment): Likewise. |
| (forms_identifier_p): Likewise. |
| (lex_identifier): Likewise. |
| (lex_string): Likewise. |
| (lex_raw_string): Likewise. |
| |
| gcc/testsuite/ChangeLog: |
| |
| * c-c++-common/Wbidi-chars-1.c: New test. |
| * c-c++-common/Wbidi-chars-2.c: New test. |
| * c-c++-common/Wbidi-chars-3.c: New test. |
| * c-c++-common/Wbidi-chars-4.c: New test. |
| * c-c++-common/Wbidi-chars-5.c: New test. |
| * c-c++-common/Wbidi-chars-6.c: New test. |
| * c-c++-common/Wbidi-chars-7.c: New test. |
| * c-c++-common/Wbidi-chars-8.c: New test. |
| * c-c++-common/Wbidi-chars-9.c: New test. |
| * c-c++-common/Wbidi-chars-10.c: New test. |
| * c-c++-common/Wbidi-chars-11.c: New test. |
| * c-c++-common/Wbidi-chars-12.c: New test. |
| * c-c++-common/Wbidi-chars-13.c: New test. |
| * c-c++-common/Wbidi-chars-14.c: New test. |
| * c-c++-common/Wbidi-chars-15.c: New test. |
| * c-c++-common/Wbidi-chars-16.c: New test. |
| * c-c++-common/Wbidi-chars-17.c: New test. |
| |
| CVE: CVE-2021-42574 |
| Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=51c500269bf53749b107807d84271385fad35628] |
| Signed-off-by: Pgowda <pgowda.cve@gmail.com> |
| |
| --- |
| gcc/c-family/c.opt | 24 ++ |
| gcc/doc/invoke.texi | 21 +- |
| gcc/testsuite/c-c++-common/Wbidi-chars-1.c | 12 + |
| gcc/testsuite/c-c++-common/Wbidi-chars-10.c | 27 ++ |
| gcc/testsuite/c-c++-common/Wbidi-chars-11.c | 13 + |
| gcc/testsuite/c-c++-common/Wbidi-chars-12.c | 19 + |
| gcc/testsuite/c-c++-common/Wbidi-chars-13.c | 17 + |
| gcc/testsuite/c-c++-common/Wbidi-chars-14.c | 38 ++ |
| gcc/testsuite/c-c++-common/Wbidi-chars-15.c | 59 +++ |
| gcc/testsuite/c-c++-common/Wbidi-chars-16.c | 26 ++ |
| gcc/testsuite/c-c++-common/Wbidi-chars-17.c | 30 ++ |
| gcc/testsuite/c-c++-common/Wbidi-chars-2.c | 9 + |
| gcc/testsuite/c-c++-common/Wbidi-chars-3.c | 11 + |
| gcc/testsuite/c-c++-common/Wbidi-chars-4.c | 188 +++++++++ |
| gcc/testsuite/c-c++-common/Wbidi-chars-5.c | 188 +++++++++ |
| gcc/testsuite/c-c++-common/Wbidi-chars-6.c | 155 ++++++++ |
| gcc/testsuite/c-c++-common/Wbidi-chars-7.c | 9 + |
| gcc/testsuite/c-c++-common/Wbidi-chars-8.c | 13 + |
| gcc/testsuite/c-c++-common/Wbidi-chars-9.c | 29 ++ |
| libcpp/include/cpplib.h | 18 +- |
| libcpp/init.c | 1 + |
| libcpp/internal.h | 7 + |
| libcpp/lex.c | 408 +++++++++++++++++++- |
| 23 files changed, 1315 insertions(+), 7 deletions(-) |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-1.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-10.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-11.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-12.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-13.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-14.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-15.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-16.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-17.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-2.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-3.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-4.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-5.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-6.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-7.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-8.c |
| create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-9.c |
| |
| diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt |
| index 8a4cd634f77..3976fc368db 100644 |
| --- a/gcc/c-family/c.opt |
| +++ b/gcc/c-family/c.opt |
| @@ -370,6 +370,30 @@ Wbad-function-cast |
| C ObjC Var(warn_bad_function_cast) Warning |
| Warn about casting functions to incompatible types. |
| |
| +Wbidi-chars |
| +C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none) |
| +; |
| + |
| +Wbidi-chars= |
| +C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level) |
| +-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters. |
| + |
| +; Required for these enum values. |
| +SourceInclude |
| +cpplib.h |
| + |
| +Enum |
| +Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized) |
| + |
| +EnumValue |
| +Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none) |
| + |
| +EnumValue |
| +Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired) |
| + |
| +EnumValue |
| +Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any) |
| + |
| Wbool-compare |
| C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) |
| Warn about boolean expression compared with an integer value different from true/false. |
| diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi |
| index 6070288856c..a22758d18ee 100644 |
| --- a/gcc/doc/invoke.texi |
| +++ b/gcc/doc/invoke.texi |
| @@ -326,7 +326,9 @@ Objective-C and Objective-C++ Dialects}. |
| -Warith-conversion @gol |
| -Warray-bounds -Warray-bounds=@var{n} @gol |
| -Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol |
| --Wno-attribute-warning -Wbool-compare -Wbool-operation @gol |
| +-Wno-attribute-warning @gol |
| +-Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol |
| +-Wbool-compare -Wbool-operation @gol |
| -Wno-builtin-declaration-mismatch @gol |
| -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol |
| -Wc11-c2x-compat @gol |
| @@ -7559,6 +7561,23 @@ Attributes considered include @code{allo |
| This is the default. You can disable these warnings with either |
| @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}. |
| |
| +@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} |
| +@opindex Wbidi-chars= |
| +@opindex Wbidi-chars |
| +@opindex Wno-bidi-chars |
| +Warn about possibly misleading UTF-8 bidirectional control characters in |
| +comments, string literals, character constants, and identifiers. Such |
| +characters can change left-to-right writing direction into right-to-left |
| +(and vice versa), which can cause confusion between the logical order and |
| +visual order. This may be dangerous; for instance, it may seem that a piece |
| +of code is not commented out, whereas it in fact is. |
| + |
| +There are three levels of warning supported by GCC@. The default is |
| +@option{-Wbidi-chars=unpaired}, which warns about improperly terminated |
| +bidi contexts. @option{-Wbidi-chars=none} turns the warning off. |
| +@option{-Wbidi-chars=any} warns about any use of bidirectional control |
| +characters. |
| + |
| @item -Wbool-compare |
| @opindex Wno-bool-compare |
| @opindex Wbool-compare |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c |
| new file mode 100644 |
| index 00000000000..34f5ac19271 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c |
| @@ -0,0 +1,27 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| +/* More nesting testing. */ |
| + |
| +/* RLEâ« LRI⦠PDF⬠PDIâ©*/ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int LRE_\u202a_PDF_\u202c; |
| +int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c; |
| +int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c; |
| +int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c; |
| +int FSI_\u2068; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int FSI_\u2068_PDI_\u2069; |
| +int FSI_\u2068_FSI_\u2068_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; |
| +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c |
| new file mode 100644 |
| index 00000000000..270ce2368a9 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c |
| @@ -0,0 +1,13 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| +/* Test that we warn when mixing UCN and UTF-8. */ |
| + |
| +int LRE_âª_PDF_\u202c; |
| +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ |
| +int LRE_\u202a_PDF_â¬_; |
| +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ |
| +const char *s1 = "LRE_âª_PDF_\u202c"; |
| +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ |
| +const char *s2 = "LRE_\u202a_PDF_â¬"; |
| +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c |
| new file mode 100644 |
| index 00000000000..b07eec1da91 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c |
| @@ -0,0 +1,19 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile { target { c || c++11 } } } */ |
| +/* { dg-options "-Wbidi-chars=any" } */ |
| +/* Test raw strings. */ |
| + |
| +const char *s1 = R"(a b c LRE⪠1 2 3 PDF⬠x y z)"; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| +const char *s2 = R"(a b c RLE⫠1 2 3 PDF⬠x y z)"; |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| +const char *s3 = R"(a b c LROâ 1 2 3 PDF⬠x y z)"; |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| +const char *s4 = R"(a b c RLO⮠1 2 3 PDF⬠x y z)"; |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| +const char *s7 = R"(a b c FSI⨠1 2 3 PDI⩠x y) z"; |
| +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| +const char *s8 = R"(a b c PDIâ© x y )z"; |
| +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| +const char *s9 = R"(a b c PDF⬠x y z)"; |
| +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| diff -uprN '-x*.orig' '-x*.rej' del/gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c |
| --- del/gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 1969-12-31 16:00:00.000000000 -0800 |
| +++ gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 2021-12-13 23:11:22.328439287 -0800 |
| @@ -0,0 +1,17 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile { target { c || c++11 } } } */ |
| +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| +/* Test raw strings. */ |
| + |
| +const char *s1 = R"(a b c LRE⪠1 2 3)"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s2 = R"(a b c RLEâ« 1 2 3)"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s3 = R"(a b c LROâ 1 2 3)"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s4 = R"(a b c FSI⨠1 2 3)"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s5 = R"(a b c LRI⦠1 2 3)"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s6 = R"(a b c RLI⧠1 2 3)"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c |
| new file mode 100644 |
| index 00000000000..ba5f75d9553 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c |
| @@ -0,0 +1,38 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| +/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs, |
| + or RLOs. */ |
| + |
| +/* LRI_â¦_LRI_â¦_RLE_â«_RLE_â«_RLE_â«_PDI_â©*/ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +// LRI_â¦_RLE_â«_RLE_â«_RLE_â«_PDI_â© |
| +// LRI_â¦_RLO_â®_RLE_â«_RLE_â«_PDI_â© |
| +// LRI_â¦_RLO_â®_RLE_â«_PDI_â© |
| +// FSI_â¨_RLO_â®_PDI_â© |
| +// FSI_â¨_FSI_â¨_RLO_â®_PDI_â© |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; |
| +int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int PDI_\u2069; |
| +int LRI_\u2066_PDI_\u2069; |
| +int RLI_\u2067_PDI_\u2069; |
| +int LRE_\u202a_LRI_\u2066_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069; |
| +int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; |
| +int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int RLO_\u202e_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int RLI_\u2067_PDI_\u2069_RLI_\u2067; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int FSI_\u2068_PDF_\u202c_PDI_\u2069; |
| +int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c |
| new file mode 100644 |
| index 00000000000..a0ce8ff5e2c |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c |
| @@ -0,0 +1,59 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| +/* Test unpaired bidi control chars in multiline comments. */ |
| + |
| +/* |
| + * LRE⪠end |
| + */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| +/* |
| + * RLEâ« end |
| + */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| +/* |
| + * LROâ end |
| + */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| +/* |
| + * RLOâ® end |
| + */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| +/* |
| + * LRI⦠end |
| + */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| +/* |
| + * RLI⧠end |
| + */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| +/* |
| + * FSI⨠end |
| + */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| +/* LRE⪠|
| + PDF⬠*/ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| +/* FSI⨠|
| + PDIâ© */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| + |
| +/* LRE<âª> |
| + * |
| + */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */ |
| + |
| +/* |
| + * LRE<âª> |
| + */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| + |
| +/* |
| + * |
| + * LRE<âª> */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +/* RLI<â§> */ /* PDI<â©> */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* LRE<âª> */ /* PDF<â¬> */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c |
| new file mode 100644 |
| index 00000000000..baa0159861c |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c |
| @@ -0,0 +1,26 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=any" } */ |
| +/* Test LTR/RTL chars. */ |
| + |
| +/* LTR<â> */ |
| +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ |
| +// LTR<â> |
| +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ |
| +/* RTL<â> */ |
| +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ |
| +// RTL<â> |
| +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ |
| + |
| +const char *s1 = "LTR<â>"; |
| +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ |
| +const char *s2 = "LTR\u200e"; |
| +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ |
| +const char *s3 = "LTR\u200E"; |
| +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ |
| +const char *s4 = "RTL<â>"; |
| +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ |
| +const char *s5 = "RTL\u200f"; |
| +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ |
| +const char *s6 = "RTL\u200F"; |
| +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c |
| new file mode 100644 |
| index 00000000000..07cb4321f96 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c |
| @@ -0,0 +1,30 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| +/* Test LTR/RTL chars. */ |
| + |
| +/* LTR<â> */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +// LTR<â> |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +/* RTL<â> */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +// RTL<â> |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int ltr_\u200e; |
| +/* { dg-error "universal character " "" { target *-*-* } .-1 } */ |
| +int rtl_\u200f; |
| +/* { dg-error "universal character " "" { target *-*-* } .-1 } */ |
| + |
| +const char *s1 = "LTR<â>"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s2 = "LTR\u200e"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s3 = "LTR\u200E"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s4 = "RTL<â>"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s5 = "RTL\u200f"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s6 = "RTL\u200F"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c |
| new file mode 100644 |
| index 00000000000..2340374f276 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c |
| @@ -0,0 +1,12 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| + |
| +int main() { |
| + int isAdmin = 0; |
| + /*â® } â¦if (isAdmin)⩠⦠begin admins only */ |
| +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ |
| + __builtin_printf("You are an admin.\n"); |
| + /* end admins only â® { â¦*/ |
| +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ |
| + return 0; |
| +} |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c |
| new file mode 100644 |
| index 00000000000..2340374f276 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c |
| @@ -0,0 +1,9 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| + |
| +int main() { |
| + /* Say hello; newlineâ§/*/ return 0 ; |
| +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ |
| + __builtin_printf("Hello world.\n"); |
| + return 0; |
| +} |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c |
| new file mode 100644 |
| index 00000000000..9dc7edb6e64 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c |
| @@ -0,0 +1,11 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| + |
| +int main() { |
| + const char* access_level = "user"; |
| + if (__builtin_strcmp(access_level, "userâ® â¦// Check if adminâ© â¦")) { |
| +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ |
| + __builtin_printf("You are an admin.\n"); |
| + } |
| + return 0; |
| +} |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c |
| new file mode 100644 |
| index 00000000000..639e5c62e88 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c |
| @@ -0,0 +1,188 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */ |
| +/* Test all bidi chars in various contexts (identifiers, comments, |
| + string literals, character constants), both UCN and UTF-8. The bidi |
| + chars here are properly terminated, except for the character constants. */ |
| + |
| +/* a b c LRE⪠1 2 3 PDF⬠x y z */ |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| +/* a b c RLE⫠1 2 3 PDF⬠x y z */ |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| +/* a b c LROâ 1 2 3 PDF⬠x y z */ |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| +/* a b c RLO⮠1 2 3 PDF⬠x y z */ |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| +/* a b c LRI⦠1 2 3 PDI⩠x y z */ |
| +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| +/* a b c RLI⧠1 2 3 PDI⩠x y */ |
| +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| +/* a b c FSI⨠1 2 3 PDI⩠x y z */ |
| +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| + |
| +/* Same but C++ comments instead. */ |
| +// a b c LRE⪠1 2 3 PDF⬠x y z |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| +// a b c RLE⫠1 2 3 PDF⬠x y z |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| +// a b c LROâ 1 2 3 PDF⬠x y z |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| +// a b c RLO⮠1 2 3 PDF⬠x y z |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| +// a b c LRI⦠1 2 3 PDI⩠x y z |
| +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| +// a b c RLI⧠1 2 3 PDI⩠x y |
| +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| +// a b c FSI⨠1 2 3 PDI⩠x y z |
| +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| + |
| +/* Here we're closing an unopened context, warn when =any. */ |
| +/* a b c PDIâ© x y z */ |
| +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| +/* a b c PDF⬠x y z */ |
| +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| +// a b c PDIâ© x y z |
| +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| +// a b c PDF⬠x y z |
| +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| + |
| +/* Multiline comments. */ |
| +/* a b c PDIâ© x y z |
| + */ |
| +/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ |
| +/* a b c PDF⬠x y z |
| + */ |
| +/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ |
| +/* first |
| + a b c PDIâ© x y z |
| + */ |
| +/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ |
| +/* first |
| + a b c PDF⬠x y z |
| + */ |
| +/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ |
| +/* first |
| + a b c PDIâ© x y z */ |
| +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| +/* first |
| + a b c PDF⬠x y z */ |
| +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| + |
| +void |
| +g1 () |
| +{ |
| + const char *s1 = "a b c LRE⪠1 2 3 PDF⬠x y z"; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| + const char *s2 = "a b c RLE⫠1 2 3 PDF⬠x y z"; |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| + const char *s3 = "a b c LROâ 1 2 3 PDF⬠x y z"; |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| + const char *s4 = "a b c RLO⮠1 2 3 PDF⬠x y z"; |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| + const char *s5 = "a b c LRI⦠1 2 3 PDI⩠x y z"; |
| +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| + const char *s6 = "a b c RLI⧠1 2 3 PDI⩠x y z"; |
| +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| + const char *s7 = "a b c FSI⨠1 2 3 PDI⩠x y z"; |
| +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| + const char *s8 = "a b c PDIâ© x y z"; |
| +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| + const char *s9 = "a b c PDF⬠x y z"; |
| +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| + |
| + const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| + const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| + const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| + const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| + const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| + const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| + const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| + const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| + const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; |
| +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| + const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; |
| +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| + const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; |
| +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| +} |
| + |
| +void |
| +g2 () |
| +{ |
| + const char c1 = '\u202a'; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| + const char c2 = '\u202A'; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| + const char c3 = '\u202b'; |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| + const char c4 = '\u202B'; |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| + const char c5 = '\u202d'; |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| + const char c6 = '\u202D'; |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| + const char c7 = '\u202e'; |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| + const char c8 = '\u202E'; |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| + const char c9 = '\u2066'; |
| +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| + const char c10 = '\u2067'; |
| +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| + const char c11 = '\u2068'; |
| +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| +} |
| + |
| +int aâªbâ¬c; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| +int aâ«bâ¬c; |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| +int aâÂbâ¬c; |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| +int aâ®bâ¬c; |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| +int aâ¦bâ©c; |
| +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| +int aâ§bâ©c; |
| +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| +int aâ¨bâ©c; |
| +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| +int Aâ¬X; |
| +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| +int A\u202cY; |
| +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| +int A\u202CY2; |
| +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| + |
| +int d\u202ae\u202cf; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| +int d\u202Ae\u202cf2; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| +int d\u202be\u202cf; |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| +int d\u202Be\u202cf2; |
| +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| +int d\u202de\u202cf; |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| +int d\u202De\u202cf2; |
| +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| +int d\u202ee\u202cf; |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| +int d\u202Ee\u202cf2; |
| +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| +int d\u2066e\u2069f; |
| +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| +int d\u2067e\u2069f; |
| +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| +int d\u2068e\u2069f; |
| +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| +int X\u2069; |
| +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c |
| new file mode 100644 |
| index 00000000000..68cb053144b |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c |
| @@ -0,0 +1,188 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */ |
| +/* Test all bidi chars in various contexts (identifiers, comments, |
| + string literals, character constants), both UCN and UTF-8. The bidi |
| + chars here are properly terminated, except for the character constants. */ |
| + |
| +/* a b c LRE⪠1 2 3 PDF⬠x y z */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c RLE⫠1 2 3 PDF⬠x y z */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c LROâ 1 2 3 PDF⬠x y z */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c RLO⮠1 2 3 PDF⬠x y z */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c LRI⦠1 2 3 PDI⩠x y z */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c RLI⧠1 2 3 PDI⩠x y */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c FSI⨠1 2 3 PDI⩠x y z */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +/* Same but C++ comments instead. */ |
| +// a b c LRE⪠1 2 3 PDF⬠x y z |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c RLE⫠1 2 3 PDF⬠x y z |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c LROâ 1 2 3 PDF⬠x y z |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c RLO⮠1 2 3 PDF⬠x y z |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c LRI⦠1 2 3 PDI⩠x y z |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c RLI⧠1 2 3 PDI⩠x y |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c FSI⨠1 2 3 PDI⩠x y z |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +/* Here we're closing an unopened context, warn when =any. */ |
| +/* a b c PDIâ© x y z */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c PDF⬠x y z */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c PDIâ© x y z |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c PDF⬠x y z |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +/* Multiline comments. */ |
| +/* a b c PDIâ© x y z |
| + */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ |
| +/* a b c PDF⬠x y z |
| + */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ |
| +/* first |
| + a b c PDIâ© x y z |
| + */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ |
| +/* first |
| + a b c PDF⬠x y z |
| + */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ |
| +/* first |
| + a b c PDIâ© x y z */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +/* first |
| + a b c PDF⬠x y z */ |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +void |
| +g1 () |
| +{ |
| + const char *s1 = "a b c LRE⪠1 2 3 PDF⬠x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s2 = "a b c RLE⫠1 2 3 PDF⬠x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s3 = "a b c LROâ 1 2 3 PDF⬠x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s4 = "a b c RLO⮠1 2 3 PDF⬠x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s5 = "a b c LRI⦠1 2 3 PDI⩠x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s6 = "a b c RLI⧠1 2 3 PDI⩠x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s7 = "a b c FSI⨠1 2 3 PDI⩠x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s8 = "a b c PDIâ© x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s9 = "a b c PDF⬠x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| + const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +} |
| + |
| +void |
| +g2 () |
| +{ |
| + const char c1 = '\u202a'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char c2 = '\u202A'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char c3 = '\u202b'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char c4 = '\u202B'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char c5 = '\u202d'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char c6 = '\u202D'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char c7 = '\u202e'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char c8 = '\u202E'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char c9 = '\u2066'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char c10 = '\u2067'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char c11 = '\u2068'; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +} |
| + |
| +int aâªbâ¬c; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int aâ«bâ¬c; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int aâÂbâ¬c; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int aâ®bâ¬c; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int aâ¦bâ©c; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int aâ§bâ©c; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int aâ¨bâ©c; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int Aâ¬X; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int A\u202cY; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int A\u202CY2; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +int d\u202ae\u202cf; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int d\u202Ae\u202cf2; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int d\u202be\u202cf; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int d\u202Be\u202cf2; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int d\u202de\u202cf; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int d\u202De\u202cf2; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int d\u202ee\u202cf; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int d\u202Ee\u202cf2; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int d\u2066e\u2069f; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int d\u2067e\u2069f; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int d\u2068e\u2069f; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| +int X\u2069; |
| +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c |
| new file mode 100644 |
| index 00000000000..0ce6fff2dee |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c |
| @@ -0,0 +1,155 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| +/* Test nesting of bidi chars in various contexts. */ |
| + |
| +/* Terminated by the wrong char: */ |
| +/* a b c LRE⪠1 2 3 PDI⩠x y z */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c RLEâ« 1 2 3 PDIâ© x y z*/ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c LROâ 1 2 3 PDIâ© x y z */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c RLOâ® 1 2 3 PDIâ© x y z */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c LRI⦠1 2 3 PDF⬠x y z */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c RLI⧠1 2 3 PDF⬠x y z */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* a b c FSI⨠1 2 3 PDF⬠x y z*/ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +/* LRE⪠PDF⬠*/ |
| +/* LRE⪠LRE⪠PDF⬠PDF⬠*/ |
| +/* PDF⬠LRE⪠PDF⬠*/ |
| +/* LRE⪠PDF⬠LRE⪠PDF⬠*/ |
| +/* LRE⪠LRE⪠PDF⬠*/ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* PDF⬠LRE⪠*/ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +// a b c LRE⪠1 2 3 PDI⩠x y z |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c RLEâ« 1 2 3 PDIâ© x y z*/ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c LROâ 1 2 3 PDIâ© x y z |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c RLOâ® 1 2 3 PDIâ© x y z |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c LRI⦠1 2 3 PDF⬠x y z |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c RLI⧠1 2 3 PDF⬠x y z |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +// a b c FSI⨠1 2 3 PDF⬠x y z |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +// LRE⪠PDF⬠|
| +// LRE⪠LRE⪠PDF⬠PDF⬠|
| +// PDF⬠LRE⪠PDF⬠|
| +// LRE⪠PDF⬠LRE⪠PDF⬠|
| +// LRE⪠LRE⪠PDF⬠|
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +// PDF⬠LRE⪠|
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +void |
| +g1 () |
| +{ |
| + const char *s1 = "a b c LRE⪠1 2 3 PDI⩠x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s3 = "a b c RLEâ« 1 2 3 PDIâ© x y "; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s5 = "a b c LROâ 1 2 3 PDIâ© x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s7 = "a b c RLOâ® 1 2 3 PDIâ© x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s9 = "a b c LRI⦠1 2 3 PDF⬠x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s11 = "a b c RLI⧠1 2 3 PDF⬠x y z\ |
| + "; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| + const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s13 = "a b c FSI⨠1 2 3 PDF⬠x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s15 = "PDF⬠LREâª"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s16 = "PDF\u202c LRE\u202a"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s17 = "LRE⪠PDFâ¬"; |
| + const char *s18 = "LRE\u202a PDF\u202c"; |
| + const char *s19 = "LRE⪠LRE⪠PDF⬠PDFâ¬"; |
| + const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c"; |
| + const char *s21 = "PDF⬠LRE⪠PDFâ¬"; |
| + const char *s22 = "PDF\u202c LRE\u202a PDF\u202c"; |
| + const char *s23 = "LRE⪠LRE⪠PDFâ¬"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s24 = "LRE\u202a LRE\u202a PDF\u202c"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s25 = "PDF⬠LREâª"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s26 = "PDF\u202c LRE\u202a"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s27 = "PDF⬠LRE\u202a"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + const char *s28 = "PDF\u202c LREâª"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +} |
| + |
| +int aLREâªbPDIâ©; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int A\u202aB\u2069C; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int aRLEâ«bPDIâ©; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int a\u202bB\u2069c; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int aLROâÂbPDIâ©; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int a\u202db\u2069c2; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int aRLOâ®bPDIâ©; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int a\u202eb\u2069; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int aLRIâ¦bPDFâ¬; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int a\u2066b\u202c; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int aRLIâ§bPDFâ¬c |
| +; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| +int a\u2067b\u202c; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int aFSIâ¨bPDFâ¬; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int a\u2068b\u202c; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int aFSIâ¨bPD\u202C; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int aFSI\u2068bPDFâ¬_; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int aLREâªbPDFâ¬b; |
| +int A\u202aB\u202c; |
| +int a_LREâª_LREâª_b_PDFâ¬_PDFâ¬; |
| +int A\u202aA\u202aB\u202cB\u202c; |
| +int aPDFâ¬bLREadPDFâ¬; |
| +int a_\u202C_\u202a_\u202c; |
| +int a_LREâª_b_PDFâ¬_c_LREâª_PDFâ¬; |
| +int a_\u202a_\u202c_\u202a_\u202c_; |
| +int a_LREâª_b_PDFâ¬_c_LREâª; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int a_\u202a_\u202c_\u202a_; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c |
| new file mode 100644 |
| index 00000000000..d012d420ec0 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c |
| @@ -0,0 +1,9 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=any" } */ |
| +/* Test we ignore UCNs in comments. */ |
| + |
| +// a b c \u202a 1 2 3 |
| +// a b c \u202A 1 2 3 |
| +/* a b c \u202a 1 2 3 */ |
| +/* a b c \u202A 1 2 3 */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c |
| new file mode 100644 |
| index 00000000000..4f54c5092ec |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c |
| @@ -0,0 +1,13 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=any" } */ |
| +/* Test \u vs \U. */ |
| + |
| +int a_\u202A; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| +int a_\u202a_2; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| +int a_\U0000202A_3; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| +int a_\U0000202a_4; |
| +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c |
| new file mode 100644 |
| index 00000000000..e2af1b1ca97 |
| --- /dev/null |
| +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c |
| @@ -0,0 +1,29 @@ |
| +/* PR preprocessor/103026 */ |
| +/* { dg-do compile } */ |
| +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| +/* Test that we properly separate bidi contexts (comment/identifier/character |
| + constant/string literal). */ |
| + |
| +/* LRE ->âª<- */ int pdf_\u202c_1; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* RLE ->â«<- */ int pdf_\u202c_2; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* LRO ->âÂ<- */ int pdf_\u202c_3; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* RLO ->â®<- */ int pdf_\u202c_4; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* LRI ->â¦<-*/ int pdi_\u2069_1; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* RLI ->â§<- */ int pdi_\u2069_12; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* FSI ->â¨<- */ int pdi_\u2069_3; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| + |
| +const char *s1 = "LRE\u202a"; /* PDF ->â¬<- */ |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +/* LRE ->âª<- */ const char *s2 = "PDF\u202c"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +const char *s3 = "LRE\u202a"; int pdf_\u202c_5; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| +int lre_\u202a; const char *s4 = "PDF\u202c"; |
| +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h |
| index 176f8c5bbce..112b9c24751 100644 |
| --- a/libcpp/include/cpplib.h |
| +++ b/libcpp/include/cpplib.h |
| @@ -318,6 +318,17 @@ enum cpp_main_search |
| CMS_system, /* Search the system INCLUDE path. */ |
| }; |
| |
| +/* The possible bidirectional control characters checking levels, from least |
| + restrictive to most. */ |
| +enum cpp_bidirectional_level { |
| + /* No checking. */ |
| + bidirectional_none, |
| + /* Only detect unpaired uses of bidirectional control characters. */ |
| + bidirectional_unpaired, |
| + /* Detect any use of bidirectional control characters. */ |
| + bidirectional_any |
| +}; |
| + |
| /* This structure is nested inside struct cpp_reader, and |
| carries all the options visible to the command line. */ |
| struct cpp_options |
| @@ -531,6 +542,10 @@ struct cpp_options |
| /* True if warn about differences between C++98 and C++11. */ |
| bool cpp_warn_cxx11_compat; |
| |
| + /* Nonzero if bidirectional control characters checking is on. See enum |
| + cpp_bidirectional_level. */ |
| + unsigned char cpp_warn_bidirectional; |
| + |
| /* Dependency generation. */ |
| struct |
| { |
| @@ -635,7 +650,8 @@ enum cpp_warning_reason { |
| CPP_W_C90_C99_COMPAT, |
| CPP_W_C11_C2X_COMPAT, |
| CPP_W_CXX11_COMPAT, |
| - CPP_W_EXPANSION_TO_DEFINED |
| + CPP_W_EXPANSION_TO_DEFINED, |
| + CPP_W_BIDIRECTIONAL |
| }; |
| |
| /* Callback for header lookup for HEADER, which is the name of a |
| diff --git a/libcpp/init.c b/libcpp/init.c |
| index 5a424e23553..f9a8f5f088f 100644 |
| --- a/libcpp/init.c |
| +++ b/libcpp/init.c |
| @@ -219,6 +219,7 @@ cpp_create_reader (enum c_lang lang, cpp |
| = ENABLE_CANONICAL_SYSTEM_HEADERS; |
| CPP_OPTION (pfile, ext_numeric_literals) = 1; |
| CPP_OPTION (pfile, warn_date_time) = 0; |
| + CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; |
| |
| /* Default CPP arithmetic to something sensible for the host for the |
| benefit of dumb users like fix-header. */ |
| diff --git a/libcpp/internal.h b/libcpp/internal.h |
| index 8577cab6c83..0ce0246c5a2 100644 |
| --- a/libcpp/internal.h |
| +++ b/libcpp/internal.h |
| @@ -597,6 +597,13 @@ struct cpp_reader |
| /* Location identifying the main source file -- intended to be line |
| zero of said file. */ |
| location_t main_loc; |
| + |
| + /* Returns true iff we should warn about UTF-8 bidirectional control |
| + characters. */ |
| + bool warn_bidi_p () const |
| + { |
| + return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none; |
| + } |
| }; |
| |
| /* Character classes. Based on the more primitive macros in safe-ctype.h. |
| diff --git a/libcpp/lex.c b/libcpp/lex.c |
| index fa2253d41c3..6a4fbce6030 100644 |
| --- a/libcpp/lex.c |
| +++ b/libcpp/lex.c |
| @@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfi |
| } |
| } |
| |
| +namespace bidi { |
| + enum class kind { |
| + NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL |
| + }; |
| + |
| + /* All the UTF-8 encodings of bidi characters start with E2. */ |
| + constexpr uchar utf8_start = 0xe2; |
| + |
| + /* A vector holding currently open bidi contexts. We use a char for |
| + each context, its LSB is 1 if it represents a PDF context, 0 if it |
| + represents a PDI context. The next bit is 1 if this context was open |
| + by a bidi character written as a UCN, and 0 when it was UTF-8. */ |
| + semi_embedded_vec <unsigned char, 16> vec; |
| + |
| + /* Close the whole comment/identifier/string literal/character constant |
| + context. */ |
| + void on_close () |
| + { |
| + vec.truncate (0); |
| + } |
| + |
| + /* Pop the last element in the vector. */ |
| + void pop () |
| + { |
| + unsigned int len = vec.count (); |
| + gcc_checking_assert (len > 0); |
| + vec.truncate (len - 1); |
| + } |
| + |
| + /* Return the context of the Ith element. */ |
| + kind ctx_at (unsigned int i) |
| + { |
| + return (vec[i] & 1) ? kind::PDF : kind::PDI; |
| + } |
| + |
| + /* Return which context is currently opened. */ |
| + kind current_ctx () |
| + { |
| + unsigned int len = vec.count (); |
| + if (len == 0) |
| + return kind::NONE; |
| + return ctx_at (len - 1); |
| + } |
| + |
| + /* Return true if the current context comes from a UCN origin, that is, |
| + the bidi char which started this bidi context was written as a UCN. */ |
| + bool current_ctx_ucn_p () |
| + { |
| + unsigned int len = vec.count (); |
| + gcc_checking_assert (len > 0); |
| + return (vec[len - 1] >> 1) & 1; |
| + } |
| + |
| + /* We've read a bidi char, update the current vector as necessary. */ |
| + void on_char (kind k, bool ucn_p) |
| + { |
| + switch (k) |
| + { |
| + case kind::LRE: |
| + case kind::RLE: |
| + case kind::LRO: |
| + case kind::RLO: |
| + vec.push (ucn_p ? 3u : 1u); |
| + break; |
| + case kind::LRI: |
| + case kind::RLI: |
| + case kind::FSI: |
| + vec.push (ucn_p ? 2u : 0u); |
| + break; |
| + /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO |
| + whose scope has not yet been terminated. */ |
| + case kind::PDF: |
| + if (current_ctx () == kind::PDF) |
| + pop (); |
| + break; |
| + /* PDI terminates the scope of the last LRI, RLI, or FSI whose |
| + scope has not yet been terminated, as well as the scopes of |
| + any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not |
| + yet been terminated. */ |
| + case kind::PDI: |
| + for (int i = vec.count () - 1; i >= 0; --i) |
| + if (ctx_at (i) == kind::PDI) |
| + { |
| + vec.truncate (i); |
| + break; |
| + } |
| + break; |
| + case kind::LTR: |
| + case kind::RTL: |
| + /* These aren't popped by a PDF/PDI. */ |
| + break; |
| + [[likely]] case kind::NONE: |
| + break; |
| + default: |
| + abort (); |
| + } |
| + } |
| + |
| + /* Return a descriptive string for K. */ |
| + const char *to_str (kind k) |
| + { |
| + switch (k) |
| + { |
| + case kind::LRE: |
| + return "U+202A (LEFT-TO-RIGHT EMBEDDING)"; |
| + case kind::RLE: |
| + return "U+202B (RIGHT-TO-LEFT EMBEDDING)"; |
| + case kind::LRO: |
| + return "U+202D (LEFT-TO-RIGHT OVERRIDE)"; |
| + case kind::RLO: |
| + return "U+202E (RIGHT-TO-LEFT OVERRIDE)"; |
| + case kind::LRI: |
| + return "U+2066 (LEFT-TO-RIGHT ISOLATE)"; |
| + case kind::RLI: |
| + return "U+2067 (RIGHT-TO-LEFT ISOLATE)"; |
| + case kind::FSI: |
| + return "U+2068 (FIRST STRONG ISOLATE)"; |
| + case kind::PDF: |
| + return "U+202C (POP DIRECTIONAL FORMATTING)"; |
| + case kind::PDI: |
| + return "U+2069 (POP DIRECTIONAL ISOLATE)"; |
| + case kind::LTR: |
| + return "U+200E (LEFT-TO-RIGHT MARK)"; |
| + case kind::RTL: |
| + return "U+200F (RIGHT-TO-LEFT MARK)"; |
| + default: |
| + abort (); |
| + } |
| + } |
| +} |
| + |
| +/* Parse a sequence of 3 bytes starting with P and return its bidi code. */ |
| + |
| +static bidi::kind |
| +get_bidi_utf8 (const unsigned char *const p) |
| +{ |
| + gcc_checking_assert (p[0] == bidi::utf8_start); |
| + |
| + if (p[1] == 0x80) |
| + switch (p[2]) |
| + { |
| + case 0xaa: |
| + return bidi::kind::LRE; |
| + case 0xab: |
| + return bidi::kind::RLE; |
| + case 0xac: |
| + return bidi::kind::PDF; |
| + case 0xad: |
| + return bidi::kind::LRO; |
| + case 0xae: |
| + return bidi::kind::RLO; |
| + case 0x8e: |
| + return bidi::kind::LTR; |
| + case 0x8f: |
| + return bidi::kind::RTL; |
| + default: |
| + break; |
| + } |
| + else if (p[1] == 0x81) |
| + switch (p[2]) |
| + { |
| + case 0xa6: |
| + return bidi::kind::LRI; |
| + case 0xa7: |
| + return bidi::kind::RLI; |
| + case 0xa8: |
| + return bidi::kind::FSI; |
| + case 0xa9: |
| + return bidi::kind::PDI; |
| + default: |
| + break; |
| + } |
| + |
| + return bidi::kind::NONE; |
| +} |
| + |
| +/* Parse a UCN where P points just past \u or \U and return its bidi code. */ |
| + |
| +static bidi::kind |
| +get_bidi_ucn (const unsigned char *p, bool is_U) |
| +{ |
| + /* 6.4.3 Universal Character Names |
| + \u hex-quad |
| + \U hex-quad hex-quad |
| + where \unnnn means \U0000nnnn. */ |
| + |
| + if (is_U) |
| + { |
| + if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0') |
| + return bidi::kind::NONE; |
| + /* Skip 4B so we can treat \u and \U the same below. */ |
| + p += 4; |
| + } |
| + |
| + /* All code points we are looking for start with 20xx. */ |
| + if (p[0] != '2' || p[1] != '0') |
| + return bidi::kind::NONE; |
| + else if (p[2] == '2') |
| + switch (p[3]) |
| + { |
| + case 'a': |
| + case 'A': |
| + return bidi::kind::LRE; |
| + case 'b': |
| + case 'B': |
| + return bidi::kind::RLE; |
| + case 'c': |
| + case 'C': |
| + return bidi::kind::PDF; |
| + case 'd': |
| + case 'D': |
| + return bidi::kind::LRO; |
| + case 'e': |
| + case 'E': |
| + return bidi::kind::RLO; |
| + default: |
| + break; |
| + } |
| + else if (p[2] == '6') |
| + switch (p[3]) |
| + { |
| + case '6': |
| + return bidi::kind::LRI; |
| + case '7': |
| + return bidi::kind::RLI; |
| + case '8': |
| + return bidi::kind::FSI; |
| + case '9': |
| + return bidi::kind::PDI; |
| + default: |
| + break; |
| + } |
| + else if (p[2] == '0') |
| + switch (p[3]) |
| + { |
| + case 'e': |
| + case 'E': |
| + return bidi::kind::LTR; |
| + case 'f': |
| + case 'F': |
| + return bidi::kind::RTL; |
| + default: |
| + break; |
| + } |
| + |
| + return bidi::kind::NONE; |
| +} |
| + |
| +/* We're closing a bidi context, that is, we've encountered a newline, |
| + are closing a C-style comment, or are at the end of a string literal, |
| + character constant, or identifier. Warn if this context was not |
| + properly terminated by a PDI or PDF. P points to the last character |
| + in this context. */ |
| + |
| +static void |
| +maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p) |
| +{ |
| + if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired |
| + && bidi::vec.count () > 0) |
| + { |
| + const location_t loc |
| + = linemap_position_for_column (pfile->line_table, |
| + CPP_BUF_COLUMN (pfile->buffer, p)); |
| + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, |
| + "unpaired UTF-8 bidirectional control character " |
| + "detected"); |
| + } |
| + /* We're done with this context. */ |
| + bidi::on_close (); |
| +} |
| + |
| +/* We're at the beginning or in the middle of an identifier/comment/string |
| + literal/character constant. Warn if we've encountered a bidi character. |
| + KIND says which bidi character it was; P points to it in the character |
| + stream. UCN_P is true iff this bidi character was written as a UCN. */ |
| + |
| +static void |
| +maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, |
| + bool ucn_p) |
| +{ |
| + if (__builtin_expect (kind == bidi::kind::NONE, 1)) |
| + return; |
| + |
| + const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional); |
| + |
| + if (warn_bidi != bidirectional_none) |
| + { |
| + const location_t loc |
| + = linemap_position_for_column (pfile->line_table, |
| + CPP_BUF_COLUMN (pfile->buffer, p)); |
| + /* It seems excessive to warn about a PDI/PDF that is closing |
| + an opened context because we've already warned about the |
| + opening character. Except warn when we have a UCN x UTF-8 |
| + mismatch. */ |
| + if (kind == bidi::current_ctx ()) |
| + { |
| + if (warn_bidi == bidirectional_unpaired |
| + && bidi::current_ctx_ucn_p () != ucn_p) |
| + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, |
| + "UTF-8 vs UCN mismatch when closing " |
| + "a context by \"%s\"", bidi::to_str (kind)); |
| + } |
| + else if (warn_bidi == bidirectional_any) |
| + { |
| + if (kind == bidi::kind::PDF || kind == bidi::kind::PDI) |
| + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, |
| + "\"%s\" is closing an unopened context", |
| + bidi::to_str (kind)); |
| + else |
| + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, |
| + "found problematic Unicode character \"%s\"", |
| + bidi::to_str (kind)); |
| + } |
| + } |
| + /* We're done with this context. */ |
| + bidi::on_char (kind, ucn_p); |
| +} |
| + |
| /* Skip a C-style block comment. We find the end of the comment by |
| seeing if an asterisk is before every '/' we encounter. Returns |
| nonzero if comment terminated by EOF, zero otherwise. |
| @@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfi |
| cpp_buffer *buffer = pfile->buffer; |
| const uchar *cur = buffer->cur; |
| uchar c; |
| + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| |
| cur++; |
| if (*cur == '/') |
| @@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfi |
| if (c == '/') |
| { |
| if (cur[-2] == '*') |
| - break; |
| + { |
| + if (warn_bidi_p) |
| + maybe_warn_bidi_on_close (pfile, cur); |
| + break; |
| + } |
| |
| /* Warn about potential nested comments, but not if the '/' |
| comes immediately before the true comment delimiter. |
| @@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfi |
| { |
| unsigned int cols; |
| buffer->cur = cur - 1; |
| + if (warn_bidi_p) |
| + maybe_warn_bidi_on_close (pfile, cur); |
| _cpp_process_line_notes (pfile, true); |
| if (buffer->next_line >= buffer->rlimit) |
| return true; |
| @@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfi |
| |
| cur = buffer->cur; |
| } |
| + /* If this is a beginning of a UTF-8 encoding, it might be |
| + a bidirectional control character. */ |
| + else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) |
| + { |
| + bidi::kind kind = get_bidi_utf8 (cur - 1); |
| + maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false); |
| + } |
| } |
| |
| buffer->cur = cur; |
| @@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile) |
| { |
| cpp_buffer *buffer = pfile->buffer; |
| location_t orig_line = pfile->line_table->highest_line; |
| + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| |
| - while (*buffer->cur != '\n') |
| - buffer->cur++; |
| + if (!warn_bidi_p) |
| + while (*buffer->cur != '\n') |
| + buffer->cur++; |
| + else |
| + { |
| + while (*buffer->cur != '\n' |
| + && *buffer->cur != bidi::utf8_start) |
| + buffer->cur++; |
| + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) |
| + { |
| + while (*buffer->cur != '\n') |
| + { |
| + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) |
| + { |
| + bidi::kind kind = get_bidi_utf8 (buffer->cur); |
| + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, |
| + /*ucn_p=*/false); |
| + } |
| + buffer->cur++; |
| + } |
| + maybe_warn_bidi_on_close (pfile, buffer->cur); |
| + } |
| + } |
| |
| _cpp_process_line_notes (pfile, true); |
| return orig_line != pfile->line_table->highest_line; |
| @@ -1317,11 +1671,13 @@ static const cppchar_t utf8_signifier = |
| |
| /* Returns TRUE if the sequence starting at buffer->cur is valid in |
| an identifier. FIRST is TRUE if this starts an identifier. */ |
| + |
| static bool |
| forms_identifier_p (cpp_reader *pfile, int first, |
| struct normalize_state *state) |
| { |
| cpp_buffer *buffer = pfile->buffer; |
| + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| |
| if (*buffer->cur == '$') |
| { |
| @@ -1344,6 +1700,13 @@ forms_identifier_p (cpp_reader *pfile, i |
| cppchar_t s; |
| if (*buffer->cur >= utf8_signifier) |
| { |
| + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0) |
| + && warn_bidi_p) |
| + { |
| + bidi::kind kind = get_bidi_utf8 (buffer->cur); |
| + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, |
| + /*ucn_p=*/false); |
| + } |
| if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first, |
| state, &s)) |
| return true; |
| @@ -1352,6 +1715,13 @@ forms_identifier_p (cpp_reader *pfile, i |
| && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) |
| { |
| buffer->cur += 2; |
| + if (warn_bidi_p) |
| + { |
| + bidi::kind kind = get_bidi_ucn (buffer->cur, |
| + buffer->cur[-1] == 'U'); |
| + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, |
| + /*ucn_p=*/true); |
| + } |
| if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, |
| state, &s, NULL, NULL)) |
| return true; |
| @@ -1460,6 +1830,7 @@ lex_identifier (cpp_reader *pfile, const |
| const uchar *cur; |
| unsigned int len; |
| unsigned int hash = HT_HASHSTEP (0, *base); |
| + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| |
| cur = pfile->buffer->cur; |
| if (! starts_ucn) |
| @@ -1483,6 +1854,8 @@ lex_identifier (cpp_reader *pfile, const |
| pfile->buffer->cur++; |
| } |
| } while (forms_identifier_p (pfile, false, nst)); |
| + if (warn_bidi_p) |
| + maybe_warn_bidi_on_close (pfile, pfile->buffer->cur); |
| result = _cpp_interpret_identifier (pfile, base, |
| pfile->buffer->cur - base); |
| *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); |
| @@ -1719,6 +2092,7 @@ static void |
| lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base) |
| { |
| const uchar *pos = base; |
| + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| |
| /* 'tis a pity this information isn't passed down from the lexer's |
| initial categorization of the token. */ |
| @@ -1955,8 +2329,15 @@ lex_raw_string (cpp_reader *pfile, cpp_t |
| pos = base = pfile->buffer->cur; |
| note = &pfile->buffer->notes[pfile->buffer->cur_note]; |
| } |
| + else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) |
| + && warn_bidi_p) |
| + maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1), |
| + /*ucn_p=*/false); |
| } |
| |
| + if (warn_bidi_p) |
| + maybe_warn_bidi_on_close (pfile, pos); |
| + |
| if (CPP_OPTION (pfile, user_literals)) |
| { |
| /* If a string format macro, say from inttypes.h, is placed touching |
| @@ -2051,15 +2432,27 @@ lex_string (cpp_reader *pfile, cpp_token |
| else |
| terminator = '>', type = CPP_HEADER_NAME; |
| |
| + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| for (;;) |
| { |
| cppchar_t c = *cur++; |
| |
| /* In #include-style directives, terminators are not escapable. */ |
| if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') |
| - cur++; |
| + { |
| + if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) |
| + { |
| + bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U'); |
| + maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true); |
| + } |
| + cur++; |
| + } |
| else if (c == terminator) |
| - break; |
| + { |
| + if (warn_bidi_p) |
| + maybe_warn_bidi_on_close (pfile, cur - 1); |
| + break; |
| + } |
| else if (c == '\n') |
| { |
| cur--; |
| @@ -2076,6 +2469,11 @@ lex_string (cpp_reader *pfile, cpp_token |
| } |
| else if (c == '\0') |
| saw_NUL = true; |
| + else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) |
| + { |
| + bidi::kind kind = get_bidi_utf8 (cur - 1); |
| + maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false); |
| + } |
| } |
| |
| if (saw_NUL && !pfile->state.skipping) |