Brad Bishop | bec4ebc | 2022-08-03 09:55:16 -0400 | [diff] [blame] | 1 | From 51c500269bf53749b107807d84271385fad35628 Mon Sep 17 00:00:00 2001 |
| 2 | From: Marek Polacek <polacek@redhat.com> |
| 3 | Date: Wed, 6 Oct 2021 14:33:59 -0400 |
| 4 | Subject: [PATCH] libcpp: Implement -Wbidi-chars for CVE-2021-42574 [PR103026] |
| 5 | |
| 6 | From a link below: |
| 7 | "An issue was discovered in the Bidirectional Algorithm in the Unicode |
| 8 | Specification through 14.0. It permits the visual reordering of |
| 9 | characters via control sequences, which can be used to craft source code |
| 10 | that renders different logic than the logical ordering of tokens |
| 11 | ingested by compilers and interpreters. Adversaries can leverage this to |
| 12 | encode source code for compilers accepting Unicode such that targeted |
| 13 | vulnerabilities are introduced invisibly to human reviewers." |
| 14 | |
| 15 | More info: |
| 16 | https://nvd.nist.gov/vuln/detail/CVE-2021-42574 |
| 17 | https://trojansource.codes/ |
| 18 | |
| 19 | This is not a compiler bug. However, to mitigate the problem, this patch |
| 20 | implements -Wbidi-chars=[none|unpaired|any] to warn about possibly |
| 21 | misleading Unicode bidirectional control characters the preprocessor may |
| 22 | encounter. |
| 23 | |
| 24 | The default is =unpaired, which warns about improperly terminated |
| 25 | bidirectional control characters; e.g. a LRE without its corresponding PDF. |
| 26 | The level =any warns about any use of bidirectional control characters. |
| 27 | |
| 28 | This patch handles both UCNs and UTF-8 characters. UCNs designating |
| 29 | bidi characters in identifiers are accepted since r204886. Then r217144 |
| 30 | enabled -fextended-identifiers by default. Extended characters in C/C++ |
| 31 | identifiers have been accepted since r275979. However, this patch still |
| 32 | warns about mixing UTF-8 and UCN bidi characters; there seems to be no |
| 33 | good reason to allow mixing them. |
| 34 | |
| 35 | We warn in different contexts: comments (both C and C++-style), string |
| 36 | literals, character constants, and identifiers. Expectedly, UCNs are ignored |
| 37 | in comments and raw string literals. The bidirectional control characters |
| 38 | can nest so this patch handles that as well. |
| 39 | |
| 40 | I have not included nor tested this at all with Fortran (which also has |
| 41 | string literals and line comments). |
| 42 | |
| 43 | Dave M. posted patches improving diagnostic involving Unicode characters. |
| 44 | This patch does not make use of this new infrastructure yet. |
| 45 | |
| 46 | PR preprocessor/103026 |
| 47 | |
| 48 | gcc/c-family/ChangeLog: |
| 49 | |
| 50 | * c.opt (Wbidi-chars, Wbidi-chars=): New option. |
| 51 | |
| 52 | gcc/ChangeLog: |
| 53 | |
| 54 | * doc/invoke.texi: Document -Wbidi-chars. |
| 55 | |
| 56 | libcpp/ChangeLog: |
| 57 | |
| 58 | * include/cpplib.h (enum cpp_bidirectional_level): New. |
| 59 | (struct cpp_options): Add cpp_warn_bidirectional. |
| 60 | (enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL. |
| 61 | * internal.h (struct cpp_reader): Add warn_bidi_p member |
| 62 | function. |
| 63 | * init.c (cpp_create_reader): Set cpp_warn_bidirectional. |
| 64 | * lex.c (bidi): New namespace. |
| 65 | (get_bidi_utf8): New function. |
| 66 | (get_bidi_ucn): Likewise. |
| 67 | (maybe_warn_bidi_on_close): Likewise. |
| 68 | (maybe_warn_bidi_on_char): Likewise. |
| 69 | (_cpp_skip_block_comment): Implement warning about bidirectional |
| 70 | control characters. |
| 71 | (skip_line_comment): Likewise. |
| 72 | (forms_identifier_p): Likewise. |
| 73 | (lex_identifier): Likewise. |
| 74 | (lex_string): Likewise. |
| 75 | (lex_raw_string): Likewise. |
| 76 | |
| 77 | gcc/testsuite/ChangeLog: |
| 78 | |
| 79 | * c-c++-common/Wbidi-chars-1.c: New test. |
| 80 | * c-c++-common/Wbidi-chars-2.c: New test. |
| 81 | * c-c++-common/Wbidi-chars-3.c: New test. |
| 82 | * c-c++-common/Wbidi-chars-4.c: New test. |
| 83 | * c-c++-common/Wbidi-chars-5.c: New test. |
| 84 | * c-c++-common/Wbidi-chars-6.c: New test. |
| 85 | * c-c++-common/Wbidi-chars-7.c: New test. |
| 86 | * c-c++-common/Wbidi-chars-8.c: New test. |
| 87 | * c-c++-common/Wbidi-chars-9.c: New test. |
| 88 | * c-c++-common/Wbidi-chars-10.c: New test. |
| 89 | * c-c++-common/Wbidi-chars-11.c: New test. |
| 90 | * c-c++-common/Wbidi-chars-12.c: New test. |
| 91 | * c-c++-common/Wbidi-chars-13.c: New test. |
| 92 | * c-c++-common/Wbidi-chars-14.c: New test. |
| 93 | * c-c++-common/Wbidi-chars-15.c: New test. |
| 94 | * c-c++-common/Wbidi-chars-16.c: New test. |
| 95 | * c-c++-common/Wbidi-chars-17.c: New test. |
| 96 | |
| 97 | CVE: CVE-2021-42574 |
| 98 | Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=51c500269bf53749b107807d84271385fad35628] |
| 99 | Signed-off-by: Pgowda <pgowda.cve@gmail.com> |
| 100 | |
| 101 | --- |
| 102 | gcc/c-family/c.opt | 24 ++ |
| 103 | gcc/doc/invoke.texi | 21 +- |
| 104 | gcc/testsuite/c-c++-common/Wbidi-chars-1.c | 12 + |
| 105 | gcc/testsuite/c-c++-common/Wbidi-chars-10.c | 27 ++ |
| 106 | gcc/testsuite/c-c++-common/Wbidi-chars-11.c | 13 + |
| 107 | gcc/testsuite/c-c++-common/Wbidi-chars-12.c | 19 + |
| 108 | gcc/testsuite/c-c++-common/Wbidi-chars-13.c | 17 + |
| 109 | gcc/testsuite/c-c++-common/Wbidi-chars-14.c | 38 ++ |
| 110 | gcc/testsuite/c-c++-common/Wbidi-chars-15.c | 59 +++ |
| 111 | gcc/testsuite/c-c++-common/Wbidi-chars-16.c | 26 ++ |
| 112 | gcc/testsuite/c-c++-common/Wbidi-chars-17.c | 30 ++ |
| 113 | gcc/testsuite/c-c++-common/Wbidi-chars-2.c | 9 + |
| 114 | gcc/testsuite/c-c++-common/Wbidi-chars-3.c | 11 + |
| 115 | gcc/testsuite/c-c++-common/Wbidi-chars-4.c | 188 +++++++++ |
| 116 | gcc/testsuite/c-c++-common/Wbidi-chars-5.c | 188 +++++++++ |
| 117 | gcc/testsuite/c-c++-common/Wbidi-chars-6.c | 155 ++++++++ |
| 118 | gcc/testsuite/c-c++-common/Wbidi-chars-7.c | 9 + |
| 119 | gcc/testsuite/c-c++-common/Wbidi-chars-8.c | 13 + |
| 120 | gcc/testsuite/c-c++-common/Wbidi-chars-9.c | 29 ++ |
| 121 | libcpp/include/cpplib.h | 18 +- |
| 122 | libcpp/init.c | 1 + |
| 123 | libcpp/internal.h | 7 + |
| 124 | libcpp/lex.c | 408 +++++++++++++++++++- |
| 125 | 23 files changed, 1315 insertions(+), 7 deletions(-) |
| 126 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-1.c |
| 127 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-10.c |
| 128 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-11.c |
| 129 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-12.c |
| 130 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-13.c |
| 131 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-14.c |
| 132 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-15.c |
| 133 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-16.c |
| 134 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-17.c |
| 135 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-2.c |
| 136 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-3.c |
| 137 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-4.c |
| 138 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-5.c |
| 139 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-6.c |
| 140 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-7.c |
| 141 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-8.c |
| 142 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-9.c |
| 143 | |
| 144 | diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt |
| 145 | index 8a4cd634f77..3976fc368db 100644 |
| 146 | --- a/gcc/c-family/c.opt |
| 147 | +++ b/gcc/c-family/c.opt |
| 148 | @@ -370,6 +370,30 @@ Wbad-function-cast |
| 149 | C ObjC Var(warn_bad_function_cast) Warning |
| 150 | Warn about casting functions to incompatible types. |
| 151 | |
| 152 | +Wbidi-chars |
| 153 | +C ObjC C++ ObjC++ Warning Alias(Wbidi-chars=,any,none) |
| 154 | +; |
| 155 | + |
| 156 | +Wbidi-chars= |
| 157 | +C ObjC C++ ObjC++ RejectNegative Joined Warning CPP(cpp_warn_bidirectional) CppReason(CPP_W_BIDIRECTIONAL) Var(warn_bidirectional) Init(bidirectional_unpaired) Enum(cpp_bidirectional_level) |
| 158 | +-Wbidi-chars=[none|unpaired|any] Warn about UTF-8 bidirectional control characters. |
| 159 | + |
| 160 | +; Required for these enum values. |
| 161 | +SourceInclude |
| 162 | +cpplib.h |
| 163 | + |
| 164 | +Enum |
| 165 | +Name(cpp_bidirectional_level) Type(int) UnknownError(argument %qs to %<-Wbidi-chars%> not recognized) |
| 166 | + |
| 167 | +EnumValue |
| 168 | +Enum(cpp_bidirectional_level) String(none) Value(bidirectional_none) |
| 169 | + |
| 170 | +EnumValue |
| 171 | +Enum(cpp_bidirectional_level) String(unpaired) Value(bidirectional_unpaired) |
| 172 | + |
| 173 | +EnumValue |
| 174 | +Enum(cpp_bidirectional_level) String(any) Value(bidirectional_any) |
| 175 | + |
| 176 | Wbool-compare |
| 177 | C ObjC C++ ObjC++ Var(warn_bool_compare) Warning LangEnabledBy(C ObjC C++ ObjC++,Wall) |
| 178 | Warn about boolean expression compared with an integer value different from true/false. |
| 179 | diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi |
| 180 | index 6070288856c..a22758d18ee 100644 |
| 181 | --- a/gcc/doc/invoke.texi |
| 182 | +++ b/gcc/doc/invoke.texi |
| 183 | @@ -326,7 +326,9 @@ Objective-C and Objective-C++ Dialects}. |
| 184 | -Warith-conversion @gol |
| 185 | -Warray-bounds -Warray-bounds=@var{n} @gol |
| 186 | -Wno-attributes -Wattribute-alias=@var{n} -Wno-attribute-alias @gol |
| 187 | --Wno-attribute-warning -Wbool-compare -Wbool-operation @gol |
| 188 | +-Wno-attribute-warning @gol |
| 189 | +-Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol |
| 190 | +-Wbool-compare -Wbool-operation @gol |
| 191 | -Wno-builtin-declaration-mismatch @gol |
| 192 | -Wno-builtin-macro-redefined -Wc90-c99-compat -Wc99-c11-compat @gol |
| 193 | -Wc11-c2x-compat @gol |
| 194 | @@ -7559,6 +7561,23 @@ Attributes considered include @code{allo |
| 195 | This is the default. You can disable these warnings with either |
| 196 | @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}. |
| 197 | |
| 198 | +@item -Wbidi-chars=@r{[}none@r{|}unpaired@r{|}any@r{]} |
| 199 | +@opindex Wbidi-chars= |
| 200 | +@opindex Wbidi-chars |
| 201 | +@opindex Wno-bidi-chars |
| 202 | +Warn about possibly misleading UTF-8 bidirectional control characters in |
| 203 | +comments, string literals, character constants, and identifiers. Such |
| 204 | +characters can change left-to-right writing direction into right-to-left |
| 205 | +(and vice versa), which can cause confusion between the logical order and |
| 206 | +visual order. This may be dangerous; for instance, it may seem that a piece |
| 207 | +of code is not commented out, whereas it in fact is. |
| 208 | + |
| 209 | +There are three levels of warning supported by GCC@. The default is |
| 210 | +@option{-Wbidi-chars=unpaired}, which warns about improperly terminated |
| 211 | +bidi contexts. @option{-Wbidi-chars=none} turns the warning off. |
| 212 | +@option{-Wbidi-chars=any} warns about any use of bidirectional control |
| 213 | +characters. |
| 214 | + |
| 215 | @item -Wbool-compare |
| 216 | @opindex Wno-bool-compare |
| 217 | @opindex Wbool-compare |
| 218 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-10.c b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c |
| 219 | new file mode 100644 |
| 220 | index 00000000000..34f5ac19271 |
| 221 | --- /dev/null |
| 222 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-10.c |
| 223 | @@ -0,0 +1,27 @@ |
| 224 | +/* PR preprocessor/103026 */ |
| 225 | +/* { dg-do compile } */ |
| 226 | +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| 227 | +/* More nesting testing. */ |
| 228 | + |
| 229 | +/* RLEâ« LRI⦠PDF⬠PDIâ©*/ |
| 230 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 231 | +int LRE_\u202a_PDF_\u202c; |
| 232 | +int LRE_\u202a_PDF_\u202c_LRE_\u202a_PDF_\u202c; |
| 233 | +int LRE_\u202a_LRI_\u2066_PDF_\u202c_PDI_\u2069; |
| 234 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 235 | +int RLE_\u202b_RLI_\u2067_PDF_\u202c_PDI_\u2069; |
| 236 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 237 | +int RLE_\u202b_RLI_\u2067_PDI_\u2069_PDF_\u202c; |
| 238 | +int FSI_\u2068_LRO_\u202d_PDI_\u2069_PDF_\u202c; |
| 239 | +int FSI_\u2068; |
| 240 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 241 | +int FSI_\u2068_PDI_\u2069; |
| 242 | +int FSI_\u2068_FSI_\u2068_PDI_\u2069; |
| 243 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 244 | +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; |
| 245 | +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; |
| 246 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 247 | +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDF_\u202c; |
| 248 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 249 | +int RLI_\u2067_RLI_\u2067_RLI_\u2067_RLI_\u2067_FSI_\u2068_PDI_\u2069_PDI_\u2069_PDI_\u2069_PDI_\u2069; |
| 250 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 251 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-11.c b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c |
| 252 | new file mode 100644 |
| 253 | index 00000000000..270ce2368a9 |
| 254 | --- /dev/null |
| 255 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-11.c |
| 256 | @@ -0,0 +1,13 @@ |
| 257 | +/* PR preprocessor/103026 */ |
| 258 | +/* { dg-do compile } */ |
| 259 | +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| 260 | +/* Test that we warn when mixing UCN and UTF-8. */ |
| 261 | + |
| 262 | +int LRE_âª_PDF_\u202c; |
| 263 | +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ |
| 264 | +int LRE_\u202a_PDF_â¬_; |
| 265 | +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ |
| 266 | +const char *s1 = "LRE_âª_PDF_\u202c"; |
| 267 | +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ |
| 268 | +const char *s2 = "LRE_\u202a_PDF_â¬"; |
| 269 | +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ |
| 270 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-12.c b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c |
| 271 | new file mode 100644 |
| 272 | index 00000000000..b07eec1da91 |
| 273 | --- /dev/null |
| 274 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-12.c |
| 275 | @@ -0,0 +1,19 @@ |
| 276 | +/* PR preprocessor/103026 */ |
| 277 | +/* { dg-do compile { target { c || c++11 } } } */ |
| 278 | +/* { dg-options "-Wbidi-chars=any" } */ |
| 279 | +/* Test raw strings. */ |
| 280 | + |
| 281 | +const char *s1 = R"(a b c LRE⪠1 2 3 PDF⬠x y z)"; |
| 282 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 283 | +const char *s2 = R"(a b c RLE⫠1 2 3 PDF⬠x y z)"; |
| 284 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 285 | +const char *s3 = R"(a b c LROâ 1 2 3 PDF⬠x y z)"; |
| 286 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 287 | +const char *s4 = R"(a b c RLO⮠1 2 3 PDF⬠x y z)"; |
| 288 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 289 | +const char *s7 = R"(a b c FSI⨠1 2 3 PDI⩠x y) z"; |
| 290 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| 291 | +const char *s8 = R"(a b c PDIâ© x y )z"; |
| 292 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| 293 | +const char *s9 = R"(a b c PDF⬠x y z)"; |
| 294 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| 295 | diff -uprN '-x*.orig' '-x*.rej' del/gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c |
| 296 | --- del/gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 1969-12-31 16:00:00.000000000 -0800 |
| 297 | +++ gcc-11.2.0/gcc/testsuite/c-c++-common/Wbidi-chars-13.c 2021-12-13 23:11:22.328439287 -0800 |
| 298 | @@ -0,0 +1,17 @@ |
| 299 | +/* PR preprocessor/103026 */ |
| 300 | +/* { dg-do compile { target { c || c++11 } } } */ |
| 301 | +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| 302 | +/* Test raw strings. */ |
| 303 | + |
| 304 | +const char *s1 = R"(a b c LRE⪠1 2 3)"; |
| 305 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 306 | +const char *s2 = R"(a b c RLEâ« 1 2 3)"; |
| 307 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 308 | +const char *s3 = R"(a b c LROâ 1 2 3)"; |
| 309 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 310 | +const char *s4 = R"(a b c FSI⨠1 2 3)"; |
| 311 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 312 | +const char *s5 = R"(a b c LRI⦠1 2 3)"; |
| 313 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 314 | +const char *s6 = R"(a b c RLI⧠1 2 3)"; |
| 315 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 316 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-14.c b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c |
| 317 | new file mode 100644 |
| 318 | index 00000000000..ba5f75d9553 |
| 319 | --- /dev/null |
| 320 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-14.c |
| 321 | @@ -0,0 +1,38 @@ |
| 322 | +/* PR preprocessor/103026 */ |
| 323 | +/* { dg-do compile } */ |
| 324 | +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| 325 | +/* Test PDI handling, which also pops any subsequent LREs, RLEs, LROs, |
| 326 | + or RLOs. */ |
| 327 | + |
| 328 | +/* LRI_â¦_LRI_â¦_RLE_â«_RLE_â«_RLE_â«_PDI_â©*/ |
| 329 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 330 | +// LRI_â¦_RLE_â«_RLE_â«_RLE_â«_PDI_â© |
| 331 | +// LRI_â¦_RLO_â®_RLE_â«_RLE_â«_PDI_â© |
| 332 | +// LRI_â¦_RLO_â®_RLE_â«_PDI_â© |
| 333 | +// FSI_â¨_RLO_â®_PDI_â© |
| 334 | +// FSI_â¨_FSI_â¨_RLO_â®_PDI_â© |
| 335 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 336 | + |
| 337 | +int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069; |
| 338 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 339 | +int LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; |
| 340 | +int LRI_\u2066_LRI_\u2066_LRI_\u2066_LRE_\u202a_LRE_\u202a_LRE_\u202a_PDI_\u2069_PDI_\u2069; |
| 341 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 342 | +int PDI_\u2069; |
| 343 | +int LRI_\u2066_PDI_\u2069; |
| 344 | +int RLI_\u2067_PDI_\u2069; |
| 345 | +int LRE_\u202a_LRI_\u2066_PDI_\u2069; |
| 346 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 347 | +int LRI_\u2066_LRE_\u202a_PDF_\u202c_PDI_\u2069; |
| 348 | +int LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; |
| 349 | +int RLI_\u2067_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; |
| 350 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 351 | +int FSI_\u2068_LRI_\u2066_LRE_\u202a_LRE_\u202a_PDF_\u202c_PDI_\u2069; |
| 352 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 353 | +int RLO_\u202e_PDI_\u2069; |
| 354 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 355 | +int RLI_\u2067_PDI_\u2069_RLI_\u2067; |
| 356 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 357 | +int FSI_\u2068_PDF_\u202c_PDI_\u2069; |
| 358 | +int FSI_\u2068_FSI_\u2068_PDF_\u202c_PDI_\u2069; |
| 359 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 360 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-15.c b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c |
| 361 | new file mode 100644 |
| 362 | index 00000000000..a0ce8ff5e2c |
| 363 | --- /dev/null |
| 364 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-15.c |
| 365 | @@ -0,0 +1,59 @@ |
| 366 | +/* PR preprocessor/103026 */ |
| 367 | +/* { dg-do compile } */ |
| 368 | +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| 369 | +/* Test unpaired bidi control chars in multiline comments. */ |
| 370 | + |
| 371 | +/* |
| 372 | + * LRE⪠end |
| 373 | + */ |
| 374 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 375 | +/* |
| 376 | + * RLEâ« end |
| 377 | + */ |
| 378 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 379 | +/* |
| 380 | + * LROâ end |
| 381 | + */ |
| 382 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 383 | +/* |
| 384 | + * RLOâ® end |
| 385 | + */ |
| 386 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 387 | +/* |
| 388 | + * LRI⦠end |
| 389 | + */ |
| 390 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 391 | +/* |
| 392 | + * RLI⧠end |
| 393 | + */ |
| 394 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 395 | +/* |
| 396 | + * FSI⨠end |
| 397 | + */ |
| 398 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 399 | +/* LRE⪠|
| 400 | + PDF⬠*/ |
| 401 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 402 | +/* FSI⨠|
| 403 | + PDIâ© */ |
| 404 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 405 | + |
| 406 | +/* LRE<âª> |
| 407 | + * |
| 408 | + */ |
| 409 | +/* { dg-warning "unpaired" "" { target *-*-* } .-3 } */ |
| 410 | + |
| 411 | +/* |
| 412 | + * LRE<âª> |
| 413 | + */ |
| 414 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 415 | + |
| 416 | +/* |
| 417 | + * |
| 418 | + * LRE<âª> */ |
| 419 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 420 | + |
| 421 | +/* RLI<â§> */ /* PDI<â©> */ |
| 422 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 423 | +/* LRE<âª> */ /* PDF<â¬> */ |
| 424 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 425 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-16.c b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c |
| 426 | new file mode 100644 |
| 427 | index 00000000000..baa0159861c |
| 428 | --- /dev/null |
| 429 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-16.c |
| 430 | @@ -0,0 +1,26 @@ |
| 431 | +/* PR preprocessor/103026 */ |
| 432 | +/* { dg-do compile } */ |
| 433 | +/* { dg-options "-Wbidi-chars=any" } */ |
| 434 | +/* Test LTR/RTL chars. */ |
| 435 | + |
| 436 | +/* LTR<â> */ |
| 437 | +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ |
| 438 | +// LTR<â> |
| 439 | +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ |
| 440 | +/* RTL<â> */ |
| 441 | +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ |
| 442 | +// RTL<â> |
| 443 | +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ |
| 444 | + |
| 445 | +const char *s1 = "LTR<â>"; |
| 446 | +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ |
| 447 | +const char *s2 = "LTR\u200e"; |
| 448 | +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ |
| 449 | +const char *s3 = "LTR\u200E"; |
| 450 | +/* { dg-warning "U\\+200E" "" { target *-*-* } .-1 } */ |
| 451 | +const char *s4 = "RTL<â>"; |
| 452 | +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ |
| 453 | +const char *s5 = "RTL\u200f"; |
| 454 | +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ |
| 455 | +const char *s6 = "RTL\u200F"; |
| 456 | +/* { dg-warning "U\\+200F" "" { target *-*-* } .-1 } */ |
| 457 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-17.c b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c |
| 458 | new file mode 100644 |
| 459 | index 00000000000..07cb4321f96 |
| 460 | --- /dev/null |
| 461 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-17.c |
| 462 | @@ -0,0 +1,30 @@ |
| 463 | +/* PR preprocessor/103026 */ |
| 464 | +/* { dg-do compile } */ |
| 465 | +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| 466 | +/* Test LTR/RTL chars. */ |
| 467 | + |
| 468 | +/* LTR<â> */ |
| 469 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 470 | +// LTR<â> |
| 471 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 472 | +/* RTL<â> */ |
| 473 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 474 | +// RTL<â> |
| 475 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 476 | +int ltr_\u200e; |
| 477 | +/* { dg-error "universal character " "" { target *-*-* } .-1 } */ |
| 478 | +int rtl_\u200f; |
| 479 | +/* { dg-error "universal character " "" { target *-*-* } .-1 } */ |
| 480 | + |
| 481 | +const char *s1 = "LTR<â>"; |
| 482 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 483 | +const char *s2 = "LTR\u200e"; |
| 484 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 485 | +const char *s3 = "LTR\u200E"; |
| 486 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 487 | +const char *s4 = "RTL<â>"; |
| 488 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 489 | +const char *s5 = "RTL\u200f"; |
| 490 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 491 | +const char *s6 = "RTL\u200F"; |
| 492 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 493 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-1.c b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c |
| 494 | new file mode 100644 |
| 495 | index 00000000000..2340374f276 |
| 496 | --- /dev/null |
| 497 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-1.c |
| 498 | @@ -0,0 +1,12 @@ |
| 499 | +/* PR preprocessor/103026 */ |
| 500 | +/* { dg-do compile } */ |
| 501 | + |
| 502 | +int main() { |
| 503 | + int isAdmin = 0; |
| 504 | + /*â® } â¦if (isAdmin)⩠⦠begin admins only */ |
| 505 | +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ |
| 506 | + __builtin_printf("You are an admin.\n"); |
| 507 | + /* end admins only â® { â¦*/ |
| 508 | +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ |
| 509 | + return 0; |
| 510 | +} |
| 511 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-2.c b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c |
| 512 | new file mode 100644 |
| 513 | index 00000000000..2340374f276 |
| 514 | --- /dev/null |
| 515 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-2.c |
| 516 | @@ -0,0 +1,9 @@ |
| 517 | +/* PR preprocessor/103026 */ |
| 518 | +/* { dg-do compile } */ |
| 519 | + |
| 520 | +int main() { |
| 521 | + /* Say hello; newlineâ§/*/ return 0 ; |
| 522 | +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ |
| 523 | + __builtin_printf("Hello world.\n"); |
| 524 | + return 0; |
| 525 | +} |
| 526 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-3.c b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c |
| 527 | new file mode 100644 |
| 528 | index 00000000000..9dc7edb6e64 |
| 529 | --- /dev/null |
| 530 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-3.c |
| 531 | @@ -0,0 +1,11 @@ |
| 532 | +/* PR preprocessor/103026 */ |
| 533 | +/* { dg-do compile } */ |
| 534 | + |
| 535 | +int main() { |
| 536 | + const char* access_level = "user"; |
| 537 | + if (__builtin_strcmp(access_level, "userâ® â¦// Check if adminâ© â¦")) { |
| 538 | +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ |
| 539 | + __builtin_printf("You are an admin.\n"); |
| 540 | + } |
| 541 | + return 0; |
| 542 | +} |
| 543 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-4.c b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c |
| 544 | new file mode 100644 |
| 545 | index 00000000000..639e5c62e88 |
| 546 | --- /dev/null |
| 547 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-4.c |
| 548 | @@ -0,0 +1,188 @@ |
| 549 | +/* PR preprocessor/103026 */ |
| 550 | +/* { dg-do compile } */ |
| 551 | +/* { dg-options "-Wbidi-chars=any -Wno-multichar -Wno-overflow" } */ |
| 552 | +/* Test all bidi chars in various contexts (identifiers, comments, |
| 553 | + string literals, character constants), both UCN and UTF-8. The bidi |
| 554 | + chars here are properly terminated, except for the character constants. */ |
| 555 | + |
| 556 | +/* a b c LRE⪠1 2 3 PDF⬠x y z */ |
| 557 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 558 | +/* a b c RLE⫠1 2 3 PDF⬠x y z */ |
| 559 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 560 | +/* a b c LROâ 1 2 3 PDF⬠x y z */ |
| 561 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 562 | +/* a b c RLO⮠1 2 3 PDF⬠x y z */ |
| 563 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 564 | +/* a b c LRI⦠1 2 3 PDI⩠x y z */ |
| 565 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| 566 | +/* a b c RLI⧠1 2 3 PDI⩠x y */ |
| 567 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| 568 | +/* a b c FSI⨠1 2 3 PDI⩠x y z */ |
| 569 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| 570 | + |
| 571 | +/* Same but C++ comments instead. */ |
| 572 | +// a b c LRE⪠1 2 3 PDF⬠x y z |
| 573 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 574 | +// a b c RLE⫠1 2 3 PDF⬠x y z |
| 575 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 576 | +// a b c LROâ 1 2 3 PDF⬠x y z |
| 577 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 578 | +// a b c RLO⮠1 2 3 PDF⬠x y z |
| 579 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 580 | +// a b c LRI⦠1 2 3 PDI⩠x y z |
| 581 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| 582 | +// a b c RLI⧠1 2 3 PDI⩠x y |
| 583 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| 584 | +// a b c FSI⨠1 2 3 PDI⩠x y z |
| 585 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| 586 | + |
| 587 | +/* Here we're closing an unopened context, warn when =any. */ |
| 588 | +/* a b c PDIâ© x y z */ |
| 589 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| 590 | +/* a b c PDF⬠x y z */ |
| 591 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| 592 | +// a b c PDIâ© x y z |
| 593 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| 594 | +// a b c PDF⬠x y z |
| 595 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| 596 | + |
| 597 | +/* Multiline comments. */ |
| 598 | +/* a b c PDIâ© x y z |
| 599 | + */ |
| 600 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ |
| 601 | +/* a b c PDF⬠x y z |
| 602 | + */ |
| 603 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ |
| 604 | +/* first |
| 605 | + a b c PDIâ© x y z |
| 606 | + */ |
| 607 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-2 } */ |
| 608 | +/* first |
| 609 | + a b c PDF⬠x y z |
| 610 | + */ |
| 611 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-2 } */ |
| 612 | +/* first |
| 613 | + a b c PDIâ© x y z */ |
| 614 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| 615 | +/* first |
| 616 | + a b c PDF⬠x y z */ |
| 617 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| 618 | + |
| 619 | +void |
| 620 | +g1 () |
| 621 | +{ |
| 622 | + const char *s1 = "a b c LRE⪠1 2 3 PDF⬠x y z"; |
| 623 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 624 | + const char *s2 = "a b c RLE⫠1 2 3 PDF⬠x y z"; |
| 625 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 626 | + const char *s3 = "a b c LROâ 1 2 3 PDF⬠x y z"; |
| 627 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 628 | + const char *s4 = "a b c RLO⮠1 2 3 PDF⬠x y z"; |
| 629 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 630 | + const char *s5 = "a b c LRI⦠1 2 3 PDI⩠x y z"; |
| 631 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| 632 | + const char *s6 = "a b c RLI⧠1 2 3 PDI⩠x y z"; |
| 633 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| 634 | + const char *s7 = "a b c FSI⨠1 2 3 PDI⩠x y z"; |
| 635 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| 636 | + const char *s8 = "a b c PDIâ© x y z"; |
| 637 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| 638 | + const char *s9 = "a b c PDF⬠x y z"; |
| 639 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| 640 | + |
| 641 | + const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; |
| 642 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 643 | + const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; |
| 644 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 645 | + const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; |
| 646 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 647 | + const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; |
| 648 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 649 | + const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; |
| 650 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 651 | + const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; |
| 652 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 653 | + const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; |
| 654 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 655 | + const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; |
| 656 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 657 | + const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; |
| 658 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| 659 | + const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; |
| 660 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| 661 | + const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; |
| 662 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| 663 | +} |
| 664 | + |
| 665 | +void |
| 666 | +g2 () |
| 667 | +{ |
| 668 | + const char c1 = '\u202a'; |
| 669 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 670 | + const char c2 = '\u202A'; |
| 671 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 672 | + const char c3 = '\u202b'; |
| 673 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 674 | + const char c4 = '\u202B'; |
| 675 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 676 | + const char c5 = '\u202d'; |
| 677 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 678 | + const char c6 = '\u202D'; |
| 679 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 680 | + const char c7 = '\u202e'; |
| 681 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 682 | + const char c8 = '\u202E'; |
| 683 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 684 | + const char c9 = '\u2066'; |
| 685 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| 686 | + const char c10 = '\u2067'; |
| 687 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| 688 | + const char c11 = '\u2068'; |
| 689 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| 690 | +} |
| 691 | + |
| 692 | +int aâªbâ¬c; |
| 693 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 694 | +int aâ«bâ¬c; |
| 695 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 696 | +int aâÂbâ¬c; |
| 697 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 698 | +int aâ®bâ¬c; |
| 699 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 700 | +int aâ¦bâ©c; |
| 701 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| 702 | +int aâ§bâ©c; |
| 703 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| 704 | +int aâ¨bâ©c; |
| 705 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| 706 | +int Aâ¬X; |
| 707 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| 708 | +int A\u202cY; |
| 709 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| 710 | +int A\u202CY2; |
| 711 | +/* { dg-warning "U\\+202C" "" { target *-*-* } .-1 } */ |
| 712 | + |
| 713 | +int d\u202ae\u202cf; |
| 714 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 715 | +int d\u202Ae\u202cf2; |
| 716 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 717 | +int d\u202be\u202cf; |
| 718 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 719 | +int d\u202Be\u202cf2; |
| 720 | +/* { dg-warning "U\\+202B" "" { target *-*-* } .-1 } */ |
| 721 | +int d\u202de\u202cf; |
| 722 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 723 | +int d\u202De\u202cf2; |
| 724 | +/* { dg-warning "U\\+202D" "" { target *-*-* } .-1 } */ |
| 725 | +int d\u202ee\u202cf; |
| 726 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 727 | +int d\u202Ee\u202cf2; |
| 728 | +/* { dg-warning "U\\+202E" "" { target *-*-* } .-1 } */ |
| 729 | +int d\u2066e\u2069f; |
| 730 | +/* { dg-warning "U\\+2066" "" { target *-*-* } .-1 } */ |
| 731 | +int d\u2067e\u2069f; |
| 732 | +/* { dg-warning "U\\+2067" "" { target *-*-* } .-1 } */ |
| 733 | +int d\u2068e\u2069f; |
| 734 | +/* { dg-warning "U\\+2068" "" { target *-*-* } .-1 } */ |
| 735 | +int X\u2069; |
| 736 | +/* { dg-warning "U\\+2069" "" { target *-*-* } .-1 } */ |
| 737 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-5.c b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c |
| 738 | new file mode 100644 |
| 739 | index 00000000000..68cb053144b |
| 740 | --- /dev/null |
| 741 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-5.c |
| 742 | @@ -0,0 +1,188 @@ |
| 743 | +/* PR preprocessor/103026 */ |
| 744 | +/* { dg-do compile } */ |
| 745 | +/* { dg-options "-Wbidi-chars=unpaired -Wno-multichar -Wno-overflow" } */ |
| 746 | +/* Test all bidi chars in various contexts (identifiers, comments, |
| 747 | + string literals, character constants), both UCN and UTF-8. The bidi |
| 748 | + chars here are properly terminated, except for the character constants. */ |
| 749 | + |
| 750 | +/* a b c LRE⪠1 2 3 PDF⬠x y z */ |
| 751 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 752 | +/* a b c RLE⫠1 2 3 PDF⬠x y z */ |
| 753 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 754 | +/* a b c LROâ 1 2 3 PDF⬠x y z */ |
| 755 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 756 | +/* a b c RLO⮠1 2 3 PDF⬠x y z */ |
| 757 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 758 | +/* a b c LRI⦠1 2 3 PDI⩠x y z */ |
| 759 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 760 | +/* a b c RLI⧠1 2 3 PDI⩠x y */ |
| 761 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 762 | +/* a b c FSI⨠1 2 3 PDI⩠x y z */ |
| 763 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 764 | + |
| 765 | +/* Same but C++ comments instead. */ |
| 766 | +// a b c LRE⪠1 2 3 PDF⬠x y z |
| 767 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 768 | +// a b c RLE⫠1 2 3 PDF⬠x y z |
| 769 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 770 | +// a b c LROâ 1 2 3 PDF⬠x y z |
| 771 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 772 | +// a b c RLO⮠1 2 3 PDF⬠x y z |
| 773 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 774 | +// a b c LRI⦠1 2 3 PDI⩠x y z |
| 775 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 776 | +// a b c RLI⧠1 2 3 PDI⩠x y |
| 777 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 778 | +// a b c FSI⨠1 2 3 PDI⩠x y z |
| 779 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 780 | + |
| 781 | +/* Here we're closing an unopened context, warn when =any. */ |
| 782 | +/* a b c PDIâ© x y z */ |
| 783 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 784 | +/* a b c PDF⬠x y z */ |
| 785 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 786 | +// a b c PDIâ© x y z |
| 787 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 788 | +// a b c PDF⬠x y z |
| 789 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 790 | + |
| 791 | +/* Multiline comments. */ |
| 792 | +/* a b c PDIâ© x y z |
| 793 | + */ |
| 794 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ |
| 795 | +/* a b c PDF⬠x y z |
| 796 | + */ |
| 797 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ |
| 798 | +/* first |
| 799 | + a b c PDIâ© x y z |
| 800 | + */ |
| 801 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ |
| 802 | +/* first |
| 803 | + a b c PDF⬠x y z |
| 804 | + */ |
| 805 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-2 } */ |
| 806 | +/* first |
| 807 | + a b c PDIâ© x y z */ |
| 808 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 809 | +/* first |
| 810 | + a b c PDF⬠x y z */ |
| 811 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 812 | + |
| 813 | +void |
| 814 | +g1 () |
| 815 | +{ |
| 816 | + const char *s1 = "a b c LRE⪠1 2 3 PDF⬠x y z"; |
| 817 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 818 | + const char *s2 = "a b c RLE⫠1 2 3 PDF⬠x y z"; |
| 819 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 820 | + const char *s3 = "a b c LROâ 1 2 3 PDF⬠x y z"; |
| 821 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 822 | + const char *s4 = "a b c RLO⮠1 2 3 PDF⬠x y z"; |
| 823 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 824 | + const char *s5 = "a b c LRI⦠1 2 3 PDI⩠x y z"; |
| 825 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 826 | + const char *s6 = "a b c RLI⧠1 2 3 PDI⩠x y z"; |
| 827 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 828 | + const char *s7 = "a b c FSI⨠1 2 3 PDI⩠x y z"; |
| 829 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 830 | + const char *s8 = "a b c PDIâ© x y z"; |
| 831 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 832 | + const char *s9 = "a b c PDF⬠x y z"; |
| 833 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 834 | + |
| 835 | + const char *s10 = "a b c LRE\u202a 1 2 3 PDF\u202c x y z"; |
| 836 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 837 | + const char *s11 = "a b c LRE\u202A 1 2 3 PDF\u202c x y z"; |
| 838 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 839 | + const char *s12 = "a b c RLE\u202b 1 2 3 PDF\u202c x y z"; |
| 840 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 841 | + const char *s13 = "a b c RLE\u202B 1 2 3 PDF\u202c x y z"; |
| 842 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 843 | + const char *s14 = "a b c LRO\u202d 1 2 3 PDF\u202c x y z"; |
| 844 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 845 | + const char *s15 = "a b c LRO\u202D 1 2 3 PDF\u202c x y z"; |
| 846 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 847 | + const char *s16 = "a b c RLO\u202e 1 2 3 PDF\u202c x y z"; |
| 848 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 849 | + const char *s17 = "a b c RLO\u202E 1 2 3 PDF\u202c x y z"; |
| 850 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 851 | + const char *s18 = "a b c LRI\u2066 1 2 3 PDI\u2069 x y z"; |
| 852 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 853 | + const char *s19 = "a b c RLI\u2067 1 2 3 PDI\u2069 x y z"; |
| 854 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 855 | + const char *s20 = "a b c FSI\u2068 1 2 3 PDI\u2069 x y z"; |
| 856 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 857 | +} |
| 858 | + |
| 859 | +void |
| 860 | +g2 () |
| 861 | +{ |
| 862 | + const char c1 = '\u202a'; |
| 863 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 864 | + const char c2 = '\u202A'; |
| 865 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 866 | + const char c3 = '\u202b'; |
| 867 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 868 | + const char c4 = '\u202B'; |
| 869 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 870 | + const char c5 = '\u202d'; |
| 871 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 872 | + const char c6 = '\u202D'; |
| 873 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 874 | + const char c7 = '\u202e'; |
| 875 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 876 | + const char c8 = '\u202E'; |
| 877 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 878 | + const char c9 = '\u2066'; |
| 879 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 880 | + const char c10 = '\u2067'; |
| 881 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 882 | + const char c11 = '\u2068'; |
| 883 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 884 | +} |
| 885 | + |
| 886 | +int aâªbâ¬c; |
| 887 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 888 | +int aâ«bâ¬c; |
| 889 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 890 | +int aâÂbâ¬c; |
| 891 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 892 | +int aâ®bâ¬c; |
| 893 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 894 | +int aâ¦bâ©c; |
| 895 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 896 | +int aâ§bâ©c; |
| 897 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 898 | +int aâ¨bâ©c; |
| 899 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 900 | +int Aâ¬X; |
| 901 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 902 | +int A\u202cY; |
| 903 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 904 | +int A\u202CY2; |
| 905 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 906 | + |
| 907 | +int d\u202ae\u202cf; |
| 908 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 909 | +int d\u202Ae\u202cf2; |
| 910 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 911 | +int d\u202be\u202cf; |
| 912 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 913 | +int d\u202Be\u202cf2; |
| 914 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 915 | +int d\u202de\u202cf; |
| 916 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 917 | +int d\u202De\u202cf2; |
| 918 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 919 | +int d\u202ee\u202cf; |
| 920 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 921 | +int d\u202Ee\u202cf2; |
| 922 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 923 | +int d\u2066e\u2069f; |
| 924 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 925 | +int d\u2067e\u2069f; |
| 926 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 927 | +int d\u2068e\u2069f; |
| 928 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 929 | +int X\u2069; |
| 930 | +/* { dg-bogus "unpaired" "" { target *-*-* } .-1 } */ |
| 931 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-6.c b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c |
| 932 | new file mode 100644 |
| 933 | index 00000000000..0ce6fff2dee |
| 934 | --- /dev/null |
| 935 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-6.c |
| 936 | @@ -0,0 +1,155 @@ |
| 937 | +/* PR preprocessor/103026 */ |
| 938 | +/* { dg-do compile } */ |
| 939 | +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| 940 | +/* Test nesting of bidi chars in various contexts. */ |
| 941 | + |
| 942 | +/* Terminated by the wrong char: */ |
| 943 | +/* a b c LRE⪠1 2 3 PDI⩠x y z */ |
| 944 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 945 | +/* a b c RLEâ« 1 2 3 PDIâ© x y z*/ |
| 946 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 947 | +/* a b c LROâ 1 2 3 PDIâ© x y z */ |
| 948 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 949 | +/* a b c RLOâ® 1 2 3 PDIâ© x y z */ |
| 950 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 951 | +/* a b c LRI⦠1 2 3 PDF⬠x y z */ |
| 952 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 953 | +/* a b c RLI⧠1 2 3 PDF⬠x y z */ |
| 954 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 955 | +/* a b c FSI⨠1 2 3 PDF⬠x y z*/ |
| 956 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 957 | + |
| 958 | +/* LRE⪠PDF⬠*/ |
| 959 | +/* LRE⪠LRE⪠PDF⬠PDF⬠*/ |
| 960 | +/* PDF⬠LRE⪠PDF⬠*/ |
| 961 | +/* LRE⪠PDF⬠LRE⪠PDF⬠*/ |
| 962 | +/* LRE⪠LRE⪠PDF⬠*/ |
| 963 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 964 | +/* PDF⬠LRE⪠*/ |
| 965 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 966 | + |
| 967 | +// a b c LRE⪠1 2 3 PDI⩠x y z |
| 968 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 969 | +// a b c RLEâ« 1 2 3 PDIâ© x y z*/ |
| 970 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 971 | +// a b c LROâ 1 2 3 PDIâ© x y z |
| 972 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 973 | +// a b c RLOâ® 1 2 3 PDIâ© x y z |
| 974 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 975 | +// a b c LRI⦠1 2 3 PDF⬠x y z |
| 976 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 977 | +// a b c RLI⧠1 2 3 PDF⬠x y z |
| 978 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 979 | +// a b c FSI⨠1 2 3 PDF⬠x y z |
| 980 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 981 | + |
| 982 | +// LRE⪠PDF⬠|
| 983 | +// LRE⪠LRE⪠PDF⬠PDF⬠|
| 984 | +// PDF⬠LRE⪠PDF⬠|
| 985 | +// LRE⪠PDF⬠LRE⪠PDF⬠|
| 986 | +// LRE⪠LRE⪠PDF⬠|
| 987 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 988 | +// PDF⬠LRE⪠|
| 989 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 990 | + |
| 991 | +void |
| 992 | +g1 () |
| 993 | +{ |
| 994 | + const char *s1 = "a b c LRE⪠1 2 3 PDI⩠x y z"; |
| 995 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 996 | + const char *s2 = "a b c LRE\u202a 1 2 3 PDI\u2069 x y z"; |
| 997 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 998 | + const char *s3 = "a b c RLEâ« 1 2 3 PDIâ© x y "; |
| 999 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1000 | + const char *s4 = "a b c RLE\u202b 1 2 3 PDI\u2069 x y z"; |
| 1001 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1002 | + const char *s5 = "a b c LROâ 1 2 3 PDIâ© x y z"; |
| 1003 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1004 | + const char *s6 = "a b c LRO\u202d 1 2 3 PDI\u2069 x y z"; |
| 1005 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1006 | + const char *s7 = "a b c RLOâ® 1 2 3 PDIâ© x y z"; |
| 1007 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1008 | + const char *s8 = "a b c RLO\u202e 1 2 3 PDI\u2069 x y z"; |
| 1009 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1010 | + const char *s9 = "a b c LRI⦠1 2 3 PDF⬠x y z"; |
| 1011 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1012 | + const char *s10 = "a b c LRI\u2066 1 2 3 PDF\u202c x y z"; |
| 1013 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1014 | + const char *s11 = "a b c RLI⧠1 2 3 PDF⬠x y z\ |
| 1015 | + "; |
| 1016 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 1017 | + const char *s12 = "a b c RLI\u2067 1 2 3 PDF\u202c x y z"; |
| 1018 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1019 | + const char *s13 = "a b c FSI⨠1 2 3 PDF⬠x y z"; |
| 1020 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1021 | + const char *s14 = "a b c FSI\u2068 1 2 3 PDF\u202c x y z"; |
| 1022 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1023 | + const char *s15 = "PDF⬠LREâª"; |
| 1024 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1025 | + const char *s16 = "PDF\u202c LRE\u202a"; |
| 1026 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1027 | + const char *s17 = "LRE⪠PDFâ¬"; |
| 1028 | + const char *s18 = "LRE\u202a PDF\u202c"; |
| 1029 | + const char *s19 = "LRE⪠LRE⪠PDF⬠PDFâ¬"; |
| 1030 | + const char *s20 = "LRE\u202a LRE\u202a PDF\u202c PDF\u202c"; |
| 1031 | + const char *s21 = "PDF⬠LRE⪠PDFâ¬"; |
| 1032 | + const char *s22 = "PDF\u202c LRE\u202a PDF\u202c"; |
| 1033 | + const char *s23 = "LRE⪠LRE⪠PDFâ¬"; |
| 1034 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1035 | + const char *s24 = "LRE\u202a LRE\u202a PDF\u202c"; |
| 1036 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1037 | + const char *s25 = "PDF⬠LREâª"; |
| 1038 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1039 | + const char *s26 = "PDF\u202c LRE\u202a"; |
| 1040 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1041 | + const char *s27 = "PDF⬠LRE\u202a"; |
| 1042 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1043 | + const char *s28 = "PDF\u202c LREâª"; |
| 1044 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1045 | +} |
| 1046 | + |
| 1047 | +int aLREâªbPDIâ©; |
| 1048 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1049 | +int A\u202aB\u2069C; |
| 1050 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1051 | +int aRLEâ«bPDIâ©; |
| 1052 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1053 | +int a\u202bB\u2069c; |
| 1054 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1055 | +int aLROâÂbPDIâ©; |
| 1056 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1057 | +int a\u202db\u2069c2; |
| 1058 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1059 | +int aRLOâ®bPDIâ©; |
| 1060 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1061 | +int a\u202eb\u2069; |
| 1062 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1063 | +int aLRIâ¦bPDFâ¬; |
| 1064 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1065 | +int a\u2066b\u202c; |
| 1066 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1067 | +int aRLIâ§bPDFâ¬c |
| 1068 | +; |
| 1069 | +/* { dg-warning "unpaired" "" { target *-*-* } .-2 } */ |
| 1070 | +int a\u2067b\u202c; |
| 1071 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1072 | +int aFSIâ¨bPDFâ¬; |
| 1073 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1074 | +int a\u2068b\u202c; |
| 1075 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1076 | +int aFSIâ¨bPD\u202C; |
| 1077 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1078 | +int aFSI\u2068bPDFâ¬_; |
| 1079 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1080 | +int aLREâªbPDFâ¬b; |
| 1081 | +int A\u202aB\u202c; |
| 1082 | +int a_LREâª_LREâª_b_PDFâ¬_PDFâ¬; |
| 1083 | +int A\u202aA\u202aB\u202cB\u202c; |
| 1084 | +int aPDFâ¬bLREadPDFâ¬; |
| 1085 | +int a_\u202C_\u202a_\u202c; |
| 1086 | +int a_LREâª_b_PDFâ¬_c_LREâª_PDFâ¬; |
| 1087 | +int a_\u202a_\u202c_\u202a_\u202c_; |
| 1088 | +int a_LREâª_b_PDFâ¬_c_LREâª; |
| 1089 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1090 | +int a_\u202a_\u202c_\u202a_; |
| 1091 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1092 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-7.c b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c |
| 1093 | new file mode 100644 |
| 1094 | index 00000000000..d012d420ec0 |
| 1095 | --- /dev/null |
| 1096 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-7.c |
| 1097 | @@ -0,0 +1,9 @@ |
| 1098 | +/* PR preprocessor/103026 */ |
| 1099 | +/* { dg-do compile } */ |
| 1100 | +/* { dg-options "-Wbidi-chars=any" } */ |
| 1101 | +/* Test we ignore UCNs in comments. */ |
| 1102 | + |
| 1103 | +// a b c \u202a 1 2 3 |
| 1104 | +// a b c \u202A 1 2 3 |
| 1105 | +/* a b c \u202a 1 2 3 */ |
| 1106 | +/* a b c \u202A 1 2 3 */ |
| 1107 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-8.c b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c |
| 1108 | new file mode 100644 |
| 1109 | index 00000000000..4f54c5092ec |
| 1110 | --- /dev/null |
| 1111 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-8.c |
| 1112 | @@ -0,0 +1,13 @@ |
| 1113 | +/* PR preprocessor/103026 */ |
| 1114 | +/* { dg-do compile } */ |
| 1115 | +/* { dg-options "-Wbidi-chars=any" } */ |
| 1116 | +/* Test \u vs \U. */ |
| 1117 | + |
| 1118 | +int a_\u202A; |
| 1119 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 1120 | +int a_\u202a_2; |
| 1121 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 1122 | +int a_\U0000202A_3; |
| 1123 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 1124 | +int a_\U0000202a_4; |
| 1125 | +/* { dg-warning "U\\+202A" "" { target *-*-* } .-1 } */ |
| 1126 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-9.c b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c |
| 1127 | new file mode 100644 |
| 1128 | index 00000000000..e2af1b1ca97 |
| 1129 | --- /dev/null |
| 1130 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-9.c |
| 1131 | @@ -0,0 +1,29 @@ |
| 1132 | +/* PR preprocessor/103026 */ |
| 1133 | +/* { dg-do compile } */ |
| 1134 | +/* { dg-options "-Wbidi-chars=unpaired" } */ |
| 1135 | +/* Test that we properly separate bidi contexts (comment/identifier/character |
| 1136 | + constant/string literal). */ |
| 1137 | + |
| 1138 | +/* LRE ->âª<- */ int pdf_\u202c_1; |
| 1139 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1140 | +/* RLE ->â«<- */ int pdf_\u202c_2; |
| 1141 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1142 | +/* LRO ->âÂ<- */ int pdf_\u202c_3; |
| 1143 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1144 | +/* RLO ->â®<- */ int pdf_\u202c_4; |
| 1145 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1146 | +/* LRI ->â¦<-*/ int pdi_\u2069_1; |
| 1147 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1148 | +/* RLI ->â§<- */ int pdi_\u2069_12; |
| 1149 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1150 | +/* FSI ->â¨<- */ int pdi_\u2069_3; |
| 1151 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1152 | + |
| 1153 | +const char *s1 = "LRE\u202a"; /* PDF ->â¬<- */ |
| 1154 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1155 | +/* LRE ->âª<- */ const char *s2 = "PDF\u202c"; |
| 1156 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1157 | +const char *s3 = "LRE\u202a"; int pdf_\u202c_5; |
| 1158 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1159 | +int lre_\u202a; const char *s4 = "PDF\u202c"; |
| 1160 | +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ |
| 1161 | diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h |
| 1162 | index 176f8c5bbce..112b9c24751 100644 |
| 1163 | --- a/libcpp/include/cpplib.h |
| 1164 | +++ b/libcpp/include/cpplib.h |
| 1165 | @@ -318,6 +318,17 @@ enum cpp_main_search |
| 1166 | CMS_system, /* Search the system INCLUDE path. */ |
| 1167 | }; |
| 1168 | |
| 1169 | +/* The possible bidirectional control characters checking levels, from least |
| 1170 | + restrictive to most. */ |
| 1171 | +enum cpp_bidirectional_level { |
| 1172 | + /* No checking. */ |
| 1173 | + bidirectional_none, |
| 1174 | + /* Only detect unpaired uses of bidirectional control characters. */ |
| 1175 | + bidirectional_unpaired, |
| 1176 | + /* Detect any use of bidirectional control characters. */ |
| 1177 | + bidirectional_any |
| 1178 | +}; |
| 1179 | + |
| 1180 | /* This structure is nested inside struct cpp_reader, and |
| 1181 | carries all the options visible to the command line. */ |
| 1182 | struct cpp_options |
| 1183 | @@ -531,6 +542,10 @@ struct cpp_options |
| 1184 | /* True if warn about differences between C++98 and C++11. */ |
| 1185 | bool cpp_warn_cxx11_compat; |
| 1186 | |
| 1187 | + /* Nonzero if bidirectional control characters checking is on. See enum |
| 1188 | + cpp_bidirectional_level. */ |
| 1189 | + unsigned char cpp_warn_bidirectional; |
| 1190 | + |
| 1191 | /* Dependency generation. */ |
| 1192 | struct |
| 1193 | { |
| 1194 | @@ -635,7 +650,8 @@ enum cpp_warning_reason { |
| 1195 | CPP_W_C90_C99_COMPAT, |
| 1196 | CPP_W_C11_C2X_COMPAT, |
| 1197 | CPP_W_CXX11_COMPAT, |
| 1198 | - CPP_W_EXPANSION_TO_DEFINED |
| 1199 | + CPP_W_EXPANSION_TO_DEFINED, |
| 1200 | + CPP_W_BIDIRECTIONAL |
| 1201 | }; |
| 1202 | |
| 1203 | /* Callback for header lookup for HEADER, which is the name of a |
| 1204 | diff --git a/libcpp/init.c b/libcpp/init.c |
| 1205 | index 5a424e23553..f9a8f5f088f 100644 |
| 1206 | --- a/libcpp/init.c |
| 1207 | +++ b/libcpp/init.c |
| 1208 | @@ -219,6 +219,7 @@ cpp_create_reader (enum c_lang lang, cpp |
| 1209 | = ENABLE_CANONICAL_SYSTEM_HEADERS; |
| 1210 | CPP_OPTION (pfile, ext_numeric_literals) = 1; |
| 1211 | CPP_OPTION (pfile, warn_date_time) = 0; |
| 1212 | + CPP_OPTION (pfile, cpp_warn_bidirectional) = bidirectional_unpaired; |
| 1213 | |
| 1214 | /* Default CPP arithmetic to something sensible for the host for the |
| 1215 | benefit of dumb users like fix-header. */ |
| 1216 | diff --git a/libcpp/internal.h b/libcpp/internal.h |
| 1217 | index 8577cab6c83..0ce0246c5a2 100644 |
| 1218 | --- a/libcpp/internal.h |
| 1219 | +++ b/libcpp/internal.h |
| 1220 | @@ -597,6 +597,13 @@ struct cpp_reader |
| 1221 | /* Location identifying the main source file -- intended to be line |
| 1222 | zero of said file. */ |
| 1223 | location_t main_loc; |
| 1224 | + |
| 1225 | + /* Returns true iff we should warn about UTF-8 bidirectional control |
| 1226 | + characters. */ |
| 1227 | + bool warn_bidi_p () const |
| 1228 | + { |
| 1229 | + return CPP_OPTION (this, cpp_warn_bidirectional) != bidirectional_none; |
| 1230 | + } |
| 1231 | }; |
| 1232 | |
| 1233 | /* Character classes. Based on the more primitive macros in safe-ctype.h. |
| 1234 | diff --git a/libcpp/lex.c b/libcpp/lex.c |
| 1235 | index fa2253d41c3..6a4fbce6030 100644 |
| 1236 | --- a/libcpp/lex.c |
| 1237 | +++ b/libcpp/lex.c |
| 1238 | @@ -1164,6 +1164,324 @@ _cpp_process_line_notes (cpp_reader *pfi |
| 1239 | } |
| 1240 | } |
| 1241 | |
| 1242 | +namespace bidi { |
| 1243 | + enum class kind { |
| 1244 | + NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL |
| 1245 | + }; |
| 1246 | + |
| 1247 | + /* All the UTF-8 encodings of bidi characters start with E2. */ |
| 1248 | + constexpr uchar utf8_start = 0xe2; |
| 1249 | + |
| 1250 | + /* A vector holding currently open bidi contexts. We use a char for |
| 1251 | + each context, its LSB is 1 if it represents a PDF context, 0 if it |
| 1252 | + represents a PDI context. The next bit is 1 if this context was open |
| 1253 | + by a bidi character written as a UCN, and 0 when it was UTF-8. */ |
| 1254 | + semi_embedded_vec <unsigned char, 16> vec; |
| 1255 | + |
| 1256 | + /* Close the whole comment/identifier/string literal/character constant |
| 1257 | + context. */ |
| 1258 | + void on_close () |
| 1259 | + { |
| 1260 | + vec.truncate (0); |
| 1261 | + } |
| 1262 | + |
| 1263 | + /* Pop the last element in the vector. */ |
| 1264 | + void pop () |
| 1265 | + { |
| 1266 | + unsigned int len = vec.count (); |
| 1267 | + gcc_checking_assert (len > 0); |
| 1268 | + vec.truncate (len - 1); |
| 1269 | + } |
| 1270 | + |
| 1271 | + /* Return the context of the Ith element. */ |
| 1272 | + kind ctx_at (unsigned int i) |
| 1273 | + { |
| 1274 | + return (vec[i] & 1) ? kind::PDF : kind::PDI; |
| 1275 | + } |
| 1276 | + |
| 1277 | + /* Return which context is currently opened. */ |
| 1278 | + kind current_ctx () |
| 1279 | + { |
| 1280 | + unsigned int len = vec.count (); |
| 1281 | + if (len == 0) |
| 1282 | + return kind::NONE; |
| 1283 | + return ctx_at (len - 1); |
| 1284 | + } |
| 1285 | + |
| 1286 | + /* Return true if the current context comes from a UCN origin, that is, |
| 1287 | + the bidi char which started this bidi context was written as a UCN. */ |
| 1288 | + bool current_ctx_ucn_p () |
| 1289 | + { |
| 1290 | + unsigned int len = vec.count (); |
| 1291 | + gcc_checking_assert (len > 0); |
| 1292 | + return (vec[len - 1] >> 1) & 1; |
| 1293 | + } |
| 1294 | + |
| 1295 | + /* We've read a bidi char, update the current vector as necessary. */ |
| 1296 | + void on_char (kind k, bool ucn_p) |
| 1297 | + { |
| 1298 | + switch (k) |
| 1299 | + { |
| 1300 | + case kind::LRE: |
| 1301 | + case kind::RLE: |
| 1302 | + case kind::LRO: |
| 1303 | + case kind::RLO: |
| 1304 | + vec.push (ucn_p ? 3u : 1u); |
| 1305 | + break; |
| 1306 | + case kind::LRI: |
| 1307 | + case kind::RLI: |
| 1308 | + case kind::FSI: |
| 1309 | + vec.push (ucn_p ? 2u : 0u); |
| 1310 | + break; |
| 1311 | + /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO |
| 1312 | + whose scope has not yet been terminated. */ |
| 1313 | + case kind::PDF: |
| 1314 | + if (current_ctx () == kind::PDF) |
| 1315 | + pop (); |
| 1316 | + break; |
| 1317 | + /* PDI terminates the scope of the last LRI, RLI, or FSI whose |
| 1318 | + scope has not yet been terminated, as well as the scopes of |
| 1319 | + any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not |
| 1320 | + yet been terminated. */ |
| 1321 | + case kind::PDI: |
| 1322 | + for (int i = vec.count () - 1; i >= 0; --i) |
| 1323 | + if (ctx_at (i) == kind::PDI) |
| 1324 | + { |
| 1325 | + vec.truncate (i); |
| 1326 | + break; |
| 1327 | + } |
| 1328 | + break; |
| 1329 | + case kind::LTR: |
| 1330 | + case kind::RTL: |
| 1331 | + /* These aren't popped by a PDF/PDI. */ |
| 1332 | + break; |
| 1333 | + [[likely]] case kind::NONE: |
| 1334 | + break; |
| 1335 | + default: |
| 1336 | + abort (); |
| 1337 | + } |
| 1338 | + } |
| 1339 | + |
| 1340 | + /* Return a descriptive string for K. */ |
| 1341 | + const char *to_str (kind k) |
| 1342 | + { |
| 1343 | + switch (k) |
| 1344 | + { |
| 1345 | + case kind::LRE: |
| 1346 | + return "U+202A (LEFT-TO-RIGHT EMBEDDING)"; |
| 1347 | + case kind::RLE: |
| 1348 | + return "U+202B (RIGHT-TO-LEFT EMBEDDING)"; |
| 1349 | + case kind::LRO: |
| 1350 | + return "U+202D (LEFT-TO-RIGHT OVERRIDE)"; |
| 1351 | + case kind::RLO: |
| 1352 | + return "U+202E (RIGHT-TO-LEFT OVERRIDE)"; |
| 1353 | + case kind::LRI: |
| 1354 | + return "U+2066 (LEFT-TO-RIGHT ISOLATE)"; |
| 1355 | + case kind::RLI: |
| 1356 | + return "U+2067 (RIGHT-TO-LEFT ISOLATE)"; |
| 1357 | + case kind::FSI: |
| 1358 | + return "U+2068 (FIRST STRONG ISOLATE)"; |
| 1359 | + case kind::PDF: |
| 1360 | + return "U+202C (POP DIRECTIONAL FORMATTING)"; |
| 1361 | + case kind::PDI: |
| 1362 | + return "U+2069 (POP DIRECTIONAL ISOLATE)"; |
| 1363 | + case kind::LTR: |
| 1364 | + return "U+200E (LEFT-TO-RIGHT MARK)"; |
| 1365 | + case kind::RTL: |
| 1366 | + return "U+200F (RIGHT-TO-LEFT MARK)"; |
| 1367 | + default: |
| 1368 | + abort (); |
| 1369 | + } |
| 1370 | + } |
| 1371 | +} |
| 1372 | + |
| 1373 | +/* Parse a sequence of 3 bytes starting with P and return its bidi code. */ |
| 1374 | + |
| 1375 | +static bidi::kind |
| 1376 | +get_bidi_utf8 (const unsigned char *const p) |
| 1377 | +{ |
| 1378 | + gcc_checking_assert (p[0] == bidi::utf8_start); |
| 1379 | + |
| 1380 | + if (p[1] == 0x80) |
| 1381 | + switch (p[2]) |
| 1382 | + { |
| 1383 | + case 0xaa: |
| 1384 | + return bidi::kind::LRE; |
| 1385 | + case 0xab: |
| 1386 | + return bidi::kind::RLE; |
| 1387 | + case 0xac: |
| 1388 | + return bidi::kind::PDF; |
| 1389 | + case 0xad: |
| 1390 | + return bidi::kind::LRO; |
| 1391 | + case 0xae: |
| 1392 | + return bidi::kind::RLO; |
| 1393 | + case 0x8e: |
| 1394 | + return bidi::kind::LTR; |
| 1395 | + case 0x8f: |
| 1396 | + return bidi::kind::RTL; |
| 1397 | + default: |
| 1398 | + break; |
| 1399 | + } |
| 1400 | + else if (p[1] == 0x81) |
| 1401 | + switch (p[2]) |
| 1402 | + { |
| 1403 | + case 0xa6: |
| 1404 | + return bidi::kind::LRI; |
| 1405 | + case 0xa7: |
| 1406 | + return bidi::kind::RLI; |
| 1407 | + case 0xa8: |
| 1408 | + return bidi::kind::FSI; |
| 1409 | + case 0xa9: |
| 1410 | + return bidi::kind::PDI; |
| 1411 | + default: |
| 1412 | + break; |
| 1413 | + } |
| 1414 | + |
| 1415 | + return bidi::kind::NONE; |
| 1416 | +} |
| 1417 | + |
| 1418 | +/* Parse a UCN where P points just past \u or \U and return its bidi code. */ |
| 1419 | + |
| 1420 | +static bidi::kind |
| 1421 | +get_bidi_ucn (const unsigned char *p, bool is_U) |
| 1422 | +{ |
| 1423 | + /* 6.4.3 Universal Character Names |
| 1424 | + \u hex-quad |
| 1425 | + \U hex-quad hex-quad |
| 1426 | + where \unnnn means \U0000nnnn. */ |
| 1427 | + |
| 1428 | + if (is_U) |
| 1429 | + { |
| 1430 | + if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0') |
| 1431 | + return bidi::kind::NONE; |
| 1432 | + /* Skip 4B so we can treat \u and \U the same below. */ |
| 1433 | + p += 4; |
| 1434 | + } |
| 1435 | + |
| 1436 | + /* All code points we are looking for start with 20xx. */ |
| 1437 | + if (p[0] != '2' || p[1] != '0') |
| 1438 | + return bidi::kind::NONE; |
| 1439 | + else if (p[2] == '2') |
| 1440 | + switch (p[3]) |
| 1441 | + { |
| 1442 | + case 'a': |
| 1443 | + case 'A': |
| 1444 | + return bidi::kind::LRE; |
| 1445 | + case 'b': |
| 1446 | + case 'B': |
| 1447 | + return bidi::kind::RLE; |
| 1448 | + case 'c': |
| 1449 | + case 'C': |
| 1450 | + return bidi::kind::PDF; |
| 1451 | + case 'd': |
| 1452 | + case 'D': |
| 1453 | + return bidi::kind::LRO; |
| 1454 | + case 'e': |
| 1455 | + case 'E': |
| 1456 | + return bidi::kind::RLO; |
| 1457 | + default: |
| 1458 | + break; |
| 1459 | + } |
| 1460 | + else if (p[2] == '6') |
| 1461 | + switch (p[3]) |
| 1462 | + { |
| 1463 | + case '6': |
| 1464 | + return bidi::kind::LRI; |
| 1465 | + case '7': |
| 1466 | + return bidi::kind::RLI; |
| 1467 | + case '8': |
| 1468 | + return bidi::kind::FSI; |
| 1469 | + case '9': |
| 1470 | + return bidi::kind::PDI; |
| 1471 | + default: |
| 1472 | + break; |
| 1473 | + } |
| 1474 | + else if (p[2] == '0') |
| 1475 | + switch (p[3]) |
| 1476 | + { |
| 1477 | + case 'e': |
| 1478 | + case 'E': |
| 1479 | + return bidi::kind::LTR; |
| 1480 | + case 'f': |
| 1481 | + case 'F': |
| 1482 | + return bidi::kind::RTL; |
| 1483 | + default: |
| 1484 | + break; |
| 1485 | + } |
| 1486 | + |
| 1487 | + return bidi::kind::NONE; |
| 1488 | +} |
| 1489 | + |
| 1490 | +/* We're closing a bidi context, that is, we've encountered a newline, |
| 1491 | + are closing a C-style comment, or are at the end of a string literal, |
| 1492 | + character constant, or identifier. Warn if this context was not |
| 1493 | + properly terminated by a PDI or PDF. P points to the last character |
| 1494 | + in this context. */ |
| 1495 | + |
| 1496 | +static void |
| 1497 | +maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p) |
| 1498 | +{ |
| 1499 | + if (CPP_OPTION (pfile, cpp_warn_bidirectional) == bidirectional_unpaired |
| 1500 | + && bidi::vec.count () > 0) |
| 1501 | + { |
| 1502 | + const location_t loc |
| 1503 | + = linemap_position_for_column (pfile->line_table, |
| 1504 | + CPP_BUF_COLUMN (pfile->buffer, p)); |
| 1505 | + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, |
| 1506 | + "unpaired UTF-8 bidirectional control character " |
| 1507 | + "detected"); |
| 1508 | + } |
| 1509 | + /* We're done with this context. */ |
| 1510 | + bidi::on_close (); |
| 1511 | +} |
| 1512 | + |
| 1513 | +/* We're at the beginning or in the middle of an identifier/comment/string |
| 1514 | + literal/character constant. Warn if we've encountered a bidi character. |
| 1515 | + KIND says which bidi character it was; P points to it in the character |
| 1516 | + stream. UCN_P is true iff this bidi character was written as a UCN. */ |
| 1517 | + |
| 1518 | +static void |
| 1519 | +maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, |
| 1520 | + bool ucn_p) |
| 1521 | +{ |
| 1522 | + if (__builtin_expect (kind == bidi::kind::NONE, 1)) |
| 1523 | + return; |
| 1524 | + |
| 1525 | + const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional); |
| 1526 | + |
| 1527 | + if (warn_bidi != bidirectional_none) |
| 1528 | + { |
| 1529 | + const location_t loc |
| 1530 | + = linemap_position_for_column (pfile->line_table, |
| 1531 | + CPP_BUF_COLUMN (pfile->buffer, p)); |
| 1532 | + /* It seems excessive to warn about a PDI/PDF that is closing |
| 1533 | + an opened context because we've already warned about the |
| 1534 | + opening character. Except warn when we have a UCN x UTF-8 |
| 1535 | + mismatch. */ |
| 1536 | + if (kind == bidi::current_ctx ()) |
| 1537 | + { |
| 1538 | + if (warn_bidi == bidirectional_unpaired |
| 1539 | + && bidi::current_ctx_ucn_p () != ucn_p) |
| 1540 | + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, |
| 1541 | + "UTF-8 vs UCN mismatch when closing " |
| 1542 | + "a context by \"%s\"", bidi::to_str (kind)); |
| 1543 | + } |
| 1544 | + else if (warn_bidi == bidirectional_any) |
| 1545 | + { |
| 1546 | + if (kind == bidi::kind::PDF || kind == bidi::kind::PDI) |
| 1547 | + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, |
| 1548 | + "\"%s\" is closing an unopened context", |
| 1549 | + bidi::to_str (kind)); |
| 1550 | + else |
| 1551 | + cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0, |
| 1552 | + "found problematic Unicode character \"%s\"", |
| 1553 | + bidi::to_str (kind)); |
| 1554 | + } |
| 1555 | + } |
| 1556 | + /* We're done with this context. */ |
| 1557 | + bidi::on_char (kind, ucn_p); |
| 1558 | +} |
| 1559 | + |
| 1560 | /* Skip a C-style block comment. We find the end of the comment by |
| 1561 | seeing if an asterisk is before every '/' we encounter. Returns |
| 1562 | nonzero if comment terminated by EOF, zero otherwise. |
| 1563 | @@ -1175,6 +1493,7 @@ _cpp_skip_block_comment (cpp_reader *pfi |
| 1564 | cpp_buffer *buffer = pfile->buffer; |
| 1565 | const uchar *cur = buffer->cur; |
| 1566 | uchar c; |
| 1567 | + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| 1568 | |
| 1569 | cur++; |
| 1570 | if (*cur == '/') |
| 1571 | @@ -1189,7 +1508,11 @@ _cpp_skip_block_comment (cpp_reader *pfi |
| 1572 | if (c == '/') |
| 1573 | { |
| 1574 | if (cur[-2] == '*') |
| 1575 | - break; |
| 1576 | + { |
| 1577 | + if (warn_bidi_p) |
| 1578 | + maybe_warn_bidi_on_close (pfile, cur); |
| 1579 | + break; |
| 1580 | + } |
| 1581 | |
| 1582 | /* Warn about potential nested comments, but not if the '/' |
| 1583 | comes immediately before the true comment delimiter. |
| 1584 | @@ -1208,6 +1531,8 @@ _cpp_skip_block_comment (cpp_reader *pfi |
| 1585 | { |
| 1586 | unsigned int cols; |
| 1587 | buffer->cur = cur - 1; |
| 1588 | + if (warn_bidi_p) |
| 1589 | + maybe_warn_bidi_on_close (pfile, cur); |
| 1590 | _cpp_process_line_notes (pfile, true); |
| 1591 | if (buffer->next_line >= buffer->rlimit) |
| 1592 | return true; |
| 1593 | @@ -1218,6 +1543,13 @@ _cpp_skip_block_comment (cpp_reader *pfi |
| 1594 | |
| 1595 | cur = buffer->cur; |
| 1596 | } |
| 1597 | + /* If this is a beginning of a UTF-8 encoding, it might be |
| 1598 | + a bidirectional control character. */ |
| 1599 | + else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) |
| 1600 | + { |
| 1601 | + bidi::kind kind = get_bidi_utf8 (cur - 1); |
| 1602 | + maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false); |
| 1603 | + } |
| 1604 | } |
| 1605 | |
| 1606 | buffer->cur = cur; |
| 1607 | @@ -1233,9 +1565,31 @@ skip_line_comment (cpp_reader *pfile) |
| 1608 | { |
| 1609 | cpp_buffer *buffer = pfile->buffer; |
| 1610 | location_t orig_line = pfile->line_table->highest_line; |
| 1611 | + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| 1612 | |
| 1613 | - while (*buffer->cur != '\n') |
| 1614 | - buffer->cur++; |
| 1615 | + if (!warn_bidi_p) |
| 1616 | + while (*buffer->cur != '\n') |
| 1617 | + buffer->cur++; |
| 1618 | + else |
| 1619 | + { |
| 1620 | + while (*buffer->cur != '\n' |
| 1621 | + && *buffer->cur != bidi::utf8_start) |
| 1622 | + buffer->cur++; |
| 1623 | + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) |
| 1624 | + { |
| 1625 | + while (*buffer->cur != '\n') |
| 1626 | + { |
| 1627 | + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) |
| 1628 | + { |
| 1629 | + bidi::kind kind = get_bidi_utf8 (buffer->cur); |
| 1630 | + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, |
| 1631 | + /*ucn_p=*/false); |
| 1632 | + } |
| 1633 | + buffer->cur++; |
| 1634 | + } |
| 1635 | + maybe_warn_bidi_on_close (pfile, buffer->cur); |
| 1636 | + } |
| 1637 | + } |
| 1638 | |
| 1639 | _cpp_process_line_notes (pfile, true); |
| 1640 | return orig_line != pfile->line_table->highest_line; |
| 1641 | @@ -1317,11 +1671,13 @@ static const cppchar_t utf8_signifier = |
| 1642 | |
| 1643 | /* Returns TRUE if the sequence starting at buffer->cur is valid in |
| 1644 | an identifier. FIRST is TRUE if this starts an identifier. */ |
| 1645 | + |
| 1646 | static bool |
| 1647 | forms_identifier_p (cpp_reader *pfile, int first, |
| 1648 | struct normalize_state *state) |
| 1649 | { |
| 1650 | cpp_buffer *buffer = pfile->buffer; |
| 1651 | + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| 1652 | |
| 1653 | if (*buffer->cur == '$') |
| 1654 | { |
| 1655 | @@ -1344,6 +1700,13 @@ forms_identifier_p (cpp_reader *pfile, i |
| 1656 | cppchar_t s; |
| 1657 | if (*buffer->cur >= utf8_signifier) |
| 1658 | { |
| 1659 | + if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0) |
| 1660 | + && warn_bidi_p) |
| 1661 | + { |
| 1662 | + bidi::kind kind = get_bidi_utf8 (buffer->cur); |
| 1663 | + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, |
| 1664 | + /*ucn_p=*/false); |
| 1665 | + } |
| 1666 | if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first, |
| 1667 | state, &s)) |
| 1668 | return true; |
| 1669 | @@ -1352,6 +1715,13 @@ forms_identifier_p (cpp_reader *pfile, i |
| 1670 | && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) |
| 1671 | { |
| 1672 | buffer->cur += 2; |
| 1673 | + if (warn_bidi_p) |
| 1674 | + { |
| 1675 | + bidi::kind kind = get_bidi_ucn (buffer->cur, |
| 1676 | + buffer->cur[-1] == 'U'); |
| 1677 | + maybe_warn_bidi_on_char (pfile, buffer->cur, kind, |
| 1678 | + /*ucn_p=*/true); |
| 1679 | + } |
| 1680 | if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, |
| 1681 | state, &s, NULL, NULL)) |
| 1682 | return true; |
| 1683 | @@ -1460,6 +1830,7 @@ lex_identifier (cpp_reader *pfile, const |
| 1684 | const uchar *cur; |
| 1685 | unsigned int len; |
| 1686 | unsigned int hash = HT_HASHSTEP (0, *base); |
| 1687 | + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| 1688 | |
| 1689 | cur = pfile->buffer->cur; |
| 1690 | if (! starts_ucn) |
| 1691 | @@ -1483,6 +1854,8 @@ lex_identifier (cpp_reader *pfile, const |
| 1692 | pfile->buffer->cur++; |
| 1693 | } |
| 1694 | } while (forms_identifier_p (pfile, false, nst)); |
| 1695 | + if (warn_bidi_p) |
| 1696 | + maybe_warn_bidi_on_close (pfile, pfile->buffer->cur); |
| 1697 | result = _cpp_interpret_identifier (pfile, base, |
| 1698 | pfile->buffer->cur - base); |
| 1699 | *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base); |
| 1700 | @@ -1719,6 +2092,7 @@ static void |
| 1701 | lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base) |
| 1702 | { |
| 1703 | const uchar *pos = base; |
| 1704 | + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| 1705 | |
| 1706 | /* 'tis a pity this information isn't passed down from the lexer's |
| 1707 | initial categorization of the token. */ |
| 1708 | @@ -1955,8 +2329,15 @@ lex_raw_string (cpp_reader *pfile, cpp_t |
| 1709 | pos = base = pfile->buffer->cur; |
| 1710 | note = &pfile->buffer->notes[pfile->buffer->cur_note]; |
| 1711 | } |
| 1712 | + else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) |
| 1713 | + && warn_bidi_p) |
| 1714 | + maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1), |
| 1715 | + /*ucn_p=*/false); |
| 1716 | } |
| 1717 | |
| 1718 | + if (warn_bidi_p) |
| 1719 | + maybe_warn_bidi_on_close (pfile, pos); |
| 1720 | + |
| 1721 | if (CPP_OPTION (pfile, user_literals)) |
| 1722 | { |
| 1723 | /* If a string format macro, say from inttypes.h, is placed touching |
| 1724 | @@ -2051,15 +2432,27 @@ lex_string (cpp_reader *pfile, cpp_token |
| 1725 | else |
| 1726 | terminator = '>', type = CPP_HEADER_NAME; |
| 1727 | |
| 1728 | + const bool warn_bidi_p = pfile->warn_bidi_p (); |
| 1729 | for (;;) |
| 1730 | { |
| 1731 | cppchar_t c = *cur++; |
| 1732 | |
| 1733 | /* In #include-style directives, terminators are not escapable. */ |
| 1734 | if (c == '\\' && !pfile->state.angled_headers && *cur != '\n') |
| 1735 | - cur++; |
| 1736 | + { |
| 1737 | + if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) |
| 1738 | + { |
| 1739 | + bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U'); |
| 1740 | + maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true); |
| 1741 | + } |
| 1742 | + cur++; |
| 1743 | + } |
| 1744 | else if (c == terminator) |
| 1745 | - break; |
| 1746 | + { |
| 1747 | + if (warn_bidi_p) |
| 1748 | + maybe_warn_bidi_on_close (pfile, cur - 1); |
| 1749 | + break; |
| 1750 | + } |
| 1751 | else if (c == '\n') |
| 1752 | { |
| 1753 | cur--; |
| 1754 | @@ -2076,6 +2469,11 @@ lex_string (cpp_reader *pfile, cpp_token |
| 1755 | } |
| 1756 | else if (c == '\0') |
| 1757 | saw_NUL = true; |
| 1758 | + else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) |
| 1759 | + { |
| 1760 | + bidi::kind kind = get_bidi_utf8 (cur - 1); |
| 1761 | + maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false); |
| 1762 | + } |
| 1763 | } |
| 1764 | |
| 1765 | if (saw_NUL && !pfile->state.skipping) |