Brad Bishop | bec4ebc | 2022-08-03 09:55:16 -0400 | [diff] [blame] | 1 | From bef32d4a28595e933f24fef378cf052a30b674a7 Mon Sep 17 00:00:00 2001 |
| 2 | From: David Malcolm <dmalcolm@redhat.com> |
| 3 | Date: Tue, 2 Nov 2021 15:45:22 -0400 |
| 4 | Subject: [PATCH] libcpp: capture and underline ranges in -Wbidi-chars= |
| 5 | [PR103026] |
| 6 | MIME-Version: 1.0 |
| 7 | Content-Type: text/plain; charset=utf8 |
| 8 | Content-Transfer-Encoding: 8bit |
| 9 | |
| 10 | This patch converts the bidi::vec to use a struct so that we can |
| 11 | capture location_t values for the bidirectional control characters. |
| 12 | |
| 13 | Before: |
| 14 | |
| 15 | Wbidi-chars-1.c: In function âmainâ: |
| 16 | Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] |
| 17 | 6 | /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */ |
| 18 | | ^ |
| 19 | Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=] |
| 20 | 9 | /* end admins only <U+202E> { <U+2066>*/ |
| 21 | | ^ |
| 22 | |
| 23 | After: |
| 24 | |
| 25 | Wbidi-chars-1.c: In function âmainâ: |
| 26 | Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control characters detected [-Wbidi-chars=] |
| 27 | 6 | /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */ |
| 28 | | ~~~~~~~~ ~~~~~~~~ ^ |
| 29 | | | | | |
| 30 | | | | end of bidirectional context |
| 31 | | U+202E (RIGHT-TO-LEFT OVERRIDE) U+2066 (LEFT-TO-RIGHT ISOLATE) |
| 32 | Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control characters detected [-Wbidi-chars=] |
| 33 | 9 | /* end admins only <U+202E> { <U+2066>*/ |
| 34 | | ~~~~~~~~ ~~~~~~~~ ^ |
| 35 | | | | | |
| 36 | | | | end of bidirectional context |
| 37 | | | U+2066 (LEFT-TO-RIGHT ISOLATE) |
| 38 | | U+202E (RIGHT-TO-LEFT OVERRIDE) |
| 39 | |
| 40 | Signed-off-by: David Malcolm <dmalcolm@redhat.com> |
| 41 | |
| 42 | gcc/testsuite/ChangeLog: |
| 43 | PR preprocessor/103026 |
| 44 | * c-c++-common/Wbidi-chars-ranges.c: New test. |
| 45 | |
| 46 | libcpp/ChangeLog: |
| 47 | PR preprocessor/103026 |
| 48 | * lex.c (struct bidi::context): New. |
| 49 | (bidi::vec): Convert to a vec of context rather than unsigned |
| 50 | char. |
| 51 | (bidi::ctx_at): Rename to... |
| 52 | (bidi::pop_kind_at): ...this and reimplement for above change. |
| 53 | (bidi::current_ctx): Update for change to vec. |
| 54 | (bidi::current_ctx_ucn_p): Likewise. |
| 55 | (bidi::current_ctx_loc): New. |
| 56 | (bidi::on_char): Update for usage of context struct. Add "loc" |
| 57 | param and pass it when pushing contexts. |
| 58 | (get_location_for_byte_range_in_cur_line): New. |
| 59 | (get_bidi_utf8): Rename to... |
| 60 | (get_bidi_utf8_1): ...this, reintroducing... |
| 61 | (get_bidi_utf8): ...as a wrapper, setting *OUT when the result is |
| 62 | not NONE. |
| 63 | (get_bidi_ucn): Rename to... |
| 64 | (get_bidi_ucn_1): ...this, reintroducing... |
| 65 | (get_bidi_ucn): ...as a wrapper, setting *OUT when the result is |
| 66 | not NONE. |
| 67 | (class unpaired_bidi_rich_location): New. |
| 68 | (maybe_warn_bidi_on_close): Use unpaired_bidi_rich_location when |
| 69 | reporting on unpaired bidi chars. Split into singular vs plural |
| 70 | spellings. |
| 71 | (maybe_warn_bidi_on_char): Pass in a location_t rather than a |
| 72 | const uchar * and use it when emitting warnings, and when calling |
| 73 | bidi::on_char. |
| 74 | (_cpp_skip_block_comment): Capture location when kind is not NONE |
| 75 | and pass it to maybe_warn_bidi_on_char. |
| 76 | (skip_line_comment): Likewise. |
| 77 | (forms_identifier_p): Likewise. |
| 78 | (lex_raw_string): Likewise. |
| 79 | (lex_string): Likewise. |
| 80 | |
| 81 | Signed-off-by: David Malcolm <dmalcolm@redhat.com> |
| 82 | |
| 83 | CVE: CVE-2021-42574 |
| 84 | Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=bef32d4a28595e933f24fef378cf052a30b674a7] |
| 85 | Signed-off-by: Pgowda <pgowda.cve@gmail.com> |
| 86 | |
| 87 | --- |
| 88 | .../c-c++-common/Wbidi-chars-ranges.c | 54 ++++ |
| 89 | libcpp/lex.c | 251 ++++++++++++++---- |
| 90 | 2 files changed, 257 insertions(+), 48 deletions(-) |
| 91 | create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c |
| 92 | |
| 93 | diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c |
| 94 | new file mode 100644 |
| 95 | index 00000000000..298750a2a64 |
| 96 | --- /dev/null |
| 97 | +++ b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c |
| 98 | @@ -0,0 +1,54 @@ |
| 99 | +/* PR preprocessor/103026 */ |
| 100 | +/* { dg-do compile } */ |
| 101 | +/* { dg-options "-Wbidi-chars=unpaired -fdiagnostics-show-caret" } */ |
| 102 | +/* Verify that we escape and underline pertinent bidirectional |
| 103 | + control characters when quoting the source. */ |
| 104 | + |
| 105 | +int test_unpaired_bidi () { |
| 106 | + int isAdmin = 0; |
| 107 | + /*â® } â¦if (isAdmin)⩠⦠begin admins only */ |
| 108 | +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ |
| 109 | +#if 0 |
| 110 | + { dg-begin-multiline-output "" } |
| 111 | + /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */ |
| 112 | + ~~~~~~~~ ~~~~~~~~ ^ |
| 113 | + | | | |
| 114 | + | | end of bidirectional context |
| 115 | + U+202E (RIGHT-TO-LEFT OVERRIDE) U+2066 (LEFT-TO-RIGHT ISOLATE) |
| 116 | + { dg-end-multiline-output "" } |
| 117 | +#endif |
| 118 | + |
| 119 | + __builtin_printf("You are an admin.\n"); |
| 120 | + /* end admins only â® { â¦*/ |
| 121 | +/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */ |
| 122 | +#if 0 |
| 123 | + { dg-begin-multiline-output "" } |
| 124 | + /* end admins only <U+202E> { <U+2066>*/ |
| 125 | + ~~~~~~~~ ~~~~~~~~ ^ |
| 126 | + | | | |
| 127 | + | | end of bidirectional context |
| 128 | + | U+2066 (LEFT-TO-RIGHT ISOLATE) |
| 129 | + U+202E (RIGHT-TO-LEFT OVERRIDE) |
| 130 | + { dg-end-multiline-output "" } |
| 131 | +#endif |
| 132 | + |
| 133 | + return 0; |
| 134 | +} |
| 135 | + |
| 136 | +int LRE_âª_PDF_\u202c; |
| 137 | +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ |
| 138 | +#if 0 |
| 139 | + { dg-begin-multiline-output "" } |
| 140 | + int LRE_<U+202A>_PDF_\u202c; |
| 141 | + ~~~~~~~~ ^~~~~~ |
| 142 | + { dg-end-multiline-output "" } |
| 143 | +#endif |
| 144 | + |
| 145 | +const char *s1 = "LRE_âª_PDF_\u202c"; |
| 146 | +/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */ |
| 147 | +#if 0 |
| 148 | + { dg-begin-multiline-output "" } |
| 149 | + const char *s1 = "LRE_<U+202A>_PDF_\u202c"; |
| 150 | + ~~~~~~~~ ^~~~~~ |
| 151 | + { dg-end-multiline-output "" } |
| 152 | +#endif |
| 153 | diff --git a/libcpp/lex.c b/libcpp/lex.c |
| 154 | index 2421d6c0f40..94c36f0d014 100644 |
| 155 | --- a/libcpp/lex.c |
| 156 | +++ b/libcpp/lex.c |
| 157 | @@ -1172,11 +1172,34 @@ namespace bidi { |
| 158 | /* All the UTF-8 encodings of bidi characters start with E2. */ |
| 159 | constexpr uchar utf8_start = 0xe2; |
| 160 | |
| 161 | + struct context |
| 162 | + { |
| 163 | + context () {} |
| 164 | + context (location_t loc, kind k, bool pdf, bool ucn) |
| 165 | + : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn) |
| 166 | + { |
| 167 | + } |
| 168 | + |
| 169 | + kind get_pop_kind () const |
| 170 | + { |
| 171 | + return m_pdf ? kind::PDF : kind::PDI; |
| 172 | + } |
| 173 | + bool ucn_p () const |
| 174 | + { |
| 175 | + return m_ucn; |
| 176 | + } |
| 177 | + |
| 178 | + location_t m_loc; |
| 179 | + kind m_kind; |
| 180 | + unsigned m_pdf : 1; |
| 181 | + unsigned m_ucn : 1; |
| 182 | + }; |
| 183 | + |
| 184 | /* A vector holding currently open bidi contexts. We use a char for |
| 185 | each context, its LSB is 1 if it represents a PDF context, 0 if it |
| 186 | represents a PDI context. The next bit is 1 if this context was open |
| 187 | by a bidi character written as a UCN, and 0 when it was UTF-8. */ |
| 188 | - semi_embedded_vec <unsigned char, 16> vec; |
| 189 | + semi_embedded_vec <context, 16> vec; |
| 190 | |
| 191 | /* Close the whole comment/identifier/string literal/character constant |
| 192 | context. */ |
| 193 | @@ -1193,19 +1216,19 @@ namespace bidi { |
| 194 | vec.truncate (len - 1); |
| 195 | } |
| 196 | |
| 197 | - /* Return the context of the Ith element. */ |
| 198 | - kind ctx_at (unsigned int i) |
| 199 | + /* Return the pop kind of the context of the Ith element. */ |
| 200 | + kind pop_kind_at (unsigned int i) |
| 201 | { |
| 202 | - return (vec[i] & 1) ? kind::PDF : kind::PDI; |
| 203 | + return vec[i].get_pop_kind (); |
| 204 | } |
| 205 | |
| 206 | - /* Return which context is currently opened. */ |
| 207 | + /* Return the pop kind of the context that is currently opened. */ |
| 208 | kind current_ctx () |
| 209 | { |
| 210 | unsigned int len = vec.count (); |
| 211 | if (len == 0) |
| 212 | return kind::NONE; |
| 213 | - return ctx_at (len - 1); |
| 214 | + return vec[len - 1].get_pop_kind (); |
| 215 | } |
| 216 | |
| 217 | /* Return true if the current context comes from a UCN origin, that is, |
| 218 | @@ -1214,11 +1237,19 @@ namespace bidi { |
| 219 | { |
| 220 | unsigned int len = vec.count (); |
| 221 | gcc_checking_assert (len > 0); |
| 222 | - return (vec[len - 1] >> 1) & 1; |
| 223 | + return vec[len - 1].m_ucn; |
| 224 | } |
| 225 | |
| 226 | - /* We've read a bidi char, update the current vector as necessary. */ |
| 227 | - void on_char (kind k, bool ucn_p) |
| 228 | + location_t current_ctx_loc () |
| 229 | + { |
| 230 | + unsigned int len = vec.count (); |
| 231 | + gcc_checking_assert (len > 0); |
| 232 | + return vec[len - 1].m_loc; |
| 233 | + } |
| 234 | + |
| 235 | + /* We've read a bidi char, update the current vector as necessary. |
| 236 | + LOC is only valid when K is not kind::NONE. */ |
| 237 | + void on_char (kind k, bool ucn_p, location_t loc) |
| 238 | { |
| 239 | switch (k) |
| 240 | { |
| 241 | @@ -1226,12 +1257,12 @@ namespace bidi { |
| 242 | case kind::RLE: |
| 243 | case kind::LRO: |
| 244 | case kind::RLO: |
| 245 | - vec.push (ucn_p ? 3u : 1u); |
| 246 | + vec.push (context (loc, k, true, ucn_p)); |
| 247 | break; |
| 248 | case kind::LRI: |
| 249 | case kind::RLI: |
| 250 | case kind::FSI: |
| 251 | - vec.push (ucn_p ? 2u : 0u); |
| 252 | + vec.push (context (loc, k, false, ucn_p)); |
| 253 | break; |
| 254 | /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO |
| 255 | whose scope has not yet been terminated. */ |
| 256 | @@ -1245,7 +1276,7 @@ namespace bidi { |
| 257 | yet been terminated. */ |
| 258 | case kind::PDI: |
| 259 | for (int i = vec.count () - 1; i >= 0; --i) |
| 260 | - if (ctx_at (i) == kind::PDI) |
| 261 | + if (pop_kind_at (i) == kind::PDI) |
| 262 | { |
| 263 | vec.truncate (i); |
| 264 | break; |
| 265 | @@ -1295,10 +1326,47 @@ namespace bidi { |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | +/* Get location_t for the range of bytes [START, START + NUM_BYTES) |
| 270 | + within the current line in FILE, with the caret at START. */ |
| 271 | + |
| 272 | +static location_t |
| 273 | +get_location_for_byte_range_in_cur_line (cpp_reader *pfile, |
| 274 | + const unsigned char *const start, |
| 275 | + size_t num_bytes) |
| 276 | +{ |
| 277 | + gcc_checking_assert (num_bytes > 0); |
| 278 | + |
| 279 | + /* CPP_BUF_COLUMN and linemap_position_for_column both refer |
| 280 | + to offsets in bytes, but CPP_BUF_COLUMN is 0-based, |
| 281 | + whereas linemap_position_for_column is 1-based. */ |
| 282 | + |
| 283 | + /* Get 0-based offsets within the line. */ |
| 284 | + size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start); |
| 285 | + size_t end_offset = start_offset + num_bytes - 1; |
| 286 | + |
| 287 | + /* Now convert to location_t, where "columns" are 1-based byte offsets. */ |
| 288 | + location_t start_loc = linemap_position_for_column (pfile->line_table, |
| 289 | + start_offset + 1); |
| 290 | + location_t end_loc = linemap_position_for_column (pfile->line_table, |
| 291 | + end_offset + 1); |
| 292 | + |
| 293 | + if (start_loc == end_loc) |
| 294 | + return start_loc; |
| 295 | + |
| 296 | + source_range src_range; |
| 297 | + src_range.m_start = start_loc; |
| 298 | + src_range.m_finish = end_loc; |
| 299 | + location_t combined_loc = COMBINE_LOCATION_DATA (pfile->line_table, |
| 300 | + start_loc, |
| 301 | + src_range, |
| 302 | + NULL); |
| 303 | + return combined_loc; |
| 304 | +} |
| 305 | + |
| 306 | /* Parse a sequence of 3 bytes starting with P and return its bidi code. */ |
| 307 | |
| 308 | static bidi::kind |
| 309 | -get_bidi_utf8 (const unsigned char *const p) |
| 310 | +get_bidi_utf8_1 (const unsigned char *const p) |
| 311 | { |
| 312 | gcc_checking_assert (p[0] == bidi::utf8_start); |
| 313 | |
| 314 | @@ -1340,10 +1408,25 @@ get_bidi_utf8 (const unsigned char *cons |
| 315 | return bidi::kind::NONE; |
| 316 | } |
| 317 | |
| 318 | +/* Parse a sequence of 3 bytes starting with P and return its bidi code. |
| 319 | + If the kind is not NONE, write the location to *OUT.*/ |
| 320 | + |
| 321 | +static bidi::kind |
| 322 | +get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out) |
| 323 | +{ |
| 324 | + bidi::kind result = get_bidi_utf8_1 (p); |
| 325 | + if (result != bidi::kind::NONE) |
| 326 | + { |
| 327 | + /* We have a sequence of 3 bytes starting at P. */ |
| 328 | + *out = get_location_for_byte_range_in_cur_line (pfile, p, 3); |
| 329 | + } |
| 330 | + return result; |
| 331 | +} |
| 332 | + |
| 333 | /* Parse a UCN where P points just past \u or \U and return its bidi code. */ |
| 334 | |
| 335 | static bidi::kind |
| 336 | -get_bidi_ucn (const unsigned char *p, bool is_U) |
| 337 | +get_bidi_ucn_1 (const unsigned char *p, bool is_U) |
| 338 | { |
| 339 | /* 6.4.3 Universal Character Names |
| 340 | \u hex-quad |
| 341 | @@ -1412,6 +1495,62 @@ get_bidi_ucn (const unsigned char *p, bo |
| 342 | return bidi::kind::NONE; |
| 343 | } |
| 344 | |
| 345 | +/* Parse a UCN where P points just past \u or \U and return its bidi code. |
| 346 | + If the kind is not NONE, write the location to *OUT.*/ |
| 347 | + |
| 348 | +static bidi::kind |
| 349 | +get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U, |
| 350 | + location_t *out) |
| 351 | +{ |
| 352 | + bidi::kind result = get_bidi_ucn_1 (p, is_U); |
| 353 | + if (result != bidi::kind::NONE) |
| 354 | + { |
| 355 | + const unsigned char *start = p - 2; |
| 356 | + size_t num_bytes = 2 + (is_U ? 8 : 4); |
| 357 | + *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes); |
| 358 | + } |
| 359 | + return result; |
| 360 | +} |
| 361 | + |
| 362 | +/* Subclass of rich_location for reporting on unpaired UTF-8 |
| 363 | + bidirectional control character(s). |
| 364 | + Escape the source lines on output, and show all unclosed |
| 365 | + bidi context, labelling everything. */ |
| 366 | + |
| 367 | +class unpaired_bidi_rich_location : public rich_location |
| 368 | +{ |
| 369 | + public: |
| 370 | + class custom_range_label : public range_label |
| 371 | + { |
| 372 | + public: |
| 373 | + label_text get_text (unsigned range_idx) const FINAL OVERRIDE |
| 374 | + { |
| 375 | + /* range 0 is the primary location; each subsequent range i + 1 |
| 376 | + is for bidi::vec[i]. */ |
| 377 | + if (range_idx > 0) |
| 378 | + { |
| 379 | + const bidi::context &ctxt (bidi::vec[range_idx - 1]); |
| 380 | + return label_text::borrow (bidi::to_str (ctxt.m_kind)); |
| 381 | + } |
| 382 | + else |
| 383 | + return label_text::borrow (_("end of bidirectional context")); |
| 384 | + } |
| 385 | + }; |
| 386 | + |
| 387 | + unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc) |
| 388 | + : rich_location (pfile->line_table, loc, &m_custom_label) |
| 389 | + { |
| 390 | + set_escape_on_output (true); |
| 391 | + for (unsigned i = 0; i < bidi::vec.count (); i++) |
| 392 | + add_range (bidi::vec[i].m_loc, |
| 393 | + SHOW_RANGE_WITHOUT_CARET, |
| 394 | + &m_custom_label); |
| 395 | + } |
| 396 | + |
| 397 | + private: |
| 398 | + custom_range_label m_custom_label; |
| 399 | +}; |
| 400 | + |
| 401 | /* We're closing a bidi context, that is, we've encountered a newline, |
| 402 | are closing a C-style comment, or are at the end of a string literal, |
| 403 | character constant, or identifier. Warn if this context was not |
| 404 | @@ -1427,11 +1566,17 @@ maybe_warn_bidi_on_close (cpp_reader *pf |
| 405 | const location_t loc |
| 406 | = linemap_position_for_column (pfile->line_table, |
| 407 | CPP_BUF_COLUMN (pfile->buffer, p)); |
| 408 | - rich_location rich_loc (pfile->line_table, loc); |
| 409 | - rich_loc.set_escape_on_output (true); |
| 410 | - cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, |
| 411 | - "unpaired UTF-8 bidirectional control character " |
| 412 | - "detected"); |
| 413 | + unpaired_bidi_rich_location rich_loc (pfile, loc); |
| 414 | + /* cpp_callbacks doesn't yet have a way to handle singular vs plural |
| 415 | + forms of a diagnostic, so fake it for now. */ |
| 416 | + if (bidi::vec.count () > 1) |
| 417 | + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, |
| 418 | + "unpaired UTF-8 bidirectional control characters " |
| 419 | + "detected"); |
| 420 | + else |
| 421 | + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, |
| 422 | + "unpaired UTF-8 bidirectional control character " |
| 423 | + "detected"); |
| 424 | } |
| 425 | /* We're done with this context. */ |
| 426 | bidi::on_close (); |
| 427 | @@ -1439,12 +1584,13 @@ maybe_warn_bidi_on_close (cpp_reader *pf |
| 428 | |
| 429 | /* We're at the beginning or in the middle of an identifier/comment/string |
| 430 | literal/character constant. Warn if we've encountered a bidi character. |
| 431 | - KIND says which bidi character it was; P points to it in the character |
| 432 | - stream. UCN_P is true iff this bidi character was written as a UCN. */ |
| 433 | + KIND says which bidi control character it was; UCN_P is true iff this bidi |
| 434 | + control character was written as a UCN. LOC is the location of the |
| 435 | + character, but is only valid if KIND != bidi::kind::NONE. */ |
| 436 | |
| 437 | static void |
| 438 | -maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind, |
| 439 | - bool ucn_p) |
| 440 | +maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind, |
| 441 | + bool ucn_p, location_t loc) |
| 442 | { |
| 443 | if (__builtin_expect (kind == bidi::kind::NONE, 1)) |
| 444 | return; |
| 445 | @@ -1453,9 +1599,6 @@ maybe_warn_bidi_on_char (cpp_reader *pfi |
| 446 | |
| 447 | if (warn_bidi != bidirectional_none) |
| 448 | { |
| 449 | - const location_t loc |
| 450 | - = linemap_position_for_column (pfile->line_table, |
| 451 | - CPP_BUF_COLUMN (pfile->buffer, p)); |
| 452 | rich_location rich_loc (pfile->line_table, loc); |
| 453 | rich_loc.set_escape_on_output (true); |
| 454 | |
| 455 | @@ -1467,9 +1610,12 @@ maybe_warn_bidi_on_char (cpp_reader *pfi |
| 456 | { |
| 457 | if (warn_bidi == bidirectional_unpaired |
| 458 | && bidi::current_ctx_ucn_p () != ucn_p) |
| 459 | - cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, |
| 460 | - "UTF-8 vs UCN mismatch when closing " |
| 461 | - "a context by \"%s\"", bidi::to_str (kind)); |
| 462 | + { |
| 463 | + rich_loc.add_range (bidi::current_ctx_loc ()); |
| 464 | + cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc, |
| 465 | + "UTF-8 vs UCN mismatch when closing " |
| 466 | + "a context by \"%s\"", bidi::to_str (kind)); |
| 467 | + } |
| 468 | } |
| 469 | else if (warn_bidi == bidirectional_any) |
| 470 | { |
| 471 | @@ -1484,7 +1630,7 @@ maybe_warn_bidi_on_char (cpp_reader *pfi |
| 472 | } |
| 473 | } |
| 474 | /* We're done with this context. */ |
| 475 | - bidi::on_char (kind, ucn_p); |
| 476 | + bidi::on_char (kind, ucn_p, loc); |
| 477 | } |
| 478 | |
| 479 | /* Skip a C-style block comment. We find the end of the comment by |
| 480 | @@ -1552,8 +1698,9 @@ _cpp_skip_block_comment (cpp_reader *pfi |
| 481 | a bidirectional control character. */ |
| 482 | else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) |
| 483 | { |
| 484 | - bidi::kind kind = get_bidi_utf8 (cur - 1); |
| 485 | - maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false); |
| 486 | + location_t loc; |
| 487 | + bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc); |
| 488 | + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); |
| 489 | } |
| 490 | } |
| 491 | |
| 492 | @@ -1586,9 +1733,9 @@ skip_line_comment (cpp_reader *pfile) |
| 493 | { |
| 494 | if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)) |
| 495 | { |
| 496 | - bidi::kind kind = get_bidi_utf8 (buffer->cur); |
| 497 | - maybe_warn_bidi_on_char (pfile, buffer->cur, kind, |
| 498 | - /*ucn_p=*/false); |
| 499 | + location_t loc; |
| 500 | + bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc); |
| 501 | + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); |
| 502 | } |
| 503 | buffer->cur++; |
| 504 | } |
| 505 | @@ -1708,9 +1855,9 @@ forms_identifier_p (cpp_reader *pfile, i |
| 506 | if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0) |
| 507 | && warn_bidi_p) |
| 508 | { |
| 509 | - bidi::kind kind = get_bidi_utf8 (buffer->cur); |
| 510 | - maybe_warn_bidi_on_char (pfile, buffer->cur, kind, |
| 511 | - /*ucn_p=*/false); |
| 512 | + location_t loc; |
| 513 | + bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc); |
| 514 | + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); |
| 515 | } |
| 516 | if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first, |
| 517 | state, &s)) |
| 518 | @@ -1722,10 +1869,12 @@ forms_identifier_p (cpp_reader *pfile, i |
| 519 | buffer->cur += 2; |
| 520 | if (warn_bidi_p) |
| 521 | { |
| 522 | - bidi::kind kind = get_bidi_ucn (buffer->cur, |
| 523 | - buffer->cur[-1] == 'U'); |
| 524 | - maybe_warn_bidi_on_char (pfile, buffer->cur, kind, |
| 525 | - /*ucn_p=*/true); |
| 526 | + location_t loc; |
| 527 | + bidi::kind kind = get_bidi_ucn (pfile, |
| 528 | + buffer->cur, |
| 529 | + buffer->cur[-1] == 'U', |
| 530 | + &loc); |
| 531 | + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc); |
| 532 | } |
| 533 | if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first, |
| 534 | state, &s, NULL, NULL)) |
| 535 | @@ -2336,8 +2485,11 @@ lex_raw_string (cpp_reader *pfile, cpp_t |
| 536 | } |
| 537 | else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0) |
| 538 | && warn_bidi_p) |
| 539 | - maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1), |
| 540 | - /*ucn_p=*/false); |
| 541 | + { |
| 542 | + location_t loc; |
| 543 | + bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc); |
| 544 | + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); |
| 545 | + } |
| 546 | } |
| 547 | |
| 548 | if (warn_bidi_p) |
| 549 | @@ -2447,8 +2599,10 @@ lex_string (cpp_reader *pfile, cpp_token |
| 550 | { |
| 551 | if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p) |
| 552 | { |
| 553 | - bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U'); |
| 554 | - maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true); |
| 555 | + location_t loc; |
| 556 | + bidi::kind kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U', |
| 557 | + &loc); |
| 558 | + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc); |
| 559 | } |
| 560 | cur++; |
| 561 | } |
| 562 | @@ -2476,8 +2630,9 @@ lex_string (cpp_reader *pfile, cpp_token |
| 563 | saw_NUL = true; |
| 564 | else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p) |
| 565 | { |
| 566 | - bidi::kind kind = get_bidi_utf8 (cur - 1); |
| 567 | - maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false); |
| 568 | + location_t loc; |
| 569 | + bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc); |
| 570 | + maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc); |
| 571 | } |
| 572 | } |
| 573 | |