blob: 4999c71b641a69c7fd404e0a3e30704d153389c9 [file] [log] [blame]
Brad Bishopbec4ebc2022-08-03 09:55:16 -04001From bef32d4a28595e933f24fef378cf052a30b674a7 Mon Sep 17 00:00:00 2001
2From: David Malcolm <dmalcolm@redhat.com>
3Date: Tue, 2 Nov 2021 15:45:22 -0400
4Subject: [PATCH] libcpp: capture and underline ranges in -Wbidi-chars=
5 [PR103026]
6MIME-Version: 1.0
7Content-Type: text/plain; charset=utf8
8Content-Transfer-Encoding: 8bit
9
10This patch converts the bidi::vec to use a struct so that we can
11capture location_t values for the bidirectional control characters.
12
13Before:
14
15 Wbidi-chars-1.c: In function âmainâ:
16 Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
17 6 | /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
18 | ^
19 Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
20 9 | /* end admins only <U+202E> { <U+2066>*/
21 | ^
22
23After:
24
25 Wbidi-chars-1.c: In function âmainâ:
26 Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control characters detected [-Wbidi-chars=]
27 6 | /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
28 | ~~~~~~~~ ~~~~~~~~ ^
29 | | | |
30 | | | end of bidirectional context
31 | U+202E (RIGHT-TO-LEFT OVERRIDE) U+2066 (LEFT-TO-RIGHT ISOLATE)
32 Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control characters detected [-Wbidi-chars=]
33 9 | /* end admins only <U+202E> { <U+2066>*/
34 | ~~~~~~~~ ~~~~~~~~ ^
35 | | | |
36 | | | end of bidirectional context
37 | | U+2066 (LEFT-TO-RIGHT ISOLATE)
38 | U+202E (RIGHT-TO-LEFT OVERRIDE)
39
40Signed-off-by: David Malcolm <dmalcolm@redhat.com>
41
42gcc/testsuite/ChangeLog:
43 PR preprocessor/103026
44 * c-c++-common/Wbidi-chars-ranges.c: New test.
45
46libcpp/ChangeLog:
47 PR preprocessor/103026
48 * lex.c (struct bidi::context): New.
49 (bidi::vec): Convert to a vec of context rather than unsigned
50 char.
51 (bidi::ctx_at): Rename to...
52 (bidi::pop_kind_at): ...this and reimplement for above change.
53 (bidi::current_ctx): Update for change to vec.
54 (bidi::current_ctx_ucn_p): Likewise.
55 (bidi::current_ctx_loc): New.
56 (bidi::on_char): Update for usage of context struct. Add "loc"
57 param and pass it when pushing contexts.
58 (get_location_for_byte_range_in_cur_line): New.
59 (get_bidi_utf8): Rename to...
60 (get_bidi_utf8_1): ...this, reintroducing...
61 (get_bidi_utf8): ...as a wrapper, setting *OUT when the result is
62 not NONE.
63 (get_bidi_ucn): Rename to...
64 (get_bidi_ucn_1): ...this, reintroducing...
65 (get_bidi_ucn): ...as a wrapper, setting *OUT when the result is
66 not NONE.
67 (class unpaired_bidi_rich_location): New.
68 (maybe_warn_bidi_on_close): Use unpaired_bidi_rich_location when
69 reporting on unpaired bidi chars. Split into singular vs plural
70 spellings.
71 (maybe_warn_bidi_on_char): Pass in a location_t rather than a
72 const uchar * and use it when emitting warnings, and when calling
73 bidi::on_char.
74 (_cpp_skip_block_comment): Capture location when kind is not NONE
75 and pass it to maybe_warn_bidi_on_char.
76 (skip_line_comment): Likewise.
77 (forms_identifier_p): Likewise.
78 (lex_raw_string): Likewise.
79 (lex_string): Likewise.
80
81Signed-off-by: David Malcolm <dmalcolm@redhat.com>
82
83CVE: CVE-2021-42574
84Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=bef32d4a28595e933f24fef378cf052a30b674a7]
85Signed-off-by: Pgowda <pgowda.cve@gmail.com>
86
87---
88 .../c-c++-common/Wbidi-chars-ranges.c | 54 ++++
89 libcpp/lex.c | 251 ++++++++++++++----
90 2 files changed, 257 insertions(+), 48 deletions(-)
91 create mode 100644 gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c
92
93diff --git a/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c
94new file mode 100644
95index 00000000000..298750a2a64
96--- /dev/null
97+++ b/gcc/testsuite/c-c++-common/Wbidi-chars-ranges.c
98@@ -0,0 +1,54 @@
99+/* PR preprocessor/103026 */
100+/* { dg-do compile } */
101+/* { dg-options "-Wbidi-chars=unpaired -fdiagnostics-show-caret" } */
102+/* Verify that we escape and underline pertinent bidirectional
103+ control characters when quoting the source. */
104+
105+int test_unpaired_bidi () {
106+ int isAdmin = 0;
107+ /*â® } â¦if (isAdmin)⩠⦠begin admins only */
108+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
109+#if 0
110+ { dg-begin-multiline-output "" }
111+ /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
112+ ~~~~~~~~ ~~~~~~~~ ^
113+ | | |
114+ | | end of bidirectional context
115+ U+202E (RIGHT-TO-LEFT OVERRIDE) U+2066 (LEFT-TO-RIGHT ISOLATE)
116+ { dg-end-multiline-output "" }
117+#endif
118+
119+ __builtin_printf("You are an admin.\n");
120+ /* end admins only â® { â¦*/
121+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
122+#if 0
123+ { dg-begin-multiline-output "" }
124+ /* end admins only <U+202E> { <U+2066>*/
125+ ~~~~~~~~ ~~~~~~~~ ^
126+ | | |
127+ | | end of bidirectional context
128+ | U+2066 (LEFT-TO-RIGHT ISOLATE)
129+ U+202E (RIGHT-TO-LEFT OVERRIDE)
130+ { dg-end-multiline-output "" }
131+#endif
132+
133+ return 0;
134+}
135+
136+int LRE_âª_PDF_\u202c;
137+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
138+#if 0
139+ { dg-begin-multiline-output "" }
140+ int LRE_<U+202A>_PDF_\u202c;
141+ ~~~~~~~~ ^~~~~~
142+ { dg-end-multiline-output "" }
143+#endif
144+
145+const char *s1 = "LRE_âª_PDF_\u202c";
146+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
147+#if 0
148+ { dg-begin-multiline-output "" }
149+ const char *s1 = "LRE_<U+202A>_PDF_\u202c";
150+ ~~~~~~~~ ^~~~~~
151+ { dg-end-multiline-output "" }
152+#endif
153diff --git a/libcpp/lex.c b/libcpp/lex.c
154index 2421d6c0f40..94c36f0d014 100644
155--- a/libcpp/lex.c
156+++ b/libcpp/lex.c
157@@ -1172,11 +1172,34 @@ namespace bidi {
158 /* All the UTF-8 encodings of bidi characters start with E2. */
159 constexpr uchar utf8_start = 0xe2;
160
161+ struct context
162+ {
163+ context () {}
164+ context (location_t loc, kind k, bool pdf, bool ucn)
165+ : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
166+ {
167+ }
168+
169+ kind get_pop_kind () const
170+ {
171+ return m_pdf ? kind::PDF : kind::PDI;
172+ }
173+ bool ucn_p () const
174+ {
175+ return m_ucn;
176+ }
177+
178+ location_t m_loc;
179+ kind m_kind;
180+ unsigned m_pdf : 1;
181+ unsigned m_ucn : 1;
182+ };
183+
184 /* A vector holding currently open bidi contexts. We use a char for
185 each context, its LSB is 1 if it represents a PDF context, 0 if it
186 represents a PDI context. The next bit is 1 if this context was open
187 by a bidi character written as a UCN, and 0 when it was UTF-8. */
188- semi_embedded_vec <unsigned char, 16> vec;
189+ semi_embedded_vec <context, 16> vec;
190
191 /* Close the whole comment/identifier/string literal/character constant
192 context. */
193@@ -1193,19 +1216,19 @@ namespace bidi {
194 vec.truncate (len - 1);
195 }
196
197- /* Return the context of the Ith element. */
198- kind ctx_at (unsigned int i)
199+ /* Return the pop kind of the context of the Ith element. */
200+ kind pop_kind_at (unsigned int i)
201 {
202- return (vec[i] & 1) ? kind::PDF : kind::PDI;
203+ return vec[i].get_pop_kind ();
204 }
205
206- /* Return which context is currently opened. */
207+ /* Return the pop kind of the context that is currently opened. */
208 kind current_ctx ()
209 {
210 unsigned int len = vec.count ();
211 if (len == 0)
212 return kind::NONE;
213- return ctx_at (len - 1);
214+ return vec[len - 1].get_pop_kind ();
215 }
216
217 /* Return true if the current context comes from a UCN origin, that is,
218@@ -1214,11 +1237,19 @@ namespace bidi {
219 {
220 unsigned int len = vec.count ();
221 gcc_checking_assert (len > 0);
222- return (vec[len - 1] >> 1) & 1;
223+ return vec[len - 1].m_ucn;
224 }
225
226- /* We've read a bidi char, update the current vector as necessary. */
227- void on_char (kind k, bool ucn_p)
228+ location_t current_ctx_loc ()
229+ {
230+ unsigned int len = vec.count ();
231+ gcc_checking_assert (len > 0);
232+ return vec[len - 1].m_loc;
233+ }
234+
235+ /* We've read a bidi char, update the current vector as necessary.
236+ LOC is only valid when K is not kind::NONE. */
237+ void on_char (kind k, bool ucn_p, location_t loc)
238 {
239 switch (k)
240 {
241@@ -1226,12 +1257,12 @@ namespace bidi {
242 case kind::RLE:
243 case kind::LRO:
244 case kind::RLO:
245- vec.push (ucn_p ? 3u : 1u);
246+ vec.push (context (loc, k, true, ucn_p));
247 break;
248 case kind::LRI:
249 case kind::RLI:
250 case kind::FSI:
251- vec.push (ucn_p ? 2u : 0u);
252+ vec.push (context (loc, k, false, ucn_p));
253 break;
254 /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
255 whose scope has not yet been terminated. */
256@@ -1245,7 +1276,7 @@ namespace bidi {
257 yet been terminated. */
258 case kind::PDI:
259 for (int i = vec.count () - 1; i >= 0; --i)
260- if (ctx_at (i) == kind::PDI)
261+ if (pop_kind_at (i) == kind::PDI)
262 {
263 vec.truncate (i);
264 break;
265@@ -1295,10 +1326,47 @@ namespace bidi {
266 }
267 }
268
269+/* Get location_t for the range of bytes [START, START + NUM_BYTES)
270+ within the current line in FILE, with the caret at START. */
271+
272+static location_t
273+get_location_for_byte_range_in_cur_line (cpp_reader *pfile,
274+ const unsigned char *const start,
275+ size_t num_bytes)
276+{
277+ gcc_checking_assert (num_bytes > 0);
278+
279+ /* CPP_BUF_COLUMN and linemap_position_for_column both refer
280+ to offsets in bytes, but CPP_BUF_COLUMN is 0-based,
281+ whereas linemap_position_for_column is 1-based. */
282+
283+ /* Get 0-based offsets within the line. */
284+ size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start);
285+ size_t end_offset = start_offset + num_bytes - 1;
286+
287+ /* Now convert to location_t, where "columns" are 1-based byte offsets. */
288+ location_t start_loc = linemap_position_for_column (pfile->line_table,
289+ start_offset + 1);
290+ location_t end_loc = linemap_position_for_column (pfile->line_table,
291+ end_offset + 1);
292+
293+ if (start_loc == end_loc)
294+ return start_loc;
295+
296+ source_range src_range;
297+ src_range.m_start = start_loc;
298+ src_range.m_finish = end_loc;
299+ location_t combined_loc = COMBINE_LOCATION_DATA (pfile->line_table,
300+ start_loc,
301+ src_range,
302+ NULL);
303+ return combined_loc;
304+}
305+
306 /* Parse a sequence of 3 bytes starting with P and return its bidi code. */
307
308 static bidi::kind
309-get_bidi_utf8 (const unsigned char *const p)
310+get_bidi_utf8_1 (const unsigned char *const p)
311 {
312 gcc_checking_assert (p[0] == bidi::utf8_start);
313
314@@ -1340,10 +1408,25 @@ get_bidi_utf8 (const unsigned char *cons
315 return bidi::kind::NONE;
316 }
317
318+/* Parse a sequence of 3 bytes starting with P and return its bidi code.
319+ If the kind is not NONE, write the location to *OUT.*/
320+
321+static bidi::kind
322+get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
323+{
324+ bidi::kind result = get_bidi_utf8_1 (p);
325+ if (result != bidi::kind::NONE)
326+ {
327+ /* We have a sequence of 3 bytes starting at P. */
328+ *out = get_location_for_byte_range_in_cur_line (pfile, p, 3);
329+ }
330+ return result;
331+}
332+
333 /* Parse a UCN where P points just past \u or \U and return its bidi code. */
334
335 static bidi::kind
336-get_bidi_ucn (const unsigned char *p, bool is_U)
337+get_bidi_ucn_1 (const unsigned char *p, bool is_U)
338 {
339 /* 6.4.3 Universal Character Names
340 \u hex-quad
341@@ -1412,6 +1495,62 @@ get_bidi_ucn (const unsigned char *p, bo
342 return bidi::kind::NONE;
343 }
344
345+/* Parse a UCN where P points just past \u or \U and return its bidi code.
346+ If the kind is not NONE, write the location to *OUT.*/
347+
348+static bidi::kind
349+get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
350+ location_t *out)
351+{
352+ bidi::kind result = get_bidi_ucn_1 (p, is_U);
353+ if (result != bidi::kind::NONE)
354+ {
355+ const unsigned char *start = p - 2;
356+ size_t num_bytes = 2 + (is_U ? 8 : 4);
357+ *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
358+ }
359+ return result;
360+}
361+
362+/* Subclass of rich_location for reporting on unpaired UTF-8
363+ bidirectional control character(s).
364+ Escape the source lines on output, and show all unclosed
365+ bidi context, labelling everything. */
366+
367+class unpaired_bidi_rich_location : public rich_location
368+{
369+ public:
370+ class custom_range_label : public range_label
371+ {
372+ public:
373+ label_text get_text (unsigned range_idx) const FINAL OVERRIDE
374+ {
375+ /* range 0 is the primary location; each subsequent range i + 1
376+ is for bidi::vec[i]. */
377+ if (range_idx > 0)
378+ {
379+ const bidi::context &ctxt (bidi::vec[range_idx - 1]);
380+ return label_text::borrow (bidi::to_str (ctxt.m_kind));
381+ }
382+ else
383+ return label_text::borrow (_("end of bidirectional context"));
384+ }
385+ };
386+
387+ unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc)
388+ : rich_location (pfile->line_table, loc, &m_custom_label)
389+ {
390+ set_escape_on_output (true);
391+ for (unsigned i = 0; i < bidi::vec.count (); i++)
392+ add_range (bidi::vec[i].m_loc,
393+ SHOW_RANGE_WITHOUT_CARET,
394+ &m_custom_label);
395+ }
396+
397+ private:
398+ custom_range_label m_custom_label;
399+};
400+
401 /* We're closing a bidi context, that is, we've encountered a newline,
402 are closing a C-style comment, or are at the end of a string literal,
403 character constant, or identifier. Warn if this context was not
404@@ -1427,11 +1566,17 @@ maybe_warn_bidi_on_close (cpp_reader *pf
405 const location_t loc
406 = linemap_position_for_column (pfile->line_table,
407 CPP_BUF_COLUMN (pfile->buffer, p));
408- rich_location rich_loc (pfile->line_table, loc);
409- rich_loc.set_escape_on_output (true);
410- cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
411- "unpaired UTF-8 bidirectional control character "
412- "detected");
413+ unpaired_bidi_rich_location rich_loc (pfile, loc);
414+ /* cpp_callbacks doesn't yet have a way to handle singular vs plural
415+ forms of a diagnostic, so fake it for now. */
416+ if (bidi::vec.count () > 1)
417+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
418+ "unpaired UTF-8 bidirectional control characters "
419+ "detected");
420+ else
421+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
422+ "unpaired UTF-8 bidirectional control character "
423+ "detected");
424 }
425 /* We're done with this context. */
426 bidi::on_close ();
427@@ -1439,12 +1584,13 @@ maybe_warn_bidi_on_close (cpp_reader *pf
428
429 /* We're at the beginning or in the middle of an identifier/comment/string
430 literal/character constant. Warn if we've encountered a bidi character.
431- KIND says which bidi character it was; P points to it in the character
432- stream. UCN_P is true iff this bidi character was written as a UCN. */
433+ KIND says which bidi control character it was; UCN_P is true iff this bidi
434+ control character was written as a UCN. LOC is the location of the
435+ character, but is only valid if KIND != bidi::kind::NONE. */
436
437 static void
438-maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
439- bool ucn_p)
440+maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind,
441+ bool ucn_p, location_t loc)
442 {
443 if (__builtin_expect (kind == bidi::kind::NONE, 1))
444 return;
445@@ -1453,9 +1599,6 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
446
447 if (warn_bidi != bidirectional_none)
448 {
449- const location_t loc
450- = linemap_position_for_column (pfile->line_table,
451- CPP_BUF_COLUMN (pfile->buffer, p));
452 rich_location rich_loc (pfile->line_table, loc);
453 rich_loc.set_escape_on_output (true);
454
455@@ -1467,9 +1610,12 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
456 {
457 if (warn_bidi == bidirectional_unpaired
458 && bidi::current_ctx_ucn_p () != ucn_p)
459- cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
460- "UTF-8 vs UCN mismatch when closing "
461- "a context by \"%s\"", bidi::to_str (kind));
462+ {
463+ rich_loc.add_range (bidi::current_ctx_loc ());
464+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
465+ "UTF-8 vs UCN mismatch when closing "
466+ "a context by \"%s\"", bidi::to_str (kind));
467+ }
468 }
469 else if (warn_bidi == bidirectional_any)
470 {
471@@ -1484,7 +1630,7 @@ maybe_warn_bidi_on_char (cpp_reader *pfi
472 }
473 }
474 /* We're done with this context. */
475- bidi::on_char (kind, ucn_p);
476+ bidi::on_char (kind, ucn_p, loc);
477 }
478
479 /* Skip a C-style block comment. We find the end of the comment by
480@@ -1552,8 +1698,9 @@ _cpp_skip_block_comment (cpp_reader *pfi
481 a bidirectional control character. */
482 else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
483 {
484- bidi::kind kind = get_bidi_utf8 (cur - 1);
485- maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/false);
486+ location_t loc;
487+ bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
488+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
489 }
490 }
491
492@@ -1586,9 +1733,9 @@ skip_line_comment (cpp_reader *pfile)
493 {
494 if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
495 {
496- bidi::kind kind = get_bidi_utf8 (buffer->cur);
497- maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
498- /*ucn_p=*/false);
499+ location_t loc;
500+ bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
501+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
502 }
503 buffer->cur++;
504 }
505@@ -1708,9 +1855,9 @@ forms_identifier_p (cpp_reader *pfile, i
506 if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
507 && warn_bidi_p)
508 {
509- bidi::kind kind = get_bidi_utf8 (buffer->cur);
510- maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
511- /*ucn_p=*/false);
512+ location_t loc;
513+ bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
514+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
515 }
516 if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
517 state, &s))
518@@ -1722,10 +1869,12 @@ forms_identifier_p (cpp_reader *pfile, i
519 buffer->cur += 2;
520 if (warn_bidi_p)
521 {
522- bidi::kind kind = get_bidi_ucn (buffer->cur,
523- buffer->cur[-1] == 'U');
524- maybe_warn_bidi_on_char (pfile, buffer->cur, kind,
525- /*ucn_p=*/true);
526+ location_t loc;
527+ bidi::kind kind = get_bidi_ucn (pfile,
528+ buffer->cur,
529+ buffer->cur[-1] == 'U',
530+ &loc);
531+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
532 }
533 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
534 state, &s, NULL, NULL))
535@@ -2336,8 +2485,11 @@ lex_raw_string (cpp_reader *pfile, cpp_t
536 }
537 else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
538 && warn_bidi_p)
539- maybe_warn_bidi_on_char (pfile, pos - 1, get_bidi_utf8 (pos - 1),
540- /*ucn_p=*/false);
541+ {
542+ location_t loc;
543+ bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc);
544+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
545+ }
546 }
547
548 if (warn_bidi_p)
549@@ -2447,8 +2599,10 @@ lex_string (cpp_reader *pfile, cpp_token
550 {
551 if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
552 {
553- bidi::kind kind = get_bidi_ucn (cur + 1, cur[0] == 'U');
554- maybe_warn_bidi_on_char (pfile, cur, kind, /*ucn_p=*/true);
555+ location_t loc;
556+ bidi::kind kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U',
557+ &loc);
558+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
559 }
560 cur++;
561 }
562@@ -2476,8 +2630,9 @@ lex_string (cpp_reader *pfile, cpp_token
563 saw_NUL = true;
564 else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
565 {
566- bidi::kind kind = get_bidi_utf8 (cur - 1);
567- maybe_warn_bidi_on_char (pfile, cur - 1, kind, /*ucn_p=*/false);
568+ location_t loc;
569+ bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
570+ maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
571 }
572 }
573