blob: 2995a6fc61338004830e569113b27b8a7a0331e8 [file] [log] [blame]
From 1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3 Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Tue, 2 Nov 2021 09:54:32 -0400
Subject: [PATCH] libcpp: escape non-ASCII source bytes in -Wbidi-chars=
[PR103026]
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit
This flags rich_locations associated with -Wbidi-chars= so that
non-ASCII bytes will be escaped when printing the source lines
(using the diagnostics support I added in
r12-4825-gbd5e882cf6e0def3dd1bc106075d59a303fe0d1e).
In particular, this ensures that the printed source lines will
be pure ASCII, and thus the visual ordering of the characters
will be the same as the logical ordering.
Before:
Wbidi-chars-1.c: In function âmainâ:
Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
6 | /*â® } â¦if (isAdmin)⩠⦠begin admins only */
| ^
Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
9 | /* end admins only â® { â¦*/
| ^
Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
6 | int LRE_âª_PDF_\u202c;
| ^
Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
8 | int LRE_\u202a_PDF_â¬_;
| ^
Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
10 | const char *s1 = "LRE_âª_PDF_\u202c";
| ^
Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
12 | const char *s2 = "LRE_\u202a_PDF_â¬";
| ^
After:
Wbidi-chars-1.c: In function âmainâ:
Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
6 | /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
| ^
Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
9 | /* end admins only <U+202E> { <U+2066>*/
| ^
Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
6 | int LRE_<U+202A>_PDF_\u202c;
| ^
Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
8 | int LRE_\u202a_PDF_<U+202C>_;
| ^
Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
10 | const char *s1 = "LRE_<U+202A>_PDF_\u202c";
| ^
Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
12 | const char *s2 = "LRE_\u202a_PDF_<U+202C>";
| ^
libcpp/ChangeLog:
PR preprocessor/103026
* lex.c (maybe_warn_bidi_on_close): Use a rich_location
and call set_escape_on_output (true) on it.
(maybe_warn_bidi_on_char): Likewise.
Signed-off-by: David Malcolm <dmalcolm@redhat.com>
CVE: CVE-2021-42574
Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3]
Signed-off-by: Pgowda <pgowda.cve@gmail.com>
---
libcpp/lex.c | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 8188e33b07d..2421d6c0f40 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1427,9 +1427,11 @@ maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
const location_t loc
= linemap_position_for_column (pfile->line_table,
CPP_BUF_COLUMN (pfile->buffer, p));
- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
- "unpaired UTF-8 bidirectional control character "
- "detected");
+ rich_location rich_loc (pfile->line_table, loc);
+ rich_loc.set_escape_on_output (true);
+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+ "unpaired UTF-8 bidirectional control character "
+ "detected");
}
/* We're done with this context. */
bidi::on_close ();
@@ -1454,6 +1456,9 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
const location_t loc
= linemap_position_for_column (pfile->line_table,
CPP_BUF_COLUMN (pfile->buffer, p));
+ rich_location rich_loc (pfile->line_table, loc);
+ rich_loc.set_escape_on_output (true);
+
/* It seems excessive to warn about a PDI/PDF that is closing
an opened context because we've already warned about the
opening character. Except warn when we have a UCN x UTF-8
@@ -1462,20 +1467,20 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
{
if (warn_bidi == bidirectional_unpaired
&& bidi::current_ctx_ucn_p () != ucn_p)
- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
- "UTF-8 vs UCN mismatch when closing "
- "a context by \"%s\"", bidi::to_str (kind));
+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+ "UTF-8 vs UCN mismatch when closing "
+ "a context by \"%s\"", bidi::to_str (kind));
}
else if (warn_bidi == bidirectional_any)
{
if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
- "\"%s\" is closing an unopened context",
- bidi::to_str (kind));
+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+ "\"%s\" is closing an unopened context",
+ bidi::to_str (kind));
else
- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
- "found problematic Unicode character \"%s\"",
- bidi::to_str (kind));
+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+ "found problematic Unicode character \"%s\"",
+ bidi::to_str (kind));
}
}
/* We're done with this context. */
--
2.27.0