blob: 2995a6fc61338004830e569113b27b8a7a0331e8 [file] [log] [blame]
Brad Bishopbec4ebc2022-08-03 09:55:16 -04001From 1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3 Mon Sep 17 00:00:00 2001
2From: David Malcolm <dmalcolm@redhat.com>
3Date: Tue, 2 Nov 2021 09:54:32 -0400
4Subject: [PATCH] libcpp: escape non-ASCII source bytes in -Wbidi-chars=
5 [PR103026]
6MIME-Version: 1.0
7Content-Type: text/plain; charset=utf8
8Content-Transfer-Encoding: 8bit
9
10This flags rich_locations associated with -Wbidi-chars= so that
11non-ASCII bytes will be escaped when printing the source lines
12(using the diagnostics support I added in
13r12-4825-gbd5e882cf6e0def3dd1bc106075d59a303fe0d1e).
14
15In particular, this ensures that the printed source lines will
16be pure ASCII, and thus the visual ordering of the characters
17will be the same as the logical ordering.
18
19Before:
20
21 Wbidi-chars-1.c: In function âmainâ:
22 Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
23 6 | /*â® } â¦if (isAdmin)⩠⦠begin admins only */
24 | ^
25 Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
26 9 | /* end admins only â® { â¦*/
27 | ^
28
29 Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
30 6 | int LRE_âª_PDF_\u202c;
31 | ^
32 Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
33 8 | int LRE_\u202a_PDF_â¬_;
34 | ^
35 Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
36 10 | const char *s1 = "LRE_âª_PDF_\u202c";
37 | ^
38 Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
39 12 | const char *s2 = "LRE_\u202a_PDF_â¬";
40 | ^
41
42After:
43
44 Wbidi-chars-1.c: In function âmainâ:
45 Wbidi-chars-1.c:6:43: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
46 6 | /*<U+202E> } <U+2066>if (isAdmin)<U+2069> <U+2066> begin admins only */
47 | ^
48 Wbidi-chars-1.c:9:28: warning: unpaired UTF-8 bidirectional control character detected [-Wbidi-chars=]
49 9 | /* end admins only <U+202E> { <U+2066>*/
50 | ^
51
52 Wbidi-chars-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
53 6 | int LRE_<U+202A>_PDF_\u202c;
54 | ^
55 Wbidi-chars-11.c:8:19: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
56 8 | int LRE_\u202a_PDF_<U+202C>_;
57 | ^
58 Wbidi-chars-11.c:10:28: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
59 10 | const char *s1 = "LRE_<U+202A>_PDF_\u202c";
60 | ^
61 Wbidi-chars-11.c:12:33: warning: UTF-8 vs UCN mismatch when closing a context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidi-chars=]
62 12 | const char *s2 = "LRE_\u202a_PDF_<U+202C>";
63 | ^
64
65libcpp/ChangeLog:
66 PR preprocessor/103026
67 * lex.c (maybe_warn_bidi_on_close): Use a rich_location
68 and call set_escape_on_output (true) on it.
69 (maybe_warn_bidi_on_char): Likewise.
70
71Signed-off-by: David Malcolm <dmalcolm@redhat.com>
72
73CVE: CVE-2021-42574
74Upstream-Status: Backport [https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=1a7f2c0774129750fdf73e9f1b78f0ce983c9ab3]
75Signed-off-by: Pgowda <pgowda.cve@gmail.com>
76
77---
78 libcpp/lex.c | 29 +++++++++++++++++------------
79 1 file changed, 17 insertions(+), 12 deletions(-)
80
81diff --git a/libcpp/lex.c b/libcpp/lex.c
82index 8188e33b07d..2421d6c0f40 100644
83--- a/libcpp/lex.c
84+++ b/libcpp/lex.c
85@@ -1427,9 +1427,11 @@ maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
86 const location_t loc
87 = linemap_position_for_column (pfile->line_table,
88 CPP_BUF_COLUMN (pfile->buffer, p));
89- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
90- "unpaired UTF-8 bidirectional control character "
91- "detected");
92+ rich_location rich_loc (pfile->line_table, loc);
93+ rich_loc.set_escape_on_output (true);
94+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
95+ "unpaired UTF-8 bidirectional control character "
96+ "detected");
97 }
98 /* We're done with this context. */
99 bidi::on_close ();
100@@ -1454,6 +1456,9 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
101 const location_t loc
102 = linemap_position_for_column (pfile->line_table,
103 CPP_BUF_COLUMN (pfile->buffer, p));
104+ rich_location rich_loc (pfile->line_table, loc);
105+ rich_loc.set_escape_on_output (true);
106+
107 /* It seems excessive to warn about a PDI/PDF that is closing
108 an opened context because we've already warned about the
109 opening character. Except warn when we have a UCN x UTF-8
110@@ -1462,20 +1467,20 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar *p, bidi::kind kind,
111 {
112 if (warn_bidi == bidirectional_unpaired
113 && bidi::current_ctx_ucn_p () != ucn_p)
114- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
115- "UTF-8 vs UCN mismatch when closing "
116- "a context by \"%s\"", bidi::to_str (kind));
117+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
118+ "UTF-8 vs UCN mismatch when closing "
119+ "a context by \"%s\"", bidi::to_str (kind));
120 }
121 else if (warn_bidi == bidirectional_any)
122 {
123 if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
124- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
125- "\"%s\" is closing an unopened context",
126- bidi::to_str (kind));
127+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
128+ "\"%s\" is closing an unopened context",
129+ bidi::to_str (kind));
130 else
131- cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
132- "found problematic Unicode character \"%s\"",
133- bidi::to_str (kind));
134+ cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
135+ "found problematic Unicode character \"%s\"",
136+ bidi::to_str (kind));
137 }
138 }
139 /* We're done with this context. */
140--
1412.27.0
142