Patrick Williams | 841583d | 2023-05-03 21:37:45 -0500 | [diff] [blame^] | 1 | From 064ec20bf7a181ba5fa961aaa12973812aa6ca5d Mon Sep 17 00:00:00 2001 |
| 2 | From: "Miss Islington (bot)" |
| 3 | <31488909+miss-islington@users.noreply.github.com> |
| 4 | Date: Mon, 7 Nov 2022 18:57:10 -0800 |
| 5 | Subject: [PATCH] [3.11] gh-98433: Fix quadratic time idna decoding. (GH-99092) |
| 6 | (GH-99222) |
| 7 | |
| 8 | There was an unnecessary quadratic loop in idna decoding. This restores |
| 9 | the behavior to linear. |
| 10 | |
| 11 | (cherry picked from commit d315722564927c7202dd6e111dc79eaf14240b0d) |
| 12 | |
| 13 | (cherry picked from commit a6f6c3a3d6f2b580f2d87885c9b8a9350ad7bf15) |
| 14 | |
| 15 | Co-authored-by: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> |
| 16 | Co-authored-by: Gregory P. Smith <greg@krypto.org> |
| 17 | |
| 18 | CVE: CVE-2022-45061 |
| 19 | Upstream-Status: Backport [https://github.com/python/cpython/pull/99231/commits/064ec20bf7a181ba5fa961aaa12973812aa6ca5d] |
| 20 | Signed-off-by: Omkar Patil <Omkar.Patil@kpit.com> |
| 21 | |
| 22 | --- |
| 23 | Lib/encodings/idna.py | 32 +++++++++---------- |
| 24 | Lib/test/test_codecs.py | 6 ++++ |
| 25 | ...2-11-04-09-29-36.gh-issue-98433.l76c5G.rst | 6 ++++ |
| 26 | 3 files changed, 27 insertions(+), 17 deletions(-) |
| 27 | create mode 100644 Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst |
| 28 | |
| 29 | diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py |
| 30 | index ea4058512fe3..bf98f513366b 100644 |
| 31 | --- a/Lib/encodings/idna.py |
| 32 | +++ b/Lib/encodings/idna.py |
| 33 | @@ -39,23 +39,21 @@ def nameprep(label): |
| 34 | |
| 35 | # Check bidi |
| 36 | RandAL = [stringprep.in_table_d1(x) for x in label] |
| 37 | - for c in RandAL: |
| 38 | - if c: |
| 39 | - # There is a RandAL char in the string. Must perform further |
| 40 | - # tests: |
| 41 | - # 1) The characters in section 5.8 MUST be prohibited. |
| 42 | - # This is table C.8, which was already checked |
| 43 | - # 2) If a string contains any RandALCat character, the string |
| 44 | - # MUST NOT contain any LCat character. |
| 45 | - if any(stringprep.in_table_d2(x) for x in label): |
| 46 | - raise UnicodeError("Violation of BIDI requirement 2") |
| 47 | - |
| 48 | - # 3) If a string contains any RandALCat character, a |
| 49 | - # RandALCat character MUST be the first character of the |
| 50 | - # string, and a RandALCat character MUST be the last |
| 51 | - # character of the string. |
| 52 | - if not RandAL[0] or not RandAL[-1]: |
| 53 | - raise UnicodeError("Violation of BIDI requirement 3") |
| 54 | + if any(RandAL): |
| 55 | + # There is a RandAL char in the string. Must perform further |
| 56 | + # tests: |
| 57 | + # 1) The characters in section 5.8 MUST be prohibited. |
| 58 | + # This is table C.8, which was already checked |
| 59 | + # 2) If a string contains any RandALCat character, the string |
| 60 | + # MUST NOT contain any LCat character. |
| 61 | + if any(stringprep.in_table_d2(x) for x in label): |
| 62 | + raise UnicodeError("Violation of BIDI requirement 2") |
| 63 | + # 3) If a string contains any RandALCat character, a |
| 64 | + # RandALCat character MUST be the first character of the |
| 65 | + # string, and a RandALCat character MUST be the last |
| 66 | + # character of the string. |
| 67 | + if not RandAL[0] or not RandAL[-1]: |
| 68 | + raise UnicodeError("Violation of BIDI requirement 3") |
| 69 | |
| 70 | return label |
| 71 | |
| 72 | diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py |
| 73 | index d1faf0126c1e..37ade7d80d02 100644 |
| 74 | --- a/Lib/test/test_codecs.py |
| 75 | +++ b/Lib/test/test_codecs.py |
| 76 | @@ -1532,6 +1532,12 @@ def test_builtin_encode(self): |
| 77 | self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org") |
| 78 | self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.") |
| 79 | |
| 80 | + def test_builtin_decode_length_limit(self): |
| 81 | + with self.assertRaisesRegex(UnicodeError, "too long"): |
| 82 | + (b"xn--016c"+b"a"*1100).decode("idna") |
| 83 | + with self.assertRaisesRegex(UnicodeError, "too long"): |
| 84 | + (b"xn--016c"+b"a"*70).decode("idna") |
| 85 | + |
| 86 | def test_stream(self): |
| 87 | r = codecs.getreader("idna")(io.BytesIO(b"abc")) |
| 88 | r.read(3) |
| 89 | diff --git a/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst |
| 90 | new file mode 100644 |
| 91 | index 000000000000..5185fac2e29d |
| 92 | --- /dev/null |
| 93 | +++ b/Misc/NEWS.d/next/Security/2022-11-04-09-29-36.gh-issue-98433.l76c5G.rst |
| 94 | @@ -0,0 +1,6 @@ |
| 95 | +The IDNA codec decoder used on DNS hostnames by :mod:`socket` or :mod:`asyncio` |
| 96 | +related name resolution functions no longer involves a quadratic algorithm. |
| 97 | +This prevents a potential CPU denial of service if an out-of-spec excessive |
| 98 | +length hostname involving bidirectional characters were decoded. Some protocols |
| 99 | +such as :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker |
| 100 | +to supply such a name. |