blob: 1d34db7359ea474a5aa5f97c51c5e6a932e9ba98 [file] [log] [blame]
Patrick Williamsf1e5d692016-03-30 15:21:19 -05001Upstream-Status: Backport
2 https://git.lysator.liu.se/nettle/nettle/commit/fa269b6ad06dd13c901dbd84a12e52b918a09cd7
3
4CVE: CVE-2015-8804
5Signed-off-by: Armin Kuster <akuster@mvista.com>
6
7Index: nettle-3.1.1/ChangeLog
8===================================================================
9--- nettle-3.1.1.orig/ChangeLog
10+++ nettle-3.1.1/ChangeLog
11@@ -1,3 +1,11 @@
12+2015-12-15 Niels Möller <nisse@lysator.liu.se>
13+
14+ * x86_64/ecc-384-modp.asm: Fixed carry propagation bug. Problem
15+ reported by Hanno Böck. Simplified the folding to always use
16+ non-negative carry, the old code attempted to add in a carry which
17+ could be either positive or negative, but didn't get that case
18+ right.
19+
20 2015-12-10 Niels Möller <nisse@lysator.liu.se>
21
22 * ecc-256.c (ecc_256_modp): Fixed carry propagation bug. Problem
23Index: nettle-3.1.1/x86_64/ecc-384-modp.asm
24===================================================================
25--- nettle-3.1.1.orig/x86_64/ecc-384-modp.asm
26+++ nettle-3.1.1/x86_64/ecc-384-modp.asm
27@@ -1,7 +1,7 @@
28 C x86_64/ecc-384-modp.asm
29
30 ifelse(<
31- Copyright (C) 2013 Niels Möller
32+ Copyright (C) 2013, 2015 Niels Möller
33
34 This file is part of GNU Nettle.
35
36@@ -33,7 +33,7 @@ ifelse(<
37 .file "ecc-384-modp.asm"
38
39 define(<RP>, <%rsi>)
40-define(<D4>, <%rax>)
41+define(<D5>, <%rax>)
42 define(<T0>, <%rbx>)
43 define(<T1>, <%rcx>)
44 define(<T2>, <%rdx>)
45@@ -48,8 +48,8 @@ define(<H4>, <%r13>)
46 define(<H5>, <%r14>)
47 define(<C2>, <%r15>)
48 define(<C0>, H5) C Overlap
49-define(<D0>, RP) C Overlap
50-define(<TMP>, H4) C Overlap
51+define(<TMP>, RP) C Overlap
52+
53
54 PROLOGUE(nettle_ecc_384_modp)
55 W64_ENTRY(2, 0)
56@@ -61,34 +61,38 @@ PROLOGUE(nettle_ecc_384_modp)
57 push %r14
58 push %r15
59
60- C First get top 2 limbs, which need folding twice
61+ C First get top 2 limbs, which need folding twice.
62+ C B^10 = B^6 + B^4 + 2^32 (B-1)B^4.
63+ C We handle the terms as follow:
64 C
65- C H5 H4
66- C -H5
67- C ------
68- C H0 D4
69+ C B^6: Folded immediatly.
70 C
71- C Then shift right, (H1,H0,D4) <-- (H0,D4) << 32
72- C and add
73+ C B^4: Delayed, added in in the next folding.
74 C
75- C H5 H4
76- C H1 H0
77- C ----------
78- C C2 H1 H0
79-
80- mov 80(RP), D4
81- mov 88(RP), H0
82- mov D4, H4
83- mov H0, H5
84- sub H0, D4
85- sbb $0, H0
86-
87- mov D4, T2
88- mov H0, H1
89- shl $32, H0
90- shr $32, T2
91+ C 2^32(B-1) B^4: Low half limb delayed until the next
92+ C folding. Top 1.5 limbs subtracted and shifter now, resulting
93+ C in 2.5 limbs. The low limb saved in D5, high 1.5 limbs added
94+ C in.
95+
96+ mov 80(RP), H4
97+ mov 88(RP), H5
98+ C Shift right 32 bits, into H1, H0
99+ mov H4, H0
100+ mov H5, H1
101+ mov H5, D5
102 shr $32, H1
103- or T2, H0
104+ shl $32, D5
105+ shr $32, H0
106+ or D5, H0
107+
108+ C H1 H0
109+ C - H1 H0
110+ C --------
111+ C H1 H0 D5
112+ mov H0, D5
113+ neg D5
114+ sbb H1, H0
115+ sbb $0, H1
116
117 xor C2, C2
118 add H4, H0
119@@ -127,118 +131,95 @@ PROLOGUE(nettle_ecc_384_modp)
120 adc H3, T5
121 adc $0, C0
122
123- C H3 H2 H1 H0 0
124- C - H4 H3 H2 H1 H0
125- C ---------------
126- C H3 H2 H1 H0 D0
127-
128- mov XREG(D4), XREG(D4)
129- mov H0, D0
130- neg D0
131- sbb H1, H0
132- sbb H2, H1
133- sbb H3, H2
134- sbb H4, H3
135- sbb $0, D4
136-
137- C Shift right. High bits are sign, to be added to C0.
138- mov D4, TMP
139- sar $32, TMP
140- shl $32, D4
141- add TMP, C0
142-
143+ C Shift left, including low half of H4
144 mov H3, TMP
145+ shl $32, H4
146 shr $32, TMP
147- shl $32, H3
148- or TMP, D4
149+ or TMP, H4
150
151 mov H2, TMP
152+ shl $32, H3
153 shr $32, TMP
154- shl $32, H2
155 or TMP, H3
156
157 mov H1, TMP
158+ shl $32, H2
159 shr $32, TMP
160- shl $32, H1
161 or TMP, H2
162
163 mov H0, TMP
164+ shl $32, H1
165 shr $32, TMP
166- shl $32, H0
167 or TMP, H1
168
169- mov D0, TMP
170- shr $32, TMP
171- shl $32, D0
172- or TMP, H0
173+ shl $32, H0
174+
175+ C H4 H3 H2 H1 H0 0
176+ C - H4 H3 H2 H1 H0
177+ C ---------------
178+ C H4 H3 H2 H1 H0 TMP
179
180- add D0, T0
181+ mov H0, TMP
182+ neg TMP
183+ sbb H1, H0
184+ sbb H2, H1
185+ sbb H3, H2
186+ sbb H4, H3
187+ sbb $0, H4
188+
189+ add TMP, T0
190 adc H0, T1
191 adc H1, T2
192 adc H2, T3
193 adc H3, T4
194- adc D4, T5
195+ adc H4, T5
196 adc $0, C0
197
198 C Remains to add in C2 and C0
199- C C0 C0<<32 (-2^32+1)C0
200- C C2 C2<<32 (-2^32+1)C2
201- C where C2 is always positive, while C0 may be -1.
202+ C Set H1, H0 = (2^96 - 2^32 + 1) C0
203 mov C0, H0
204 mov C0, H1
205- mov C0, H2
206- sar $63, C0 C Get sign
207 shl $32, H1
208- sub H1, H0 C Gives borrow iff C0 > 0
209+ sub H1, H0
210 sbb $0, H1
211- add C0, H2
212
213+ C Set H3, H2 = (2^96 - 2^32 + 1) C2
214+ mov C2, H2
215+ mov C2, H3
216+ shl $32, H3
217+ sub H3, H2
218+ sbb $0, H3
219+ add C0, H2 C No carry. Could use lea trick
220+
221+ xor C0, C0
222 add H0, T0
223 adc H1, T1
224- adc $0, H2
225- adc $0, C0
226-
227- C Set (H1 H0) <-- C2 << 96 - C2 << 32 + 1
228- mov C2, H0
229- mov C2, H1
230- shl $32, H1
231- sub H1, H0
232- sbb $0, H1
233-
234- add H2, H0
235- adc C0, H1
236- adc C2, C0
237- mov C0, H2
238- sar $63, C0
239- add H0, T2
240- adc H1, T3
241- adc H2, T4
242- adc C0, T5
243- sbb C0, C0
244+ adc H2, T2
245+ adc H3, T3
246+ adc C2, T4
247+ adc D5, T5 C Value delayed from initial folding
248+ adc $0, C0 C Use sbb and switch sign?
249
250 C Final unlikely carry
251 mov C0, H0
252 mov C0, H1
253- mov C0, H2
254- sar $63, C0
255 shl $32, H1
256 sub H1, H0
257 sbb $0, H1
258- add C0, H2
259
260 pop RP
261
262- sub H0, T0
263+ add H0, T0
264 mov T0, (RP)
265- sbb H1, T1
266+ adc H1, T1
267 mov T1, 8(RP)
268- sbb H2, T2
269+ adc C0, T2
270 mov T2, 16(RP)
271- sbb C0, T3
272+ adc $0, T3
273 mov T3, 24(RP)
274- sbb C0, T4
275+ adc $0, T4
276 mov T4, 32(RP)
277- sbb C0, T5
278+ adc $0, T5
279 mov T5, 40(RP)
280
281 pop %r15