blob: 73723a998d5427f28522db5b2b7ed5fea4f9a43d [file] [log] [blame]
Patrick Williamsd8c66bc2016-06-20 12:57:21 -05001Upstream-Status: Backport
2 https://git.lysator.liu.se/nettle/nettle/commit/fa269b6ad06dd13c901dbd84a12e52b918a09cd7
3
4CVE: CVE-2015-8804
5Signed-off-by: Armin Kuster <akuster@mvista.com>
6
7Index: nettle-2.7.1/ChangeLog
8===================================================================
9--- nettle-2.7.1.orig/ChangeLog
10+++ nettle-2.7.1/ChangeLog
11@@ -1,3 +1,11 @@
12+2015-12-15 Niels Möller <nisse@lysator.liu.se>
13+
14+ * x86_64/ecc-384-modp.asm: Fixed carry propagation bug. Problem
15+ reported by Hanno Böck. Simplified the folding to always use
16+ non-negative carry, the old code attempted to add in a carry which
17+ could be either positive or negative, but didn't get that case
18+ right.
19+
20 2015-12-10 Niels Möller <nisse@lysator.liu.se>
21
22 * ecc-256.c (ecc_256_modp): Fixed carry propagation bug. Problem
23Index: nettle-2.7.1/x86_64/ecc-384-modp.asm
24===================================================================
25--- nettle-2.7.1.orig/x86_64/ecc-384-modp.asm
26+++ nettle-2.7.1/x86_64/ecc-384-modp.asm
27@@ -20,7 +20,7 @@ C MA 02111-1301, USA.
28 .file "ecc-384-modp.asm"
29
30 define(<RP>, <%rsi>)
31-define(<D4>, <%rax>)
32+define(<D5>, <%rax>)
33 define(<T0>, <%rbx>)
34 define(<T1>, <%rcx>)
35 define(<T2>, <%rdx>)
36@@ -35,8 +35,8 @@ define(<H4>, <%r13>)
37 define(<H5>, <%r14>)
38 define(<C2>, <%r15>)
39 define(<C0>, H5) C Overlap
40-define(<D0>, RP) C Overlap
41-define(<TMP>, H4) C Overlap
42+define(<TMP>, RP) C Overlap
43+
44
45 PROLOGUE(nettle_ecc_384_modp)
46 W64_ENTRY(2, 0)
47@@ -48,34 +48,38 @@ PROLOGUE(nettle_ecc_384_modp)
48 push %r14
49 push %r15
50
51- C First get top 2 limbs, which need folding twice
52+ C First get top 2 limbs, which need folding twice.
53+ C B^10 = B^6 + B^4 + 2^32 (B-1)B^4.
54+ C We handle the terms as follow:
55 C
56- C H5 H4
57- C -H5
58- C ------
59- C H0 D4
60+ C B^6: Folded immediatly.
61 C
62- C Then shift right, (H1,H0,D4) <-- (H0,D4) << 32
63- C and add
64+ C B^4: Delayed, added in in the next folding.
65 C
66- C H5 H4
67- C H1 H0
68- C ----------
69- C C2 H1 H0
70-
71- mov 80(RP), D4
72- mov 88(RP), H0
73- mov D4, H4
74- mov H0, H5
75- sub H0, D4
76- sbb $0, H0
77-
78- mov D4, T2
79- mov H0, H1
80- shl $32, H0
81- shr $32, T2
82+ C 2^32(B-1) B^4: Low half limb delayed until the next
83+ C folding. Top 1.5 limbs subtracted and shifter now, resulting
84+ C in 2.5 limbs. The low limb saved in D5, high 1.5 limbs added
85+ C in.
86+
87+ mov 80(RP), H4
88+ mov 88(RP), H5
89+ C Shift right 32 bits, into H1, H0
90+ mov H4, H0
91+ mov H5, H1
92+ mov H5, D5
93 shr $32, H1
94- or T2, H0
95+ shl $32, D5
96+ shr $32, H0
97+ or D5, H0
98+
99+ C H1 H0
100+ C - H1 H0
101+ C --------
102+ C H1 H0 D5
103+ mov H0, D5
104+ neg D5
105+ sbb H1, H0
106+ sbb $0, H1
107
108 xor C2, C2
109 add H4, H0
110@@ -114,118 +118,95 @@ PROLOGUE(nettle_ecc_384_modp)
111 adc H3, T5
112 adc $0, C0
113
114- C H3 H2 H1 H0 0
115- C - H4 H3 H2 H1 H0
116- C ---------------
117- C H3 H2 H1 H0 D0
118-
119- mov XREG(D4), XREG(D4)
120- mov H0, D0
121- neg D0
122- sbb H1, H0
123- sbb H2, H1
124- sbb H3, H2
125- sbb H4, H3
126- sbb $0, D4
127-
128- C Shift right. High bits are sign, to be added to C0.
129- mov D4, TMP
130- sar $32, TMP
131- shl $32, D4
132- add TMP, C0
133-
134+ C Shift left, including low half of H4
135 mov H3, TMP
136+ shl $32, H4
137 shr $32, TMP
138- shl $32, H3
139- or TMP, D4
140+ or TMP, H4
141
142 mov H2, TMP
143+ shl $32, H3
144 shr $32, TMP
145- shl $32, H2
146 or TMP, H3
147
148 mov H1, TMP
149+ shl $32, H2
150 shr $32, TMP
151- shl $32, H1
152 or TMP, H2
153
154 mov H0, TMP
155+ shl $32, H1
156 shr $32, TMP
157- shl $32, H0
158 or TMP, H1
159
160- mov D0, TMP
161- shr $32, TMP
162- shl $32, D0
163- or TMP, H0
164+ shl $32, H0
165+
166+ C H4 H3 H2 H1 H0 0
167+ C - H4 H3 H2 H1 H0
168+ C ---------------
169+ C H4 H3 H2 H1 H0 TMP
170
171- add D0, T0
172+ mov H0, TMP
173+ neg TMP
174+ sbb H1, H0
175+ sbb H2, H1
176+ sbb H3, H2
177+ sbb H4, H3
178+ sbb $0, H4
179+
180+ add TMP, T0
181 adc H0, T1
182 adc H1, T2
183 adc H2, T3
184 adc H3, T4
185- adc D4, T5
186+ adc H4, T5
187 adc $0, C0
188
189 C Remains to add in C2 and C0
190- C C0 C0<<32 (-2^32+1)C0
191- C C2 C2<<32 (-2^32+1)C2
192- C where C2 is always positive, while C0 may be -1.
193+ C Set H1, H0 = (2^96 - 2^32 + 1) C0
194 mov C0, H0
195 mov C0, H1
196- mov C0, H2
197- sar $63, C0 C Get sign
198 shl $32, H1
199- sub H1, H0 C Gives borrow iff C0 > 0
200+ sub H1, H0
201 sbb $0, H1
202- add C0, H2
203
204+ C Set H3, H2 = (2^96 - 2^32 + 1) C2
205+ mov C2, H2
206+ mov C2, H3
207+ shl $32, H3
208+ sub H3, H2
209+ sbb $0, H3
210+ add C0, H2 C No carry. Could use lea trick
211+
212+ xor C0, C0
213 add H0, T0
214 adc H1, T1
215- adc $0, H2
216- adc $0, C0
217-
218- C Set (H1 H0) <-- C2 << 96 - C2 << 32 + 1
219- mov C2, H0
220- mov C2, H1
221- shl $32, H1
222- sub H1, H0
223- sbb $0, H1
224-
225- add H2, H0
226- adc C0, H1
227- adc C2, C0
228- mov C0, H2
229- sar $63, C0
230- add H0, T2
231- adc H1, T3
232- adc H2, T4
233- adc C0, T5
234- sbb C0, C0
235+ adc H2, T2
236+ adc H3, T3
237+ adc C2, T4
238+ adc D5, T5 C Value delayed from initial folding
239+ adc $0, C0 C Use sbb and switch sign?
240
241 C Final unlikely carry
242 mov C0, H0
243 mov C0, H1
244- mov C0, H2
245- sar $63, C0
246 shl $32, H1
247 sub H1, H0
248 sbb $0, H1
249- add C0, H2
250
251 pop RP
252
253- sub H0, T0
254+ add H0, T0
255 mov T0, (RP)
256- sbb H1, T1
257+ adc H1, T1
258 mov T1, 8(RP)
259- sbb H2, T2
260+ adc C0, T2
261 mov T2, 16(RP)
262- sbb C0, T3
263+ adc $0, T3
264 mov T3, 24(RP)
265- sbb C0, T4
266+ adc $0, T4
267 mov T4, 32(RP)
268- sbb C0, T5
269+ adc $0, T5
270 mov T5, 40(RP)
271
272 pop %r15