Commit | Line | Data |
---|---|---|
7efe4076 JG |
1 | /* |
2 | * Serpent Cipher 8-way parallel algorithm (x86_64/AVX) | |
3 | * | |
4 | * Copyright (C) 2012 Johannes Goetzfried | |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | |
6 | * | |
a05248ed | 7 | * Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> |
7efe4076 JG |
8 | * |
9 | * This program is free software; you can redistribute it and/or modify | |
10 | * it under the terms of the GNU General Public License as published by | |
11 | * the Free Software Foundation; either version 2 of the License, or | |
12 | * (at your option) any later version. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | * GNU General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU General Public License | |
20 | * along with this program; if not, write to the Free Software | |
21 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | |
22 | * USA | |
23 | * | |
24 | */ | |
25 | ||
2dcfd44d | 26 | #include <linux/linkage.h> |
8691ccd7 | 27 | #include <asm/frame.h> |
facd416f JK |
28 | #include "glue_helper-asm-avx.S" |
29 | ||
7efe4076 | 30 | .file "serpent-avx-x86_64-asm_64.S" |
facd416f JK |
31 | |
32 | .data | |
33 | .align 16 | |
34 | ||
35 | .Lbswap128_mask: | |
36 | .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 | |
a05248ed JK |
37 | .Lxts_gf128mul_and_shl1_mask: |
38 | .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 | |
facd416f | 39 | |
7efe4076 JG |
40 | .text |
41 | ||
42 | #define CTX %rdi | |
43 | ||
44 | /********************************************************************** | |
45 | 8-way AVX serpent | |
46 | **********************************************************************/ | |
47 | #define RA1 %xmm0 | |
48 | #define RB1 %xmm1 | |
49 | #define RC1 %xmm2 | |
50 | #define RD1 %xmm3 | |
51 | #define RE1 %xmm4 | |
52 | ||
53 | #define tp %xmm5 | |
54 | ||
55 | #define RA2 %xmm6 | |
56 | #define RB2 %xmm7 | |
57 | #define RC2 %xmm8 | |
58 | #define RD2 %xmm9 | |
59 | #define RE2 %xmm10 | |
60 | ||
61 | #define RNOT %xmm11 | |
62 | ||
63 | #define RK0 %xmm12 | |
64 | #define RK1 %xmm13 | |
65 | #define RK2 %xmm14 | |
66 | #define RK3 %xmm15 | |
67 | ||
68 | ||
69 | #define S0_1(x0, x1, x2, x3, x4) \ | |
70 | vpor x0, x3, tp; \ | |
71 | vpxor x3, x0, x0; \ | |
72 | vpxor x2, x3, x4; \ | |
73 | vpxor RNOT, x4, x4; \ | |
74 | vpxor x1, tp, x3; \ | |
75 | vpand x0, x1, x1; \ | |
76 | vpxor x4, x1, x1; \ | |
77 | vpxor x0, x2, x2; | |
78 | #define S0_2(x0, x1, x2, x3, x4) \ | |
79 | vpxor x3, x0, x0; \ | |
80 | vpor x0, x4, x4; \ | |
81 | vpxor x2, x0, x0; \ | |
82 | vpand x1, x2, x2; \ | |
83 | vpxor x2, x3, x3; \ | |
84 | vpxor RNOT, x1, x1; \ | |
85 | vpxor x4, x2, x2; \ | |
86 | vpxor x2, x1, x1; | |
87 | ||
88 | #define S1_1(x0, x1, x2, x3, x4) \ | |
89 | vpxor x0, x1, tp; \ | |
90 | vpxor x3, x0, x0; \ | |
91 | vpxor RNOT, x3, x3; \ | |
92 | vpand tp, x1, x4; \ | |
93 | vpor tp, x0, x0; \ | |
94 | vpxor x2, x3, x3; \ | |
95 | vpxor x3, x0, x0; \ | |
96 | vpxor x3, tp, x1; | |
97 | #define S1_2(x0, x1, x2, x3, x4) \ | |
98 | vpxor x4, x3, x3; \ | |
99 | vpor x4, x1, x1; \ | |
100 | vpxor x2, x4, x4; \ | |
101 | vpand x0, x2, x2; \ | |
102 | vpxor x1, x2, x2; \ | |
103 | vpor x0, x1, x1; \ | |
104 | vpxor RNOT, x0, x0; \ | |
105 | vpxor x2, x0, x0; \ | |
106 | vpxor x1, x4, x4; | |
107 | ||
108 | #define S2_1(x0, x1, x2, x3, x4) \ | |
109 | vpxor RNOT, x3, x3; \ | |
110 | vpxor x0, x1, x1; \ | |
111 | vpand x2, x0, tp; \ | |
112 | vpxor x3, tp, tp; \ | |
113 | vpor x0, x3, x3; \ | |
114 | vpxor x1, x2, x2; \ | |
115 | vpxor x1, x3, x3; \ | |
116 | vpand tp, x1, x1; | |
117 | #define S2_2(x0, x1, x2, x3, x4) \ | |
118 | vpxor x2, tp, tp; \ | |
119 | vpand x3, x2, x2; \ | |
120 | vpor x1, x3, x3; \ | |
121 | vpxor RNOT, tp, tp; \ | |
122 | vpxor tp, x3, x3; \ | |
123 | vpxor tp, x0, x4; \ | |
124 | vpxor x2, tp, x0; \ | |
125 | vpor x2, x1, x1; | |
126 | ||
127 | #define S3_1(x0, x1, x2, x3, x4) \ | |
128 | vpxor x3, x1, tp; \ | |
129 | vpor x0, x3, x3; \ | |
130 | vpand x0, x1, x4; \ | |
131 | vpxor x2, x0, x0; \ | |
132 | vpxor tp, x2, x2; \ | |
133 | vpand x3, tp, x1; \ | |
134 | vpxor x3, x2, x2; \ | |
135 | vpor x4, x0, x0; \ | |
136 | vpxor x3, x4, x4; | |
137 | #define S3_2(x0, x1, x2, x3, x4) \ | |
138 | vpxor x0, x1, x1; \ | |
139 | vpand x3, x0, x0; \ | |
140 | vpand x4, x3, x3; \ | |
141 | vpxor x2, x3, x3; \ | |
142 | vpor x1, x4, x4; \ | |
143 | vpand x1, x2, x2; \ | |
144 | vpxor x3, x4, x4; \ | |
145 | vpxor x3, x0, x0; \ | |
146 | vpxor x2, x3, x3; | |
147 | ||
148 | #define S4_1(x0, x1, x2, x3, x4) \ | |
149 | vpand x0, x3, tp; \ | |
150 | vpxor x3, x0, x0; \ | |
151 | vpxor x2, tp, tp; \ | |
152 | vpor x3, x2, x2; \ | |
153 | vpxor x1, x0, x0; \ | |
154 | vpxor tp, x3, x4; \ | |
155 | vpor x0, x2, x2; \ | |
156 | vpxor x1, x2, x2; | |
157 | #define S4_2(x0, x1, x2, x3, x4) \ | |
158 | vpand x0, x1, x1; \ | |
159 | vpxor x4, x1, x1; \ | |
160 | vpand x2, x4, x4; \ | |
161 | vpxor tp, x2, x2; \ | |
162 | vpxor x0, x4, x4; \ | |
163 | vpor x1, tp, x3; \ | |
164 | vpxor RNOT, x1, x1; \ | |
165 | vpxor x0, x3, x3; | |
166 | ||
167 | #define S5_1(x0, x1, x2, x3, x4) \ | |
168 | vpor x0, x1, tp; \ | |
169 | vpxor tp, x2, x2; \ | |
170 | vpxor RNOT, x3, x3; \ | |
171 | vpxor x0, x1, x4; \ | |
172 | vpxor x2, x0, x0; \ | |
173 | vpand x4, tp, x1; \ | |
174 | vpor x3, x4, x4; \ | |
175 | vpxor x0, x4, x4; | |
176 | #define S5_2(x0, x1, x2, x3, x4) \ | |
177 | vpand x3, x0, x0; \ | |
178 | vpxor x3, x1, x1; \ | |
179 | vpxor x2, x3, x3; \ | |
180 | vpxor x1, x0, x0; \ | |
181 | vpand x4, x2, x2; \ | |
182 | vpxor x2, x1, x1; \ | |
183 | vpand x0, x2, x2; \ | |
184 | vpxor x2, x3, x3; | |
185 | ||
186 | #define S6_1(x0, x1, x2, x3, x4) \ | |
187 | vpxor x0, x3, x3; \ | |
188 | vpxor x2, x1, tp; \ | |
189 | vpxor x0, x2, x2; \ | |
190 | vpand x3, x0, x0; \ | |
191 | vpor x3, tp, tp; \ | |
192 | vpxor RNOT, x1, x4; \ | |
193 | vpxor tp, x0, x0; \ | |
194 | vpxor x2, tp, x1; | |
195 | #define S6_2(x0, x1, x2, x3, x4) \ | |
196 | vpxor x4, x3, x3; \ | |
197 | vpxor x0, x4, x4; \ | |
198 | vpand x0, x2, x2; \ | |
199 | vpxor x1, x4, x4; \ | |
200 | vpxor x3, x2, x2; \ | |
201 | vpand x1, x3, x3; \ | |
202 | vpxor x0, x3, x3; \ | |
203 | vpxor x2, x1, x1; | |
204 | ||
205 | #define S7_1(x0, x1, x2, x3, x4) \ | |
206 | vpxor RNOT, x1, tp; \ | |
207 | vpxor RNOT, x0, x0; \ | |
208 | vpand x2, tp, x1; \ | |
209 | vpxor x3, x1, x1; \ | |
210 | vpor tp, x3, x3; \ | |
211 | vpxor x2, tp, x4; \ | |
212 | vpxor x3, x2, x2; \ | |
213 | vpxor x0, x3, x3; \ | |
214 | vpor x1, x0, x0; | |
215 | #define S7_2(x0, x1, x2, x3, x4) \ | |
216 | vpand x0, x2, x2; \ | |
217 | vpxor x4, x0, x0; \ | |
218 | vpxor x3, x4, x4; \ | |
219 | vpand x0, x3, x3; \ | |
220 | vpxor x1, x4, x4; \ | |
221 | vpxor x4, x2, x2; \ | |
222 | vpxor x1, x3, x3; \ | |
223 | vpor x0, x4, x4; \ | |
224 | vpxor x1, x4, x4; | |
225 | ||
226 | #define SI0_1(x0, x1, x2, x3, x4) \ | |
227 | vpxor x0, x1, x1; \ | |
228 | vpor x1, x3, tp; \ | |
229 | vpxor x1, x3, x4; \ | |
230 | vpxor RNOT, x0, x0; \ | |
231 | vpxor tp, x2, x2; \ | |
232 | vpxor x0, tp, x3; \ | |
233 | vpand x1, x0, x0; \ | |
234 | vpxor x2, x0, x0; | |
235 | #define SI0_2(x0, x1, x2, x3, x4) \ | |
236 | vpand x3, x2, x2; \ | |
237 | vpxor x4, x3, x3; \ | |
238 | vpxor x3, x2, x2; \ | |
239 | vpxor x3, x1, x1; \ | |
240 | vpand x0, x3, x3; \ | |
241 | vpxor x0, x1, x1; \ | |
242 | vpxor x2, x0, x0; \ | |
243 | vpxor x3, x4, x4; | |
244 | ||
245 | #define SI1_1(x0, x1, x2, x3, x4) \ | |
246 | vpxor x3, x1, x1; \ | |
247 | vpxor x2, x0, tp; \ | |
248 | vpxor RNOT, x2, x2; \ | |
249 | vpor x1, x0, x4; \ | |
250 | vpxor x3, x4, x4; \ | |
251 | vpand x1, x3, x3; \ | |
252 | vpxor x2, x1, x1; \ | |
253 | vpand x4, x2, x2; | |
254 | #define SI1_2(x0, x1, x2, x3, x4) \ | |
255 | vpxor x1, x4, x4; \ | |
256 | vpor x3, x1, x1; \ | |
257 | vpxor tp, x3, x3; \ | |
258 | vpxor tp, x2, x2; \ | |
259 | vpor x4, tp, x0; \ | |
260 | vpxor x4, x2, x2; \ | |
261 | vpxor x0, x1, x1; \ | |
262 | vpxor x1, x4, x4; | |
263 | ||
264 | #define SI2_1(x0, x1, x2, x3, x4) \ | |
265 | vpxor x1, x2, x2; \ | |
266 | vpxor RNOT, x3, tp; \ | |
267 | vpor x2, tp, tp; \ | |
268 | vpxor x3, x2, x2; \ | |
269 | vpxor x0, x3, x4; \ | |
270 | vpxor x1, tp, x3; \ | |
271 | vpor x2, x1, x1; \ | |
272 | vpxor x0, x2, x2; | |
273 | #define SI2_2(x0, x1, x2, x3, x4) \ | |
274 | vpxor x4, x1, x1; \ | |
275 | vpor x3, x4, x4; \ | |
276 | vpxor x3, x2, x2; \ | |
277 | vpxor x2, x4, x4; \ | |
278 | vpand x1, x2, x2; \ | |
279 | vpxor x3, x2, x2; \ | |
280 | vpxor x4, x3, x3; \ | |
281 | vpxor x0, x4, x4; | |
282 | ||
283 | #define SI3_1(x0, x1, x2, x3, x4) \ | |
284 | vpxor x1, x2, x2; \ | |
285 | vpand x2, x1, tp; \ | |
286 | vpxor x0, tp, tp; \ | |
287 | vpor x1, x0, x0; \ | |
288 | vpxor x3, x1, x4; \ | |
289 | vpxor x3, x0, x0; \ | |
290 | vpor tp, x3, x3; \ | |
291 | vpxor x2, tp, x1; | |
292 | #define SI3_2(x0, x1, x2, x3, x4) \ | |
293 | vpxor x3, x1, x1; \ | |
294 | vpxor x2, x0, x0; \ | |
295 | vpxor x3, x2, x2; \ | |
296 | vpand x1, x3, x3; \ | |
297 | vpxor x0, x1, x1; \ | |
298 | vpand x2, x0, x0; \ | |
299 | vpxor x3, x4, x4; \ | |
300 | vpxor x0, x3, x3; \ | |
301 | vpxor x1, x0, x0; | |
302 | ||
303 | #define SI4_1(x0, x1, x2, x3, x4) \ | |
304 | vpxor x3, x2, x2; \ | |
305 | vpand x1, x0, tp; \ | |
306 | vpxor x2, tp, tp; \ | |
307 | vpor x3, x2, x2; \ | |
308 | vpxor RNOT, x0, x4; \ | |
309 | vpxor tp, x1, x1; \ | |
310 | vpxor x2, tp, x0; \ | |
311 | vpand x4, x2, x2; | |
312 | #define SI4_2(x0, x1, x2, x3, x4) \ | |
313 | vpxor x0, x2, x2; \ | |
314 | vpor x4, x0, x0; \ | |
315 | vpxor x3, x0, x0; \ | |
316 | vpand x2, x3, x3; \ | |
317 | vpxor x3, x4, x4; \ | |
318 | vpxor x1, x3, x3; \ | |
319 | vpand x0, x1, x1; \ | |
320 | vpxor x1, x4, x4; \ | |
321 | vpxor x3, x0, x0; | |
322 | ||
323 | #define SI5_1(x0, x1, x2, x3, x4) \ | |
324 | vpor x2, x1, tp; \ | |
325 | vpxor x1, x2, x2; \ | |
326 | vpxor x3, tp, tp; \ | |
327 | vpand x1, x3, x3; \ | |
328 | vpxor x3, x2, x2; \ | |
329 | vpor x0, x3, x3; \ | |
330 | vpxor RNOT, x0, x0; \ | |
331 | vpxor x2, x3, x3; \ | |
332 | vpor x0, x2, x2; | |
333 | #define SI5_2(x0, x1, x2, x3, x4) \ | |
334 | vpxor tp, x1, x4; \ | |
335 | vpxor x4, x2, x2; \ | |
336 | vpand x0, x4, x4; \ | |
337 | vpxor tp, x0, x0; \ | |
338 | vpxor x3, tp, x1; \ | |
339 | vpand x2, x0, x0; \ | |
340 | vpxor x3, x2, x2; \ | |
341 | vpxor x2, x0, x0; \ | |
342 | vpxor x4, x2, x2; \ | |
343 | vpxor x3, x4, x4; | |
344 | ||
345 | #define SI6_1(x0, x1, x2, x3, x4) \ | |
346 | vpxor x2, x0, x0; \ | |
347 | vpand x3, x0, tp; \ | |
348 | vpxor x3, x2, x2; \ | |
349 | vpxor x2, tp, tp; \ | |
350 | vpxor x1, x3, x3; \ | |
351 | vpor x0, x2, x2; \ | |
352 | vpxor x3, x2, x2; \ | |
353 | vpand tp, x3, x3; | |
354 | #define SI6_2(x0, x1, x2, x3, x4) \ | |
355 | vpxor RNOT, tp, tp; \ | |
356 | vpxor x1, x3, x3; \ | |
357 | vpand x2, x1, x1; \ | |
358 | vpxor tp, x0, x4; \ | |
359 | vpxor x4, x3, x3; \ | |
360 | vpxor x2, x4, x4; \ | |
361 | vpxor x1, tp, x0; \ | |
362 | vpxor x0, x2, x2; | |
363 | ||
364 | #define SI7_1(x0, x1, x2, x3, x4) \ | |
365 | vpand x0, x3, tp; \ | |
366 | vpxor x2, x0, x0; \ | |
367 | vpor x3, x2, x2; \ | |
368 | vpxor x1, x3, x4; \ | |
369 | vpxor RNOT, x0, x0; \ | |
370 | vpor tp, x1, x1; \ | |
371 | vpxor x0, x4, x4; \ | |
372 | vpand x2, x0, x0; \ | |
373 | vpxor x1, x0, x0; | |
374 | #define SI7_2(x0, x1, x2, x3, x4) \ | |
375 | vpand x2, x1, x1; \ | |
376 | vpxor x2, tp, x3; \ | |
377 | vpxor x3, x4, x4; \ | |
378 | vpand x3, x2, x2; \ | |
379 | vpor x0, x3, x3; \ | |
380 | vpxor x4, x1, x1; \ | |
381 | vpxor x4, x3, x3; \ | |
382 | vpand x0, x4, x4; \ | |
383 | vpxor x2, x4, x4; | |
384 | ||
385 | #define get_key(i, j, t) \ | |
386 | vbroadcastss (4*(i)+(j))*4(CTX), t; | |
387 | ||
388 | #define K2(x0, x1, x2, x3, x4, i) \ | |
389 | get_key(i, 0, RK0); \ | |
390 | get_key(i, 1, RK1); \ | |
391 | get_key(i, 2, RK2); \ | |
392 | get_key(i, 3, RK3); \ | |
393 | vpxor RK0, x0 ## 1, x0 ## 1; \ | |
394 | vpxor RK1, x1 ## 1, x1 ## 1; \ | |
395 | vpxor RK2, x2 ## 1, x2 ## 1; \ | |
396 | vpxor RK3, x3 ## 1, x3 ## 1; \ | |
397 | vpxor RK0, x0 ## 2, x0 ## 2; \ | |
398 | vpxor RK1, x1 ## 2, x1 ## 2; \ | |
399 | vpxor RK2, x2 ## 2, x2 ## 2; \ | |
400 | vpxor RK3, x3 ## 2, x3 ## 2; | |
401 | ||
402 | #define LK2(x0, x1, x2, x3, x4, i) \ | |
403 | vpslld $13, x0 ## 1, x4 ## 1; \ | |
404 | vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \ | |
405 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | |
406 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | |
407 | vpslld $3, x2 ## 1, x4 ## 1; \ | |
408 | vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \ | |
409 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | |
410 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | |
411 | vpslld $13, x0 ## 2, x4 ## 2; \ | |
412 | vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \ | |
413 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | |
414 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | |
415 | vpslld $3, x2 ## 2, x4 ## 2; \ | |
416 | vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \ | |
417 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | |
418 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | |
419 | vpslld $1, x1 ## 1, x4 ## 1; \ | |
420 | vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \ | |
421 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | |
422 | vpslld $3, x0 ## 1, x4 ## 1; \ | |
423 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | |
424 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | |
425 | get_key(i, 1, RK1); \ | |
426 | vpslld $1, x1 ## 2, x4 ## 2; \ | |
427 | vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \ | |
428 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | |
429 | vpslld $3, x0 ## 2, x4 ## 2; \ | |
430 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | |
431 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | |
432 | get_key(i, 3, RK3); \ | |
433 | vpslld $7, x3 ## 1, x4 ## 1; \ | |
434 | vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \ | |
435 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | |
436 | vpslld $7, x1 ## 1, x4 ## 1; \ | |
437 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | |
438 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | |
439 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | |
440 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | |
441 | get_key(i, 0, RK0); \ | |
442 | vpslld $7, x3 ## 2, x4 ## 2; \ | |
443 | vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \ | |
444 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | |
445 | vpslld $7, x1 ## 2, x4 ## 2; \ | |
446 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | |
447 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | |
448 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | |
449 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | |
450 | get_key(i, 2, RK2); \ | |
451 | vpxor RK1, x1 ## 1, x1 ## 1; \ | |
452 | vpxor RK3, x3 ## 1, x3 ## 1; \ | |
453 | vpslld $5, x0 ## 1, x4 ## 1; \ | |
454 | vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \ | |
455 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | |
456 | vpslld $22, x2 ## 1, x4 ## 1; \ | |
457 | vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \ | |
458 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | |
459 | vpxor RK0, x0 ## 1, x0 ## 1; \ | |
460 | vpxor RK2, x2 ## 1, x2 ## 1; \ | |
461 | vpxor RK1, x1 ## 2, x1 ## 2; \ | |
462 | vpxor RK3, x3 ## 2, x3 ## 2; \ | |
463 | vpslld $5, x0 ## 2, x4 ## 2; \ | |
464 | vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \ | |
465 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | |
466 | vpslld $22, x2 ## 2, x4 ## 2; \ | |
467 | vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \ | |
468 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | |
469 | vpxor RK0, x0 ## 2, x0 ## 2; \ | |
470 | vpxor RK2, x2 ## 2, x2 ## 2; | |
471 | ||
472 | #define KL2(x0, x1, x2, x3, x4, i) \ | |
473 | vpxor RK0, x0 ## 1, x0 ## 1; \ | |
474 | vpxor RK2, x2 ## 1, x2 ## 1; \ | |
475 | vpsrld $5, x0 ## 1, x4 ## 1; \ | |
476 | vpslld $(32 - 5), x0 ## 1, x0 ## 1; \ | |
477 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | |
478 | vpxor RK3, x3 ## 1, x3 ## 1; \ | |
479 | vpxor RK1, x1 ## 1, x1 ## 1; \ | |
480 | vpsrld $22, x2 ## 1, x4 ## 1; \ | |
481 | vpslld $(32 - 22), x2 ## 1, x2 ## 1; \ | |
482 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | |
483 | vpxor x3 ## 1, x2 ## 1, x2 ## 1; \ | |
484 | vpxor RK0, x0 ## 2, x0 ## 2; \ | |
485 | vpxor RK2, x2 ## 2, x2 ## 2; \ | |
486 | vpsrld $5, x0 ## 2, x4 ## 2; \ | |
487 | vpslld $(32 - 5), x0 ## 2, x0 ## 2; \ | |
488 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | |
489 | vpxor RK3, x3 ## 2, x3 ## 2; \ | |
490 | vpxor RK1, x1 ## 2, x1 ## 2; \ | |
491 | vpsrld $22, x2 ## 2, x4 ## 2; \ | |
492 | vpslld $(32 - 22), x2 ## 2, x2 ## 2; \ | |
493 | vpor x4 ## 2, x2 ## 2, x2 ## 2; \ | |
494 | vpxor x3 ## 2, x2 ## 2, x2 ## 2; \ | |
495 | vpxor x3 ## 1, x0 ## 1, x0 ## 1; \ | |
496 | vpslld $7, x1 ## 1, x4 ## 1; \ | |
497 | vpxor x1 ## 1, x0 ## 1, x0 ## 1; \ | |
498 | vpxor x4 ## 1, x2 ## 1, x2 ## 1; \ | |
499 | vpsrld $1, x1 ## 1, x4 ## 1; \ | |
500 | vpslld $(32 - 1), x1 ## 1, x1 ## 1; \ | |
501 | vpor x4 ## 1, x1 ## 1, x1 ## 1; \ | |
502 | vpxor x3 ## 2, x0 ## 2, x0 ## 2; \ | |
503 | vpslld $7, x1 ## 2, x4 ## 2; \ | |
504 | vpxor x1 ## 2, x0 ## 2, x0 ## 2; \ | |
505 | vpxor x4 ## 2, x2 ## 2, x2 ## 2; \ | |
506 | vpsrld $1, x1 ## 2, x4 ## 2; \ | |
507 | vpslld $(32 - 1), x1 ## 2, x1 ## 2; \ | |
508 | vpor x4 ## 2, x1 ## 2, x1 ## 2; \ | |
509 | vpsrld $7, x3 ## 1, x4 ## 1; \ | |
510 | vpslld $(32 - 7), x3 ## 1, x3 ## 1; \ | |
511 | vpor x4 ## 1, x3 ## 1, x3 ## 1; \ | |
512 | vpxor x0 ## 1, x1 ## 1, x1 ## 1; \ | |
513 | vpslld $3, x0 ## 1, x4 ## 1; \ | |
514 | vpxor x4 ## 1, x3 ## 1, x3 ## 1; \ | |
515 | vpsrld $7, x3 ## 2, x4 ## 2; \ | |
516 | vpslld $(32 - 7), x3 ## 2, x3 ## 2; \ | |
517 | vpor x4 ## 2, x3 ## 2, x3 ## 2; \ | |
518 | vpxor x0 ## 2, x1 ## 2, x1 ## 2; \ | |
519 | vpslld $3, x0 ## 2, x4 ## 2; \ | |
520 | vpxor x4 ## 2, x3 ## 2, x3 ## 2; \ | |
521 | vpsrld $13, x0 ## 1, x4 ## 1; \ | |
522 | vpslld $(32 - 13), x0 ## 1, x0 ## 1; \ | |
523 | vpor x4 ## 1, x0 ## 1, x0 ## 1; \ | |
524 | vpxor x2 ## 1, x1 ## 1, x1 ## 1; \ | |
525 | vpxor x2 ## 1, x3 ## 1, x3 ## 1; \ | |
526 | vpsrld $3, x2 ## 1, x4 ## 1; \ | |
527 | vpslld $(32 - 3), x2 ## 1, x2 ## 1; \ | |
528 | vpor x4 ## 1, x2 ## 1, x2 ## 1; \ | |
529 | vpsrld $13, x0 ## 2, x4 ## 2; \ | |
530 | vpslld $(32 - 13), x0 ## 2, x0 ## 2; \ | |
531 | vpor x4 ## 2, x0 ## 2, x0 ## 2; \ | |
532 | vpxor x2 ## 2, x1 ## 2, x1 ## 2; \ | |
533 | vpxor x2 ## 2, x3 ## 2, x3 ## 2; \ | |
534 | vpsrld $3, x2 ## 2, x4 ## 2; \ | |
535 | vpslld $(32 - 3), x2 ## 2, x2 ## 2; \ | |
536 | vpor x4 ## 2, x2 ## 2, x2 ## 2; | |
537 | ||
538 | #define S(SBOX, x0, x1, x2, x3, x4) \ | |
539 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | |
540 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | |
541 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | |
542 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); | |
543 | ||
544 | #define SP(SBOX, x0, x1, x2, x3, x4, i) \ | |
545 | get_key(i, 0, RK0); \ | |
546 | SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | |
547 | get_key(i, 2, RK2); \ | |
548 | SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \ | |
549 | get_key(i, 3, RK3); \ | |
550 | SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | |
551 | get_key(i, 1, RK1); \ | |
552 | SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \ | |
553 | ||
554 | #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \ | |
555 | vpunpckldq x1, x0, t0; \ | |
556 | vpunpckhdq x1, x0, t2; \ | |
557 | vpunpckldq x3, x2, t1; \ | |
558 | vpunpckhdq x3, x2, x3; \ | |
559 | \ | |
560 | vpunpcklqdq t1, t0, x0; \ | |
561 | vpunpckhqdq t1, t0, x1; \ | |
562 | vpunpcklqdq x3, t2, x2; \ | |
563 | vpunpckhqdq x3, t2, x3; | |
564 | ||
facd416f | 565 | #define read_blocks(x0, x1, x2, x3, t0, t1, t2) \ |
7efe4076 JG |
566 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) |
567 | ||
facd416f JK |
568 | #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ |
569 | transpose_4x4(x0, x1, x2, x3, t0, t1, t2) | |
7efe4076 JG |
570 | |
571 | .align 8 | |
facd416f | 572 | __serpent_enc_blk8_avx: |
7efe4076 JG |
573 | /* input: |
574 | * %rdi: ctx, CTX | |
facd416f JK |
575 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks |
576 | * output: | |
577 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks | |
7efe4076 JG |
578 | */ |
579 | ||
580 | vpcmpeqd RNOT, RNOT, RNOT; | |
581 | ||
facd416f JK |
582 | read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); |
583 | read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | |
7efe4076 JG |
584 | |
585 | K2(RA, RB, RC, RD, RE, 0); | |
586 | S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1); | |
587 | S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2); | |
588 | S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3); | |
589 | S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4); | |
590 | S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5); | |
591 | S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6); | |
592 | S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7); | |
593 | S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8); | |
594 | S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9); | |
595 | S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10); | |
596 | S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11); | |
597 | S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12); | |
598 | S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13); | |
599 | S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14); | |
600 | S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15); | |
601 | S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16); | |
602 | S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17); | |
603 | S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18); | |
604 | S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19); | |
605 | S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20); | |
606 | S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21); | |
607 | S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22); | |
608 | S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23); | |
609 | S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24); | |
610 | S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25); | |
611 | S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26); | |
612 | S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27); | |
613 | S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28); | |
614 | S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29); | |
615 | S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30); | |
616 | S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31); | |
617 | S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32); | |
618 | ||
facd416f JK |
619 | write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); |
620 | write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | |
7efe4076 JG |
621 | |
622 | ret; | |
2dcfd44d | 623 | ENDPROC(__serpent_enc_blk8_avx) |
7efe4076 JG |
624 | |
625 | .align 8 | |
facd416f | 626 | __serpent_dec_blk8_avx: |
7efe4076 JG |
627 | /* input: |
628 | * %rdi: ctx, CTX | |
facd416f JK |
629 | * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks |
630 | * output: | |
631 | * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: decrypted blocks | |
7efe4076 JG |
632 | */ |
633 | ||
634 | vpcmpeqd RNOT, RNOT, RNOT; | |
635 | ||
facd416f JK |
636 | read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2); |
637 | read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); | |
7efe4076 JG |
638 | |
639 | K2(RA, RB, RC, RD, RE, 32); | |
640 | SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31); | |
641 | SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30); | |
642 | SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29); | |
643 | SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28); | |
644 | SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27); | |
645 | SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26); | |
646 | SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25); | |
647 | SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24); | |
648 | SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23); | |
649 | SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22); | |
650 | SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21); | |
651 | SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20); | |
652 | SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19); | |
653 | SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18); | |
654 | SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17); | |
655 | SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16); | |
656 | SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15); | |
657 | SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14); | |
658 | SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13); | |
659 | SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12); | |
660 | SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11); | |
661 | SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10); | |
662 | SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9); | |
663 | SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8); | |
664 | SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7); | |
665 | SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6); | |
666 | SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5); | |
667 | SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4); | |
668 | SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3); | |
669 | SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2); | |
670 | SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1); | |
671 | S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0); | |
672 | ||
facd416f JK |
673 | write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2); |
674 | write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); | |
675 | ||
676 | ret; | |
2dcfd44d | 677 | ENDPROC(__serpent_dec_blk8_avx) |
facd416f | 678 | |
2dcfd44d | 679 | ENTRY(serpent_ecb_enc_8way_avx) |
facd416f JK |
680 | /* input: |
681 | * %rdi: ctx, CTX | |
682 | * %rsi: dst | |
683 | * %rdx: src | |
684 | */ | |
8691ccd7 | 685 | FRAME_BEGIN |
facd416f JK |
686 | |
687 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | |
688 | ||
689 | call __serpent_enc_blk8_avx; | |
690 | ||
691 | store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | |
692 | ||
8691ccd7 | 693 | FRAME_END |
facd416f | 694 | ret; |
2dcfd44d | 695 | ENDPROC(serpent_ecb_enc_8way_avx) |
facd416f | 696 | |
2dcfd44d | 697 | ENTRY(serpent_ecb_dec_8way_avx) |
facd416f JK |
698 | /* input: |
699 | * %rdi: ctx, CTX | |
700 | * %rsi: dst | |
701 | * %rdx: src | |
702 | */ | |
8691ccd7 | 703 | FRAME_BEGIN |
facd416f JK |
704 | |
705 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | |
706 | ||
707 | call __serpent_dec_blk8_avx; | |
708 | ||
709 | store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | |
710 | ||
8691ccd7 | 711 | FRAME_END |
facd416f | 712 | ret; |
2dcfd44d | 713 | ENDPROC(serpent_ecb_dec_8way_avx) |
facd416f | 714 | |
2dcfd44d | 715 | ENTRY(serpent_cbc_dec_8way_avx) |
facd416f JK |
716 | /* input: |
717 | * %rdi: ctx, CTX | |
718 | * %rsi: dst | |
719 | * %rdx: src | |
720 | */ | |
8691ccd7 | 721 | FRAME_BEGIN |
facd416f JK |
722 | |
723 | load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | |
724 | ||
725 | call __serpent_dec_blk8_avx; | |
726 | ||
727 | store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | |
728 | ||
8691ccd7 | 729 | FRAME_END |
facd416f | 730 | ret; |
2dcfd44d | 731 | ENDPROC(serpent_cbc_dec_8way_avx) |
facd416f | 732 | |
2dcfd44d | 733 | ENTRY(serpent_ctr_8way_avx) |
facd416f JK |
734 | /* input: |
735 | * %rdi: ctx, CTX | |
736 | * %rsi: dst | |
737 | * %rdx: src | |
738 | * %rcx: iv (little endian, 128bit) | |
739 | */ | |
8691ccd7 | 740 | FRAME_BEGIN |
facd416f JK |
741 | |
742 | load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2, | |
743 | RD2, RK0, RK1, RK2); | |
744 | ||
745 | call __serpent_enc_blk8_avx; | |
746 | ||
747 | store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | |
7efe4076 | 748 | |
8691ccd7 | 749 | FRAME_END |
7efe4076 | 750 | ret; |
2dcfd44d | 751 | ENDPROC(serpent_ctr_8way_avx) |
a05248ed JK |
752 | |
753 | ENTRY(serpent_xts_enc_8way_avx) | |
754 | /* input: | |
755 | * %rdi: ctx, CTX | |
756 | * %rsi: dst | |
757 | * %rdx: src | |
758 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | |
759 | */ | |
8691ccd7 | 760 | FRAME_BEGIN |
a05248ed JK |
761 | |
762 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | |
763 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | |
764 | RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask); | |
765 | ||
766 | call __serpent_enc_blk8_avx; | |
767 | ||
768 | /* dst <= regs xor IVs(in dst) */ | |
769 | store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); | |
770 | ||
8691ccd7 | 771 | FRAME_END |
a05248ed JK |
772 | ret; |
773 | ENDPROC(serpent_xts_enc_8way_avx) | |
774 | ||
775 | ENTRY(serpent_xts_dec_8way_avx) | |
776 | /* input: | |
777 | * %rdi: ctx, CTX | |
778 | * %rsi: dst | |
779 | * %rdx: src | |
780 | * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸)) | |
781 | */ | |
8691ccd7 | 782 | FRAME_BEGIN |
a05248ed JK |
783 | |
784 | /* regs <= src, dst <= IVs, regs <= regs xor IVs */ | |
785 | load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2, | |
786 | RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask); | |
787 | ||
788 | call __serpent_dec_blk8_avx; | |
789 | ||
790 | /* dst <= regs xor IVs(in dst) */ | |
791 | store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); | |
792 | ||
8691ccd7 | 793 | FRAME_END |
a05248ed JK |
794 | ret; |
795 | ENDPROC(serpent_xts_dec_8way_avx) |