Commit | Line | Data |
---|---|---|
6574e6c6 JK |
1 | /* |
2 | * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher | |
3 | * | |
4 | * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | */ | |
16 | ||
17 | #include <linux/linkage.h> | |
18 | ||
19 | .file "des3_ede-asm_64.S" | |
20 | .text | |
21 | ||
22 | #define s1 .L_s1 | |
23 | #define s2 ((s1) + (64*8)) | |
24 | #define s3 ((s2) + (64*8)) | |
25 | #define s4 ((s3) + (64*8)) | |
26 | #define s5 ((s4) + (64*8)) | |
27 | #define s6 ((s5) + (64*8)) | |
28 | #define s7 ((s6) + (64*8)) | |
29 | #define s8 ((s7) + (64*8)) | |
30 | ||
31 | /* register macros */ | |
32 | #define CTX %rdi | |
33 | ||
34 | #define RL0 %r8 | |
35 | #define RL1 %r9 | |
36 | #define RL2 %r10 | |
37 | ||
38 | #define RL0d %r8d | |
39 | #define RL1d %r9d | |
40 | #define RL2d %r10d | |
41 | ||
42 | #define RR0 %r11 | |
43 | #define RR1 %r12 | |
44 | #define RR2 %r13 | |
45 | ||
46 | #define RR0d %r11d | |
47 | #define RR1d %r12d | |
48 | #define RR2d %r13d | |
49 | ||
50 | #define RW0 %rax | |
51 | #define RW1 %rbx | |
52 | #define RW2 %rcx | |
53 | ||
54 | #define RW0d %eax | |
55 | #define RW1d %ebx | |
56 | #define RW2d %ecx | |
57 | ||
58 | #define RW0bl %al | |
59 | #define RW1bl %bl | |
60 | #define RW2bl %cl | |
61 | ||
62 | #define RW0bh %ah | |
63 | #define RW1bh %bh | |
64 | #define RW2bh %ch | |
65 | ||
66 | #define RT0 %r15 | |
67 | #define RT1 %rbp | |
68 | #define RT2 %r14 | |
69 | #define RT3 %rdx | |
70 | ||
71 | #define RT0d %r15d | |
72 | #define RT1d %ebp | |
73 | #define RT2d %r14d | |
74 | #define RT3d %edx | |
75 | ||
76 | /*********************************************************************** | |
77 | * 1-way 3DES | |
78 | ***********************************************************************/ | |
79 | #define do_permutation(a, b, offset, mask) \ | |
80 | movl a, RT0d; \ | |
81 | shrl $(offset), RT0d; \ | |
82 | xorl b, RT0d; \ | |
83 | andl $(mask), RT0d; \ | |
84 | xorl RT0d, b; \ | |
85 | shll $(offset), RT0d; \ | |
86 | xorl RT0d, a; | |
87 | ||
88 | #define expand_to_64bits(val, mask) \ | |
89 | movl val##d, RT0d; \ | |
90 | rorl $4, RT0d; \ | |
91 | shlq $32, RT0; \ | |
92 | orq RT0, val; \ | |
93 | andq mask, val; | |
94 | ||
95 | #define compress_to_64bits(val) \ | |
96 | movq val, RT0; \ | |
97 | shrq $32, RT0; \ | |
98 | roll $4, RT0d; \ | |
99 | orl RT0d, val##d; | |
100 | ||
101 | #define initial_permutation(left, right) \ | |
102 | do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \ | |
103 | do_permutation(left##d, right##d, 16, 0x0000ffff); \ | |
104 | do_permutation(right##d, left##d, 2, 0x33333333); \ | |
105 | do_permutation(right##d, left##d, 8, 0x00ff00ff); \ | |
106 | movabs $0x3f3f3f3f3f3f3f3f, RT3; \ | |
107 | movl left##d, RW0d; \ | |
108 | roll $1, right##d; \ | |
109 | xorl right##d, RW0d; \ | |
110 | andl $0xaaaaaaaa, RW0d; \ | |
111 | xorl RW0d, left##d; \ | |
112 | xorl RW0d, right##d; \ | |
113 | roll $1, left##d; \ | |
114 | expand_to_64bits(right, RT3); \ | |
115 | expand_to_64bits(left, RT3); | |
116 | ||
117 | #define final_permutation(left, right) \ | |
118 | compress_to_64bits(right); \ | |
119 | compress_to_64bits(left); \ | |
120 | movl right##d, RW0d; \ | |
121 | rorl $1, left##d; \ | |
122 | xorl left##d, RW0d; \ | |
123 | andl $0xaaaaaaaa, RW0d; \ | |
124 | xorl RW0d, right##d; \ | |
125 | xorl RW0d, left##d; \ | |
126 | rorl $1, right##d; \ | |
127 | do_permutation(right##d, left##d, 8, 0x00ff00ff); \ | |
128 | do_permutation(right##d, left##d, 2, 0x33333333); \ | |
129 | do_permutation(left##d, right##d, 16, 0x0000ffff); \ | |
130 | do_permutation(left##d, right##d, 4, 0x0f0f0f0f); | |
131 | ||
132 | #define round1(n, from, to, load_next_key) \ | |
133 | xorq from, RW0; \ | |
134 | \ | |
135 | movzbl RW0bl, RT0d; \ | |
136 | movzbl RW0bh, RT1d; \ | |
137 | shrq $16, RW0; \ | |
138 | movzbl RW0bl, RT2d; \ | |
139 | movzbl RW0bh, RT3d; \ | |
140 | shrq $16, RW0; \ | |
141 | movq s8(, RT0, 8), RT0; \ | |
142 | xorq s6(, RT1, 8), to; \ | |
143 | movzbl RW0bl, RL1d; \ | |
144 | movzbl RW0bh, RT1d; \ | |
145 | shrl $16, RW0d; \ | |
146 | xorq s4(, RT2, 8), RT0; \ | |
147 | xorq s2(, RT3, 8), to; \ | |
148 | movzbl RW0bl, RT2d; \ | |
149 | movzbl RW0bh, RT3d; \ | |
150 | xorq s7(, RL1, 8), RT0; \ | |
151 | xorq s5(, RT1, 8), to; \ | |
152 | xorq s3(, RT2, 8), RT0; \ | |
153 | load_next_key(n, RW0); \ | |
154 | xorq RT0, to; \ | |
155 | xorq s1(, RT3, 8), to; \ | |
156 | ||
157 | #define load_next_key(n, RWx) \ | |
158 | movq (((n) + 1) * 8)(CTX), RWx; | |
159 | ||
160 | #define dummy2(a, b) /*_*/ | |
161 | ||
162 | #define read_block(io, left, right) \ | |
163 | movl (io), left##d; \ | |
164 | movl 4(io), right##d; \ | |
165 | bswapl left##d; \ | |
166 | bswapl right##d; | |
167 | ||
168 | #define write_block(io, left, right) \ | |
169 | bswapl left##d; \ | |
170 | bswapl right##d; \ | |
171 | movl left##d, (io); \ | |
172 | movl right##d, 4(io); | |
173 | ||
174 | ENTRY(des3_ede_x86_64_crypt_blk) | |
175 | /* input: | |
176 | * %rdi: round keys, CTX | |
177 | * %rsi: dst | |
178 | * %rdx: src | |
179 | */ | |
180 | pushq %rbp; | |
181 | pushq %rbx; | |
182 | pushq %r12; | |
183 | pushq %r13; | |
184 | pushq %r14; | |
185 | pushq %r15; | |
186 | ||
187 | read_block(%rdx, RL0, RR0); | |
188 | initial_permutation(RL0, RR0); | |
189 | ||
190 | movq (CTX), RW0; | |
191 | ||
192 | round1(0, RR0, RL0, load_next_key); | |
193 | round1(1, RL0, RR0, load_next_key); | |
194 | round1(2, RR0, RL0, load_next_key); | |
195 | round1(3, RL0, RR0, load_next_key); | |
196 | round1(4, RR0, RL0, load_next_key); | |
197 | round1(5, RL0, RR0, load_next_key); | |
198 | round1(6, RR0, RL0, load_next_key); | |
199 | round1(7, RL0, RR0, load_next_key); | |
200 | round1(8, RR0, RL0, load_next_key); | |
201 | round1(9, RL0, RR0, load_next_key); | |
202 | round1(10, RR0, RL0, load_next_key); | |
203 | round1(11, RL0, RR0, load_next_key); | |
204 | round1(12, RR0, RL0, load_next_key); | |
205 | round1(13, RL0, RR0, load_next_key); | |
206 | round1(14, RR0, RL0, load_next_key); | |
207 | round1(15, RL0, RR0, load_next_key); | |
208 | ||
209 | round1(16+0, RL0, RR0, load_next_key); | |
210 | round1(16+1, RR0, RL0, load_next_key); | |
211 | round1(16+2, RL0, RR0, load_next_key); | |
212 | round1(16+3, RR0, RL0, load_next_key); | |
213 | round1(16+4, RL0, RR0, load_next_key); | |
214 | round1(16+5, RR0, RL0, load_next_key); | |
215 | round1(16+6, RL0, RR0, load_next_key); | |
216 | round1(16+7, RR0, RL0, load_next_key); | |
217 | round1(16+8, RL0, RR0, load_next_key); | |
218 | round1(16+9, RR0, RL0, load_next_key); | |
219 | round1(16+10, RL0, RR0, load_next_key); | |
220 | round1(16+11, RR0, RL0, load_next_key); | |
221 | round1(16+12, RL0, RR0, load_next_key); | |
222 | round1(16+13, RR0, RL0, load_next_key); | |
223 | round1(16+14, RL0, RR0, load_next_key); | |
224 | round1(16+15, RR0, RL0, load_next_key); | |
225 | ||
226 | round1(32+0, RR0, RL0, load_next_key); | |
227 | round1(32+1, RL0, RR0, load_next_key); | |
228 | round1(32+2, RR0, RL0, load_next_key); | |
229 | round1(32+3, RL0, RR0, load_next_key); | |
230 | round1(32+4, RR0, RL0, load_next_key); | |
231 | round1(32+5, RL0, RR0, load_next_key); | |
232 | round1(32+6, RR0, RL0, load_next_key); | |
233 | round1(32+7, RL0, RR0, load_next_key); | |
234 | round1(32+8, RR0, RL0, load_next_key); | |
235 | round1(32+9, RL0, RR0, load_next_key); | |
236 | round1(32+10, RR0, RL0, load_next_key); | |
237 | round1(32+11, RL0, RR0, load_next_key); | |
238 | round1(32+12, RR0, RL0, load_next_key); | |
239 | round1(32+13, RL0, RR0, load_next_key); | |
240 | round1(32+14, RR0, RL0, load_next_key); | |
241 | round1(32+15, RL0, RR0, dummy2); | |
242 | ||
243 | final_permutation(RR0, RL0); | |
244 | write_block(%rsi, RR0, RL0); | |
245 | ||
246 | popq %r15; | |
247 | popq %r14; | |
248 | popq %r13; | |
249 | popq %r12; | |
250 | popq %rbx; | |
251 | popq %rbp; | |
252 | ||
253 | ret; | |
254 | ENDPROC(des3_ede_x86_64_crypt_blk) | |
255 | ||
256 | /*********************************************************************** | |
257 | * 3-way 3DES | |
258 | ***********************************************************************/ | |
259 | #define expand_to_64bits(val, mask) \ | |
260 | movl val##d, RT0d; \ | |
261 | rorl $4, RT0d; \ | |
262 | shlq $32, RT0; \ | |
263 | orq RT0, val; \ | |
264 | andq mask, val; | |
265 | ||
266 | #define compress_to_64bits(val) \ | |
267 | movq val, RT0; \ | |
268 | shrq $32, RT0; \ | |
269 | roll $4, RT0d; \ | |
270 | orl RT0d, val##d; | |
271 | ||
272 | #define initial_permutation3(left, right) \ | |
273 | do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ | |
274 | do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ | |
275 | do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ | |
276 | do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ | |
277 | do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \ | |
278 | do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ | |
279 | \ | |
280 | do_permutation(right##0d, left##0d, 2, 0x33333333); \ | |
281 | do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ | |
282 | do_permutation(right##1d, left##1d, 2, 0x33333333); \ | |
283 | do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ | |
284 | do_permutation(right##2d, left##2d, 2, 0x33333333); \ | |
285 | do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ | |
286 | \ | |
287 | movabs $0x3f3f3f3f3f3f3f3f, RT3; \ | |
288 | \ | |
289 | movl left##0d, RW0d; \ | |
290 | roll $1, right##0d; \ | |
291 | xorl right##0d, RW0d; \ | |
292 | andl $0xaaaaaaaa, RW0d; \ | |
293 | xorl RW0d, left##0d; \ | |
294 | xorl RW0d, right##0d; \ | |
295 | roll $1, left##0d; \ | |
296 | expand_to_64bits(right##0, RT3); \ | |
297 | expand_to_64bits(left##0, RT3); \ | |
298 | movl left##1d, RW1d; \ | |
299 | roll $1, right##1d; \ | |
300 | xorl right##1d, RW1d; \ | |
301 | andl $0xaaaaaaaa, RW1d; \ | |
302 | xorl RW1d, left##1d; \ | |
303 | xorl RW1d, right##1d; \ | |
304 | roll $1, left##1d; \ | |
305 | expand_to_64bits(right##1, RT3); \ | |
306 | expand_to_64bits(left##1, RT3); \ | |
307 | movl left##2d, RW2d; \ | |
308 | roll $1, right##2d; \ | |
309 | xorl right##2d, RW2d; \ | |
310 | andl $0xaaaaaaaa, RW2d; \ | |
311 | xorl RW2d, left##2d; \ | |
312 | xorl RW2d, right##2d; \ | |
313 | roll $1, left##2d; \ | |
314 | expand_to_64bits(right##2, RT3); \ | |
315 | expand_to_64bits(left##2, RT3); | |
316 | ||
317 | #define final_permutation3(left, right) \ | |
318 | compress_to_64bits(right##0); \ | |
319 | compress_to_64bits(left##0); \ | |
320 | movl right##0d, RW0d; \ | |
321 | rorl $1, left##0d; \ | |
322 | xorl left##0d, RW0d; \ | |
323 | andl $0xaaaaaaaa, RW0d; \ | |
324 | xorl RW0d, right##0d; \ | |
325 | xorl RW0d, left##0d; \ | |
326 | rorl $1, right##0d; \ | |
327 | compress_to_64bits(right##1); \ | |
328 | compress_to_64bits(left##1); \ | |
329 | movl right##1d, RW1d; \ | |
330 | rorl $1, left##1d; \ | |
331 | xorl left##1d, RW1d; \ | |
332 | andl $0xaaaaaaaa, RW1d; \ | |
333 | xorl RW1d, right##1d; \ | |
334 | xorl RW1d, left##1d; \ | |
335 | rorl $1, right##1d; \ | |
336 | compress_to_64bits(right##2); \ | |
337 | compress_to_64bits(left##2); \ | |
338 | movl right##2d, RW2d; \ | |
339 | rorl $1, left##2d; \ | |
340 | xorl left##2d, RW2d; \ | |
341 | andl $0xaaaaaaaa, RW2d; \ | |
342 | xorl RW2d, right##2d; \ | |
343 | xorl RW2d, left##2d; \ | |
344 | rorl $1, right##2d; \ | |
345 | \ | |
346 | do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ | |
347 | do_permutation(right##0d, left##0d, 2, 0x33333333); \ | |
348 | do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ | |
349 | do_permutation(right##1d, left##1d, 2, 0x33333333); \ | |
350 | do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ | |
351 | do_permutation(right##2d, left##2d, 2, 0x33333333); \ | |
352 | \ | |
353 | do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ | |
354 | do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ | |
355 | do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ | |
356 | do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ | |
357 | do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ | |
358 | do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); | |
359 | ||
360 | #define round3(n, from, to, load_next_key, do_movq) \ | |
361 | xorq from##0, RW0; \ | |
362 | movzbl RW0bl, RT3d; \ | |
363 | movzbl RW0bh, RT1d; \ | |
364 | shrq $16, RW0; \ | |
365 | xorq s8(, RT3, 8), to##0; \ | |
366 | xorq s6(, RT1, 8), to##0; \ | |
367 | movzbl RW0bl, RT3d; \ | |
368 | movzbl RW0bh, RT1d; \ | |
369 | shrq $16, RW0; \ | |
370 | xorq s4(, RT3, 8), to##0; \ | |
371 | xorq s2(, RT1, 8), to##0; \ | |
372 | movzbl RW0bl, RT3d; \ | |
373 | movzbl RW0bh, RT1d; \ | |
374 | shrl $16, RW0d; \ | |
375 | xorq s7(, RT3, 8), to##0; \ | |
376 | xorq s5(, RT1, 8), to##0; \ | |
377 | movzbl RW0bl, RT3d; \ | |
378 | movzbl RW0bh, RT1d; \ | |
379 | load_next_key(n, RW0); \ | |
380 | xorq s3(, RT3, 8), to##0; \ | |
381 | xorq s1(, RT1, 8), to##0; \ | |
382 | xorq from##1, RW1; \ | |
383 | movzbl RW1bl, RT3d; \ | |
384 | movzbl RW1bh, RT1d; \ | |
385 | shrq $16, RW1; \ | |
386 | xorq s8(, RT3, 8), to##1; \ | |
387 | xorq s6(, RT1, 8), to##1; \ | |
388 | movzbl RW1bl, RT3d; \ | |
389 | movzbl RW1bh, RT1d; \ | |
390 | shrq $16, RW1; \ | |
391 | xorq s4(, RT3, 8), to##1; \ | |
392 | xorq s2(, RT1, 8), to##1; \ | |
393 | movzbl RW1bl, RT3d; \ | |
394 | movzbl RW1bh, RT1d; \ | |
395 | shrl $16, RW1d; \ | |
396 | xorq s7(, RT3, 8), to##1; \ | |
397 | xorq s5(, RT1, 8), to##1; \ | |
398 | movzbl RW1bl, RT3d; \ | |
399 | movzbl RW1bh, RT1d; \ | |
400 | do_movq(RW0, RW1); \ | |
401 | xorq s3(, RT3, 8), to##1; \ | |
402 | xorq s1(, RT1, 8), to##1; \ | |
403 | xorq from##2, RW2; \ | |
404 | movzbl RW2bl, RT3d; \ | |
405 | movzbl RW2bh, RT1d; \ | |
406 | shrq $16, RW2; \ | |
407 | xorq s8(, RT3, 8), to##2; \ | |
408 | xorq s6(, RT1, 8), to##2; \ | |
409 | movzbl RW2bl, RT3d; \ | |
410 | movzbl RW2bh, RT1d; \ | |
411 | shrq $16, RW2; \ | |
412 | xorq s4(, RT3, 8), to##2; \ | |
413 | xorq s2(, RT1, 8), to##2; \ | |
414 | movzbl RW2bl, RT3d; \ | |
415 | movzbl RW2bh, RT1d; \ | |
416 | shrl $16, RW2d; \ | |
417 | xorq s7(, RT3, 8), to##2; \ | |
418 | xorq s5(, RT1, 8), to##2; \ | |
419 | movzbl RW2bl, RT3d; \ | |
420 | movzbl RW2bh, RT1d; \ | |
421 | do_movq(RW0, RW2); \ | |
422 | xorq s3(, RT3, 8), to##2; \ | |
423 | xorq s1(, RT1, 8), to##2; | |
424 | ||
425 | #define __movq(src, dst) \ | |
426 | movq src, dst; | |
427 | ||
428 | ENTRY(des3_ede_x86_64_crypt_blk_3way) | |
429 | /* input: | |
430 | * %rdi: ctx, round keys | |
431 | * %rsi: dst (3 blocks) | |
432 | * %rdx: src (3 blocks) | |
433 | */ | |
434 | ||
435 | pushq %rbp; | |
436 | pushq %rbx; | |
437 | pushq %r12; | |
438 | pushq %r13; | |
439 | pushq %r14; | |
440 | pushq %r15; | |
441 | ||
442 | /* load input */ | |
443 | movl 0 * 4(%rdx), RL0d; | |
444 | movl 1 * 4(%rdx), RR0d; | |
445 | movl 2 * 4(%rdx), RL1d; | |
446 | movl 3 * 4(%rdx), RR1d; | |
447 | movl 4 * 4(%rdx), RL2d; | |
448 | movl 5 * 4(%rdx), RR2d; | |
449 | ||
450 | bswapl RL0d; | |
451 | bswapl RR0d; | |
452 | bswapl RL1d; | |
453 | bswapl RR1d; | |
454 | bswapl RL2d; | |
455 | bswapl RR2d; | |
456 | ||
457 | initial_permutation3(RL, RR); | |
458 | ||
459 | movq 0(CTX), RW0; | |
460 | movq RW0, RW1; | |
461 | movq RW0, RW2; | |
462 | ||
463 | round3(0, RR, RL, load_next_key, __movq); | |
464 | round3(1, RL, RR, load_next_key, __movq); | |
465 | round3(2, RR, RL, load_next_key, __movq); | |
466 | round3(3, RL, RR, load_next_key, __movq); | |
467 | round3(4, RR, RL, load_next_key, __movq); | |
468 | round3(5, RL, RR, load_next_key, __movq); | |
469 | round3(6, RR, RL, load_next_key, __movq); | |
470 | round3(7, RL, RR, load_next_key, __movq); | |
471 | round3(8, RR, RL, load_next_key, __movq); | |
472 | round3(9, RL, RR, load_next_key, __movq); | |
473 | round3(10, RR, RL, load_next_key, __movq); | |
474 | round3(11, RL, RR, load_next_key, __movq); | |
475 | round3(12, RR, RL, load_next_key, __movq); | |
476 | round3(13, RL, RR, load_next_key, __movq); | |
477 | round3(14, RR, RL, load_next_key, __movq); | |
478 | round3(15, RL, RR, load_next_key, __movq); | |
479 | ||
480 | round3(16+0, RL, RR, load_next_key, __movq); | |
481 | round3(16+1, RR, RL, load_next_key, __movq); | |
482 | round3(16+2, RL, RR, load_next_key, __movq); | |
483 | round3(16+3, RR, RL, load_next_key, __movq); | |
484 | round3(16+4, RL, RR, load_next_key, __movq); | |
485 | round3(16+5, RR, RL, load_next_key, __movq); | |
486 | round3(16+6, RL, RR, load_next_key, __movq); | |
487 | round3(16+7, RR, RL, load_next_key, __movq); | |
488 | round3(16+8, RL, RR, load_next_key, __movq); | |
489 | round3(16+9, RR, RL, load_next_key, __movq); | |
490 | round3(16+10, RL, RR, load_next_key, __movq); | |
491 | round3(16+11, RR, RL, load_next_key, __movq); | |
492 | round3(16+12, RL, RR, load_next_key, __movq); | |
493 | round3(16+13, RR, RL, load_next_key, __movq); | |
494 | round3(16+14, RL, RR, load_next_key, __movq); | |
495 | round3(16+15, RR, RL, load_next_key, __movq); | |
496 | ||
497 | round3(32+0, RR, RL, load_next_key, __movq); | |
498 | round3(32+1, RL, RR, load_next_key, __movq); | |
499 | round3(32+2, RR, RL, load_next_key, __movq); | |
500 | round3(32+3, RL, RR, load_next_key, __movq); | |
501 | round3(32+4, RR, RL, load_next_key, __movq); | |
502 | round3(32+5, RL, RR, load_next_key, __movq); | |
503 | round3(32+6, RR, RL, load_next_key, __movq); | |
504 | round3(32+7, RL, RR, load_next_key, __movq); | |
505 | round3(32+8, RR, RL, load_next_key, __movq); | |
506 | round3(32+9, RL, RR, load_next_key, __movq); | |
507 | round3(32+10, RR, RL, load_next_key, __movq); | |
508 | round3(32+11, RL, RR, load_next_key, __movq); | |
509 | round3(32+12, RR, RL, load_next_key, __movq); | |
510 | round3(32+13, RL, RR, load_next_key, __movq); | |
511 | round3(32+14, RR, RL, load_next_key, __movq); | |
512 | round3(32+15, RL, RR, dummy2, dummy2); | |
513 | ||
514 | final_permutation3(RR, RL); | |
515 | ||
516 | bswapl RR0d; | |
517 | bswapl RL0d; | |
518 | bswapl RR1d; | |
519 | bswapl RL1d; | |
520 | bswapl RR2d; | |
521 | bswapl RL2d; | |
522 | ||
523 | movl RR0d, 0 * 4(%rsi); | |
524 | movl RL0d, 1 * 4(%rsi); | |
525 | movl RR1d, 2 * 4(%rsi); | |
526 | movl RL1d, 3 * 4(%rsi); | |
527 | movl RR2d, 4 * 4(%rsi); | |
528 | movl RL2d, 5 * 4(%rsi); | |
529 | ||
530 | popq %r15; | |
531 | popq %r14; | |
532 | popq %r13; | |
533 | popq %r12; | |
534 | popq %rbx; | |
535 | popq %rbp; | |
536 | ||
537 | ret; | |
538 | ENDPROC(des3_ede_x86_64_crypt_blk_3way) | |
539 | ||
540 | .data | |
541 | .align 16 | |
542 | .L_s1: | |
543 | .quad 0x0010100001010400, 0x0000000000000000 | |
544 | .quad 0x0000100000010000, 0x0010100001010404 | |
545 | .quad 0x0010100001010004, 0x0000100000010404 | |
546 | .quad 0x0000000000000004, 0x0000100000010000 | |
547 | .quad 0x0000000000000400, 0x0010100001010400 | |
548 | .quad 0x0010100001010404, 0x0000000000000400 | |
549 | .quad 0x0010000001000404, 0x0010100001010004 | |
550 | .quad 0x0010000001000000, 0x0000000000000004 | |
551 | .quad 0x0000000000000404, 0x0010000001000400 | |
552 | .quad 0x0010000001000400, 0x0000100000010400 | |
553 | .quad 0x0000100000010400, 0x0010100001010000 | |
554 | .quad 0x0010100001010000, 0x0010000001000404 | |
555 | .quad 0x0000100000010004, 0x0010000001000004 | |
556 | .quad 0x0010000001000004, 0x0000100000010004 | |
557 | .quad 0x0000000000000000, 0x0000000000000404 | |
558 | .quad 0x0000100000010404, 0x0010000001000000 | |
559 | .quad 0x0000100000010000, 0x0010100001010404 | |
560 | .quad 0x0000000000000004, 0x0010100001010000 | |
561 | .quad 0x0010100001010400, 0x0010000001000000 | |
562 | .quad 0x0010000001000000, 0x0000000000000400 | |
563 | .quad 0x0010100001010004, 0x0000100000010000 | |
564 | .quad 0x0000100000010400, 0x0010000001000004 | |
565 | .quad 0x0000000000000400, 0x0000000000000004 | |
566 | .quad 0x0010000001000404, 0x0000100000010404 | |
567 | .quad 0x0010100001010404, 0x0000100000010004 | |
568 | .quad 0x0010100001010000, 0x0010000001000404 | |
569 | .quad 0x0010000001000004, 0x0000000000000404 | |
570 | .quad 0x0000100000010404, 0x0010100001010400 | |
571 | .quad 0x0000000000000404, 0x0010000001000400 | |
572 | .quad 0x0010000001000400, 0x0000000000000000 | |
573 | .quad 0x0000100000010004, 0x0000100000010400 | |
574 | .quad 0x0000000000000000, 0x0010100001010004 | |
575 | .L_s2: | |
576 | .quad 0x0801080200100020, 0x0800080000000000 | |
577 | .quad 0x0000080000000000, 0x0001080200100020 | |
578 | .quad 0x0001000000100000, 0x0000000200000020 | |
579 | .quad 0x0801000200100020, 0x0800080200000020 | |
580 | .quad 0x0800000200000020, 0x0801080200100020 | |
581 | .quad 0x0801080000100000, 0x0800000000000000 | |
582 | .quad 0x0800080000000000, 0x0001000000100000 | |
583 | .quad 0x0000000200000020, 0x0801000200100020 | |
584 | .quad 0x0001080000100000, 0x0001000200100020 | |
585 | .quad 0x0800080200000020, 0x0000000000000000 | |
586 | .quad 0x0800000000000000, 0x0000080000000000 | |
587 | .quad 0x0001080200100020, 0x0801000000100000 | |
588 | .quad 0x0001000200100020, 0x0800000200000020 | |
589 | .quad 0x0000000000000000, 0x0001080000100000 | |
590 | .quad 0x0000080200000020, 0x0801080000100000 | |
591 | .quad 0x0801000000100000, 0x0000080200000020 | |
592 | .quad 0x0000000000000000, 0x0001080200100020 | |
593 | .quad 0x0801000200100020, 0x0001000000100000 | |
594 | .quad 0x0800080200000020, 0x0801000000100000 | |
595 | .quad 0x0801080000100000, 0x0000080000000000 | |
596 | .quad 0x0801000000100000, 0x0800080000000000 | |
597 | .quad 0x0000000200000020, 0x0801080200100020 | |
598 | .quad 0x0001080200100020, 0x0000000200000020 | |
599 | .quad 0x0000080000000000, 0x0800000000000000 | |
600 | .quad 0x0000080200000020, 0x0801080000100000 | |
601 | .quad 0x0001000000100000, 0x0800000200000020 | |
602 | .quad 0x0001000200100020, 0x0800080200000020 | |
603 | .quad 0x0800000200000020, 0x0001000200100020 | |
604 | .quad 0x0001080000100000, 0x0000000000000000 | |
605 | .quad 0x0800080000000000, 0x0000080200000020 | |
606 | .quad 0x0800000000000000, 0x0801000200100020 | |
607 | .quad 0x0801080200100020, 0x0001080000100000 | |
608 | .L_s3: | |
609 | .quad 0x0000002000000208, 0x0000202008020200 | |
610 | .quad 0x0000000000000000, 0x0000200008020008 | |
611 | .quad 0x0000002008000200, 0x0000000000000000 | |
612 | .quad 0x0000202000020208, 0x0000002008000200 | |
613 | .quad 0x0000200000020008, 0x0000000008000008 | |
614 | .quad 0x0000000008000008, 0x0000200000020000 | |
615 | .quad 0x0000202008020208, 0x0000200000020008 | |
616 | .quad 0x0000200008020000, 0x0000002000000208 | |
617 | .quad 0x0000000008000000, 0x0000000000000008 | |
618 | .quad 0x0000202008020200, 0x0000002000000200 | |
619 | .quad 0x0000202000020200, 0x0000200008020000 | |
620 | .quad 0x0000200008020008, 0x0000202000020208 | |
621 | .quad 0x0000002008000208, 0x0000202000020200 | |
622 | .quad 0x0000200000020000, 0x0000002008000208 | |
623 | .quad 0x0000000000000008, 0x0000202008020208 | |
624 | .quad 0x0000002000000200, 0x0000000008000000 | |
625 | .quad 0x0000202008020200, 0x0000000008000000 | |
626 | .quad 0x0000200000020008, 0x0000002000000208 | |
627 | .quad 0x0000200000020000, 0x0000202008020200 | |
628 | .quad 0x0000002008000200, 0x0000000000000000 | |
629 | .quad 0x0000002000000200, 0x0000200000020008 | |
630 | .quad 0x0000202008020208, 0x0000002008000200 | |
631 | .quad 0x0000000008000008, 0x0000002000000200 | |
632 | .quad 0x0000000000000000, 0x0000200008020008 | |
633 | .quad 0x0000002008000208, 0x0000200000020000 | |
634 | .quad 0x0000000008000000, 0x0000202008020208 | |
635 | .quad 0x0000000000000008, 0x0000202000020208 | |
636 | .quad 0x0000202000020200, 0x0000000008000008 | |
637 | .quad 0x0000200008020000, 0x0000002008000208 | |
638 | .quad 0x0000002000000208, 0x0000200008020000 | |
639 | .quad 0x0000202000020208, 0x0000000000000008 | |
640 | .quad 0x0000200008020008, 0x0000202000020200 | |
641 | .L_s4: | |
642 | .quad 0x1008020000002001, 0x1000020800002001 | |
643 | .quad 0x1000020800002001, 0x0000000800000000 | |
644 | .quad 0x0008020800002000, 0x1008000800000001 | |
645 | .quad 0x1008000000000001, 0x1000020000002001 | |
646 | .quad 0x0000000000000000, 0x0008020000002000 | |
647 | .quad 0x0008020000002000, 0x1008020800002001 | |
648 | .quad 0x1000000800000001, 0x0000000000000000 | |
649 | .quad 0x0008000800000000, 0x1008000000000001 | |
650 | .quad 0x1000000000000001, 0x0000020000002000 | |
651 | .quad 0x0008000000000000, 0x1008020000002001 | |
652 | .quad 0x0000000800000000, 0x0008000000000000 | |
653 | .quad 0x1000020000002001, 0x0000020800002000 | |
654 | .quad 0x1008000800000001, 0x1000000000000001 | |
655 | .quad 0x0000020800002000, 0x0008000800000000 | |
656 | .quad 0x0000020000002000, 0x0008020800002000 | |
657 | .quad 0x1008020800002001, 0x1000000800000001 | |
658 | .quad 0x0008000800000000, 0x1008000000000001 | |
659 | .quad 0x0008020000002000, 0x1008020800002001 | |
660 | .quad 0x1000000800000001, 0x0000000000000000 | |
661 | .quad 0x0000000000000000, 0x0008020000002000 | |
662 | .quad 0x0000020800002000, 0x0008000800000000 | |
663 | .quad 0x1008000800000001, 0x1000000000000001 | |
664 | .quad 0x1008020000002001, 0x1000020800002001 | |
665 | .quad 0x1000020800002001, 0x0000000800000000 | |
666 | .quad 0x1008020800002001, 0x1000000800000001 | |
667 | .quad 0x1000000000000001, 0x0000020000002000 | |
668 | .quad 0x1008000000000001, 0x1000020000002001 | |
669 | .quad 0x0008020800002000, 0x1008000800000001 | |
670 | .quad 0x1000020000002001, 0x0000020800002000 | |
671 | .quad 0x0008000000000000, 0x1008020000002001 | |
672 | .quad 0x0000000800000000, 0x0008000000000000 | |
673 | .quad 0x0000020000002000, 0x0008020800002000 | |
674 | .L_s5: | |
675 | .quad 0x0000001000000100, 0x0020001002080100 | |
676 | .quad 0x0020000002080000, 0x0420001002000100 | |
677 | .quad 0x0000000000080000, 0x0000001000000100 | |
678 | .quad 0x0400000000000000, 0x0020000002080000 | |
679 | .quad 0x0400001000080100, 0x0000000000080000 | |
680 | .quad 0x0020001002000100, 0x0400001000080100 | |
681 | .quad 0x0420001002000100, 0x0420000002080000 | |
682 | .quad 0x0000001000080100, 0x0400000000000000 | |
683 | .quad 0x0020000002000000, 0x0400000000080000 | |
684 | .quad 0x0400000000080000, 0x0000000000000000 | |
685 | .quad 0x0400001000000100, 0x0420001002080100 | |
686 | .quad 0x0420001002080100, 0x0020001002000100 | |
687 | .quad 0x0420000002080000, 0x0400001000000100 | |
688 | .quad 0x0000000000000000, 0x0420000002000000 | |
689 | .quad 0x0020001002080100, 0x0020000002000000 | |
690 | .quad 0x0420000002000000, 0x0000001000080100 | |
691 | .quad 0x0000000000080000, 0x0420001002000100 | |
692 | .quad 0x0000001000000100, 0x0020000002000000 | |
693 | .quad 0x0400000000000000, 0x0020000002080000 | |
694 | .quad 0x0420001002000100, 0x0400001000080100 | |
695 | .quad 0x0020001002000100, 0x0400000000000000 | |
696 | .quad 0x0420000002080000, 0x0020001002080100 | |
697 | .quad 0x0400001000080100, 0x0000001000000100 | |
698 | .quad 0x0020000002000000, 0x0420000002080000 | |
699 | .quad 0x0420001002080100, 0x0000001000080100 | |
700 | .quad 0x0420000002000000, 0x0420001002080100 | |
701 | .quad 0x0020000002080000, 0x0000000000000000 | |
702 | .quad 0x0400000000080000, 0x0420000002000000 | |
703 | .quad 0x0000001000080100, 0x0020001002000100 | |
704 | .quad 0x0400001000000100, 0x0000000000080000 | |
705 | .quad 0x0000000000000000, 0x0400000000080000 | |
706 | .quad 0x0020001002080100, 0x0400001000000100 | |
707 | .L_s6: | |
708 | .quad 0x0200000120000010, 0x0204000020000000 | |
709 | .quad 0x0000040000000000, 0x0204040120000010 | |
710 | .quad 0x0204000020000000, 0x0000000100000010 | |
711 | .quad 0x0204040120000010, 0x0004000000000000 | |
712 | .quad 0x0200040020000000, 0x0004040100000010 | |
713 | .quad 0x0004000000000000, 0x0200000120000010 | |
714 | .quad 0x0004000100000010, 0x0200040020000000 | |
715 | .quad 0x0200000020000000, 0x0000040100000010 | |
716 | .quad 0x0000000000000000, 0x0004000100000010 | |
717 | .quad 0x0200040120000010, 0x0000040000000000 | |
718 | .quad 0x0004040000000000, 0x0200040120000010 | |
719 | .quad 0x0000000100000010, 0x0204000120000010 | |
720 | .quad 0x0204000120000010, 0x0000000000000000 | |
721 | .quad 0x0004040100000010, 0x0204040020000000 | |
722 | .quad 0x0000040100000010, 0x0004040000000000 | |
723 | .quad 0x0204040020000000, 0x0200000020000000 | |
724 | .quad 0x0200040020000000, 0x0000000100000010 | |
725 | .quad 0x0204000120000010, 0x0004040000000000 | |
726 | .quad 0x0204040120000010, 0x0004000000000000 | |
727 | .quad 0x0000040100000010, 0x0200000120000010 | |
728 | .quad 0x0004000000000000, 0x0200040020000000 | |
729 | .quad 0x0200000020000000, 0x0000040100000010 | |
730 | .quad 0x0200000120000010, 0x0204040120000010 | |
731 | .quad 0x0004040000000000, 0x0204000020000000 | |
732 | .quad 0x0004040100000010, 0x0204040020000000 | |
733 | .quad 0x0000000000000000, 0x0204000120000010 | |
734 | .quad 0x0000000100000010, 0x0000040000000000 | |
735 | .quad 0x0204000020000000, 0x0004040100000010 | |
736 | .quad 0x0000040000000000, 0x0004000100000010 | |
737 | .quad 0x0200040120000010, 0x0000000000000000 | |
738 | .quad 0x0204040020000000, 0x0200000020000000 | |
739 | .quad 0x0004000100000010, 0x0200040120000010 | |
740 | .L_s7: | |
741 | .quad 0x0002000000200000, 0x2002000004200002 | |
742 | .quad 0x2000000004000802, 0x0000000000000000 | |
743 | .quad 0x0000000000000800, 0x2000000004000802 | |
744 | .quad 0x2002000000200802, 0x0002000004200800 | |
745 | .quad 0x2002000004200802, 0x0002000000200000 | |
746 | .quad 0x0000000000000000, 0x2000000004000002 | |
747 | .quad 0x2000000000000002, 0x0000000004000000 | |
748 | .quad 0x2002000004200002, 0x2000000000000802 | |
749 | .quad 0x0000000004000800, 0x2002000000200802 | |
750 | .quad 0x2002000000200002, 0x0000000004000800 | |
751 | .quad 0x2000000004000002, 0x0002000004200000 | |
752 | .quad 0x0002000004200800, 0x2002000000200002 | |
753 | .quad 0x0002000004200000, 0x0000000000000800 | |
754 | .quad 0x2000000000000802, 0x2002000004200802 | |
755 | .quad 0x0002000000200800, 0x2000000000000002 | |
756 | .quad 0x0000000004000000, 0x0002000000200800 | |
757 | .quad 0x0000000004000000, 0x0002000000200800 | |
758 | .quad 0x0002000000200000, 0x2000000004000802 | |
759 | .quad 0x2000000004000802, 0x2002000004200002 | |
760 | .quad 0x2002000004200002, 0x2000000000000002 | |
761 | .quad 0x2002000000200002, 0x0000000004000000 | |
762 | .quad 0x0000000004000800, 0x0002000000200000 | |
763 | .quad 0x0002000004200800, 0x2000000000000802 | |
764 | .quad 0x2002000000200802, 0x0002000004200800 | |
765 | .quad 0x2000000000000802, 0x2000000004000002 | |
766 | .quad 0x2002000004200802, 0x0002000004200000 | |
767 | .quad 0x0002000000200800, 0x0000000000000000 | |
768 | .quad 0x2000000000000002, 0x2002000004200802 | |
769 | .quad 0x0000000000000000, 0x2002000000200802 | |
770 | .quad 0x0002000004200000, 0x0000000000000800 | |
771 | .quad 0x2000000004000002, 0x0000000004000800 | |
772 | .quad 0x0000000000000800, 0x2002000000200002 | |
773 | .L_s8: | |
774 | .quad 0x0100010410001000, 0x0000010000001000 | |
775 | .quad 0x0000000000040000, 0x0100010410041000 | |
776 | .quad 0x0100000010000000, 0x0100010410001000 | |
777 | .quad 0x0000000400000000, 0x0100000010000000 | |
778 | .quad 0x0000000400040000, 0x0100000010040000 | |
779 | .quad 0x0100010410041000, 0x0000010000041000 | |
780 | .quad 0x0100010010041000, 0x0000010400041000 | |
781 | .quad 0x0000010000001000, 0x0000000400000000 | |
782 | .quad 0x0100000010040000, 0x0100000410000000 | |
783 | .quad 0x0100010010001000, 0x0000010400001000 | |
784 | .quad 0x0000010000041000, 0x0000000400040000 | |
785 | .quad 0x0100000410040000, 0x0100010010041000 | |
786 | .quad 0x0000010400001000, 0x0000000000000000 | |
787 | .quad 0x0000000000000000, 0x0100000410040000 | |
788 | .quad 0x0100000410000000, 0x0100010010001000 | |
789 | .quad 0x0000010400041000, 0x0000000000040000 | |
790 | .quad 0x0000010400041000, 0x0000000000040000 | |
791 | .quad 0x0100010010041000, 0x0000010000001000 | |
792 | .quad 0x0000000400000000, 0x0100000410040000 | |
793 | .quad 0x0000010000001000, 0x0000010400041000 | |
794 | .quad 0x0100010010001000, 0x0000000400000000 | |
795 | .quad 0x0100000410000000, 0x0100000010040000 | |
796 | .quad 0x0100000410040000, 0x0100000010000000 | |
797 | .quad 0x0000000000040000, 0x0100010410001000 | |
798 | .quad 0x0000000000000000, 0x0100010410041000 | |
799 | .quad 0x0000000400040000, 0x0100000410000000 | |
800 | .quad 0x0100000010040000, 0x0100010010001000 | |
801 | .quad 0x0100010410001000, 0x0000000000000000 | |
802 | .quad 0x0100010410041000, 0x0000010000041000 | |
803 | .quad 0x0000010000041000, 0x0000010400001000 | |
804 | .quad 0x0000010400001000, 0x0000000400040000 | |
805 | .quad 0x0100000010000000, 0x0100010010041000 |