Commit | Line | Data |
---|---|---|
04443808 JK |
1 | #include <linux/linkage.h> |
2 | ||
3 | # enter salsa20_encrypt_bytes | |
4 | ENTRY(salsa20_encrypt_bytes) | |
9a7dafbb TSH |
5 | mov %rsp,%r11 |
6 | and $31,%r11 | |
7 | add $256,%r11 | |
8 | sub %r11,%rsp | |
9 | # x = arg1 | |
10 | mov %rdi,%r8 | |
11 | # m = arg2 | |
12 | mov %rsi,%rsi | |
13 | # out = arg3 | |
14 | mov %rdx,%rdi | |
15 | # bytes = arg4 | |
16 | mov %rcx,%rdx | |
17 | # unsigned>? bytes - 0 | |
18 | cmp $0,%rdx | |
19 | # comment:fp stack unchanged by jump | |
20 | # goto done if !unsigned> | |
21 | jbe ._done | |
22 | # comment:fp stack unchanged by fallthrough | |
23 | # start: | |
24 | ._start: | |
25 | # r11_stack = r11 | |
26 | movq %r11,0(%rsp) | |
27 | # r12_stack = r12 | |
28 | movq %r12,8(%rsp) | |
29 | # r13_stack = r13 | |
30 | movq %r13,16(%rsp) | |
31 | # r14_stack = r14 | |
32 | movq %r14,24(%rsp) | |
33 | # r15_stack = r15 | |
34 | movq %r15,32(%rsp) | |
35 | # rbx_stack = rbx | |
36 | movq %rbx,40(%rsp) | |
37 | # rbp_stack = rbp | |
38 | movq %rbp,48(%rsp) | |
39 | # in0 = *(uint64 *) (x + 0) | |
40 | movq 0(%r8),%rcx | |
41 | # in2 = *(uint64 *) (x + 8) | |
42 | movq 8(%r8),%r9 | |
43 | # in4 = *(uint64 *) (x + 16) | |
44 | movq 16(%r8),%rax | |
45 | # in6 = *(uint64 *) (x + 24) | |
46 | movq 24(%r8),%r10 | |
47 | # in8 = *(uint64 *) (x + 32) | |
48 | movq 32(%r8),%r11 | |
49 | # in10 = *(uint64 *) (x + 40) | |
50 | movq 40(%r8),%r12 | |
51 | # in12 = *(uint64 *) (x + 48) | |
52 | movq 48(%r8),%r13 | |
53 | # in14 = *(uint64 *) (x + 56) | |
54 | movq 56(%r8),%r14 | |
55 | # j0 = in0 | |
56 | movq %rcx,56(%rsp) | |
57 | # j2 = in2 | |
58 | movq %r9,64(%rsp) | |
59 | # j4 = in4 | |
60 | movq %rax,72(%rsp) | |
61 | # j6 = in6 | |
62 | movq %r10,80(%rsp) | |
63 | # j8 = in8 | |
64 | movq %r11,88(%rsp) | |
65 | # j10 = in10 | |
66 | movq %r12,96(%rsp) | |
67 | # j12 = in12 | |
68 | movq %r13,104(%rsp) | |
69 | # j14 = in14 | |
70 | movq %r14,112(%rsp) | |
71 | # x_backup = x | |
72 | movq %r8,120(%rsp) | |
73 | # bytesatleast1: | |
74 | ._bytesatleast1: | |
75 | # unsigned<? bytes - 64 | |
76 | cmp $64,%rdx | |
77 | # comment:fp stack unchanged by jump | |
78 | # goto nocopy if !unsigned< | |
79 | jae ._nocopy | |
80 | # ctarget = out | |
81 | movq %rdi,128(%rsp) | |
82 | # out = &tmp | |
83 | leaq 192(%rsp),%rdi | |
84 | # i = bytes | |
85 | mov %rdx,%rcx | |
86 | # while (i) { *out++ = *m++; --i } | |
87 | rep movsb | |
88 | # out = &tmp | |
89 | leaq 192(%rsp),%rdi | |
90 | # m = &tmp | |
91 | leaq 192(%rsp),%rsi | |
92 | # comment:fp stack unchanged by fallthrough | |
93 | # nocopy: | |
94 | ._nocopy: | |
95 | # out_backup = out | |
96 | movq %rdi,136(%rsp) | |
97 | # m_backup = m | |
98 | movq %rsi,144(%rsp) | |
99 | # bytes_backup = bytes | |
100 | movq %rdx,152(%rsp) | |
101 | # x1 = j0 | |
102 | movq 56(%rsp),%rdi | |
103 | # x0 = x1 | |
104 | mov %rdi,%rdx | |
105 | # (uint64) x1 >>= 32 | |
106 | shr $32,%rdi | |
107 | # x3 = j2 | |
108 | movq 64(%rsp),%rsi | |
109 | # x2 = x3 | |
110 | mov %rsi,%rcx | |
111 | # (uint64) x3 >>= 32 | |
112 | shr $32,%rsi | |
113 | # x5 = j4 | |
114 | movq 72(%rsp),%r8 | |
115 | # x4 = x5 | |
116 | mov %r8,%r9 | |
117 | # (uint64) x5 >>= 32 | |
118 | shr $32,%r8 | |
119 | # x5_stack = x5 | |
120 | movq %r8,160(%rsp) | |
121 | # x7 = j6 | |
122 | movq 80(%rsp),%r8 | |
123 | # x6 = x7 | |
124 | mov %r8,%rax | |
125 | # (uint64) x7 >>= 32 | |
126 | shr $32,%r8 | |
127 | # x9 = j8 | |
128 | movq 88(%rsp),%r10 | |
129 | # x8 = x9 | |
130 | mov %r10,%r11 | |
131 | # (uint64) x9 >>= 32 | |
132 | shr $32,%r10 | |
133 | # x11 = j10 | |
134 | movq 96(%rsp),%r12 | |
135 | # x10 = x11 | |
136 | mov %r12,%r13 | |
137 | # x10_stack = x10 | |
138 | movq %r13,168(%rsp) | |
139 | # (uint64) x11 >>= 32 | |
140 | shr $32,%r12 | |
141 | # x13 = j12 | |
142 | movq 104(%rsp),%r13 | |
143 | # x12 = x13 | |
144 | mov %r13,%r14 | |
145 | # (uint64) x13 >>= 32 | |
146 | shr $32,%r13 | |
147 | # x15 = j14 | |
148 | movq 112(%rsp),%r15 | |
149 | # x14 = x15 | |
150 | mov %r15,%rbx | |
151 | # (uint64) x15 >>= 32 | |
152 | shr $32,%r15 | |
153 | # x15_stack = x15 | |
154 | movq %r15,176(%rsp) | |
155 | # i = 20 | |
156 | mov $20,%r15 | |
157 | # mainloop: | |
158 | ._mainloop: | |
159 | # i_backup = i | |
160 | movq %r15,184(%rsp) | |
161 | # x5 = x5_stack | |
162 | movq 160(%rsp),%r15 | |
163 | # a = x12 + x0 | |
164 | lea (%r14,%rdx),%rbp | |
165 | # (uint32) a <<<= 7 | |
166 | rol $7,%ebp | |
167 | # x4 ^= a | |
168 | xor %rbp,%r9 | |
169 | # b = x1 + x5 | |
170 | lea (%rdi,%r15),%rbp | |
171 | # (uint32) b <<<= 7 | |
172 | rol $7,%ebp | |
173 | # x9 ^= b | |
174 | xor %rbp,%r10 | |
175 | # a = x0 + x4 | |
176 | lea (%rdx,%r9),%rbp | |
177 | # (uint32) a <<<= 9 | |
178 | rol $9,%ebp | |
179 | # x8 ^= a | |
180 | xor %rbp,%r11 | |
181 | # b = x5 + x9 | |
182 | lea (%r15,%r10),%rbp | |
183 | # (uint32) b <<<= 9 | |
184 | rol $9,%ebp | |
185 | # x13 ^= b | |
186 | xor %rbp,%r13 | |
187 | # a = x4 + x8 | |
188 | lea (%r9,%r11),%rbp | |
189 | # (uint32) a <<<= 13 | |
190 | rol $13,%ebp | |
191 | # x12 ^= a | |
192 | xor %rbp,%r14 | |
193 | # b = x9 + x13 | |
194 | lea (%r10,%r13),%rbp | |
195 | # (uint32) b <<<= 13 | |
196 | rol $13,%ebp | |
197 | # x1 ^= b | |
198 | xor %rbp,%rdi | |
199 | # a = x8 + x12 | |
200 | lea (%r11,%r14),%rbp | |
201 | # (uint32) a <<<= 18 | |
202 | rol $18,%ebp | |
203 | # x0 ^= a | |
204 | xor %rbp,%rdx | |
205 | # b = x13 + x1 | |
206 | lea (%r13,%rdi),%rbp | |
207 | # (uint32) b <<<= 18 | |
208 | rol $18,%ebp | |
209 | # x5 ^= b | |
210 | xor %rbp,%r15 | |
211 | # x10 = x10_stack | |
212 | movq 168(%rsp),%rbp | |
213 | # x5_stack = x5 | |
214 | movq %r15,160(%rsp) | |
215 | # c = x6 + x10 | |
216 | lea (%rax,%rbp),%r15 | |
217 | # (uint32) c <<<= 7 | |
218 | rol $7,%r15d | |
219 | # x14 ^= c | |
220 | xor %r15,%rbx | |
221 | # c = x10 + x14 | |
222 | lea (%rbp,%rbx),%r15 | |
223 | # (uint32) c <<<= 9 | |
224 | rol $9,%r15d | |
225 | # x2 ^= c | |
226 | xor %r15,%rcx | |
227 | # c = x14 + x2 | |
228 | lea (%rbx,%rcx),%r15 | |
229 | # (uint32) c <<<= 13 | |
230 | rol $13,%r15d | |
231 | # x6 ^= c | |
232 | xor %r15,%rax | |
233 | # c = x2 + x6 | |
234 | lea (%rcx,%rax),%r15 | |
235 | # (uint32) c <<<= 18 | |
236 | rol $18,%r15d | |
237 | # x10 ^= c | |
238 | xor %r15,%rbp | |
239 | # x15 = x15_stack | |
240 | movq 176(%rsp),%r15 | |
241 | # x10_stack = x10 | |
242 | movq %rbp,168(%rsp) | |
243 | # d = x11 + x15 | |
244 | lea (%r12,%r15),%rbp | |
245 | # (uint32) d <<<= 7 | |
246 | rol $7,%ebp | |
247 | # x3 ^= d | |
248 | xor %rbp,%rsi | |
249 | # d = x15 + x3 | |
250 | lea (%r15,%rsi),%rbp | |
251 | # (uint32) d <<<= 9 | |
252 | rol $9,%ebp | |
253 | # x7 ^= d | |
254 | xor %rbp,%r8 | |
255 | # d = x3 + x7 | |
256 | lea (%rsi,%r8),%rbp | |
257 | # (uint32) d <<<= 13 | |
258 | rol $13,%ebp | |
259 | # x11 ^= d | |
260 | xor %rbp,%r12 | |
261 | # d = x7 + x11 | |
262 | lea (%r8,%r12),%rbp | |
263 | # (uint32) d <<<= 18 | |
264 | rol $18,%ebp | |
265 | # x15 ^= d | |
266 | xor %rbp,%r15 | |
267 | # x15_stack = x15 | |
268 | movq %r15,176(%rsp) | |
269 | # x5 = x5_stack | |
270 | movq 160(%rsp),%r15 | |
271 | # a = x3 + x0 | |
272 | lea (%rsi,%rdx),%rbp | |
273 | # (uint32) a <<<= 7 | |
274 | rol $7,%ebp | |
275 | # x1 ^= a | |
276 | xor %rbp,%rdi | |
277 | # b = x4 + x5 | |
278 | lea (%r9,%r15),%rbp | |
279 | # (uint32) b <<<= 7 | |
280 | rol $7,%ebp | |
281 | # x6 ^= b | |
282 | xor %rbp,%rax | |
283 | # a = x0 + x1 | |
284 | lea (%rdx,%rdi),%rbp | |
285 | # (uint32) a <<<= 9 | |
286 | rol $9,%ebp | |
287 | # x2 ^= a | |
288 | xor %rbp,%rcx | |
289 | # b = x5 + x6 | |
290 | lea (%r15,%rax),%rbp | |
291 | # (uint32) b <<<= 9 | |
292 | rol $9,%ebp | |
293 | # x7 ^= b | |
294 | xor %rbp,%r8 | |
295 | # a = x1 + x2 | |
296 | lea (%rdi,%rcx),%rbp | |
297 | # (uint32) a <<<= 13 | |
298 | rol $13,%ebp | |
299 | # x3 ^= a | |
300 | xor %rbp,%rsi | |
301 | # b = x6 + x7 | |
302 | lea (%rax,%r8),%rbp | |
303 | # (uint32) b <<<= 13 | |
304 | rol $13,%ebp | |
305 | # x4 ^= b | |
306 | xor %rbp,%r9 | |
307 | # a = x2 + x3 | |
308 | lea (%rcx,%rsi),%rbp | |
309 | # (uint32) a <<<= 18 | |
310 | rol $18,%ebp | |
311 | # x0 ^= a | |
312 | xor %rbp,%rdx | |
313 | # b = x7 + x4 | |
314 | lea (%r8,%r9),%rbp | |
315 | # (uint32) b <<<= 18 | |
316 | rol $18,%ebp | |
317 | # x5 ^= b | |
318 | xor %rbp,%r15 | |
319 | # x10 = x10_stack | |
320 | movq 168(%rsp),%rbp | |
321 | # x5_stack = x5 | |
322 | movq %r15,160(%rsp) | |
323 | # c = x9 + x10 | |
324 | lea (%r10,%rbp),%r15 | |
325 | # (uint32) c <<<= 7 | |
326 | rol $7,%r15d | |
327 | # x11 ^= c | |
328 | xor %r15,%r12 | |
329 | # c = x10 + x11 | |
330 | lea (%rbp,%r12),%r15 | |
331 | # (uint32) c <<<= 9 | |
332 | rol $9,%r15d | |
333 | # x8 ^= c | |
334 | xor %r15,%r11 | |
335 | # c = x11 + x8 | |
336 | lea (%r12,%r11),%r15 | |
337 | # (uint32) c <<<= 13 | |
338 | rol $13,%r15d | |
339 | # x9 ^= c | |
340 | xor %r15,%r10 | |
341 | # c = x8 + x9 | |
342 | lea (%r11,%r10),%r15 | |
343 | # (uint32) c <<<= 18 | |
344 | rol $18,%r15d | |
345 | # x10 ^= c | |
346 | xor %r15,%rbp | |
347 | # x15 = x15_stack | |
348 | movq 176(%rsp),%r15 | |
349 | # x10_stack = x10 | |
350 | movq %rbp,168(%rsp) | |
351 | # d = x14 + x15 | |
352 | lea (%rbx,%r15),%rbp | |
353 | # (uint32) d <<<= 7 | |
354 | rol $7,%ebp | |
355 | # x12 ^= d | |
356 | xor %rbp,%r14 | |
357 | # d = x15 + x12 | |
358 | lea (%r15,%r14),%rbp | |
359 | # (uint32) d <<<= 9 | |
360 | rol $9,%ebp | |
361 | # x13 ^= d | |
362 | xor %rbp,%r13 | |
363 | # d = x12 + x13 | |
364 | lea (%r14,%r13),%rbp | |
365 | # (uint32) d <<<= 13 | |
366 | rol $13,%ebp | |
367 | # x14 ^= d | |
368 | xor %rbp,%rbx | |
369 | # d = x13 + x14 | |
370 | lea (%r13,%rbx),%rbp | |
371 | # (uint32) d <<<= 18 | |
372 | rol $18,%ebp | |
373 | # x15 ^= d | |
374 | xor %rbp,%r15 | |
375 | # x15_stack = x15 | |
376 | movq %r15,176(%rsp) | |
377 | # x5 = x5_stack | |
378 | movq 160(%rsp),%r15 | |
379 | # a = x12 + x0 | |
380 | lea (%r14,%rdx),%rbp | |
381 | # (uint32) a <<<= 7 | |
382 | rol $7,%ebp | |
383 | # x4 ^= a | |
384 | xor %rbp,%r9 | |
385 | # b = x1 + x5 | |
386 | lea (%rdi,%r15),%rbp | |
387 | # (uint32) b <<<= 7 | |
388 | rol $7,%ebp | |
389 | # x9 ^= b | |
390 | xor %rbp,%r10 | |
391 | # a = x0 + x4 | |
392 | lea (%rdx,%r9),%rbp | |
393 | # (uint32) a <<<= 9 | |
394 | rol $9,%ebp | |
395 | # x8 ^= a | |
396 | xor %rbp,%r11 | |
397 | # b = x5 + x9 | |
398 | lea (%r15,%r10),%rbp | |
399 | # (uint32) b <<<= 9 | |
400 | rol $9,%ebp | |
401 | # x13 ^= b | |
402 | xor %rbp,%r13 | |
403 | # a = x4 + x8 | |
404 | lea (%r9,%r11),%rbp | |
405 | # (uint32) a <<<= 13 | |
406 | rol $13,%ebp | |
407 | # x12 ^= a | |
408 | xor %rbp,%r14 | |
409 | # b = x9 + x13 | |
410 | lea (%r10,%r13),%rbp | |
411 | # (uint32) b <<<= 13 | |
412 | rol $13,%ebp | |
413 | # x1 ^= b | |
414 | xor %rbp,%rdi | |
415 | # a = x8 + x12 | |
416 | lea (%r11,%r14),%rbp | |
417 | # (uint32) a <<<= 18 | |
418 | rol $18,%ebp | |
419 | # x0 ^= a | |
420 | xor %rbp,%rdx | |
421 | # b = x13 + x1 | |
422 | lea (%r13,%rdi),%rbp | |
423 | # (uint32) b <<<= 18 | |
424 | rol $18,%ebp | |
425 | # x5 ^= b | |
426 | xor %rbp,%r15 | |
427 | # x10 = x10_stack | |
428 | movq 168(%rsp),%rbp | |
429 | # x5_stack = x5 | |
430 | movq %r15,160(%rsp) | |
431 | # c = x6 + x10 | |
432 | lea (%rax,%rbp),%r15 | |
433 | # (uint32) c <<<= 7 | |
434 | rol $7,%r15d | |
435 | # x14 ^= c | |
436 | xor %r15,%rbx | |
437 | # c = x10 + x14 | |
438 | lea (%rbp,%rbx),%r15 | |
439 | # (uint32) c <<<= 9 | |
440 | rol $9,%r15d | |
441 | # x2 ^= c | |
442 | xor %r15,%rcx | |
443 | # c = x14 + x2 | |
444 | lea (%rbx,%rcx),%r15 | |
445 | # (uint32) c <<<= 13 | |
446 | rol $13,%r15d | |
447 | # x6 ^= c | |
448 | xor %r15,%rax | |
449 | # c = x2 + x6 | |
450 | lea (%rcx,%rax),%r15 | |
451 | # (uint32) c <<<= 18 | |
452 | rol $18,%r15d | |
453 | # x10 ^= c | |
454 | xor %r15,%rbp | |
455 | # x15 = x15_stack | |
456 | movq 176(%rsp),%r15 | |
457 | # x10_stack = x10 | |
458 | movq %rbp,168(%rsp) | |
459 | # d = x11 + x15 | |
460 | lea (%r12,%r15),%rbp | |
461 | # (uint32) d <<<= 7 | |
462 | rol $7,%ebp | |
463 | # x3 ^= d | |
464 | xor %rbp,%rsi | |
465 | # d = x15 + x3 | |
466 | lea (%r15,%rsi),%rbp | |
467 | # (uint32) d <<<= 9 | |
468 | rol $9,%ebp | |
469 | # x7 ^= d | |
470 | xor %rbp,%r8 | |
471 | # d = x3 + x7 | |
472 | lea (%rsi,%r8),%rbp | |
473 | # (uint32) d <<<= 13 | |
474 | rol $13,%ebp | |
475 | # x11 ^= d | |
476 | xor %rbp,%r12 | |
477 | # d = x7 + x11 | |
478 | lea (%r8,%r12),%rbp | |
479 | # (uint32) d <<<= 18 | |
480 | rol $18,%ebp | |
481 | # x15 ^= d | |
482 | xor %rbp,%r15 | |
483 | # x15_stack = x15 | |
484 | movq %r15,176(%rsp) | |
485 | # x5 = x5_stack | |
486 | movq 160(%rsp),%r15 | |
487 | # a = x3 + x0 | |
488 | lea (%rsi,%rdx),%rbp | |
489 | # (uint32) a <<<= 7 | |
490 | rol $7,%ebp | |
491 | # x1 ^= a | |
492 | xor %rbp,%rdi | |
493 | # b = x4 + x5 | |
494 | lea (%r9,%r15),%rbp | |
495 | # (uint32) b <<<= 7 | |
496 | rol $7,%ebp | |
497 | # x6 ^= b | |
498 | xor %rbp,%rax | |
499 | # a = x0 + x1 | |
500 | lea (%rdx,%rdi),%rbp | |
501 | # (uint32) a <<<= 9 | |
502 | rol $9,%ebp | |
503 | # x2 ^= a | |
504 | xor %rbp,%rcx | |
505 | # b = x5 + x6 | |
506 | lea (%r15,%rax),%rbp | |
507 | # (uint32) b <<<= 9 | |
508 | rol $9,%ebp | |
509 | # x7 ^= b | |
510 | xor %rbp,%r8 | |
511 | # a = x1 + x2 | |
512 | lea (%rdi,%rcx),%rbp | |
513 | # (uint32) a <<<= 13 | |
514 | rol $13,%ebp | |
515 | # x3 ^= a | |
516 | xor %rbp,%rsi | |
517 | # b = x6 + x7 | |
518 | lea (%rax,%r8),%rbp | |
519 | # (uint32) b <<<= 13 | |
520 | rol $13,%ebp | |
521 | # x4 ^= b | |
522 | xor %rbp,%r9 | |
523 | # a = x2 + x3 | |
524 | lea (%rcx,%rsi),%rbp | |
525 | # (uint32) a <<<= 18 | |
526 | rol $18,%ebp | |
527 | # x0 ^= a | |
528 | xor %rbp,%rdx | |
529 | # b = x7 + x4 | |
530 | lea (%r8,%r9),%rbp | |
531 | # (uint32) b <<<= 18 | |
532 | rol $18,%ebp | |
533 | # x5 ^= b | |
534 | xor %rbp,%r15 | |
535 | # x10 = x10_stack | |
536 | movq 168(%rsp),%rbp | |
537 | # x5_stack = x5 | |
538 | movq %r15,160(%rsp) | |
539 | # c = x9 + x10 | |
540 | lea (%r10,%rbp),%r15 | |
541 | # (uint32) c <<<= 7 | |
542 | rol $7,%r15d | |
543 | # x11 ^= c | |
544 | xor %r15,%r12 | |
545 | # c = x10 + x11 | |
546 | lea (%rbp,%r12),%r15 | |
547 | # (uint32) c <<<= 9 | |
548 | rol $9,%r15d | |
549 | # x8 ^= c | |
550 | xor %r15,%r11 | |
551 | # c = x11 + x8 | |
552 | lea (%r12,%r11),%r15 | |
553 | # (uint32) c <<<= 13 | |
554 | rol $13,%r15d | |
555 | # x9 ^= c | |
556 | xor %r15,%r10 | |
557 | # c = x8 + x9 | |
558 | lea (%r11,%r10),%r15 | |
559 | # (uint32) c <<<= 18 | |
560 | rol $18,%r15d | |
561 | # x10 ^= c | |
562 | xor %r15,%rbp | |
563 | # x15 = x15_stack | |
564 | movq 176(%rsp),%r15 | |
565 | # x10_stack = x10 | |
566 | movq %rbp,168(%rsp) | |
567 | # d = x14 + x15 | |
568 | lea (%rbx,%r15),%rbp | |
569 | # (uint32) d <<<= 7 | |
570 | rol $7,%ebp | |
571 | # x12 ^= d | |
572 | xor %rbp,%r14 | |
573 | # d = x15 + x12 | |
574 | lea (%r15,%r14),%rbp | |
575 | # (uint32) d <<<= 9 | |
576 | rol $9,%ebp | |
577 | # x13 ^= d | |
578 | xor %rbp,%r13 | |
579 | # d = x12 + x13 | |
580 | lea (%r14,%r13),%rbp | |
581 | # (uint32) d <<<= 13 | |
582 | rol $13,%ebp | |
583 | # x14 ^= d | |
584 | xor %rbp,%rbx | |
585 | # d = x13 + x14 | |
586 | lea (%r13,%rbx),%rbp | |
587 | # (uint32) d <<<= 18 | |
588 | rol $18,%ebp | |
589 | # x15 ^= d | |
590 | xor %rbp,%r15 | |
591 | # x15_stack = x15 | |
592 | movq %r15,176(%rsp) | |
593 | # i = i_backup | |
594 | movq 184(%rsp),%r15 | |
595 | # unsigned>? i -= 4 | |
596 | sub $4,%r15 | |
597 | # comment:fp stack unchanged by jump | |
598 | # goto mainloop if unsigned> | |
599 | ja ._mainloop | |
600 | # (uint32) x2 += j2 | |
601 | addl 64(%rsp),%ecx | |
602 | # x3 <<= 32 | |
603 | shl $32,%rsi | |
604 | # x3 += j2 | |
605 | addq 64(%rsp),%rsi | |
606 | # (uint64) x3 >>= 32 | |
607 | shr $32,%rsi | |
608 | # x3 <<= 32 | |
609 | shl $32,%rsi | |
610 | # x2 += x3 | |
611 | add %rsi,%rcx | |
612 | # (uint32) x6 += j6 | |
613 | addl 80(%rsp),%eax | |
614 | # x7 <<= 32 | |
615 | shl $32,%r8 | |
616 | # x7 += j6 | |
617 | addq 80(%rsp),%r8 | |
618 | # (uint64) x7 >>= 32 | |
619 | shr $32,%r8 | |
620 | # x7 <<= 32 | |
621 | shl $32,%r8 | |
622 | # x6 += x7 | |
623 | add %r8,%rax | |
624 | # (uint32) x8 += j8 | |
625 | addl 88(%rsp),%r11d | |
626 | # x9 <<= 32 | |
627 | shl $32,%r10 | |
628 | # x9 += j8 | |
629 | addq 88(%rsp),%r10 | |
630 | # (uint64) x9 >>= 32 | |
631 | shr $32,%r10 | |
632 | # x9 <<= 32 | |
633 | shl $32,%r10 | |
634 | # x8 += x9 | |
635 | add %r10,%r11 | |
636 | # (uint32) x12 += j12 | |
637 | addl 104(%rsp),%r14d | |
638 | # x13 <<= 32 | |
639 | shl $32,%r13 | |
640 | # x13 += j12 | |
641 | addq 104(%rsp),%r13 | |
642 | # (uint64) x13 >>= 32 | |
643 | shr $32,%r13 | |
644 | # x13 <<= 32 | |
645 | shl $32,%r13 | |
646 | # x12 += x13 | |
647 | add %r13,%r14 | |
648 | # (uint32) x0 += j0 | |
649 | addl 56(%rsp),%edx | |
650 | # x1 <<= 32 | |
651 | shl $32,%rdi | |
652 | # x1 += j0 | |
653 | addq 56(%rsp),%rdi | |
654 | # (uint64) x1 >>= 32 | |
655 | shr $32,%rdi | |
656 | # x1 <<= 32 | |
657 | shl $32,%rdi | |
658 | # x0 += x1 | |
659 | add %rdi,%rdx | |
660 | # x5 = x5_stack | |
661 | movq 160(%rsp),%rdi | |
662 | # (uint32) x4 += j4 | |
663 | addl 72(%rsp),%r9d | |
664 | # x5 <<= 32 | |
665 | shl $32,%rdi | |
666 | # x5 += j4 | |
667 | addq 72(%rsp),%rdi | |
668 | # (uint64) x5 >>= 32 | |
669 | shr $32,%rdi | |
670 | # x5 <<= 32 | |
671 | shl $32,%rdi | |
672 | # x4 += x5 | |
673 | add %rdi,%r9 | |
674 | # x10 = x10_stack | |
675 | movq 168(%rsp),%r8 | |
676 | # (uint32) x10 += j10 | |
677 | addl 96(%rsp),%r8d | |
678 | # x11 <<= 32 | |
679 | shl $32,%r12 | |
680 | # x11 += j10 | |
681 | addq 96(%rsp),%r12 | |
682 | # (uint64) x11 >>= 32 | |
683 | shr $32,%r12 | |
684 | # x11 <<= 32 | |
685 | shl $32,%r12 | |
686 | # x10 += x11 | |
687 | add %r12,%r8 | |
688 | # x15 = x15_stack | |
689 | movq 176(%rsp),%rdi | |
690 | # (uint32) x14 += j14 | |
691 | addl 112(%rsp),%ebx | |
692 | # x15 <<= 32 | |
693 | shl $32,%rdi | |
694 | # x15 += j14 | |
695 | addq 112(%rsp),%rdi | |
696 | # (uint64) x15 >>= 32 | |
697 | shr $32,%rdi | |
698 | # x15 <<= 32 | |
699 | shl $32,%rdi | |
700 | # x14 += x15 | |
701 | add %rdi,%rbx | |
702 | # out = out_backup | |
703 | movq 136(%rsp),%rdi | |
704 | # m = m_backup | |
705 | movq 144(%rsp),%rsi | |
706 | # x0 ^= *(uint64 *) (m + 0) | |
707 | xorq 0(%rsi),%rdx | |
708 | # *(uint64 *) (out + 0) = x0 | |
709 | movq %rdx,0(%rdi) | |
710 | # x2 ^= *(uint64 *) (m + 8) | |
711 | xorq 8(%rsi),%rcx | |
712 | # *(uint64 *) (out + 8) = x2 | |
713 | movq %rcx,8(%rdi) | |
714 | # x4 ^= *(uint64 *) (m + 16) | |
715 | xorq 16(%rsi),%r9 | |
716 | # *(uint64 *) (out + 16) = x4 | |
717 | movq %r9,16(%rdi) | |
718 | # x6 ^= *(uint64 *) (m + 24) | |
719 | xorq 24(%rsi),%rax | |
720 | # *(uint64 *) (out + 24) = x6 | |
721 | movq %rax,24(%rdi) | |
722 | # x8 ^= *(uint64 *) (m + 32) | |
723 | xorq 32(%rsi),%r11 | |
724 | # *(uint64 *) (out + 32) = x8 | |
725 | movq %r11,32(%rdi) | |
726 | # x10 ^= *(uint64 *) (m + 40) | |
727 | xorq 40(%rsi),%r8 | |
728 | # *(uint64 *) (out + 40) = x10 | |
729 | movq %r8,40(%rdi) | |
730 | # x12 ^= *(uint64 *) (m + 48) | |
731 | xorq 48(%rsi),%r14 | |
732 | # *(uint64 *) (out + 48) = x12 | |
733 | movq %r14,48(%rdi) | |
734 | # x14 ^= *(uint64 *) (m + 56) | |
735 | xorq 56(%rsi),%rbx | |
736 | # *(uint64 *) (out + 56) = x14 | |
737 | movq %rbx,56(%rdi) | |
738 | # bytes = bytes_backup | |
739 | movq 152(%rsp),%rdx | |
740 | # in8 = j8 | |
741 | movq 88(%rsp),%rcx | |
742 | # in8 += 1 | |
743 | add $1,%rcx | |
744 | # j8 = in8 | |
745 | movq %rcx,88(%rsp) | |
746 | # unsigned>? unsigned<? bytes - 64 | |
747 | cmp $64,%rdx | |
748 | # comment:fp stack unchanged by jump | |
749 | # goto bytesatleast65 if unsigned> | |
750 | ja ._bytesatleast65 | |
751 | # comment:fp stack unchanged by jump | |
752 | # goto bytesatleast64 if !unsigned< | |
753 | jae ._bytesatleast64 | |
754 | # m = out | |
755 | mov %rdi,%rsi | |
756 | # out = ctarget | |
757 | movq 128(%rsp),%rdi | |
758 | # i = bytes | |
759 | mov %rdx,%rcx | |
760 | # while (i) { *out++ = *m++; --i } | |
761 | rep movsb | |
762 | # comment:fp stack unchanged by fallthrough | |
763 | # bytesatleast64: | |
764 | ._bytesatleast64: | |
765 | # x = x_backup | |
766 | movq 120(%rsp),%rdi | |
767 | # in8 = j8 | |
768 | movq 88(%rsp),%rsi | |
769 | # *(uint64 *) (x + 32) = in8 | |
770 | movq %rsi,32(%rdi) | |
771 | # r11 = r11_stack | |
772 | movq 0(%rsp),%r11 | |
773 | # r12 = r12_stack | |
774 | movq 8(%rsp),%r12 | |
775 | # r13 = r13_stack | |
776 | movq 16(%rsp),%r13 | |
777 | # r14 = r14_stack | |
778 | movq 24(%rsp),%r14 | |
779 | # r15 = r15_stack | |
780 | movq 32(%rsp),%r15 | |
781 | # rbx = rbx_stack | |
782 | movq 40(%rsp),%rbx | |
783 | # rbp = rbp_stack | |
784 | movq 48(%rsp),%rbp | |
785 | # comment:fp stack unchanged by fallthrough | |
786 | # done: | |
787 | ._done: | |
788 | # leave | |
789 | add %r11,%rsp | |
790 | mov %rdi,%rax | |
791 | mov %rsi,%rdx | |
792 | ret | |
793 | # bytesatleast65: | |
794 | ._bytesatleast65: | |
795 | # bytes -= 64 | |
796 | sub $64,%rdx | |
797 | # out += 64 | |
798 | add $64,%rdi | |
799 | # m += 64 | |
800 | add $64,%rsi | |
801 | # comment:fp stack unchanged by jump | |
802 | # goto bytesatleast1 | |
803 | jmp ._bytesatleast1 | |
04443808 JK |
804 | ENDPROC(salsa20_encrypt_bytes) |
805 | ||
806 | # enter salsa20_keysetup | |
807 | ENTRY(salsa20_keysetup) | |
9a7dafbb TSH |
808 | mov %rsp,%r11 |
809 | and $31,%r11 | |
810 | add $256,%r11 | |
811 | sub %r11,%rsp | |
812 | # k = arg2 | |
813 | mov %rsi,%rsi | |
814 | # kbits = arg3 | |
815 | mov %rdx,%rdx | |
816 | # x = arg1 | |
817 | mov %rdi,%rdi | |
818 | # in0 = *(uint64 *) (k + 0) | |
819 | movq 0(%rsi),%r8 | |
820 | # in2 = *(uint64 *) (k + 8) | |
821 | movq 8(%rsi),%r9 | |
822 | # *(uint64 *) (x + 4) = in0 | |
823 | movq %r8,4(%rdi) | |
824 | # *(uint64 *) (x + 12) = in2 | |
825 | movq %r9,12(%rdi) | |
826 | # unsigned<? kbits - 256 | |
827 | cmp $256,%rdx | |
828 | # comment:fp stack unchanged by jump | |
829 | # goto kbits128 if unsigned< | |
830 | jb ._kbits128 | |
831 | # kbits256: | |
832 | ._kbits256: | |
833 | # in10 = *(uint64 *) (k + 16) | |
834 | movq 16(%rsi),%rdx | |
835 | # in12 = *(uint64 *) (k + 24) | |
836 | movq 24(%rsi),%rsi | |
837 | # *(uint64 *) (x + 44) = in10 | |
838 | movq %rdx,44(%rdi) | |
839 | # *(uint64 *) (x + 52) = in12 | |
840 | movq %rsi,52(%rdi) | |
841 | # in0 = 1634760805 | |
842 | mov $1634760805,%rsi | |
843 | # in4 = 857760878 | |
844 | mov $857760878,%rdx | |
845 | # in10 = 2036477234 | |
846 | mov $2036477234,%rcx | |
847 | # in14 = 1797285236 | |
848 | mov $1797285236,%r8 | |
849 | # *(uint32 *) (x + 0) = in0 | |
850 | movl %esi,0(%rdi) | |
851 | # *(uint32 *) (x + 20) = in4 | |
852 | movl %edx,20(%rdi) | |
853 | # *(uint32 *) (x + 40) = in10 | |
854 | movl %ecx,40(%rdi) | |
855 | # *(uint32 *) (x + 60) = in14 | |
856 | movl %r8d,60(%rdi) | |
857 | # comment:fp stack unchanged by jump | |
858 | # goto keysetupdone | |
859 | jmp ._keysetupdone | |
860 | # kbits128: | |
861 | ._kbits128: | |
862 | # in10 = *(uint64 *) (k + 0) | |
863 | movq 0(%rsi),%rdx | |
864 | # in12 = *(uint64 *) (k + 8) | |
865 | movq 8(%rsi),%rsi | |
866 | # *(uint64 *) (x + 44) = in10 | |
867 | movq %rdx,44(%rdi) | |
868 | # *(uint64 *) (x + 52) = in12 | |
869 | movq %rsi,52(%rdi) | |
870 | # in0 = 1634760805 | |
871 | mov $1634760805,%rsi | |
872 | # in4 = 824206446 | |
873 | mov $824206446,%rdx | |
874 | # in10 = 2036477238 | |
875 | mov $2036477238,%rcx | |
876 | # in14 = 1797285236 | |
877 | mov $1797285236,%r8 | |
878 | # *(uint32 *) (x + 0) = in0 | |
879 | movl %esi,0(%rdi) | |
880 | # *(uint32 *) (x + 20) = in4 | |
881 | movl %edx,20(%rdi) | |
882 | # *(uint32 *) (x + 40) = in10 | |
883 | movl %ecx,40(%rdi) | |
884 | # *(uint32 *) (x + 60) = in14 | |
885 | movl %r8d,60(%rdi) | |
886 | # keysetupdone: | |
887 | ._keysetupdone: | |
888 | # leave | |
889 | add %r11,%rsp | |
890 | mov %rdi,%rax | |
891 | mov %rsi,%rdx | |
892 | ret | |
04443808 JK |
893 | ENDPROC(salsa20_keysetup) |
894 | ||
895 | # enter salsa20_ivsetup | |
896 | ENTRY(salsa20_ivsetup) | |
9a7dafbb TSH |
897 | mov %rsp,%r11 |
898 | and $31,%r11 | |
899 | add $256,%r11 | |
900 | sub %r11,%rsp | |
901 | # iv = arg2 | |
902 | mov %rsi,%rsi | |
903 | # x = arg1 | |
904 | mov %rdi,%rdi | |
905 | # in6 = *(uint64 *) (iv + 0) | |
906 | movq 0(%rsi),%rsi | |
907 | # in8 = 0 | |
908 | mov $0,%r8 | |
909 | # *(uint64 *) (x + 24) = in6 | |
910 | movq %rsi,24(%rdi) | |
911 | # *(uint64 *) (x + 32) = in8 | |
912 | movq %r8,32(%rdi) | |
913 | # leave | |
914 | add %r11,%rsp | |
915 | mov %rdi,%rax | |
916 | mov %rsi,%rdx | |
917 | ret | |
04443808 | 918 | ENDPROC(salsa20_ivsetup) |