Merge back intel_pstate fixes for v4.6.
[deliverable/linux.git] / arch / x86 / crypto / salsa20-x86_64-asm_64.S
CommitLineData
04443808
JK
1#include <linux/linkage.h>
2
3# enter salsa20_encrypt_bytes
4ENTRY(salsa20_encrypt_bytes)
9a7dafbb
TSH
5 mov %rsp,%r11
6 and $31,%r11
7 add $256,%r11
8 sub %r11,%rsp
9 # x = arg1
10 mov %rdi,%r8
11 # m = arg2
12 mov %rsi,%rsi
13 # out = arg3
14 mov %rdx,%rdi
15 # bytes = arg4
16 mov %rcx,%rdx
17 # unsigned>? bytes - 0
18 cmp $0,%rdx
19 # comment:fp stack unchanged by jump
20 # goto done if !unsigned>
21 jbe ._done
22 # comment:fp stack unchanged by fallthrough
23# start:
24._start:
25 # r11_stack = r11
26 movq %r11,0(%rsp)
27 # r12_stack = r12
28 movq %r12,8(%rsp)
29 # r13_stack = r13
30 movq %r13,16(%rsp)
31 # r14_stack = r14
32 movq %r14,24(%rsp)
33 # r15_stack = r15
34 movq %r15,32(%rsp)
35 # rbx_stack = rbx
36 movq %rbx,40(%rsp)
37 # rbp_stack = rbp
38 movq %rbp,48(%rsp)
39 # in0 = *(uint64 *) (x + 0)
40 movq 0(%r8),%rcx
41 # in2 = *(uint64 *) (x + 8)
42 movq 8(%r8),%r9
43 # in4 = *(uint64 *) (x + 16)
44 movq 16(%r8),%rax
45 # in6 = *(uint64 *) (x + 24)
46 movq 24(%r8),%r10
47 # in8 = *(uint64 *) (x + 32)
48 movq 32(%r8),%r11
49 # in10 = *(uint64 *) (x + 40)
50 movq 40(%r8),%r12
51 # in12 = *(uint64 *) (x + 48)
52 movq 48(%r8),%r13
53 # in14 = *(uint64 *) (x + 56)
54 movq 56(%r8),%r14
55 # j0 = in0
56 movq %rcx,56(%rsp)
57 # j2 = in2
58 movq %r9,64(%rsp)
59 # j4 = in4
60 movq %rax,72(%rsp)
61 # j6 = in6
62 movq %r10,80(%rsp)
63 # j8 = in8
64 movq %r11,88(%rsp)
65 # j10 = in10
66 movq %r12,96(%rsp)
67 # j12 = in12
68 movq %r13,104(%rsp)
69 # j14 = in14
70 movq %r14,112(%rsp)
71 # x_backup = x
72 movq %r8,120(%rsp)
73# bytesatleast1:
74._bytesatleast1:
75 # unsigned<? bytes - 64
76 cmp $64,%rdx
77 # comment:fp stack unchanged by jump
78 # goto nocopy if !unsigned<
79 jae ._nocopy
80 # ctarget = out
81 movq %rdi,128(%rsp)
82 # out = &tmp
83 leaq 192(%rsp),%rdi
84 # i = bytes
85 mov %rdx,%rcx
86 # while (i) { *out++ = *m++; --i }
87 rep movsb
88 # out = &tmp
89 leaq 192(%rsp),%rdi
90 # m = &tmp
91 leaq 192(%rsp),%rsi
92 # comment:fp stack unchanged by fallthrough
93# nocopy:
94._nocopy:
95 # out_backup = out
96 movq %rdi,136(%rsp)
97 # m_backup = m
98 movq %rsi,144(%rsp)
99 # bytes_backup = bytes
100 movq %rdx,152(%rsp)
101 # x1 = j0
102 movq 56(%rsp),%rdi
103 # x0 = x1
104 mov %rdi,%rdx
105 # (uint64) x1 >>= 32
106 shr $32,%rdi
107 # x3 = j2
108 movq 64(%rsp),%rsi
109 # x2 = x3
110 mov %rsi,%rcx
111 # (uint64) x3 >>= 32
112 shr $32,%rsi
113 # x5 = j4
114 movq 72(%rsp),%r8
115 # x4 = x5
116 mov %r8,%r9
117 # (uint64) x5 >>= 32
118 shr $32,%r8
119 # x5_stack = x5
120 movq %r8,160(%rsp)
121 # x7 = j6
122 movq 80(%rsp),%r8
123 # x6 = x7
124 mov %r8,%rax
125 # (uint64) x7 >>= 32
126 shr $32,%r8
127 # x9 = j8
128 movq 88(%rsp),%r10
129 # x8 = x9
130 mov %r10,%r11
131 # (uint64) x9 >>= 32
132 shr $32,%r10
133 # x11 = j10
134 movq 96(%rsp),%r12
135 # x10 = x11
136 mov %r12,%r13
137 # x10_stack = x10
138 movq %r13,168(%rsp)
139 # (uint64) x11 >>= 32
140 shr $32,%r12
141 # x13 = j12
142 movq 104(%rsp),%r13
143 # x12 = x13
144 mov %r13,%r14
145 # (uint64) x13 >>= 32
146 shr $32,%r13
147 # x15 = j14
148 movq 112(%rsp),%r15
149 # x14 = x15
150 mov %r15,%rbx
151 # (uint64) x15 >>= 32
152 shr $32,%r15
153 # x15_stack = x15
154 movq %r15,176(%rsp)
155 # i = 20
156 mov $20,%r15
157# mainloop:
158._mainloop:
159 # i_backup = i
160 movq %r15,184(%rsp)
161 # x5 = x5_stack
162 movq 160(%rsp),%r15
163 # a = x12 + x0
164 lea (%r14,%rdx),%rbp
165 # (uint32) a <<<= 7
166 rol $7,%ebp
167 # x4 ^= a
168 xor %rbp,%r9
169 # b = x1 + x5
170 lea (%rdi,%r15),%rbp
171 # (uint32) b <<<= 7
172 rol $7,%ebp
173 # x9 ^= b
174 xor %rbp,%r10
175 # a = x0 + x4
176 lea (%rdx,%r9),%rbp
177 # (uint32) a <<<= 9
178 rol $9,%ebp
179 # x8 ^= a
180 xor %rbp,%r11
181 # b = x5 + x9
182 lea (%r15,%r10),%rbp
183 # (uint32) b <<<= 9
184 rol $9,%ebp
185 # x13 ^= b
186 xor %rbp,%r13
187 # a = x4 + x8
188 lea (%r9,%r11),%rbp
189 # (uint32) a <<<= 13
190 rol $13,%ebp
191 # x12 ^= a
192 xor %rbp,%r14
193 # b = x9 + x13
194 lea (%r10,%r13),%rbp
195 # (uint32) b <<<= 13
196 rol $13,%ebp
197 # x1 ^= b
198 xor %rbp,%rdi
199 # a = x8 + x12
200 lea (%r11,%r14),%rbp
201 # (uint32) a <<<= 18
202 rol $18,%ebp
203 # x0 ^= a
204 xor %rbp,%rdx
205 # b = x13 + x1
206 lea (%r13,%rdi),%rbp
207 # (uint32) b <<<= 18
208 rol $18,%ebp
209 # x5 ^= b
210 xor %rbp,%r15
211 # x10 = x10_stack
212 movq 168(%rsp),%rbp
213 # x5_stack = x5
214 movq %r15,160(%rsp)
215 # c = x6 + x10
216 lea (%rax,%rbp),%r15
217 # (uint32) c <<<= 7
218 rol $7,%r15d
219 # x14 ^= c
220 xor %r15,%rbx
221 # c = x10 + x14
222 lea (%rbp,%rbx),%r15
223 # (uint32) c <<<= 9
224 rol $9,%r15d
225 # x2 ^= c
226 xor %r15,%rcx
227 # c = x14 + x2
228 lea (%rbx,%rcx),%r15
229 # (uint32) c <<<= 13
230 rol $13,%r15d
231 # x6 ^= c
232 xor %r15,%rax
233 # c = x2 + x6
234 lea (%rcx,%rax),%r15
235 # (uint32) c <<<= 18
236 rol $18,%r15d
237 # x10 ^= c
238 xor %r15,%rbp
239 # x15 = x15_stack
240 movq 176(%rsp),%r15
241 # x10_stack = x10
242 movq %rbp,168(%rsp)
243 # d = x11 + x15
244 lea (%r12,%r15),%rbp
245 # (uint32) d <<<= 7
246 rol $7,%ebp
247 # x3 ^= d
248 xor %rbp,%rsi
249 # d = x15 + x3
250 lea (%r15,%rsi),%rbp
251 # (uint32) d <<<= 9
252 rol $9,%ebp
253 # x7 ^= d
254 xor %rbp,%r8
255 # d = x3 + x7
256 lea (%rsi,%r8),%rbp
257 # (uint32) d <<<= 13
258 rol $13,%ebp
259 # x11 ^= d
260 xor %rbp,%r12
261 # d = x7 + x11
262 lea (%r8,%r12),%rbp
263 # (uint32) d <<<= 18
264 rol $18,%ebp
265 # x15 ^= d
266 xor %rbp,%r15
267 # x15_stack = x15
268 movq %r15,176(%rsp)
269 # x5 = x5_stack
270 movq 160(%rsp),%r15
271 # a = x3 + x0
272 lea (%rsi,%rdx),%rbp
273 # (uint32) a <<<= 7
274 rol $7,%ebp
275 # x1 ^= a
276 xor %rbp,%rdi
277 # b = x4 + x5
278 lea (%r9,%r15),%rbp
279 # (uint32) b <<<= 7
280 rol $7,%ebp
281 # x6 ^= b
282 xor %rbp,%rax
283 # a = x0 + x1
284 lea (%rdx,%rdi),%rbp
285 # (uint32) a <<<= 9
286 rol $9,%ebp
287 # x2 ^= a
288 xor %rbp,%rcx
289 # b = x5 + x6
290 lea (%r15,%rax),%rbp
291 # (uint32) b <<<= 9
292 rol $9,%ebp
293 # x7 ^= b
294 xor %rbp,%r8
295 # a = x1 + x2
296 lea (%rdi,%rcx),%rbp
297 # (uint32) a <<<= 13
298 rol $13,%ebp
299 # x3 ^= a
300 xor %rbp,%rsi
301 # b = x6 + x7
302 lea (%rax,%r8),%rbp
303 # (uint32) b <<<= 13
304 rol $13,%ebp
305 # x4 ^= b
306 xor %rbp,%r9
307 # a = x2 + x3
308 lea (%rcx,%rsi),%rbp
309 # (uint32) a <<<= 18
310 rol $18,%ebp
311 # x0 ^= a
312 xor %rbp,%rdx
313 # b = x7 + x4
314 lea (%r8,%r9),%rbp
315 # (uint32) b <<<= 18
316 rol $18,%ebp
317 # x5 ^= b
318 xor %rbp,%r15
319 # x10 = x10_stack
320 movq 168(%rsp),%rbp
321 # x5_stack = x5
322 movq %r15,160(%rsp)
323 # c = x9 + x10
324 lea (%r10,%rbp),%r15
325 # (uint32) c <<<= 7
326 rol $7,%r15d
327 # x11 ^= c
328 xor %r15,%r12
329 # c = x10 + x11
330 lea (%rbp,%r12),%r15
331 # (uint32) c <<<= 9
332 rol $9,%r15d
333 # x8 ^= c
334 xor %r15,%r11
335 # c = x11 + x8
336 lea (%r12,%r11),%r15
337 # (uint32) c <<<= 13
338 rol $13,%r15d
339 # x9 ^= c
340 xor %r15,%r10
341 # c = x8 + x9
342 lea (%r11,%r10),%r15
343 # (uint32) c <<<= 18
344 rol $18,%r15d
345 # x10 ^= c
346 xor %r15,%rbp
347 # x15 = x15_stack
348 movq 176(%rsp),%r15
349 # x10_stack = x10
350 movq %rbp,168(%rsp)
351 # d = x14 + x15
352 lea (%rbx,%r15),%rbp
353 # (uint32) d <<<= 7
354 rol $7,%ebp
355 # x12 ^= d
356 xor %rbp,%r14
357 # d = x15 + x12
358 lea (%r15,%r14),%rbp
359 # (uint32) d <<<= 9
360 rol $9,%ebp
361 # x13 ^= d
362 xor %rbp,%r13
363 # d = x12 + x13
364 lea (%r14,%r13),%rbp
365 # (uint32) d <<<= 13
366 rol $13,%ebp
367 # x14 ^= d
368 xor %rbp,%rbx
369 # d = x13 + x14
370 lea (%r13,%rbx),%rbp
371 # (uint32) d <<<= 18
372 rol $18,%ebp
373 # x15 ^= d
374 xor %rbp,%r15
375 # x15_stack = x15
376 movq %r15,176(%rsp)
377 # x5 = x5_stack
378 movq 160(%rsp),%r15
379 # a = x12 + x0
380 lea (%r14,%rdx),%rbp
381 # (uint32) a <<<= 7
382 rol $7,%ebp
383 # x4 ^= a
384 xor %rbp,%r9
385 # b = x1 + x5
386 lea (%rdi,%r15),%rbp
387 # (uint32) b <<<= 7
388 rol $7,%ebp
389 # x9 ^= b
390 xor %rbp,%r10
391 # a = x0 + x4
392 lea (%rdx,%r9),%rbp
393 # (uint32) a <<<= 9
394 rol $9,%ebp
395 # x8 ^= a
396 xor %rbp,%r11
397 # b = x5 + x9
398 lea (%r15,%r10),%rbp
399 # (uint32) b <<<= 9
400 rol $9,%ebp
401 # x13 ^= b
402 xor %rbp,%r13
403 # a = x4 + x8
404 lea (%r9,%r11),%rbp
405 # (uint32) a <<<= 13
406 rol $13,%ebp
407 # x12 ^= a
408 xor %rbp,%r14
409 # b = x9 + x13
410 lea (%r10,%r13),%rbp
411 # (uint32) b <<<= 13
412 rol $13,%ebp
413 # x1 ^= b
414 xor %rbp,%rdi
415 # a = x8 + x12
416 lea (%r11,%r14),%rbp
417 # (uint32) a <<<= 18
418 rol $18,%ebp
419 # x0 ^= a
420 xor %rbp,%rdx
421 # b = x13 + x1
422 lea (%r13,%rdi),%rbp
423 # (uint32) b <<<= 18
424 rol $18,%ebp
425 # x5 ^= b
426 xor %rbp,%r15
427 # x10 = x10_stack
428 movq 168(%rsp),%rbp
429 # x5_stack = x5
430 movq %r15,160(%rsp)
431 # c = x6 + x10
432 lea (%rax,%rbp),%r15
433 # (uint32) c <<<= 7
434 rol $7,%r15d
435 # x14 ^= c
436 xor %r15,%rbx
437 # c = x10 + x14
438 lea (%rbp,%rbx),%r15
439 # (uint32) c <<<= 9
440 rol $9,%r15d
441 # x2 ^= c
442 xor %r15,%rcx
443 # c = x14 + x2
444 lea (%rbx,%rcx),%r15
445 # (uint32) c <<<= 13
446 rol $13,%r15d
447 # x6 ^= c
448 xor %r15,%rax
449 # c = x2 + x6
450 lea (%rcx,%rax),%r15
451 # (uint32) c <<<= 18
452 rol $18,%r15d
453 # x10 ^= c
454 xor %r15,%rbp
455 # x15 = x15_stack
456 movq 176(%rsp),%r15
457 # x10_stack = x10
458 movq %rbp,168(%rsp)
459 # d = x11 + x15
460 lea (%r12,%r15),%rbp
461 # (uint32) d <<<= 7
462 rol $7,%ebp
463 # x3 ^= d
464 xor %rbp,%rsi
465 # d = x15 + x3
466 lea (%r15,%rsi),%rbp
467 # (uint32) d <<<= 9
468 rol $9,%ebp
469 # x7 ^= d
470 xor %rbp,%r8
471 # d = x3 + x7
472 lea (%rsi,%r8),%rbp
473 # (uint32) d <<<= 13
474 rol $13,%ebp
475 # x11 ^= d
476 xor %rbp,%r12
477 # d = x7 + x11
478 lea (%r8,%r12),%rbp
479 # (uint32) d <<<= 18
480 rol $18,%ebp
481 # x15 ^= d
482 xor %rbp,%r15
483 # x15_stack = x15
484 movq %r15,176(%rsp)
485 # x5 = x5_stack
486 movq 160(%rsp),%r15
487 # a = x3 + x0
488 lea (%rsi,%rdx),%rbp
489 # (uint32) a <<<= 7
490 rol $7,%ebp
491 # x1 ^= a
492 xor %rbp,%rdi
493 # b = x4 + x5
494 lea (%r9,%r15),%rbp
495 # (uint32) b <<<= 7
496 rol $7,%ebp
497 # x6 ^= b
498 xor %rbp,%rax
499 # a = x0 + x1
500 lea (%rdx,%rdi),%rbp
501 # (uint32) a <<<= 9
502 rol $9,%ebp
503 # x2 ^= a
504 xor %rbp,%rcx
505 # b = x5 + x6
506 lea (%r15,%rax),%rbp
507 # (uint32) b <<<= 9
508 rol $9,%ebp
509 # x7 ^= b
510 xor %rbp,%r8
511 # a = x1 + x2
512 lea (%rdi,%rcx),%rbp
513 # (uint32) a <<<= 13
514 rol $13,%ebp
515 # x3 ^= a
516 xor %rbp,%rsi
517 # b = x6 + x7
518 lea (%rax,%r8),%rbp
519 # (uint32) b <<<= 13
520 rol $13,%ebp
521 # x4 ^= b
522 xor %rbp,%r9
523 # a = x2 + x3
524 lea (%rcx,%rsi),%rbp
525 # (uint32) a <<<= 18
526 rol $18,%ebp
527 # x0 ^= a
528 xor %rbp,%rdx
529 # b = x7 + x4
530 lea (%r8,%r9),%rbp
531 # (uint32) b <<<= 18
532 rol $18,%ebp
533 # x5 ^= b
534 xor %rbp,%r15
535 # x10 = x10_stack
536 movq 168(%rsp),%rbp
537 # x5_stack = x5
538 movq %r15,160(%rsp)
539 # c = x9 + x10
540 lea (%r10,%rbp),%r15
541 # (uint32) c <<<= 7
542 rol $7,%r15d
543 # x11 ^= c
544 xor %r15,%r12
545 # c = x10 + x11
546 lea (%rbp,%r12),%r15
547 # (uint32) c <<<= 9
548 rol $9,%r15d
549 # x8 ^= c
550 xor %r15,%r11
551 # c = x11 + x8
552 lea (%r12,%r11),%r15
553 # (uint32) c <<<= 13
554 rol $13,%r15d
555 # x9 ^= c
556 xor %r15,%r10
557 # c = x8 + x9
558 lea (%r11,%r10),%r15
559 # (uint32) c <<<= 18
560 rol $18,%r15d
561 # x10 ^= c
562 xor %r15,%rbp
563 # x15 = x15_stack
564 movq 176(%rsp),%r15
565 # x10_stack = x10
566 movq %rbp,168(%rsp)
567 # d = x14 + x15
568 lea (%rbx,%r15),%rbp
569 # (uint32) d <<<= 7
570 rol $7,%ebp
571 # x12 ^= d
572 xor %rbp,%r14
573 # d = x15 + x12
574 lea (%r15,%r14),%rbp
575 # (uint32) d <<<= 9
576 rol $9,%ebp
577 # x13 ^= d
578 xor %rbp,%r13
579 # d = x12 + x13
580 lea (%r14,%r13),%rbp
581 # (uint32) d <<<= 13
582 rol $13,%ebp
583 # x14 ^= d
584 xor %rbp,%rbx
585 # d = x13 + x14
586 lea (%r13,%rbx),%rbp
587 # (uint32) d <<<= 18
588 rol $18,%ebp
589 # x15 ^= d
590 xor %rbp,%r15
591 # x15_stack = x15
592 movq %r15,176(%rsp)
593 # i = i_backup
594 movq 184(%rsp),%r15
595 # unsigned>? i -= 4
596 sub $4,%r15
597 # comment:fp stack unchanged by jump
598 # goto mainloop if unsigned>
599 ja ._mainloop
600 # (uint32) x2 += j2
601 addl 64(%rsp),%ecx
602 # x3 <<= 32
603 shl $32,%rsi
604 # x3 += j2
605 addq 64(%rsp),%rsi
606 # (uint64) x3 >>= 32
607 shr $32,%rsi
608 # x3 <<= 32
609 shl $32,%rsi
610 # x2 += x3
611 add %rsi,%rcx
612 # (uint32) x6 += j6
613 addl 80(%rsp),%eax
614 # x7 <<= 32
615 shl $32,%r8
616 # x7 += j6
617 addq 80(%rsp),%r8
618 # (uint64) x7 >>= 32
619 shr $32,%r8
620 # x7 <<= 32
621 shl $32,%r8
622 # x6 += x7
623 add %r8,%rax
624 # (uint32) x8 += j8
625 addl 88(%rsp),%r11d
626 # x9 <<= 32
627 shl $32,%r10
628 # x9 += j8
629 addq 88(%rsp),%r10
630 # (uint64) x9 >>= 32
631 shr $32,%r10
632 # x9 <<= 32
633 shl $32,%r10
634 # x8 += x9
635 add %r10,%r11
636 # (uint32) x12 += j12
637 addl 104(%rsp),%r14d
638 # x13 <<= 32
639 shl $32,%r13
640 # x13 += j12
641 addq 104(%rsp),%r13
642 # (uint64) x13 >>= 32
643 shr $32,%r13
644 # x13 <<= 32
645 shl $32,%r13
646 # x12 += x13
647 add %r13,%r14
648 # (uint32) x0 += j0
649 addl 56(%rsp),%edx
650 # x1 <<= 32
651 shl $32,%rdi
652 # x1 += j0
653 addq 56(%rsp),%rdi
654 # (uint64) x1 >>= 32
655 shr $32,%rdi
656 # x1 <<= 32
657 shl $32,%rdi
658 # x0 += x1
659 add %rdi,%rdx
660 # x5 = x5_stack
661 movq 160(%rsp),%rdi
662 # (uint32) x4 += j4
663 addl 72(%rsp),%r9d
664 # x5 <<= 32
665 shl $32,%rdi
666 # x5 += j4
667 addq 72(%rsp),%rdi
668 # (uint64) x5 >>= 32
669 shr $32,%rdi
670 # x5 <<= 32
671 shl $32,%rdi
672 # x4 += x5
673 add %rdi,%r9
674 # x10 = x10_stack
675 movq 168(%rsp),%r8
676 # (uint32) x10 += j10
677 addl 96(%rsp),%r8d
678 # x11 <<= 32
679 shl $32,%r12
680 # x11 += j10
681 addq 96(%rsp),%r12
682 # (uint64) x11 >>= 32
683 shr $32,%r12
684 # x11 <<= 32
685 shl $32,%r12
686 # x10 += x11
687 add %r12,%r8
688 # x15 = x15_stack
689 movq 176(%rsp),%rdi
690 # (uint32) x14 += j14
691 addl 112(%rsp),%ebx
692 # x15 <<= 32
693 shl $32,%rdi
694 # x15 += j14
695 addq 112(%rsp),%rdi
696 # (uint64) x15 >>= 32
697 shr $32,%rdi
698 # x15 <<= 32
699 shl $32,%rdi
700 # x14 += x15
701 add %rdi,%rbx
702 # out = out_backup
703 movq 136(%rsp),%rdi
704 # m = m_backup
705 movq 144(%rsp),%rsi
706 # x0 ^= *(uint64 *) (m + 0)
707 xorq 0(%rsi),%rdx
708 # *(uint64 *) (out + 0) = x0
709 movq %rdx,0(%rdi)
710 # x2 ^= *(uint64 *) (m + 8)
711 xorq 8(%rsi),%rcx
712 # *(uint64 *) (out + 8) = x2
713 movq %rcx,8(%rdi)
714 # x4 ^= *(uint64 *) (m + 16)
715 xorq 16(%rsi),%r9
716 # *(uint64 *) (out + 16) = x4
717 movq %r9,16(%rdi)
718 # x6 ^= *(uint64 *) (m + 24)
719 xorq 24(%rsi),%rax
720 # *(uint64 *) (out + 24) = x6
721 movq %rax,24(%rdi)
722 # x8 ^= *(uint64 *) (m + 32)
723 xorq 32(%rsi),%r11
724 # *(uint64 *) (out + 32) = x8
725 movq %r11,32(%rdi)
726 # x10 ^= *(uint64 *) (m + 40)
727 xorq 40(%rsi),%r8
728 # *(uint64 *) (out + 40) = x10
729 movq %r8,40(%rdi)
730 # x12 ^= *(uint64 *) (m + 48)
731 xorq 48(%rsi),%r14
732 # *(uint64 *) (out + 48) = x12
733 movq %r14,48(%rdi)
734 # x14 ^= *(uint64 *) (m + 56)
735 xorq 56(%rsi),%rbx
736 # *(uint64 *) (out + 56) = x14
737 movq %rbx,56(%rdi)
738 # bytes = bytes_backup
739 movq 152(%rsp),%rdx
740 # in8 = j8
741 movq 88(%rsp),%rcx
742 # in8 += 1
743 add $1,%rcx
744 # j8 = in8
745 movq %rcx,88(%rsp)
746 # unsigned>? unsigned<? bytes - 64
747 cmp $64,%rdx
748 # comment:fp stack unchanged by jump
749 # goto bytesatleast65 if unsigned>
750 ja ._bytesatleast65
751 # comment:fp stack unchanged by jump
752 # goto bytesatleast64 if !unsigned<
753 jae ._bytesatleast64
754 # m = out
755 mov %rdi,%rsi
756 # out = ctarget
757 movq 128(%rsp),%rdi
758 # i = bytes
759 mov %rdx,%rcx
760 # while (i) { *out++ = *m++; --i }
761 rep movsb
762 # comment:fp stack unchanged by fallthrough
763# bytesatleast64:
764._bytesatleast64:
765 # x = x_backup
766 movq 120(%rsp),%rdi
767 # in8 = j8
768 movq 88(%rsp),%rsi
769 # *(uint64 *) (x + 32) = in8
770 movq %rsi,32(%rdi)
771 # r11 = r11_stack
772 movq 0(%rsp),%r11
773 # r12 = r12_stack
774 movq 8(%rsp),%r12
775 # r13 = r13_stack
776 movq 16(%rsp),%r13
777 # r14 = r14_stack
778 movq 24(%rsp),%r14
779 # r15 = r15_stack
780 movq 32(%rsp),%r15
781 # rbx = rbx_stack
782 movq 40(%rsp),%rbx
783 # rbp = rbp_stack
784 movq 48(%rsp),%rbp
785 # comment:fp stack unchanged by fallthrough
786# done:
787._done:
788 # leave
789 add %r11,%rsp
790 mov %rdi,%rax
791 mov %rsi,%rdx
792 ret
793# bytesatleast65:
794._bytesatleast65:
795 # bytes -= 64
796 sub $64,%rdx
797 # out += 64
798 add $64,%rdi
799 # m += 64
800 add $64,%rsi
801 # comment:fp stack unchanged by jump
802 # goto bytesatleast1
803 jmp ._bytesatleast1
04443808
JK
804ENDPROC(salsa20_encrypt_bytes)
805
806# enter salsa20_keysetup
807ENTRY(salsa20_keysetup)
9a7dafbb
TSH
808 mov %rsp,%r11
809 and $31,%r11
810 add $256,%r11
811 sub %r11,%rsp
812 # k = arg2
813 mov %rsi,%rsi
814 # kbits = arg3
815 mov %rdx,%rdx
816 # x = arg1
817 mov %rdi,%rdi
818 # in0 = *(uint64 *) (k + 0)
819 movq 0(%rsi),%r8
820 # in2 = *(uint64 *) (k + 8)
821 movq 8(%rsi),%r9
822 # *(uint64 *) (x + 4) = in0
823 movq %r8,4(%rdi)
824 # *(uint64 *) (x + 12) = in2
825 movq %r9,12(%rdi)
826 # unsigned<? kbits - 256
827 cmp $256,%rdx
828 # comment:fp stack unchanged by jump
829 # goto kbits128 if unsigned<
830 jb ._kbits128
831# kbits256:
832._kbits256:
833 # in10 = *(uint64 *) (k + 16)
834 movq 16(%rsi),%rdx
835 # in12 = *(uint64 *) (k + 24)
836 movq 24(%rsi),%rsi
837 # *(uint64 *) (x + 44) = in10
838 movq %rdx,44(%rdi)
839 # *(uint64 *) (x + 52) = in12
840 movq %rsi,52(%rdi)
841 # in0 = 1634760805
842 mov $1634760805,%rsi
843 # in4 = 857760878
844 mov $857760878,%rdx
845 # in10 = 2036477234
846 mov $2036477234,%rcx
847 # in14 = 1797285236
848 mov $1797285236,%r8
849 # *(uint32 *) (x + 0) = in0
850 movl %esi,0(%rdi)
851 # *(uint32 *) (x + 20) = in4
852 movl %edx,20(%rdi)
853 # *(uint32 *) (x + 40) = in10
854 movl %ecx,40(%rdi)
855 # *(uint32 *) (x + 60) = in14
856 movl %r8d,60(%rdi)
857 # comment:fp stack unchanged by jump
858 # goto keysetupdone
859 jmp ._keysetupdone
860# kbits128:
861._kbits128:
862 # in10 = *(uint64 *) (k + 0)
863 movq 0(%rsi),%rdx
864 # in12 = *(uint64 *) (k + 8)
865 movq 8(%rsi),%rsi
866 # *(uint64 *) (x + 44) = in10
867 movq %rdx,44(%rdi)
868 # *(uint64 *) (x + 52) = in12
869 movq %rsi,52(%rdi)
870 # in0 = 1634760805
871 mov $1634760805,%rsi
872 # in4 = 824206446
873 mov $824206446,%rdx
874 # in10 = 2036477238
875 mov $2036477238,%rcx
876 # in14 = 1797285236
877 mov $1797285236,%r8
878 # *(uint32 *) (x + 0) = in0
879 movl %esi,0(%rdi)
880 # *(uint32 *) (x + 20) = in4
881 movl %edx,20(%rdi)
882 # *(uint32 *) (x + 40) = in10
883 movl %ecx,40(%rdi)
884 # *(uint32 *) (x + 60) = in14
885 movl %r8d,60(%rdi)
886# keysetupdone:
887._keysetupdone:
888 # leave
889 add %r11,%rsp
890 mov %rdi,%rax
891 mov %rsi,%rdx
892 ret
04443808
JK
893ENDPROC(salsa20_keysetup)
894
895# enter salsa20_ivsetup
896ENTRY(salsa20_ivsetup)
9a7dafbb
TSH
897 mov %rsp,%r11
898 and $31,%r11
899 add $256,%r11
900 sub %r11,%rsp
901 # iv = arg2
902 mov %rsi,%rsi
903 # x = arg1
904 mov %rdi,%rdi
905 # in6 = *(uint64 *) (iv + 0)
906 movq 0(%rsi),%rsi
907 # in8 = 0
908 mov $0,%r8
909 # *(uint64 *) (x + 24) = in6
910 movq %rsi,24(%rdi)
911 # *(uint64 *) (x + 32) = in8
912 movq %r8,32(%rdi)
913 # leave
914 add %r11,%rsp
915 mov %rdi,%rax
916 mov %rsi,%rdx
917 ret
04443808 918ENDPROC(salsa20_ivsetup)
This page took 0.553464 seconds and 5 git commands to generate.