Commit | Line | Data |
---|---|---|
974e4b75 TSH |
1 | # salsa20_pm.s version 20051229 |
2 | # D. J. Bernstein | |
3 | # Public domain. | |
4 | ||
04443808 JK |
5 | #include <linux/linkage.h> |
6 | ||
974e4b75 | 7 | .text |
04443808 JK |
8 | |
9 | # enter salsa20_encrypt_bytes | |
10 | ENTRY(salsa20_encrypt_bytes) | |
974e4b75 TSH |
11 | mov %esp,%eax |
12 | and $31,%eax | |
13 | add $256,%eax | |
14 | sub %eax,%esp | |
15 | # eax_stack = eax | |
16 | movl %eax,80(%esp) | |
17 | # ebx_stack = ebx | |
18 | movl %ebx,84(%esp) | |
19 | # esi_stack = esi | |
20 | movl %esi,88(%esp) | |
21 | # edi_stack = edi | |
22 | movl %edi,92(%esp) | |
23 | # ebp_stack = ebp | |
24 | movl %ebp,96(%esp) | |
25 | # x = arg1 | |
26 | movl 4(%esp,%eax),%edx | |
27 | # m = arg2 | |
28 | movl 8(%esp,%eax),%esi | |
29 | # out = arg3 | |
30 | movl 12(%esp,%eax),%edi | |
31 | # bytes = arg4 | |
32 | movl 16(%esp,%eax),%ebx | |
33 | # bytes -= 0 | |
34 | sub $0,%ebx | |
35 | # goto done if unsigned<= | |
36 | jbe ._done | |
37 | ._start: | |
38 | # in0 = *(uint32 *) (x + 0) | |
39 | movl 0(%edx),%eax | |
40 | # in1 = *(uint32 *) (x + 4) | |
41 | movl 4(%edx),%ecx | |
42 | # in2 = *(uint32 *) (x + 8) | |
43 | movl 8(%edx),%ebp | |
44 | # j0 = in0 | |
45 | movl %eax,164(%esp) | |
46 | # in3 = *(uint32 *) (x + 12) | |
47 | movl 12(%edx),%eax | |
48 | # j1 = in1 | |
49 | movl %ecx,168(%esp) | |
50 | # in4 = *(uint32 *) (x + 16) | |
51 | movl 16(%edx),%ecx | |
52 | # j2 = in2 | |
53 | movl %ebp,172(%esp) | |
54 | # in5 = *(uint32 *) (x + 20) | |
55 | movl 20(%edx),%ebp | |
56 | # j3 = in3 | |
57 | movl %eax,176(%esp) | |
58 | # in6 = *(uint32 *) (x + 24) | |
59 | movl 24(%edx),%eax | |
60 | # j4 = in4 | |
61 | movl %ecx,180(%esp) | |
62 | # in7 = *(uint32 *) (x + 28) | |
63 | movl 28(%edx),%ecx | |
64 | # j5 = in5 | |
65 | movl %ebp,184(%esp) | |
66 | # in8 = *(uint32 *) (x + 32) | |
67 | movl 32(%edx),%ebp | |
68 | # j6 = in6 | |
69 | movl %eax,188(%esp) | |
70 | # in9 = *(uint32 *) (x + 36) | |
71 | movl 36(%edx),%eax | |
72 | # j7 = in7 | |
73 | movl %ecx,192(%esp) | |
74 | # in10 = *(uint32 *) (x + 40) | |
75 | movl 40(%edx),%ecx | |
76 | # j8 = in8 | |
77 | movl %ebp,196(%esp) | |
78 | # in11 = *(uint32 *) (x + 44) | |
79 | movl 44(%edx),%ebp | |
80 | # j9 = in9 | |
81 | movl %eax,200(%esp) | |
82 | # in12 = *(uint32 *) (x + 48) | |
83 | movl 48(%edx),%eax | |
84 | # j10 = in10 | |
85 | movl %ecx,204(%esp) | |
86 | # in13 = *(uint32 *) (x + 52) | |
87 | movl 52(%edx),%ecx | |
88 | # j11 = in11 | |
89 | movl %ebp,208(%esp) | |
90 | # in14 = *(uint32 *) (x + 56) | |
91 | movl 56(%edx),%ebp | |
92 | # j12 = in12 | |
93 | movl %eax,212(%esp) | |
94 | # in15 = *(uint32 *) (x + 60) | |
95 | movl 60(%edx),%eax | |
96 | # j13 = in13 | |
97 | movl %ecx,216(%esp) | |
98 | # j14 = in14 | |
99 | movl %ebp,220(%esp) | |
100 | # j15 = in15 | |
101 | movl %eax,224(%esp) | |
102 | # x_backup = x | |
103 | movl %edx,64(%esp) | |
104 | ._bytesatleast1: | |
105 | # bytes - 64 | |
106 | cmp $64,%ebx | |
107 | # goto nocopy if unsigned>= | |
108 | jae ._nocopy | |
109 | # ctarget = out | |
110 | movl %edi,228(%esp) | |
111 | # out = &tmp | |
112 | leal 0(%esp),%edi | |
113 | # i = bytes | |
114 | mov %ebx,%ecx | |
115 | # while (i) { *out++ = *m++; --i } | |
116 | rep movsb | |
117 | # out = &tmp | |
118 | leal 0(%esp),%edi | |
119 | # m = &tmp | |
120 | leal 0(%esp),%esi | |
121 | ._nocopy: | |
122 | # out_backup = out | |
123 | movl %edi,72(%esp) | |
124 | # m_backup = m | |
125 | movl %esi,68(%esp) | |
126 | # bytes_backup = bytes | |
127 | movl %ebx,76(%esp) | |
128 | # in0 = j0 | |
129 | movl 164(%esp),%eax | |
130 | # in1 = j1 | |
131 | movl 168(%esp),%ecx | |
132 | # in2 = j2 | |
133 | movl 172(%esp),%edx | |
134 | # in3 = j3 | |
135 | movl 176(%esp),%ebx | |
136 | # x0 = in0 | |
137 | movl %eax,100(%esp) | |
138 | # x1 = in1 | |
139 | movl %ecx,104(%esp) | |
140 | # x2 = in2 | |
141 | movl %edx,108(%esp) | |
142 | # x3 = in3 | |
143 | movl %ebx,112(%esp) | |
144 | # in4 = j4 | |
145 | movl 180(%esp),%eax | |
146 | # in5 = j5 | |
147 | movl 184(%esp),%ecx | |
148 | # in6 = j6 | |
149 | movl 188(%esp),%edx | |
150 | # in7 = j7 | |
151 | movl 192(%esp),%ebx | |
152 | # x4 = in4 | |
153 | movl %eax,116(%esp) | |
154 | # x5 = in5 | |
155 | movl %ecx,120(%esp) | |
156 | # x6 = in6 | |
157 | movl %edx,124(%esp) | |
158 | # x7 = in7 | |
159 | movl %ebx,128(%esp) | |
160 | # in8 = j8 | |
161 | movl 196(%esp),%eax | |
162 | # in9 = j9 | |
163 | movl 200(%esp),%ecx | |
164 | # in10 = j10 | |
165 | movl 204(%esp),%edx | |
166 | # in11 = j11 | |
167 | movl 208(%esp),%ebx | |
168 | # x8 = in8 | |
169 | movl %eax,132(%esp) | |
170 | # x9 = in9 | |
171 | movl %ecx,136(%esp) | |
172 | # x10 = in10 | |
173 | movl %edx,140(%esp) | |
174 | # x11 = in11 | |
175 | movl %ebx,144(%esp) | |
176 | # in12 = j12 | |
177 | movl 212(%esp),%eax | |
178 | # in13 = j13 | |
179 | movl 216(%esp),%ecx | |
180 | # in14 = j14 | |
181 | movl 220(%esp),%edx | |
182 | # in15 = j15 | |
183 | movl 224(%esp),%ebx | |
184 | # x12 = in12 | |
185 | movl %eax,148(%esp) | |
186 | # x13 = in13 | |
187 | movl %ecx,152(%esp) | |
188 | # x14 = in14 | |
189 | movl %edx,156(%esp) | |
190 | # x15 = in15 | |
191 | movl %ebx,160(%esp) | |
192 | # i = 20 | |
193 | mov $20,%ebp | |
194 | # p = x0 | |
195 | movl 100(%esp),%eax | |
196 | # s = x5 | |
197 | movl 120(%esp),%ecx | |
198 | # t = x10 | |
199 | movl 140(%esp),%edx | |
200 | # w = x15 | |
201 | movl 160(%esp),%ebx | |
202 | ._mainloop: | |
203 | # x0 = p | |
204 | movl %eax,100(%esp) | |
205 | # x10 = t | |
206 | movl %edx,140(%esp) | |
207 | # p += x12 | |
208 | addl 148(%esp),%eax | |
209 | # x5 = s | |
210 | movl %ecx,120(%esp) | |
211 | # t += x6 | |
212 | addl 124(%esp),%edx | |
213 | # x15 = w | |
214 | movl %ebx,160(%esp) | |
215 | # r = x1 | |
216 | movl 104(%esp),%esi | |
217 | # r += s | |
218 | add %ecx,%esi | |
219 | # v = x11 | |
220 | movl 144(%esp),%edi | |
221 | # v += w | |
222 | add %ebx,%edi | |
223 | # p <<<= 7 | |
224 | rol $7,%eax | |
225 | # p ^= x4 | |
226 | xorl 116(%esp),%eax | |
227 | # t <<<= 7 | |
228 | rol $7,%edx | |
229 | # t ^= x14 | |
230 | xorl 156(%esp),%edx | |
231 | # r <<<= 7 | |
232 | rol $7,%esi | |
233 | # r ^= x9 | |
234 | xorl 136(%esp),%esi | |
235 | # v <<<= 7 | |
236 | rol $7,%edi | |
237 | # v ^= x3 | |
238 | xorl 112(%esp),%edi | |
239 | # x4 = p | |
240 | movl %eax,116(%esp) | |
241 | # x14 = t | |
242 | movl %edx,156(%esp) | |
243 | # p += x0 | |
244 | addl 100(%esp),%eax | |
245 | # x9 = r | |
246 | movl %esi,136(%esp) | |
247 | # t += x10 | |
248 | addl 140(%esp),%edx | |
249 | # x3 = v | |
250 | movl %edi,112(%esp) | |
251 | # p <<<= 9 | |
252 | rol $9,%eax | |
253 | # p ^= x8 | |
254 | xorl 132(%esp),%eax | |
255 | # t <<<= 9 | |
256 | rol $9,%edx | |
257 | # t ^= x2 | |
258 | xorl 108(%esp),%edx | |
259 | # s += r | |
260 | add %esi,%ecx | |
261 | # s <<<= 9 | |
262 | rol $9,%ecx | |
263 | # s ^= x13 | |
264 | xorl 152(%esp),%ecx | |
265 | # w += v | |
266 | add %edi,%ebx | |
267 | # w <<<= 9 | |
268 | rol $9,%ebx | |
269 | # w ^= x7 | |
270 | xorl 128(%esp),%ebx | |
271 | # x8 = p | |
272 | movl %eax,132(%esp) | |
273 | # x2 = t | |
274 | movl %edx,108(%esp) | |
275 | # p += x4 | |
276 | addl 116(%esp),%eax | |
277 | # x13 = s | |
278 | movl %ecx,152(%esp) | |
279 | # t += x14 | |
280 | addl 156(%esp),%edx | |
281 | # x7 = w | |
282 | movl %ebx,128(%esp) | |
283 | # p <<<= 13 | |
284 | rol $13,%eax | |
285 | # p ^= x12 | |
286 | xorl 148(%esp),%eax | |
287 | # t <<<= 13 | |
288 | rol $13,%edx | |
289 | # t ^= x6 | |
290 | xorl 124(%esp),%edx | |
291 | # r += s | |
292 | add %ecx,%esi | |
293 | # r <<<= 13 | |
294 | rol $13,%esi | |
295 | # r ^= x1 | |
296 | xorl 104(%esp),%esi | |
297 | # v += w | |
298 | add %ebx,%edi | |
299 | # v <<<= 13 | |
300 | rol $13,%edi | |
301 | # v ^= x11 | |
302 | xorl 144(%esp),%edi | |
303 | # x12 = p | |
304 | movl %eax,148(%esp) | |
305 | # x6 = t | |
306 | movl %edx,124(%esp) | |
307 | # p += x8 | |
308 | addl 132(%esp),%eax | |
309 | # x1 = r | |
310 | movl %esi,104(%esp) | |
311 | # t += x2 | |
312 | addl 108(%esp),%edx | |
313 | # x11 = v | |
314 | movl %edi,144(%esp) | |
315 | # p <<<= 18 | |
316 | rol $18,%eax | |
317 | # p ^= x0 | |
318 | xorl 100(%esp),%eax | |
319 | # t <<<= 18 | |
320 | rol $18,%edx | |
321 | # t ^= x10 | |
322 | xorl 140(%esp),%edx | |
323 | # s += r | |
324 | add %esi,%ecx | |
325 | # s <<<= 18 | |
326 | rol $18,%ecx | |
327 | # s ^= x5 | |
328 | xorl 120(%esp),%ecx | |
329 | # w += v | |
330 | add %edi,%ebx | |
331 | # w <<<= 18 | |
332 | rol $18,%ebx | |
333 | # w ^= x15 | |
334 | xorl 160(%esp),%ebx | |
335 | # x0 = p | |
336 | movl %eax,100(%esp) | |
337 | # x10 = t | |
338 | movl %edx,140(%esp) | |
339 | # p += x3 | |
340 | addl 112(%esp),%eax | |
341 | # p <<<= 7 | |
342 | rol $7,%eax | |
343 | # x5 = s | |
344 | movl %ecx,120(%esp) | |
345 | # t += x9 | |
346 | addl 136(%esp),%edx | |
347 | # x15 = w | |
348 | movl %ebx,160(%esp) | |
349 | # r = x4 | |
350 | movl 116(%esp),%esi | |
351 | # r += s | |
352 | add %ecx,%esi | |
353 | # v = x14 | |
354 | movl 156(%esp),%edi | |
355 | # v += w | |
356 | add %ebx,%edi | |
357 | # p ^= x1 | |
358 | xorl 104(%esp),%eax | |
359 | # t <<<= 7 | |
360 | rol $7,%edx | |
361 | # t ^= x11 | |
362 | xorl 144(%esp),%edx | |
363 | # r <<<= 7 | |
364 | rol $7,%esi | |
365 | # r ^= x6 | |
366 | xorl 124(%esp),%esi | |
367 | # v <<<= 7 | |
368 | rol $7,%edi | |
369 | # v ^= x12 | |
370 | xorl 148(%esp),%edi | |
371 | # x1 = p | |
372 | movl %eax,104(%esp) | |
373 | # x11 = t | |
374 | movl %edx,144(%esp) | |
375 | # p += x0 | |
376 | addl 100(%esp),%eax | |
377 | # x6 = r | |
378 | movl %esi,124(%esp) | |
379 | # t += x10 | |
380 | addl 140(%esp),%edx | |
381 | # x12 = v | |
382 | movl %edi,148(%esp) | |
383 | # p <<<= 9 | |
384 | rol $9,%eax | |
385 | # p ^= x2 | |
386 | xorl 108(%esp),%eax | |
387 | # t <<<= 9 | |
388 | rol $9,%edx | |
389 | # t ^= x8 | |
390 | xorl 132(%esp),%edx | |
391 | # s += r | |
392 | add %esi,%ecx | |
393 | # s <<<= 9 | |
394 | rol $9,%ecx | |
395 | # s ^= x7 | |
396 | xorl 128(%esp),%ecx | |
397 | # w += v | |
398 | add %edi,%ebx | |
399 | # w <<<= 9 | |
400 | rol $9,%ebx | |
401 | # w ^= x13 | |
402 | xorl 152(%esp),%ebx | |
403 | # x2 = p | |
404 | movl %eax,108(%esp) | |
405 | # x8 = t | |
406 | movl %edx,132(%esp) | |
407 | # p += x1 | |
408 | addl 104(%esp),%eax | |
409 | # x7 = s | |
410 | movl %ecx,128(%esp) | |
411 | # t += x11 | |
412 | addl 144(%esp),%edx | |
413 | # x13 = w | |
414 | movl %ebx,152(%esp) | |
415 | # p <<<= 13 | |
416 | rol $13,%eax | |
417 | # p ^= x3 | |
418 | xorl 112(%esp),%eax | |
419 | # t <<<= 13 | |
420 | rol $13,%edx | |
421 | # t ^= x9 | |
422 | xorl 136(%esp),%edx | |
423 | # r += s | |
424 | add %ecx,%esi | |
425 | # r <<<= 13 | |
426 | rol $13,%esi | |
427 | # r ^= x4 | |
428 | xorl 116(%esp),%esi | |
429 | # v += w | |
430 | add %ebx,%edi | |
431 | # v <<<= 13 | |
432 | rol $13,%edi | |
433 | # v ^= x14 | |
434 | xorl 156(%esp),%edi | |
435 | # x3 = p | |
436 | movl %eax,112(%esp) | |
437 | # x9 = t | |
438 | movl %edx,136(%esp) | |
439 | # p += x2 | |
440 | addl 108(%esp),%eax | |
441 | # x4 = r | |
442 | movl %esi,116(%esp) | |
443 | # t += x8 | |
444 | addl 132(%esp),%edx | |
445 | # x14 = v | |
446 | movl %edi,156(%esp) | |
447 | # p <<<= 18 | |
448 | rol $18,%eax | |
449 | # p ^= x0 | |
450 | xorl 100(%esp),%eax | |
451 | # t <<<= 18 | |
452 | rol $18,%edx | |
453 | # t ^= x10 | |
454 | xorl 140(%esp),%edx | |
455 | # s += r | |
456 | add %esi,%ecx | |
457 | # s <<<= 18 | |
458 | rol $18,%ecx | |
459 | # s ^= x5 | |
460 | xorl 120(%esp),%ecx | |
461 | # w += v | |
462 | add %edi,%ebx | |
463 | # w <<<= 18 | |
464 | rol $18,%ebx | |
465 | # w ^= x15 | |
466 | xorl 160(%esp),%ebx | |
467 | # x0 = p | |
468 | movl %eax,100(%esp) | |
469 | # x10 = t | |
470 | movl %edx,140(%esp) | |
471 | # p += x12 | |
472 | addl 148(%esp),%eax | |
473 | # x5 = s | |
474 | movl %ecx,120(%esp) | |
475 | # t += x6 | |
476 | addl 124(%esp),%edx | |
477 | # x15 = w | |
478 | movl %ebx,160(%esp) | |
479 | # r = x1 | |
480 | movl 104(%esp),%esi | |
481 | # r += s | |
482 | add %ecx,%esi | |
483 | # v = x11 | |
484 | movl 144(%esp),%edi | |
485 | # v += w | |
486 | add %ebx,%edi | |
487 | # p <<<= 7 | |
488 | rol $7,%eax | |
489 | # p ^= x4 | |
490 | xorl 116(%esp),%eax | |
491 | # t <<<= 7 | |
492 | rol $7,%edx | |
493 | # t ^= x14 | |
494 | xorl 156(%esp),%edx | |
495 | # r <<<= 7 | |
496 | rol $7,%esi | |
497 | # r ^= x9 | |
498 | xorl 136(%esp),%esi | |
499 | # v <<<= 7 | |
500 | rol $7,%edi | |
501 | # v ^= x3 | |
502 | xorl 112(%esp),%edi | |
503 | # x4 = p | |
504 | movl %eax,116(%esp) | |
505 | # x14 = t | |
506 | movl %edx,156(%esp) | |
507 | # p += x0 | |
508 | addl 100(%esp),%eax | |
509 | # x9 = r | |
510 | movl %esi,136(%esp) | |
511 | # t += x10 | |
512 | addl 140(%esp),%edx | |
513 | # x3 = v | |
514 | movl %edi,112(%esp) | |
515 | # p <<<= 9 | |
516 | rol $9,%eax | |
517 | # p ^= x8 | |
518 | xorl 132(%esp),%eax | |
519 | # t <<<= 9 | |
520 | rol $9,%edx | |
521 | # t ^= x2 | |
522 | xorl 108(%esp),%edx | |
523 | # s += r | |
524 | add %esi,%ecx | |
525 | # s <<<= 9 | |
526 | rol $9,%ecx | |
527 | # s ^= x13 | |
528 | xorl 152(%esp),%ecx | |
529 | # w += v | |
530 | add %edi,%ebx | |
531 | # w <<<= 9 | |
532 | rol $9,%ebx | |
533 | # w ^= x7 | |
534 | xorl 128(%esp),%ebx | |
535 | # x8 = p | |
536 | movl %eax,132(%esp) | |
537 | # x2 = t | |
538 | movl %edx,108(%esp) | |
539 | # p += x4 | |
540 | addl 116(%esp),%eax | |
541 | # x13 = s | |
542 | movl %ecx,152(%esp) | |
543 | # t += x14 | |
544 | addl 156(%esp),%edx | |
545 | # x7 = w | |
546 | movl %ebx,128(%esp) | |
547 | # p <<<= 13 | |
548 | rol $13,%eax | |
549 | # p ^= x12 | |
550 | xorl 148(%esp),%eax | |
551 | # t <<<= 13 | |
552 | rol $13,%edx | |
553 | # t ^= x6 | |
554 | xorl 124(%esp),%edx | |
555 | # r += s | |
556 | add %ecx,%esi | |
557 | # r <<<= 13 | |
558 | rol $13,%esi | |
559 | # r ^= x1 | |
560 | xorl 104(%esp),%esi | |
561 | # v += w | |
562 | add %ebx,%edi | |
563 | # v <<<= 13 | |
564 | rol $13,%edi | |
565 | # v ^= x11 | |
566 | xorl 144(%esp),%edi | |
567 | # x12 = p | |
568 | movl %eax,148(%esp) | |
569 | # x6 = t | |
570 | movl %edx,124(%esp) | |
571 | # p += x8 | |
572 | addl 132(%esp),%eax | |
573 | # x1 = r | |
574 | movl %esi,104(%esp) | |
575 | # t += x2 | |
576 | addl 108(%esp),%edx | |
577 | # x11 = v | |
578 | movl %edi,144(%esp) | |
579 | # p <<<= 18 | |
580 | rol $18,%eax | |
581 | # p ^= x0 | |
582 | xorl 100(%esp),%eax | |
583 | # t <<<= 18 | |
584 | rol $18,%edx | |
585 | # t ^= x10 | |
586 | xorl 140(%esp),%edx | |
587 | # s += r | |
588 | add %esi,%ecx | |
589 | # s <<<= 18 | |
590 | rol $18,%ecx | |
591 | # s ^= x5 | |
592 | xorl 120(%esp),%ecx | |
593 | # w += v | |
594 | add %edi,%ebx | |
595 | # w <<<= 18 | |
596 | rol $18,%ebx | |
597 | # w ^= x15 | |
598 | xorl 160(%esp),%ebx | |
599 | # x0 = p | |
600 | movl %eax,100(%esp) | |
601 | # x10 = t | |
602 | movl %edx,140(%esp) | |
603 | # p += x3 | |
604 | addl 112(%esp),%eax | |
605 | # p <<<= 7 | |
606 | rol $7,%eax | |
607 | # x5 = s | |
608 | movl %ecx,120(%esp) | |
609 | # t += x9 | |
610 | addl 136(%esp),%edx | |
611 | # x15 = w | |
612 | movl %ebx,160(%esp) | |
613 | # r = x4 | |
614 | movl 116(%esp),%esi | |
615 | # r += s | |
616 | add %ecx,%esi | |
617 | # v = x14 | |
618 | movl 156(%esp),%edi | |
619 | # v += w | |
620 | add %ebx,%edi | |
621 | # p ^= x1 | |
622 | xorl 104(%esp),%eax | |
623 | # t <<<= 7 | |
624 | rol $7,%edx | |
625 | # t ^= x11 | |
626 | xorl 144(%esp),%edx | |
627 | # r <<<= 7 | |
628 | rol $7,%esi | |
629 | # r ^= x6 | |
630 | xorl 124(%esp),%esi | |
631 | # v <<<= 7 | |
632 | rol $7,%edi | |
633 | # v ^= x12 | |
634 | xorl 148(%esp),%edi | |
635 | # x1 = p | |
636 | movl %eax,104(%esp) | |
637 | # x11 = t | |
638 | movl %edx,144(%esp) | |
639 | # p += x0 | |
640 | addl 100(%esp),%eax | |
641 | # x6 = r | |
642 | movl %esi,124(%esp) | |
643 | # t += x10 | |
644 | addl 140(%esp),%edx | |
645 | # x12 = v | |
646 | movl %edi,148(%esp) | |
647 | # p <<<= 9 | |
648 | rol $9,%eax | |
649 | # p ^= x2 | |
650 | xorl 108(%esp),%eax | |
651 | # t <<<= 9 | |
652 | rol $9,%edx | |
653 | # t ^= x8 | |
654 | xorl 132(%esp),%edx | |
655 | # s += r | |
656 | add %esi,%ecx | |
657 | # s <<<= 9 | |
658 | rol $9,%ecx | |
659 | # s ^= x7 | |
660 | xorl 128(%esp),%ecx | |
661 | # w += v | |
662 | add %edi,%ebx | |
663 | # w <<<= 9 | |
664 | rol $9,%ebx | |
665 | # w ^= x13 | |
666 | xorl 152(%esp),%ebx | |
667 | # x2 = p | |
668 | movl %eax,108(%esp) | |
669 | # x8 = t | |
670 | movl %edx,132(%esp) | |
671 | # p += x1 | |
672 | addl 104(%esp),%eax | |
673 | # x7 = s | |
674 | movl %ecx,128(%esp) | |
675 | # t += x11 | |
676 | addl 144(%esp),%edx | |
677 | # x13 = w | |
678 | movl %ebx,152(%esp) | |
679 | # p <<<= 13 | |
680 | rol $13,%eax | |
681 | # p ^= x3 | |
682 | xorl 112(%esp),%eax | |
683 | # t <<<= 13 | |
684 | rol $13,%edx | |
685 | # t ^= x9 | |
686 | xorl 136(%esp),%edx | |
687 | # r += s | |
688 | add %ecx,%esi | |
689 | # r <<<= 13 | |
690 | rol $13,%esi | |
691 | # r ^= x4 | |
692 | xorl 116(%esp),%esi | |
693 | # v += w | |
694 | add %ebx,%edi | |
695 | # v <<<= 13 | |
696 | rol $13,%edi | |
697 | # v ^= x14 | |
698 | xorl 156(%esp),%edi | |
699 | # x3 = p | |
700 | movl %eax,112(%esp) | |
701 | # x9 = t | |
702 | movl %edx,136(%esp) | |
703 | # p += x2 | |
704 | addl 108(%esp),%eax | |
705 | # x4 = r | |
706 | movl %esi,116(%esp) | |
707 | # t += x8 | |
708 | addl 132(%esp),%edx | |
709 | # x14 = v | |
710 | movl %edi,156(%esp) | |
711 | # p <<<= 18 | |
712 | rol $18,%eax | |
713 | # p ^= x0 | |
714 | xorl 100(%esp),%eax | |
715 | # t <<<= 18 | |
716 | rol $18,%edx | |
717 | # t ^= x10 | |
718 | xorl 140(%esp),%edx | |
719 | # s += r | |
720 | add %esi,%ecx | |
721 | # s <<<= 18 | |
722 | rol $18,%ecx | |
723 | # s ^= x5 | |
724 | xorl 120(%esp),%ecx | |
725 | # w += v | |
726 | add %edi,%ebx | |
727 | # w <<<= 18 | |
728 | rol $18,%ebx | |
729 | # w ^= x15 | |
730 | xorl 160(%esp),%ebx | |
731 | # i -= 4 | |
732 | sub $4,%ebp | |
733 | # goto mainloop if unsigned > | |
734 | ja ._mainloop | |
735 | # x0 = p | |
736 | movl %eax,100(%esp) | |
737 | # x5 = s | |
738 | movl %ecx,120(%esp) | |
739 | # x10 = t | |
740 | movl %edx,140(%esp) | |
741 | # x15 = w | |
742 | movl %ebx,160(%esp) | |
743 | # out = out_backup | |
744 | movl 72(%esp),%edi | |
745 | # m = m_backup | |
746 | movl 68(%esp),%esi | |
747 | # in0 = x0 | |
748 | movl 100(%esp),%eax | |
749 | # in1 = x1 | |
750 | movl 104(%esp),%ecx | |
751 | # in0 += j0 | |
752 | addl 164(%esp),%eax | |
753 | # in1 += j1 | |
754 | addl 168(%esp),%ecx | |
755 | # in0 ^= *(uint32 *) (m + 0) | |
756 | xorl 0(%esi),%eax | |
757 | # in1 ^= *(uint32 *) (m + 4) | |
758 | xorl 4(%esi),%ecx | |
759 | # *(uint32 *) (out + 0) = in0 | |
760 | movl %eax,0(%edi) | |
761 | # *(uint32 *) (out + 4) = in1 | |
762 | movl %ecx,4(%edi) | |
763 | # in2 = x2 | |
764 | movl 108(%esp),%eax | |
765 | # in3 = x3 | |
766 | movl 112(%esp),%ecx | |
767 | # in2 += j2 | |
768 | addl 172(%esp),%eax | |
769 | # in3 += j3 | |
770 | addl 176(%esp),%ecx | |
771 | # in2 ^= *(uint32 *) (m + 8) | |
772 | xorl 8(%esi),%eax | |
773 | # in3 ^= *(uint32 *) (m + 12) | |
774 | xorl 12(%esi),%ecx | |
775 | # *(uint32 *) (out + 8) = in2 | |
776 | movl %eax,8(%edi) | |
777 | # *(uint32 *) (out + 12) = in3 | |
778 | movl %ecx,12(%edi) | |
779 | # in4 = x4 | |
780 | movl 116(%esp),%eax | |
781 | # in5 = x5 | |
782 | movl 120(%esp),%ecx | |
783 | # in4 += j4 | |
784 | addl 180(%esp),%eax | |
785 | # in5 += j5 | |
786 | addl 184(%esp),%ecx | |
787 | # in4 ^= *(uint32 *) (m + 16) | |
788 | xorl 16(%esi),%eax | |
789 | # in5 ^= *(uint32 *) (m + 20) | |
790 | xorl 20(%esi),%ecx | |
791 | # *(uint32 *) (out + 16) = in4 | |
792 | movl %eax,16(%edi) | |
793 | # *(uint32 *) (out + 20) = in5 | |
794 | movl %ecx,20(%edi) | |
795 | # in6 = x6 | |
796 | movl 124(%esp),%eax | |
797 | # in7 = x7 | |
798 | movl 128(%esp),%ecx | |
799 | # in6 += j6 | |
800 | addl 188(%esp),%eax | |
801 | # in7 += j7 | |
802 | addl 192(%esp),%ecx | |
803 | # in6 ^= *(uint32 *) (m + 24) | |
804 | xorl 24(%esi),%eax | |
805 | # in7 ^= *(uint32 *) (m + 28) | |
806 | xorl 28(%esi),%ecx | |
807 | # *(uint32 *) (out + 24) = in6 | |
808 | movl %eax,24(%edi) | |
809 | # *(uint32 *) (out + 28) = in7 | |
810 | movl %ecx,28(%edi) | |
811 | # in8 = x8 | |
812 | movl 132(%esp),%eax | |
813 | # in9 = x9 | |
814 | movl 136(%esp),%ecx | |
815 | # in8 += j8 | |
816 | addl 196(%esp),%eax | |
817 | # in9 += j9 | |
818 | addl 200(%esp),%ecx | |
819 | # in8 ^= *(uint32 *) (m + 32) | |
820 | xorl 32(%esi),%eax | |
821 | # in9 ^= *(uint32 *) (m + 36) | |
822 | xorl 36(%esi),%ecx | |
823 | # *(uint32 *) (out + 32) = in8 | |
824 | movl %eax,32(%edi) | |
825 | # *(uint32 *) (out + 36) = in9 | |
826 | movl %ecx,36(%edi) | |
827 | # in10 = x10 | |
828 | movl 140(%esp),%eax | |
829 | # in11 = x11 | |
830 | movl 144(%esp),%ecx | |
831 | # in10 += j10 | |
832 | addl 204(%esp),%eax | |
833 | # in11 += j11 | |
834 | addl 208(%esp),%ecx | |
835 | # in10 ^= *(uint32 *) (m + 40) | |
836 | xorl 40(%esi),%eax | |
837 | # in11 ^= *(uint32 *) (m + 44) | |
838 | xorl 44(%esi),%ecx | |
839 | # *(uint32 *) (out + 40) = in10 | |
840 | movl %eax,40(%edi) | |
841 | # *(uint32 *) (out + 44) = in11 | |
842 | movl %ecx,44(%edi) | |
843 | # in12 = x12 | |
844 | movl 148(%esp),%eax | |
845 | # in13 = x13 | |
846 | movl 152(%esp),%ecx | |
847 | # in12 += j12 | |
848 | addl 212(%esp),%eax | |
849 | # in13 += j13 | |
850 | addl 216(%esp),%ecx | |
851 | # in12 ^= *(uint32 *) (m + 48) | |
852 | xorl 48(%esi),%eax | |
853 | # in13 ^= *(uint32 *) (m + 52) | |
854 | xorl 52(%esi),%ecx | |
855 | # *(uint32 *) (out + 48) = in12 | |
856 | movl %eax,48(%edi) | |
857 | # *(uint32 *) (out + 52) = in13 | |
858 | movl %ecx,52(%edi) | |
859 | # in14 = x14 | |
860 | movl 156(%esp),%eax | |
861 | # in15 = x15 | |
862 | movl 160(%esp),%ecx | |
863 | # in14 += j14 | |
864 | addl 220(%esp),%eax | |
865 | # in15 += j15 | |
866 | addl 224(%esp),%ecx | |
867 | # in14 ^= *(uint32 *) (m + 56) | |
868 | xorl 56(%esi),%eax | |
869 | # in15 ^= *(uint32 *) (m + 60) | |
870 | xorl 60(%esi),%ecx | |
871 | # *(uint32 *) (out + 56) = in14 | |
872 | movl %eax,56(%edi) | |
873 | # *(uint32 *) (out + 60) = in15 | |
874 | movl %ecx,60(%edi) | |
875 | # bytes = bytes_backup | |
876 | movl 76(%esp),%ebx | |
877 | # in8 = j8 | |
878 | movl 196(%esp),%eax | |
879 | # in9 = j9 | |
880 | movl 200(%esp),%ecx | |
881 | # in8 += 1 | |
882 | add $1,%eax | |
883 | # in9 += 0 + carry | |
884 | adc $0,%ecx | |
885 | # j8 = in8 | |
886 | movl %eax,196(%esp) | |
887 | # j9 = in9 | |
888 | movl %ecx,200(%esp) | |
889 | # bytes - 64 | |
890 | cmp $64,%ebx | |
891 | # goto bytesatleast65 if unsigned> | |
892 | ja ._bytesatleast65 | |
893 | # goto bytesatleast64 if unsigned>= | |
894 | jae ._bytesatleast64 | |
895 | # m = out | |
896 | mov %edi,%esi | |
897 | # out = ctarget | |
898 | movl 228(%esp),%edi | |
899 | # i = bytes | |
900 | mov %ebx,%ecx | |
901 | # while (i) { *out++ = *m++; --i } | |
902 | rep movsb | |
903 | ._bytesatleast64: | |
904 | # x = x_backup | |
905 | movl 64(%esp),%eax | |
906 | # in8 = j8 | |
907 | movl 196(%esp),%ecx | |
908 | # in9 = j9 | |
909 | movl 200(%esp),%edx | |
910 | # *(uint32 *) (x + 32) = in8 | |
911 | movl %ecx,32(%eax) | |
912 | # *(uint32 *) (x + 36) = in9 | |
913 | movl %edx,36(%eax) | |
914 | ._done: | |
915 | # eax = eax_stack | |
916 | movl 80(%esp),%eax | |
917 | # ebx = ebx_stack | |
918 | movl 84(%esp),%ebx | |
919 | # esi = esi_stack | |
920 | movl 88(%esp),%esi | |
921 | # edi = edi_stack | |
922 | movl 92(%esp),%edi | |
923 | # ebp = ebp_stack | |
924 | movl 96(%esp),%ebp | |
925 | # leave | |
926 | add %eax,%esp | |
927 | ret | |
928 | ._bytesatleast65: | |
929 | # bytes -= 64 | |
930 | sub $64,%ebx | |
931 | # out += 64 | |
932 | add $64,%edi | |
933 | # m += 64 | |
934 | add $64,%esi | |
935 | # goto bytesatleast1 | |
936 | jmp ._bytesatleast1 | |
04443808 JK |
937 | ENDPROC(salsa20_encrypt_bytes) |
938 | ||
939 | # enter salsa20_keysetup | |
940 | ENTRY(salsa20_keysetup) | |
974e4b75 TSH |
941 | mov %esp,%eax |
942 | and $31,%eax | |
943 | add $256,%eax | |
944 | sub %eax,%esp | |
945 | # eax_stack = eax | |
946 | movl %eax,64(%esp) | |
947 | # ebx_stack = ebx | |
948 | movl %ebx,68(%esp) | |
949 | # esi_stack = esi | |
950 | movl %esi,72(%esp) | |
951 | # edi_stack = edi | |
952 | movl %edi,76(%esp) | |
953 | # ebp_stack = ebp | |
954 | movl %ebp,80(%esp) | |
955 | # k = arg2 | |
956 | movl 8(%esp,%eax),%ecx | |
957 | # kbits = arg3 | |
958 | movl 12(%esp,%eax),%edx | |
959 | # x = arg1 | |
960 | movl 4(%esp,%eax),%eax | |
961 | # in1 = *(uint32 *) (k + 0) | |
962 | movl 0(%ecx),%ebx | |
963 | # in2 = *(uint32 *) (k + 4) | |
964 | movl 4(%ecx),%esi | |
965 | # in3 = *(uint32 *) (k + 8) | |
966 | movl 8(%ecx),%edi | |
967 | # in4 = *(uint32 *) (k + 12) | |
968 | movl 12(%ecx),%ebp | |
969 | # *(uint32 *) (x + 4) = in1 | |
970 | movl %ebx,4(%eax) | |
971 | # *(uint32 *) (x + 8) = in2 | |
972 | movl %esi,8(%eax) | |
973 | # *(uint32 *) (x + 12) = in3 | |
974 | movl %edi,12(%eax) | |
975 | # *(uint32 *) (x + 16) = in4 | |
976 | movl %ebp,16(%eax) | |
977 | # kbits - 256 | |
978 | cmp $256,%edx | |
979 | # goto kbits128 if unsigned< | |
980 | jb ._kbits128 | |
981 | ._kbits256: | |
982 | # in11 = *(uint32 *) (k + 16) | |
983 | movl 16(%ecx),%edx | |
984 | # in12 = *(uint32 *) (k + 20) | |
985 | movl 20(%ecx),%ebx | |
986 | # in13 = *(uint32 *) (k + 24) | |
987 | movl 24(%ecx),%esi | |
988 | # in14 = *(uint32 *) (k + 28) | |
989 | movl 28(%ecx),%ecx | |
990 | # *(uint32 *) (x + 44) = in11 | |
991 | movl %edx,44(%eax) | |
992 | # *(uint32 *) (x + 48) = in12 | |
993 | movl %ebx,48(%eax) | |
994 | # *(uint32 *) (x + 52) = in13 | |
995 | movl %esi,52(%eax) | |
996 | # *(uint32 *) (x + 56) = in14 | |
997 | movl %ecx,56(%eax) | |
998 | # in0 = 1634760805 | |
999 | mov $1634760805,%ecx | |
1000 | # in5 = 857760878 | |
1001 | mov $857760878,%edx | |
1002 | # in10 = 2036477234 | |
1003 | mov $2036477234,%ebx | |
1004 | # in15 = 1797285236 | |
1005 | mov $1797285236,%esi | |
1006 | # *(uint32 *) (x + 0) = in0 | |
1007 | movl %ecx,0(%eax) | |
1008 | # *(uint32 *) (x + 20) = in5 | |
1009 | movl %edx,20(%eax) | |
1010 | # *(uint32 *) (x + 40) = in10 | |
1011 | movl %ebx,40(%eax) | |
1012 | # *(uint32 *) (x + 60) = in15 | |
1013 | movl %esi,60(%eax) | |
1014 | # goto keysetupdone | |
1015 | jmp ._keysetupdone | |
1016 | ._kbits128: | |
1017 | # in11 = *(uint32 *) (k + 0) | |
1018 | movl 0(%ecx),%edx | |
1019 | # in12 = *(uint32 *) (k + 4) | |
1020 | movl 4(%ecx),%ebx | |
1021 | # in13 = *(uint32 *) (k + 8) | |
1022 | movl 8(%ecx),%esi | |
1023 | # in14 = *(uint32 *) (k + 12) | |
1024 | movl 12(%ecx),%ecx | |
1025 | # *(uint32 *) (x + 44) = in11 | |
1026 | movl %edx,44(%eax) | |
1027 | # *(uint32 *) (x + 48) = in12 | |
1028 | movl %ebx,48(%eax) | |
1029 | # *(uint32 *) (x + 52) = in13 | |
1030 | movl %esi,52(%eax) | |
1031 | # *(uint32 *) (x + 56) = in14 | |
1032 | movl %ecx,56(%eax) | |
1033 | # in0 = 1634760805 | |
1034 | mov $1634760805,%ecx | |
1035 | # in5 = 824206446 | |
1036 | mov $824206446,%edx | |
1037 | # in10 = 2036477238 | |
1038 | mov $2036477238,%ebx | |
1039 | # in15 = 1797285236 | |
1040 | mov $1797285236,%esi | |
1041 | # *(uint32 *) (x + 0) = in0 | |
1042 | movl %ecx,0(%eax) | |
1043 | # *(uint32 *) (x + 20) = in5 | |
1044 | movl %edx,20(%eax) | |
1045 | # *(uint32 *) (x + 40) = in10 | |
1046 | movl %ebx,40(%eax) | |
1047 | # *(uint32 *) (x + 60) = in15 | |
1048 | movl %esi,60(%eax) | |
1049 | ._keysetupdone: | |
1050 | # eax = eax_stack | |
1051 | movl 64(%esp),%eax | |
1052 | # ebx = ebx_stack | |
1053 | movl 68(%esp),%ebx | |
1054 | # esi = esi_stack | |
1055 | movl 72(%esp),%esi | |
1056 | # edi = edi_stack | |
1057 | movl 76(%esp),%edi | |
1058 | # ebp = ebp_stack | |
1059 | movl 80(%esp),%ebp | |
1060 | # leave | |
1061 | add %eax,%esp | |
1062 | ret | |
04443808 JK |
1063 | ENDPROC(salsa20_keysetup) |
1064 | ||
1065 | # enter salsa20_ivsetup | |
1066 | ENTRY(salsa20_ivsetup) | |
974e4b75 TSH |
1067 | mov %esp,%eax |
1068 | and $31,%eax | |
1069 | add $256,%eax | |
1070 | sub %eax,%esp | |
1071 | # eax_stack = eax | |
1072 | movl %eax,64(%esp) | |
1073 | # ebx_stack = ebx | |
1074 | movl %ebx,68(%esp) | |
1075 | # esi_stack = esi | |
1076 | movl %esi,72(%esp) | |
1077 | # edi_stack = edi | |
1078 | movl %edi,76(%esp) | |
1079 | # ebp_stack = ebp | |
1080 | movl %ebp,80(%esp) | |
1081 | # iv = arg2 | |
1082 | movl 8(%esp,%eax),%ecx | |
1083 | # x = arg1 | |
1084 | movl 4(%esp,%eax),%eax | |
1085 | # in6 = *(uint32 *) (iv + 0) | |
1086 | movl 0(%ecx),%edx | |
1087 | # in7 = *(uint32 *) (iv + 4) | |
1088 | movl 4(%ecx),%ecx | |
1089 | # in8 = 0 | |
1090 | mov $0,%ebx | |
1091 | # in9 = 0 | |
1092 | mov $0,%esi | |
1093 | # *(uint32 *) (x + 24) = in6 | |
1094 | movl %edx,24(%eax) | |
1095 | # *(uint32 *) (x + 28) = in7 | |
1096 | movl %ecx,28(%eax) | |
1097 | # *(uint32 *) (x + 32) = in8 | |
1098 | movl %ebx,32(%eax) | |
1099 | # *(uint32 *) (x + 36) = in9 | |
1100 | movl %esi,36(%eax) | |
1101 | # eax = eax_stack | |
1102 | movl 64(%esp),%eax | |
1103 | # ebx = ebx_stack | |
1104 | movl 68(%esp),%ebx | |
1105 | # esi = esi_stack | |
1106 | movl 72(%esp),%esi | |
1107 | # edi = edi_stack | |
1108 | movl 76(%esp),%edi | |
1109 | # ebp = ebp_stack | |
1110 | movl 80(%esp),%ebp | |
1111 | # leave | |
1112 | add %eax,%esp | |
1113 | ret | |
04443808 | 1114 | ENDPROC(salsa20_ivsetup) |