Commit | Line | Data |
---|---|---|
86464859 AB |
1 | /* |
2 | * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions | |
3 | * | |
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
10 | ||
11 | #include <linux/linkage.h> | |
12 | #include <asm/assembler.h> | |
13 | ||
14 | .text | |
15 | .fpu crypto-neon-fp-armv8 | |
16 | .align 3 | |
17 | ||
18 | .macro enc_round, state, key | |
19 | aese.8 \state, \key | |
20 | aesmc.8 \state, \state | |
21 | .endm | |
22 | ||
23 | .macro dec_round, state, key | |
24 | aesd.8 \state, \key | |
25 | aesimc.8 \state, \state | |
26 | .endm | |
27 | ||
28 | .macro enc_dround, key1, key2 | |
29 | enc_round q0, \key1 | |
30 | enc_round q0, \key2 | |
31 | .endm | |
32 | ||
33 | .macro dec_dround, key1, key2 | |
34 | dec_round q0, \key1 | |
35 | dec_round q0, \key2 | |
36 | .endm | |
37 | ||
38 | .macro enc_fround, key1, key2, key3 | |
39 | enc_round q0, \key1 | |
40 | aese.8 q0, \key2 | |
41 | veor q0, q0, \key3 | |
42 | .endm | |
43 | ||
44 | .macro dec_fround, key1, key2, key3 | |
45 | dec_round q0, \key1 | |
46 | aesd.8 q0, \key2 | |
47 | veor q0, q0, \key3 | |
48 | .endm | |
49 | ||
50 | .macro enc_dround_3x, key1, key2 | |
51 | enc_round q0, \key1 | |
52 | enc_round q1, \key1 | |
53 | enc_round q2, \key1 | |
54 | enc_round q0, \key2 | |
55 | enc_round q1, \key2 | |
56 | enc_round q2, \key2 | |
57 | .endm | |
58 | ||
59 | .macro dec_dround_3x, key1, key2 | |
60 | dec_round q0, \key1 | |
61 | dec_round q1, \key1 | |
62 | dec_round q2, \key1 | |
63 | dec_round q0, \key2 | |
64 | dec_round q1, \key2 | |
65 | dec_round q2, \key2 | |
66 | .endm | |
67 | ||
68 | .macro enc_fround_3x, key1, key2, key3 | |
69 | enc_round q0, \key1 | |
70 | enc_round q1, \key1 | |
71 | enc_round q2, \key1 | |
72 | aese.8 q0, \key2 | |
73 | aese.8 q1, \key2 | |
74 | aese.8 q2, \key2 | |
75 | veor q0, q0, \key3 | |
76 | veor q1, q1, \key3 | |
77 | veor q2, q2, \key3 | |
78 | .endm | |
79 | ||
80 | .macro dec_fround_3x, key1, key2, key3 | |
81 | dec_round q0, \key1 | |
82 | dec_round q1, \key1 | |
83 | dec_round q2, \key1 | |
84 | aesd.8 q0, \key2 | |
85 | aesd.8 q1, \key2 | |
86 | aesd.8 q2, \key2 | |
87 | veor q0, q0, \key3 | |
88 | veor q1, q1, \key3 | |
89 | veor q2, q2, \key3 | |
90 | .endm | |
91 | ||
92 | .macro do_block, dround, fround | |
93 | cmp r3, #12 @ which key size? | |
94 | vld1.8 {q10-q11}, [ip]! | |
95 | \dround q8, q9 | |
96 | vld1.8 {q12-q13}, [ip]! | |
97 | \dround q10, q11 | |
98 | vld1.8 {q10-q11}, [ip]! | |
99 | \dround q12, q13 | |
100 | vld1.8 {q12-q13}, [ip]! | |
101 | \dround q10, q11 | |
102 | blo 0f @ AES-128: 10 rounds | |
103 | vld1.8 {q10-q11}, [ip]! | |
86464859 | 104 | \dround q12, q13 |
6499e8cf | 105 | beq 1f @ AES-192: 12 rounds |
86464859 AB |
106 | vld1.8 {q12-q13}, [ip] |
107 | \dround q10, q11 | |
108 | 0: \fround q12, q13, q14 | |
109 | bx lr | |
110 | ||
6499e8cf | 111 | 1: \fround q10, q11, q14 |
86464859 AB |
112 | bx lr |
113 | .endm | |
114 | ||
115 | /* | |
116 | * Internal, non-AAPCS compliant functions that implement the core AES | |
117 | * transforms. These should preserve all registers except q0 - q2 and ip | |
118 | * Arguments: | |
119 | * q0 : first in/output block | |
120 | * q1 : second in/output block (_3x version only) | |
121 | * q2 : third in/output block (_3x version only) | |
122 | * q8 : first round key | |
123 | * q9 : secound round key | |
86464859 | 124 | * q14 : final round key |
6499e8cf | 125 | * r2 : address of round key array |
86464859 AB |
126 | * r3 : number of rounds |
127 | */ | |
128 | .align 6 | |
129 | aes_encrypt: | |
130 | add ip, r2, #32 @ 3rd round key | |
131 | .Laes_encrypt_tweak: | |
132 | do_block enc_dround, enc_fround | |
133 | ENDPROC(aes_encrypt) | |
134 | ||
135 | .align 6 | |
136 | aes_decrypt: | |
137 | add ip, r2, #32 @ 3rd round key | |
138 | do_block dec_dround, dec_fround | |
139 | ENDPROC(aes_decrypt) | |
140 | ||
141 | .align 6 | |
142 | aes_encrypt_3x: | |
143 | add ip, r2, #32 @ 3rd round key | |
144 | do_block enc_dround_3x, enc_fround_3x | |
145 | ENDPROC(aes_encrypt_3x) | |
146 | ||
147 | .align 6 | |
148 | aes_decrypt_3x: | |
149 | add ip, r2, #32 @ 3rd round key | |
150 | do_block dec_dround_3x, dec_fround_3x | |
151 | ENDPROC(aes_decrypt_3x) | |
152 | ||
153 | .macro prepare_key, rk, rounds | |
154 | add ip, \rk, \rounds, lsl #4 | |
155 | vld1.8 {q8-q9}, [\rk] @ load first 2 round keys | |
156 | vld1.8 {q14}, [ip] @ load last round key | |
157 | .endm | |
158 | ||
159 | /* | |
160 | * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
161 | * int blocks) | |
162 | * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
163 | * int blocks) | |
164 | */ | |
165 | ENTRY(ce_aes_ecb_encrypt) | |
166 | push {r4, lr} | |
167 | ldr r4, [sp, #8] | |
168 | prepare_key r2, r3 | |
169 | .Lecbencloop3x: | |
170 | subs r4, r4, #3 | |
171 | bmi .Lecbenc1x | |
172 | vld1.8 {q0-q1}, [r1, :64]! | |
173 | vld1.8 {q2}, [r1, :64]! | |
174 | bl aes_encrypt_3x | |
175 | vst1.8 {q0-q1}, [r0, :64]! | |
176 | vst1.8 {q2}, [r0, :64]! | |
177 | b .Lecbencloop3x | |
178 | .Lecbenc1x: | |
179 | adds r4, r4, #3 | |
180 | beq .Lecbencout | |
181 | .Lecbencloop: | |
182 | vld1.8 {q0}, [r1, :64]! | |
183 | bl aes_encrypt | |
184 | vst1.8 {q0}, [r0, :64]! | |
185 | subs r4, r4, #1 | |
186 | bne .Lecbencloop | |
187 | .Lecbencout: | |
188 | pop {r4, pc} | |
189 | ENDPROC(ce_aes_ecb_encrypt) | |
190 | ||
191 | ENTRY(ce_aes_ecb_decrypt) | |
192 | push {r4, lr} | |
193 | ldr r4, [sp, #8] | |
194 | prepare_key r2, r3 | |
195 | .Lecbdecloop3x: | |
196 | subs r4, r4, #3 | |
197 | bmi .Lecbdec1x | |
198 | vld1.8 {q0-q1}, [r1, :64]! | |
199 | vld1.8 {q2}, [r1, :64]! | |
200 | bl aes_decrypt_3x | |
201 | vst1.8 {q0-q1}, [r0, :64]! | |
202 | vst1.8 {q2}, [r0, :64]! | |
203 | b .Lecbdecloop3x | |
204 | .Lecbdec1x: | |
205 | adds r4, r4, #3 | |
206 | beq .Lecbdecout | |
207 | .Lecbdecloop: | |
208 | vld1.8 {q0}, [r1, :64]! | |
209 | bl aes_decrypt | |
210 | vst1.8 {q0}, [r0, :64]! | |
211 | subs r4, r4, #1 | |
212 | bne .Lecbdecloop | |
213 | .Lecbdecout: | |
214 | pop {r4, pc} | |
215 | ENDPROC(ce_aes_ecb_decrypt) | |
216 | ||
217 | /* | |
218 | * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
219 | * int blocks, u8 iv[]) | |
220 | * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
221 | * int blocks, u8 iv[]) | |
222 | */ | |
223 | ENTRY(ce_aes_cbc_encrypt) | |
224 | push {r4-r6, lr} | |
225 | ldrd r4, r5, [sp, #16] | |
226 | vld1.8 {q0}, [r5] | |
227 | prepare_key r2, r3 | |
228 | .Lcbcencloop: | |
229 | vld1.8 {q1}, [r1, :64]! @ get next pt block | |
230 | veor q0, q0, q1 @ ..and xor with iv | |
231 | bl aes_encrypt | |
232 | vst1.8 {q0}, [r0, :64]! | |
233 | subs r4, r4, #1 | |
234 | bne .Lcbcencloop | |
235 | vst1.8 {q0}, [r5] | |
236 | pop {r4-r6, pc} | |
237 | ENDPROC(ce_aes_cbc_encrypt) | |
238 | ||
239 | ENTRY(ce_aes_cbc_decrypt) | |
240 | push {r4-r6, lr} | |
241 | ldrd r4, r5, [sp, #16] | |
242 | vld1.8 {q6}, [r5] @ keep iv in q6 | |
243 | prepare_key r2, r3 | |
244 | .Lcbcdecloop3x: | |
245 | subs r4, r4, #3 | |
246 | bmi .Lcbcdec1x | |
247 | vld1.8 {q0-q1}, [r1, :64]! | |
248 | vld1.8 {q2}, [r1, :64]! | |
249 | vmov q3, q0 | |
250 | vmov q4, q1 | |
251 | vmov q5, q2 | |
252 | bl aes_decrypt_3x | |
253 | veor q0, q0, q6 | |
254 | veor q1, q1, q3 | |
255 | veor q2, q2, q4 | |
256 | vmov q6, q5 | |
257 | vst1.8 {q0-q1}, [r0, :64]! | |
258 | vst1.8 {q2}, [r0, :64]! | |
259 | b .Lcbcdecloop3x | |
260 | .Lcbcdec1x: | |
261 | adds r4, r4, #3 | |
262 | beq .Lcbcdecout | |
263 | vmov q15, q14 @ preserve last round key | |
264 | .Lcbcdecloop: | |
265 | vld1.8 {q0}, [r1, :64]! @ get next ct block | |
266 | veor q14, q15, q6 @ combine prev ct with last key | |
267 | vmov q6, q0 | |
268 | bl aes_decrypt | |
269 | vst1.8 {q0}, [r0, :64]! | |
270 | subs r4, r4, #1 | |
271 | bne .Lcbcdecloop | |
272 | .Lcbcdecout: | |
273 | vst1.8 {q6}, [r5] @ keep iv in q6 | |
274 | pop {r4-r6, pc} | |
275 | ENDPROC(ce_aes_cbc_decrypt) | |
276 | ||
277 | /* | |
278 | * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
279 | * int blocks, u8 ctr[]) | |
280 | */ | |
281 | ENTRY(ce_aes_ctr_encrypt) | |
282 | push {r4-r6, lr} | |
283 | ldrd r4, r5, [sp, #16] | |
284 | vld1.8 {q6}, [r5] @ load ctr | |
285 | prepare_key r2, r3 | |
286 | vmov r6, s27 @ keep swabbed ctr in r6 | |
287 | rev r6, r6 | |
288 | cmn r6, r4 @ 32 bit overflow? | |
289 | bcs .Lctrloop | |
290 | .Lctrloop3x: | |
291 | subs r4, r4, #3 | |
292 | bmi .Lctr1x | |
293 | add r6, r6, #1 | |
294 | vmov q0, q6 | |
295 | vmov q1, q6 | |
296 | rev ip, r6 | |
297 | add r6, r6, #1 | |
298 | vmov q2, q6 | |
299 | vmov s7, ip | |
300 | rev ip, r6 | |
301 | add r6, r6, #1 | |
302 | vmov s11, ip | |
303 | vld1.8 {q3-q4}, [r1, :64]! | |
304 | vld1.8 {q5}, [r1, :64]! | |
305 | bl aes_encrypt_3x | |
306 | veor q0, q0, q3 | |
307 | veor q1, q1, q4 | |
308 | veor q2, q2, q5 | |
309 | rev ip, r6 | |
310 | vst1.8 {q0-q1}, [r0, :64]! | |
311 | vst1.8 {q2}, [r0, :64]! | |
312 | vmov s27, ip | |
313 | b .Lctrloop3x | |
314 | .Lctr1x: | |
315 | adds r4, r4, #3 | |
316 | beq .Lctrout | |
317 | .Lctrloop: | |
318 | vmov q0, q6 | |
319 | bl aes_encrypt | |
320 | subs r4, r4, #1 | |
321 | bmi .Lctrhalfblock @ blocks < 0 means 1/2 block | |
322 | vld1.8 {q3}, [r1, :64]! | |
323 | veor q3, q0, q3 | |
324 | vst1.8 {q3}, [r0, :64]! | |
325 | ||
326 | adds r6, r6, #1 @ increment BE ctr | |
327 | rev ip, r6 | |
328 | vmov s27, ip | |
329 | bcs .Lctrcarry | |
330 | teq r4, #0 | |
331 | bne .Lctrloop | |
332 | .Lctrout: | |
333 | vst1.8 {q6}, [r5] | |
334 | pop {r4-r6, pc} | |
335 | ||
336 | .Lctrhalfblock: | |
337 | vld1.8 {d1}, [r1, :64] | |
338 | veor d0, d0, d1 | |
339 | vst1.8 {d0}, [r0, :64] | |
340 | pop {r4-r6, pc} | |
341 | ||
342 | .Lctrcarry: | |
343 | .irp sreg, s26, s25, s24 | |
344 | vmov ip, \sreg @ load next word of ctr | |
345 | rev ip, ip @ ... to handle the carry | |
346 | adds ip, ip, #1 | |
347 | rev ip, ip | |
348 | vmov \sreg, ip | |
349 | bcc 0f | |
350 | .endr | |
351 | 0: teq r4, #0 | |
352 | beq .Lctrout | |
353 | b .Lctrloop | |
354 | ENDPROC(ce_aes_ctr_encrypt) | |
355 | ||
356 | /* | |
357 | * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | |
358 | * int blocks, u8 iv[], u8 const rk2[], int first) | |
359 | * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | |
360 | * int blocks, u8 iv[], u8 const rk2[], int first) | |
361 | */ | |
362 | ||
363 | .macro next_tweak, out, in, const, tmp | |
364 | vshr.s64 \tmp, \in, #63 | |
365 | vand \tmp, \tmp, \const | |
366 | vadd.u64 \out, \in, \in | |
367 | vext.8 \tmp, \tmp, \tmp, #8 | |
368 | veor \out, \out, \tmp | |
369 | .endm | |
370 | ||
371 | .align 3 | |
372 | .Lxts_mul_x: | |
373 | .quad 1, 0x87 | |
374 | ||
375 | ce_aes_xts_init: | |
376 | vldr d14, .Lxts_mul_x | |
377 | vldr d15, .Lxts_mul_x + 8 | |
378 | ||
379 | ldrd r4, r5, [sp, #16] @ load args | |
380 | ldr r6, [sp, #28] | |
381 | vld1.8 {q0}, [r5] @ load iv | |
382 | teq r6, #1 @ start of a block? | |
383 | bxne lr | |
384 | ||
385 | @ Encrypt the IV in q0 with the second AES key. This should only | |
386 | @ be done at the start of a block. | |
387 | ldr r6, [sp, #24] @ load AES key 2 | |
388 | prepare_key r6, r3 | |
389 | add ip, r6, #32 @ 3rd round key of key 2 | |
390 | b .Laes_encrypt_tweak @ tail call | |
391 | ENDPROC(ce_aes_xts_init) | |
392 | ||
393 | ENTRY(ce_aes_xts_encrypt) | |
394 | push {r4-r6, lr} | |
395 | ||
396 | bl ce_aes_xts_init @ run shared prologue | |
397 | prepare_key r2, r3 | |
398 | vmov q3, q0 | |
399 | ||
400 | teq r6, #0 @ start of a block? | |
401 | bne .Lxtsenc3x | |
402 | ||
403 | .Lxtsencloop3x: | |
404 | next_tweak q3, q3, q7, q6 | |
405 | .Lxtsenc3x: | |
406 | subs r4, r4, #3 | |
407 | bmi .Lxtsenc1x | |
408 | vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks | |
409 | vld1.8 {q2}, [r1, :64]! | |
410 | next_tweak q4, q3, q7, q6 | |
411 | veor q0, q0, q3 | |
412 | next_tweak q5, q4, q7, q6 | |
413 | veor q1, q1, q4 | |
414 | veor q2, q2, q5 | |
415 | bl aes_encrypt_3x | |
416 | veor q0, q0, q3 | |
417 | veor q1, q1, q4 | |
418 | veor q2, q2, q5 | |
419 | vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks | |
420 | vst1.8 {q2}, [r0, :64]! | |
421 | vmov q3, q5 | |
422 | teq r4, #0 | |
423 | beq .Lxtsencout | |
424 | b .Lxtsencloop3x | |
425 | .Lxtsenc1x: | |
426 | adds r4, r4, #3 | |
427 | beq .Lxtsencout | |
428 | .Lxtsencloop: | |
429 | vld1.8 {q0}, [r1, :64]! | |
430 | veor q0, q0, q3 | |
431 | bl aes_encrypt | |
432 | veor q0, q0, q3 | |
433 | vst1.8 {q0}, [r0, :64]! | |
434 | subs r4, r4, #1 | |
435 | beq .Lxtsencout | |
436 | next_tweak q3, q3, q7, q6 | |
437 | b .Lxtsencloop | |
438 | .Lxtsencout: | |
439 | vst1.8 {q3}, [r5] | |
440 | pop {r4-r6, pc} | |
441 | ENDPROC(ce_aes_xts_encrypt) | |
442 | ||
443 | ||
444 | ENTRY(ce_aes_xts_decrypt) | |
445 | push {r4-r6, lr} | |
446 | ||
447 | bl ce_aes_xts_init @ run shared prologue | |
448 | prepare_key r2, r3 | |
449 | vmov q3, q0 | |
450 | ||
451 | teq r6, #0 @ start of a block? | |
452 | bne .Lxtsdec3x | |
453 | ||
454 | .Lxtsdecloop3x: | |
455 | next_tweak q3, q3, q7, q6 | |
456 | .Lxtsdec3x: | |
457 | subs r4, r4, #3 | |
458 | bmi .Lxtsdec1x | |
459 | vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks | |
460 | vld1.8 {q2}, [r1, :64]! | |
461 | next_tweak q4, q3, q7, q6 | |
462 | veor q0, q0, q3 | |
463 | next_tweak q5, q4, q7, q6 | |
464 | veor q1, q1, q4 | |
465 | veor q2, q2, q5 | |
466 | bl aes_decrypt_3x | |
467 | veor q0, q0, q3 | |
468 | veor q1, q1, q4 | |
469 | veor q2, q2, q5 | |
470 | vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks | |
471 | vst1.8 {q2}, [r0, :64]! | |
472 | vmov q3, q5 | |
473 | teq r4, #0 | |
474 | beq .Lxtsdecout | |
475 | b .Lxtsdecloop3x | |
476 | .Lxtsdec1x: | |
477 | adds r4, r4, #3 | |
478 | beq .Lxtsdecout | |
479 | .Lxtsdecloop: | |
480 | vld1.8 {q0}, [r1, :64]! | |
481 | veor q0, q0, q3 | |
482 | add ip, r2, #32 @ 3rd round key | |
483 | bl aes_decrypt | |
484 | veor q0, q0, q3 | |
485 | vst1.8 {q0}, [r0, :64]! | |
486 | subs r4, r4, #1 | |
487 | beq .Lxtsdecout | |
488 | next_tweak q3, q3, q7, q6 | |
489 | b .Lxtsdecloop | |
490 | .Lxtsdecout: | |
491 | vst1.8 {q3}, [r5] | |
492 | pop {r4-r6, pc} | |
493 | ENDPROC(ce_aes_xts_decrypt) | |
494 | ||
495 | /* | |
496 | * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the | |
497 | * AES sbox substitution on each byte in | |
498 | * 'input' | |
499 | */ | |
500 | ENTRY(ce_aes_sub) | |
501 | vdup.32 q1, r0 | |
502 | veor q0, q0, q0 | |
503 | aese.8 q0, q1 | |
504 | vmov r0, s0 | |
505 | bx lr | |
506 | ENDPROC(ce_aes_sub) | |
507 | ||
508 | /* | |
509 | * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns | |
510 | * operation on round key *src | |
511 | */ | |
512 | ENTRY(ce_aes_invert) | |
513 | vld1.8 {q0}, [r1] | |
514 | aesimc.8 q0, q0 | |
515 | vst1.8 {q0}, [r0] | |
516 | bx lr | |
517 | ENDPROC(ce_aes_invert) |