Merge tag 'dlm-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm
[deliverable/linux.git] / arch / arm / crypto / aes-ce-core.S
1 /*
2 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
13
14 .text
15 .fpu crypto-neon-fp-armv8
16 .align 3
17
18 .macro enc_round, state, key
19 aese.8 \state, \key
20 aesmc.8 \state, \state
21 .endm
22
23 .macro dec_round, state, key
24 aesd.8 \state, \key
25 aesimc.8 \state, \state
26 .endm
27
28 .macro enc_dround, key1, key2
29 enc_round q0, \key1
30 enc_round q0, \key2
31 .endm
32
33 .macro dec_dround, key1, key2
34 dec_round q0, \key1
35 dec_round q0, \key2
36 .endm
37
38 .macro enc_fround, key1, key2, key3
39 enc_round q0, \key1
40 aese.8 q0, \key2
41 veor q0, q0, \key3
42 .endm
43
44 .macro dec_fround, key1, key2, key3
45 dec_round q0, \key1
46 aesd.8 q0, \key2
47 veor q0, q0, \key3
48 .endm
49
50 .macro enc_dround_3x, key1, key2
51 enc_round q0, \key1
52 enc_round q1, \key1
53 enc_round q2, \key1
54 enc_round q0, \key2
55 enc_round q1, \key2
56 enc_round q2, \key2
57 .endm
58
59 .macro dec_dround_3x, key1, key2
60 dec_round q0, \key1
61 dec_round q1, \key1
62 dec_round q2, \key1
63 dec_round q0, \key2
64 dec_round q1, \key2
65 dec_round q2, \key2
66 .endm
67
68 .macro enc_fround_3x, key1, key2, key3
69 enc_round q0, \key1
70 enc_round q1, \key1
71 enc_round q2, \key1
72 aese.8 q0, \key2
73 aese.8 q1, \key2
74 aese.8 q2, \key2
75 veor q0, q0, \key3
76 veor q1, q1, \key3
77 veor q2, q2, \key3
78 .endm
79
80 .macro dec_fround_3x, key1, key2, key3
81 dec_round q0, \key1
82 dec_round q1, \key1
83 dec_round q2, \key1
84 aesd.8 q0, \key2
85 aesd.8 q1, \key2
86 aesd.8 q2, \key2
87 veor q0, q0, \key3
88 veor q1, q1, \key3
89 veor q2, q2, \key3
90 .endm
91
92 .macro do_block, dround, fround
93 cmp r3, #12 @ which key size?
94 vld1.8 {q10-q11}, [ip]!
95 \dround q8, q9
96 vld1.8 {q12-q13}, [ip]!
97 \dround q10, q11
98 vld1.8 {q10-q11}, [ip]!
99 \dround q12, q13
100 vld1.8 {q12-q13}, [ip]!
101 \dround q10, q11
102 blo 0f @ AES-128: 10 rounds
103 vld1.8 {q10-q11}, [ip]!
104 \dround q12, q13
105 beq 1f @ AES-192: 12 rounds
106 vld1.8 {q12-q13}, [ip]
107 \dround q10, q11
108 0: \fround q12, q13, q14
109 bx lr
110
111 1: \fround q10, q11, q14
112 bx lr
113 .endm
114
115 /*
116 * Internal, non-AAPCS compliant functions that implement the core AES
117 * transforms. These should preserve all registers except q0 - q2 and ip
118 * Arguments:
119 * q0 : first in/output block
120 * q1 : second in/output block (_3x version only)
121 * q2 : third in/output block (_3x version only)
122 * q8 : first round key
123 * q9 : secound round key
124 * q14 : final round key
125 * r2 : address of round key array
126 * r3 : number of rounds
127 */
128 .align 6
129 aes_encrypt:
130 add ip, r2, #32 @ 3rd round key
131 .Laes_encrypt_tweak:
132 do_block enc_dround, enc_fround
133 ENDPROC(aes_encrypt)
134
135 .align 6
136 aes_decrypt:
137 add ip, r2, #32 @ 3rd round key
138 do_block dec_dround, dec_fround
139 ENDPROC(aes_decrypt)
140
141 .align 6
142 aes_encrypt_3x:
143 add ip, r2, #32 @ 3rd round key
144 do_block enc_dround_3x, enc_fround_3x
145 ENDPROC(aes_encrypt_3x)
146
147 .align 6
148 aes_decrypt_3x:
149 add ip, r2, #32 @ 3rd round key
150 do_block dec_dround_3x, dec_fround_3x
151 ENDPROC(aes_decrypt_3x)
152
153 .macro prepare_key, rk, rounds
154 add ip, \rk, \rounds, lsl #4
155 vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
156 vld1.8 {q14}, [ip] @ load last round key
157 .endm
158
159 /*
160 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
161 * int blocks)
162 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
163 * int blocks)
164 */
165 ENTRY(ce_aes_ecb_encrypt)
166 push {r4, lr}
167 ldr r4, [sp, #8]
168 prepare_key r2, r3
169 .Lecbencloop3x:
170 subs r4, r4, #3
171 bmi .Lecbenc1x
172 vld1.8 {q0-q1}, [r1, :64]!
173 vld1.8 {q2}, [r1, :64]!
174 bl aes_encrypt_3x
175 vst1.8 {q0-q1}, [r0, :64]!
176 vst1.8 {q2}, [r0, :64]!
177 b .Lecbencloop3x
178 .Lecbenc1x:
179 adds r4, r4, #3
180 beq .Lecbencout
181 .Lecbencloop:
182 vld1.8 {q0}, [r1, :64]!
183 bl aes_encrypt
184 vst1.8 {q0}, [r0, :64]!
185 subs r4, r4, #1
186 bne .Lecbencloop
187 .Lecbencout:
188 pop {r4, pc}
189 ENDPROC(ce_aes_ecb_encrypt)
190
191 ENTRY(ce_aes_ecb_decrypt)
192 push {r4, lr}
193 ldr r4, [sp, #8]
194 prepare_key r2, r3
195 .Lecbdecloop3x:
196 subs r4, r4, #3
197 bmi .Lecbdec1x
198 vld1.8 {q0-q1}, [r1, :64]!
199 vld1.8 {q2}, [r1, :64]!
200 bl aes_decrypt_3x
201 vst1.8 {q0-q1}, [r0, :64]!
202 vst1.8 {q2}, [r0, :64]!
203 b .Lecbdecloop3x
204 .Lecbdec1x:
205 adds r4, r4, #3
206 beq .Lecbdecout
207 .Lecbdecloop:
208 vld1.8 {q0}, [r1, :64]!
209 bl aes_decrypt
210 vst1.8 {q0}, [r0, :64]!
211 subs r4, r4, #1
212 bne .Lecbdecloop
213 .Lecbdecout:
214 pop {r4, pc}
215 ENDPROC(ce_aes_ecb_decrypt)
216
217 /*
218 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
219 * int blocks, u8 iv[])
220 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
221 * int blocks, u8 iv[])
222 */
223 ENTRY(ce_aes_cbc_encrypt)
224 push {r4-r6, lr}
225 ldrd r4, r5, [sp, #16]
226 vld1.8 {q0}, [r5]
227 prepare_key r2, r3
228 .Lcbcencloop:
229 vld1.8 {q1}, [r1, :64]! @ get next pt block
230 veor q0, q0, q1 @ ..and xor with iv
231 bl aes_encrypt
232 vst1.8 {q0}, [r0, :64]!
233 subs r4, r4, #1
234 bne .Lcbcencloop
235 vst1.8 {q0}, [r5]
236 pop {r4-r6, pc}
237 ENDPROC(ce_aes_cbc_encrypt)
238
239 ENTRY(ce_aes_cbc_decrypt)
240 push {r4-r6, lr}
241 ldrd r4, r5, [sp, #16]
242 vld1.8 {q6}, [r5] @ keep iv in q6
243 prepare_key r2, r3
244 .Lcbcdecloop3x:
245 subs r4, r4, #3
246 bmi .Lcbcdec1x
247 vld1.8 {q0-q1}, [r1, :64]!
248 vld1.8 {q2}, [r1, :64]!
249 vmov q3, q0
250 vmov q4, q1
251 vmov q5, q2
252 bl aes_decrypt_3x
253 veor q0, q0, q6
254 veor q1, q1, q3
255 veor q2, q2, q4
256 vmov q6, q5
257 vst1.8 {q0-q1}, [r0, :64]!
258 vst1.8 {q2}, [r0, :64]!
259 b .Lcbcdecloop3x
260 .Lcbcdec1x:
261 adds r4, r4, #3
262 beq .Lcbcdecout
263 vmov q15, q14 @ preserve last round key
264 .Lcbcdecloop:
265 vld1.8 {q0}, [r1, :64]! @ get next ct block
266 veor q14, q15, q6 @ combine prev ct with last key
267 vmov q6, q0
268 bl aes_decrypt
269 vst1.8 {q0}, [r0, :64]!
270 subs r4, r4, #1
271 bne .Lcbcdecloop
272 .Lcbcdecout:
273 vst1.8 {q6}, [r5] @ keep iv in q6
274 pop {r4-r6, pc}
275 ENDPROC(ce_aes_cbc_decrypt)
276
277 /*
278 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
279 * int blocks, u8 ctr[])
280 */
281 ENTRY(ce_aes_ctr_encrypt)
282 push {r4-r6, lr}
283 ldrd r4, r5, [sp, #16]
284 vld1.8 {q6}, [r5] @ load ctr
285 prepare_key r2, r3
286 vmov r6, s27 @ keep swabbed ctr in r6
287 rev r6, r6
288 cmn r6, r4 @ 32 bit overflow?
289 bcs .Lctrloop
290 .Lctrloop3x:
291 subs r4, r4, #3
292 bmi .Lctr1x
293 add r6, r6, #1
294 vmov q0, q6
295 vmov q1, q6
296 rev ip, r6
297 add r6, r6, #1
298 vmov q2, q6
299 vmov s7, ip
300 rev ip, r6
301 add r6, r6, #1
302 vmov s11, ip
303 vld1.8 {q3-q4}, [r1, :64]!
304 vld1.8 {q5}, [r1, :64]!
305 bl aes_encrypt_3x
306 veor q0, q0, q3
307 veor q1, q1, q4
308 veor q2, q2, q5
309 rev ip, r6
310 vst1.8 {q0-q1}, [r0, :64]!
311 vst1.8 {q2}, [r0, :64]!
312 vmov s27, ip
313 b .Lctrloop3x
314 .Lctr1x:
315 adds r4, r4, #3
316 beq .Lctrout
317 .Lctrloop:
318 vmov q0, q6
319 bl aes_encrypt
320 subs r4, r4, #1
321 bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
322 vld1.8 {q3}, [r1, :64]!
323 veor q3, q0, q3
324 vst1.8 {q3}, [r0, :64]!
325
326 adds r6, r6, #1 @ increment BE ctr
327 rev ip, r6
328 vmov s27, ip
329 bcs .Lctrcarry
330 teq r4, #0
331 bne .Lctrloop
332 .Lctrout:
333 vst1.8 {q6}, [r5]
334 pop {r4-r6, pc}
335
336 .Lctrhalfblock:
337 vld1.8 {d1}, [r1, :64]
338 veor d0, d0, d1
339 vst1.8 {d0}, [r0, :64]
340 pop {r4-r6, pc}
341
342 .Lctrcarry:
343 .irp sreg, s26, s25, s24
344 vmov ip, \sreg @ load next word of ctr
345 rev ip, ip @ ... to handle the carry
346 adds ip, ip, #1
347 rev ip, ip
348 vmov \sreg, ip
349 bcc 0f
350 .endr
351 0: teq r4, #0
352 beq .Lctrout
353 b .Lctrloop
354 ENDPROC(ce_aes_ctr_encrypt)
355
356 /*
357 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
358 * int blocks, u8 iv[], u8 const rk2[], int first)
359 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
360 * int blocks, u8 iv[], u8 const rk2[], int first)
361 */
362
363 .macro next_tweak, out, in, const, tmp
364 vshr.s64 \tmp, \in, #63
365 vand \tmp, \tmp, \const
366 vadd.u64 \out, \in, \in
367 vext.8 \tmp, \tmp, \tmp, #8
368 veor \out, \out, \tmp
369 .endm
370
371 .align 3
372 .Lxts_mul_x:
373 .quad 1, 0x87
374
375 ce_aes_xts_init:
376 vldr d14, .Lxts_mul_x
377 vldr d15, .Lxts_mul_x + 8
378
379 ldrd r4, r5, [sp, #16] @ load args
380 ldr r6, [sp, #28]
381 vld1.8 {q0}, [r5] @ load iv
382 teq r6, #1 @ start of a block?
383 bxne lr
384
385 @ Encrypt the IV in q0 with the second AES key. This should only
386 @ be done at the start of a block.
387 ldr r6, [sp, #24] @ load AES key 2
388 prepare_key r6, r3
389 add ip, r6, #32 @ 3rd round key of key 2
390 b .Laes_encrypt_tweak @ tail call
391 ENDPROC(ce_aes_xts_init)
392
393 ENTRY(ce_aes_xts_encrypt)
394 push {r4-r6, lr}
395
396 bl ce_aes_xts_init @ run shared prologue
397 prepare_key r2, r3
398 vmov q3, q0
399
400 teq r6, #0 @ start of a block?
401 bne .Lxtsenc3x
402
403 .Lxtsencloop3x:
404 next_tweak q3, q3, q7, q6
405 .Lxtsenc3x:
406 subs r4, r4, #3
407 bmi .Lxtsenc1x
408 vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
409 vld1.8 {q2}, [r1, :64]!
410 next_tweak q4, q3, q7, q6
411 veor q0, q0, q3
412 next_tweak q5, q4, q7, q6
413 veor q1, q1, q4
414 veor q2, q2, q5
415 bl aes_encrypt_3x
416 veor q0, q0, q3
417 veor q1, q1, q4
418 veor q2, q2, q5
419 vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
420 vst1.8 {q2}, [r0, :64]!
421 vmov q3, q5
422 teq r4, #0
423 beq .Lxtsencout
424 b .Lxtsencloop3x
425 .Lxtsenc1x:
426 adds r4, r4, #3
427 beq .Lxtsencout
428 .Lxtsencloop:
429 vld1.8 {q0}, [r1, :64]!
430 veor q0, q0, q3
431 bl aes_encrypt
432 veor q0, q0, q3
433 vst1.8 {q0}, [r0, :64]!
434 subs r4, r4, #1
435 beq .Lxtsencout
436 next_tweak q3, q3, q7, q6
437 b .Lxtsencloop
438 .Lxtsencout:
439 vst1.8 {q3}, [r5]
440 pop {r4-r6, pc}
441 ENDPROC(ce_aes_xts_encrypt)
442
443
444 ENTRY(ce_aes_xts_decrypt)
445 push {r4-r6, lr}
446
447 bl ce_aes_xts_init @ run shared prologue
448 prepare_key r2, r3
449 vmov q3, q0
450
451 teq r6, #0 @ start of a block?
452 bne .Lxtsdec3x
453
454 .Lxtsdecloop3x:
455 next_tweak q3, q3, q7, q6
456 .Lxtsdec3x:
457 subs r4, r4, #3
458 bmi .Lxtsdec1x
459 vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
460 vld1.8 {q2}, [r1, :64]!
461 next_tweak q4, q3, q7, q6
462 veor q0, q0, q3
463 next_tweak q5, q4, q7, q6
464 veor q1, q1, q4
465 veor q2, q2, q5
466 bl aes_decrypt_3x
467 veor q0, q0, q3
468 veor q1, q1, q4
469 veor q2, q2, q5
470 vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
471 vst1.8 {q2}, [r0, :64]!
472 vmov q3, q5
473 teq r4, #0
474 beq .Lxtsdecout
475 b .Lxtsdecloop3x
476 .Lxtsdec1x:
477 adds r4, r4, #3
478 beq .Lxtsdecout
479 .Lxtsdecloop:
480 vld1.8 {q0}, [r1, :64]!
481 veor q0, q0, q3
482 add ip, r2, #32 @ 3rd round key
483 bl aes_decrypt
484 veor q0, q0, q3
485 vst1.8 {q0}, [r0, :64]!
486 subs r4, r4, #1
487 beq .Lxtsdecout
488 next_tweak q3, q3, q7, q6
489 b .Lxtsdecloop
490 .Lxtsdecout:
491 vst1.8 {q3}, [r5]
492 pop {r4-r6, pc}
493 ENDPROC(ce_aes_xts_decrypt)
494
495 /*
496 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
497 * AES sbox substitution on each byte in
498 * 'input'
499 */
500 ENTRY(ce_aes_sub)
501 vdup.32 q1, r0
502 veor q0, q0, q0
503 aese.8 q0, q1
504 vmov r0, s0
505 bx lr
506 ENDPROC(ce_aes_sub)
507
508 /*
509 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
510 * operation on round key *src
511 */
512 ENTRY(ce_aes_invert)
513 vld1.8 {q0}, [r1]
514 aesimc.8 q0, q0
515 vst1.8 {q0}, [r0]
516 bx lr
517 ENDPROC(ce_aes_invert)
This page took 0.043185 seconds and 5 git commands to generate.