Merge branch 'linus' into genirq
[deliverable/linux.git] / arch / x86 / crypto / aes-x86_64-asm_64.S
1 /* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
2 *
3 * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
4 *
5 * License:
6 * This code can be distributed under the terms of the GNU General Public
7 * License (GPL) Version 2 provided that the above header down to and
8 * including this sentence is retained in full.
9 */
10
11 .extern crypto_ft_tab
12 .extern crypto_it_tab
13 .extern crypto_fl_tab
14 .extern crypto_il_tab
15
16 .text
17
18 #include <asm/asm-offsets.h>
19
20 #define BASE crypto_tfm_ctx_offset
21
22 #define R1 %rax
23 #define R1E %eax
24 #define R1X %ax
25 #define R1H %ah
26 #define R1L %al
27 #define R2 %rbx
28 #define R2E %ebx
29 #define R2X %bx
30 #define R2H %bh
31 #define R2L %bl
32 #define R3 %rcx
33 #define R3E %ecx
34 #define R3X %cx
35 #define R3H %ch
36 #define R3L %cl
37 #define R4 %rdx
38 #define R4E %edx
39 #define R4X %dx
40 #define R4H %dh
41 #define R4L %dl
42 #define R5 %rsi
43 #define R5E %esi
44 #define R6 %rdi
45 #define R6E %edi
46 #define R7 %rbp
47 #define R7E %ebp
48 #define R8 %r8
49 #define R9 %r9
50 #define R10 %r10
51 #define R11 %r11
52
53 #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
54 .global FUNC; \
55 .type FUNC,@function; \
56 .align 8; \
57 FUNC: movq r1,r2; \
58 movq r3,r4; \
59 leaq BASE+KEY+48+4(r8),r9; \
60 movq r10,r11; \
61 movl (r7),r5 ## E; \
62 movl 4(r7),r1 ## E; \
63 movl 8(r7),r6 ## E; \
64 movl 12(r7),r7 ## E; \
65 movl BASE+0(r8),r10 ## E; \
66 xorl -48(r9),r5 ## E; \
67 xorl -44(r9),r1 ## E; \
68 xorl -40(r9),r6 ## E; \
69 xorl -36(r9),r7 ## E; \
70 cmpl $24,r10 ## E; \
71 jb B128; \
72 leaq 32(r9),r9; \
73 je B192; \
74 leaq 32(r9),r9;
75
76 #define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
77 movq r1,r2; \
78 movq r3,r4; \
79 movl r5 ## E,(r9); \
80 movl r6 ## E,4(r9); \
81 movl r7 ## E,8(r9); \
82 movl r8 ## E,12(r9); \
83 ret;
84
85 #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
86 movzbl r2 ## H,r5 ## E; \
87 movzbl r2 ## L,r6 ## E; \
88 movl TAB+1024(,r5,4),r5 ## E;\
89 movw r4 ## X,r2 ## X; \
90 movl TAB(,r6,4),r6 ## E; \
91 roll $16,r2 ## E; \
92 shrl $16,r4 ## E; \
93 movzbl r4 ## H,r7 ## E; \
94 movzbl r4 ## L,r4 ## E; \
95 xorl OFFSET(r8),ra ## E; \
96 xorl OFFSET+4(r8),rb ## E; \
97 xorl TAB+3072(,r7,4),r5 ## E;\
98 xorl TAB+2048(,r4,4),r6 ## E;\
99 movzbl r1 ## L,r7 ## E; \
100 movzbl r1 ## H,r4 ## E; \
101 movl TAB+1024(,r4,4),r4 ## E;\
102 movw r3 ## X,r1 ## X; \
103 roll $16,r1 ## E; \
104 shrl $16,r3 ## E; \
105 xorl TAB(,r7,4),r5 ## E; \
106 movzbl r3 ## H,r7 ## E; \
107 movzbl r3 ## L,r3 ## E; \
108 xorl TAB+3072(,r7,4),r4 ## E;\
109 xorl TAB+2048(,r3,4),r5 ## E;\
110 movzbl r1 ## H,r7 ## E; \
111 movzbl r1 ## L,r3 ## E; \
112 shrl $16,r1 ## E; \
113 xorl TAB+3072(,r7,4),r6 ## E;\
114 movl TAB+2048(,r3,4),r3 ## E;\
115 movzbl r1 ## H,r7 ## E; \
116 movzbl r1 ## L,r1 ## E; \
117 xorl TAB+1024(,r7,4),r6 ## E;\
118 xorl TAB(,r1,4),r3 ## E; \
119 movzbl r2 ## H,r1 ## E; \
120 movzbl r2 ## L,r7 ## E; \
121 shrl $16,r2 ## E; \
122 xorl TAB+3072(,r1,4),r3 ## E;\
123 xorl TAB+2048(,r7,4),r4 ## E;\
124 movzbl r2 ## H,r1 ## E; \
125 movzbl r2 ## L,r2 ## E; \
126 xorl OFFSET+8(r8),rc ## E; \
127 xorl OFFSET+12(r8),rd ## E; \
128 xorl TAB+1024(,r1,4),r3 ## E;\
129 xorl TAB(,r2,4),r4 ## E;
130
131 #define move_regs(r1,r2,r3,r4) \
132 movl r3 ## E,r1 ## E; \
133 movl r4 ## E,r2 ## E;
134
135 #define entry(FUNC,KEY,B128,B192) \
136 prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
137
138 #define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
139
140 #define encrypt_round(TAB,OFFSET) \
141 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
142 move_regs(R1,R2,R5,R6)
143
144 #define encrypt_final(TAB,OFFSET) \
145 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
146
147 #define decrypt_round(TAB,OFFSET) \
148 round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
149 move_regs(R1,R2,R5,R6)
150
151 #define decrypt_final(TAB,OFFSET) \
152 round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
153
154 /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
155
156 entry(aes_enc_blk,0,enc128,enc192)
157 encrypt_round(crypto_ft_tab,-96)
158 encrypt_round(crypto_ft_tab,-80)
159 enc192: encrypt_round(crypto_ft_tab,-64)
160 encrypt_round(crypto_ft_tab,-48)
161 enc128: encrypt_round(crypto_ft_tab,-32)
162 encrypt_round(crypto_ft_tab,-16)
163 encrypt_round(crypto_ft_tab, 0)
164 encrypt_round(crypto_ft_tab, 16)
165 encrypt_round(crypto_ft_tab, 32)
166 encrypt_round(crypto_ft_tab, 48)
167 encrypt_round(crypto_ft_tab, 64)
168 encrypt_round(crypto_ft_tab, 80)
169 encrypt_round(crypto_ft_tab, 96)
170 encrypt_final(crypto_fl_tab,112)
171 return
172
173 /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
174
175 entry(aes_dec_blk,240,dec128,dec192)
176 decrypt_round(crypto_it_tab,-96)
177 decrypt_round(crypto_it_tab,-80)
178 dec192: decrypt_round(crypto_it_tab,-64)
179 decrypt_round(crypto_it_tab,-48)
180 dec128: decrypt_round(crypto_it_tab,-32)
181 decrypt_round(crypto_it_tab,-16)
182 decrypt_round(crypto_it_tab, 0)
183 decrypt_round(crypto_it_tab, 16)
184 decrypt_round(crypto_it_tab, 32)
185 decrypt_round(crypto_it_tab, 48)
186 decrypt_round(crypto_it_tab, 64)
187 decrypt_round(crypto_it_tab, 80)
188 decrypt_round(crypto_it_tab, 96)
189 decrypt_final(crypto_il_tab,112)
190 return
This page took 0.041272 seconds and 5 git commands to generate.