Commit | Line | Data |
---|---|---|
a2a892a2 AS |
1 | /* AES (Rijndael) implementation (FIPS PUB 197) for x86_64 |
2 | * | |
3 | * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de> | |
4 | * | |
5 | * License: | |
6 | * This code can be distributed under the terms of the GNU General Public | |
7 | * License (GPL) Version 2 provided that the above header down to and | |
8 | * including this sentence is retained in full. | |
9 | */ | |
10 | ||
81190b32 SS |
11 | .extern crypto_ft_tab |
12 | .extern crypto_it_tab | |
13 | .extern crypto_fl_tab | |
14 | .extern crypto_il_tab | |
a2a892a2 AS |
15 | |
16 | .text | |
17 | ||
3f299743 | 18 | #include <linux/linkage.h> |
6c2bb98b HX |
19 | #include <asm/asm-offsets.h> |
20 | ||
a2a892a2 AS |
21 | #define R1 %rax |
22 | #define R1E %eax | |
23 | #define R1X %ax | |
24 | #define R1H %ah | |
25 | #define R1L %al | |
26 | #define R2 %rbx | |
27 | #define R2E %ebx | |
28 | #define R2X %bx | |
29 | #define R2H %bh | |
30 | #define R2L %bl | |
31 | #define R3 %rcx | |
32 | #define R3E %ecx | |
33 | #define R3X %cx | |
34 | #define R3H %ch | |
35 | #define R3L %cl | |
36 | #define R4 %rdx | |
37 | #define R4E %edx | |
38 | #define R4X %dx | |
39 | #define R4H %dh | |
40 | #define R4L %dl | |
41 | #define R5 %rsi | |
42 | #define R5E %esi | |
43 | #define R6 %rdi | |
44 | #define R6E %edi | |
45 | #define R7 %rbp | |
46 | #define R7E %ebp | |
47 | #define R8 %r8 | |
48 | #define R9 %r9 | |
49 | #define R10 %r10 | |
50 | #define R11 %r11 | |
51 | ||
6c2bb98b | 52 | #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ |
3f299743 JK |
53 | ENTRY(FUNC); \ |
54 | movq r1,r2; \ | |
a2a892a2 | 55 | movq r3,r4; \ |
07bf44f8 | 56 | leaq KEY+48(r8),r9; \ |
a2a892a2 AS |
57 | movq r10,r11; \ |
58 | movl (r7),r5 ## E; \ | |
59 | movl 4(r7),r1 ## E; \ | |
60 | movl 8(r7),r6 ## E; \ | |
61 | movl 12(r7),r7 ## E; \ | |
07bf44f8 | 62 | movl 480(r8),r10 ## E; \ |
a2a892a2 AS |
63 | xorl -48(r9),r5 ## E; \ |
64 | xorl -44(r9),r1 ## E; \ | |
65 | xorl -40(r9),r6 ## E; \ | |
66 | xorl -36(r9),r7 ## E; \ | |
67 | cmpl $24,r10 ## E; \ | |
68 | jb B128; \ | |
69 | leaq 32(r9),r9; \ | |
70 | je B192; \ | |
71 | leaq 32(r9),r9; | |
72 | ||
3f299743 | 73 | #define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \ |
a2a892a2 AS |
74 | movq r1,r2; \ |
75 | movq r3,r4; \ | |
76 | movl r5 ## E,(r9); \ | |
77 | movl r6 ## E,4(r9); \ | |
78 | movl r7 ## E,8(r9); \ | |
79 | movl r8 ## E,12(r9); \ | |
3f299743 JK |
80 | ret; \ |
81 | ENDPROC(FUNC); | |
a2a892a2 AS |
82 | |
83 | #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ | |
84 | movzbl r2 ## H,r5 ## E; \ | |
85 | movzbl r2 ## L,r6 ## E; \ | |
86 | movl TAB+1024(,r5,4),r5 ## E;\ | |
87 | movw r4 ## X,r2 ## X; \ | |
88 | movl TAB(,r6,4),r6 ## E; \ | |
89 | roll $16,r2 ## E; \ | |
90 | shrl $16,r4 ## E; \ | |
91 | movzbl r4 ## H,r7 ## E; \ | |
92 | movzbl r4 ## L,r4 ## E; \ | |
93 | xorl OFFSET(r8),ra ## E; \ | |
94 | xorl OFFSET+4(r8),rb ## E; \ | |
95 | xorl TAB+3072(,r7,4),r5 ## E;\ | |
96 | xorl TAB+2048(,r4,4),r6 ## E;\ | |
97 | movzbl r1 ## L,r7 ## E; \ | |
98 | movzbl r1 ## H,r4 ## E; \ | |
99 | movl TAB+1024(,r4,4),r4 ## E;\ | |
100 | movw r3 ## X,r1 ## X; \ | |
101 | roll $16,r1 ## E; \ | |
102 | shrl $16,r3 ## E; \ | |
103 | xorl TAB(,r7,4),r5 ## E; \ | |
104 | movzbl r3 ## H,r7 ## E; \ | |
105 | movzbl r3 ## L,r3 ## E; \ | |
106 | xorl TAB+3072(,r7,4),r4 ## E;\ | |
107 | xorl TAB+2048(,r3,4),r5 ## E;\ | |
108 | movzbl r1 ## H,r7 ## E; \ | |
109 | movzbl r1 ## L,r3 ## E; \ | |
110 | shrl $16,r1 ## E; \ | |
111 | xorl TAB+3072(,r7,4),r6 ## E;\ | |
112 | movl TAB+2048(,r3,4),r3 ## E;\ | |
113 | movzbl r1 ## H,r7 ## E; \ | |
114 | movzbl r1 ## L,r1 ## E; \ | |
115 | xorl TAB+1024(,r7,4),r6 ## E;\ | |
116 | xorl TAB(,r1,4),r3 ## E; \ | |
117 | movzbl r2 ## H,r1 ## E; \ | |
118 | movzbl r2 ## L,r7 ## E; \ | |
119 | shrl $16,r2 ## E; \ | |
120 | xorl TAB+3072(,r1,4),r3 ## E;\ | |
121 | xorl TAB+2048(,r7,4),r4 ## E;\ | |
122 | movzbl r2 ## H,r1 ## E; \ | |
123 | movzbl r2 ## L,r2 ## E; \ | |
124 | xorl OFFSET+8(r8),rc ## E; \ | |
125 | xorl OFFSET+12(r8),rd ## E; \ | |
126 | xorl TAB+1024(,r1,4),r3 ## E;\ | |
127 | xorl TAB(,r2,4),r4 ## E; | |
128 | ||
129 | #define move_regs(r1,r2,r3,r4) \ | |
130 | movl r3 ## E,r1 ## E; \ | |
131 | movl r4 ## E,r2 ## E; | |
132 | ||
6c2bb98b HX |
133 | #define entry(FUNC,KEY,B128,B192) \ |
134 | prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) | |
a2a892a2 | 135 | |
3f299743 | 136 | #define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11) |
a2a892a2 AS |
137 | |
138 | #define encrypt_round(TAB,OFFSET) \ | |
139 | round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ | |
140 | move_regs(R1,R2,R5,R6) | |
141 | ||
142 | #define encrypt_final(TAB,OFFSET) \ | |
143 | round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) | |
144 | ||
145 | #define decrypt_round(TAB,OFFSET) \ | |
146 | round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \ | |
147 | move_regs(R1,R2,R5,R6) | |
148 | ||
149 | #define decrypt_final(TAB,OFFSET) \ | |
150 | round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) | |
151 | ||
e90b1a2b | 152 | /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ |
a2a892a2 | 153 | |
3f299743 | 154 | entry(aes_enc_blk,0,.Le128,.Le192) |
81190b32 SS |
155 | encrypt_round(crypto_ft_tab,-96) |
156 | encrypt_round(crypto_ft_tab,-80) | |
3f299743 | 157 | .Le192: encrypt_round(crypto_ft_tab,-64) |
81190b32 | 158 | encrypt_round(crypto_ft_tab,-48) |
3f299743 | 159 | .Le128: encrypt_round(crypto_ft_tab,-32) |
81190b32 SS |
160 | encrypt_round(crypto_ft_tab,-16) |
161 | encrypt_round(crypto_ft_tab, 0) | |
162 | encrypt_round(crypto_ft_tab, 16) | |
163 | encrypt_round(crypto_ft_tab, 32) | |
164 | encrypt_round(crypto_ft_tab, 48) | |
165 | encrypt_round(crypto_ft_tab, 64) | |
166 | encrypt_round(crypto_ft_tab, 80) | |
167 | encrypt_round(crypto_ft_tab, 96) | |
168 | encrypt_final(crypto_fl_tab,112) | |
3f299743 | 169 | return(aes_enc_blk) |
a2a892a2 | 170 | |
e90b1a2b | 171 | /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ |
a2a892a2 | 172 | |
3f299743 | 173 | entry(aes_dec_blk,240,.Ld128,.Ld192) |
81190b32 SS |
174 | decrypt_round(crypto_it_tab,-96) |
175 | decrypt_round(crypto_it_tab,-80) | |
3f299743 | 176 | .Ld192: decrypt_round(crypto_it_tab,-64) |
81190b32 | 177 | decrypt_round(crypto_it_tab,-48) |
3f299743 | 178 | .Ld128: decrypt_round(crypto_it_tab,-32) |
81190b32 SS |
179 | decrypt_round(crypto_it_tab,-16) |
180 | decrypt_round(crypto_it_tab, 0) | |
181 | decrypt_round(crypto_it_tab, 16) | |
182 | decrypt_round(crypto_it_tab, 32) | |
183 | decrypt_round(crypto_it_tab, 48) | |
184 | decrypt_round(crypto_it_tab, 64) | |
185 | decrypt_round(crypto_it_tab, 80) | |
186 | decrypt_round(crypto_it_tab, 96) | |
187 | decrypt_final(crypto_il_tab,112) | |
3f299743 | 188 | return(aes_dec_blk) |