Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | // ------------------------------------------------------------------------- |
2 | // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. | |
3 | // All rights reserved. | |
4 | // | |
5 | // LICENSE TERMS | |
6 | // | |
7 | // The free distribution and use of this software in both source and binary | |
8 | // form is allowed (with or without changes) provided that: | |
9 | // | |
10 | // 1. distributions of this source code include the above copyright | |
11 | // notice, this list of conditions and the following disclaimer// | |
12 | // | |
13 | // 2. distributions in binary form include the above copyright | |
14 | // notice, this list of conditions and the following disclaimer | |
15 | // in the documentation and/or other associated materials// | |
16 | // | |
17 | // 3. the copyright holder's name is not used to endorse products | |
18 | // built using this software without specific written permission. | |
19 | // | |
20 | // | |
21 | // ALTERNATIVELY, provided that this notice is retained in full, this product | |
22 | // may be distributed under the terms of the GNU General Public License (GPL), | |
23 | // in which case the provisions of the GPL apply INSTEAD OF those given above. | |
24 | // | |
25 | // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org> | |
26 | // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> | |
27 | ||
28 | // DISCLAIMER | |
29 | // | |
30 | // This software is provided 'as is' with no explicit or implied warranties | |
31 | // in respect of its properties including, but not limited to, correctness | |
32 | // and fitness for purpose. | |
33 | // ------------------------------------------------------------------------- | |
34 | // Issue Date: 29/07/2002 | |
35 | ||
36 | .file "aes-i586-asm.S" | |
37 | .text | |
38 | ||
3f299743 | 39 | #include <linux/linkage.h> |
6c2bb98b | 40 | #include <asm/asm-offsets.h> |
1da177e4 | 41 | |
6c2bb98b | 42 | #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) |
1da177e4 | 43 | |
6c2bb98b | 44 | /* offsets to parameters with one register pushed onto stack */ |
07bf44f8 | 45 | #define ctx 8 |
6c2bb98b HX |
46 | #define out_blk 12 |
47 | #define in_blk 16 | |
1da177e4 | 48 | |
07bf44f8 HY |
49 | /* offsets in crypto_aes_ctx structure */ |
50 | #define klen (480) | |
51 | #define ekey (0) | |
52 | #define dkey (240) | |
1da177e4 LT |
53 | |
54 | // register mapping for encrypt and decrypt subroutines | |
55 | ||
56 | #define r0 eax | |
57 | #define r1 ebx | |
58 | #define r2 ecx | |
59 | #define r3 edx | |
60 | #define r4 esi | |
61 | #define r5 edi | |
62 | ||
63 | #define eaxl al | |
64 | #define eaxh ah | |
65 | #define ebxl bl | |
66 | #define ebxh bh | |
67 | #define ecxl cl | |
68 | #define ecxh ch | |
69 | #define edxl dl | |
70 | #define edxh dh | |
71 | ||
72 | #define _h(reg) reg##h | |
73 | #define h(reg) _h(reg) | |
74 | ||
75 | #define _l(reg) reg##l | |
76 | #define l(reg) _l(reg) | |
77 | ||
78 | // This macro takes a 32-bit word representing a column and uses | |
79 | // each of its four bytes to index into four tables of 256 32-bit | |
80 | // words to obtain values that are then xored into the appropriate | |
81 | // output registers r0, r1, r4 or r5. | |
82 | ||
83 | // Parameters: | |
84 | // table table base address | |
85 | // %1 out_state[0] | |
86 | // %2 out_state[1] | |
87 | // %3 out_state[2] | |
88 | // %4 out_state[3] | |
89 | // idx input register for the round (destroyed) | |
90 | // tmp scratch register for the round | |
91 | // sched key schedule | |
92 | ||
93 | #define do_col(table, a1,a2,a3,a4, idx, tmp) \ | |
94 | movzx %l(idx),%tmp; \ | |
95 | xor table(,%tmp,4),%a1; \ | |
96 | movzx %h(idx),%tmp; \ | |
97 | shr $16,%idx; \ | |
98 | xor table+tlen(,%tmp,4),%a2; \ | |
99 | movzx %l(idx),%tmp; \ | |
100 | movzx %h(idx),%idx; \ | |
101 | xor table+2*tlen(,%tmp,4),%a3; \ | |
102 | xor table+3*tlen(,%idx,4),%a4; | |
103 | ||
104 | // initialise output registers from the key schedule | |
105 | // NB1: original value of a3 is in idx on exit | |
106 | // NB2: original values of a1,a2,a4 aren't used | |
107 | #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ | |
108 | mov 0 sched,%a1; \ | |
109 | movzx %l(idx),%tmp; \ | |
110 | mov 12 sched,%a2; \ | |
111 | xor table(,%tmp,4),%a1; \ | |
112 | mov 4 sched,%a4; \ | |
113 | movzx %h(idx),%tmp; \ | |
114 | shr $16,%idx; \ | |
115 | xor table+tlen(,%tmp,4),%a2; \ | |
116 | movzx %l(idx),%tmp; \ | |
117 | movzx %h(idx),%idx; \ | |
118 | xor table+3*tlen(,%idx,4),%a4; \ | |
119 | mov %a3,%idx; \ | |
120 | mov 8 sched,%a3; \ | |
121 | xor table+2*tlen(,%tmp,4),%a3; | |
122 | ||
123 | // initialise output registers from the key schedule | |
124 | // NB1: original value of a3 is in idx on exit | |
125 | // NB2: original values of a1,a2,a4 aren't used | |
126 | #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ | |
127 | mov 0 sched,%a1; \ | |
128 | movzx %l(idx),%tmp; \ | |
129 | mov 4 sched,%a2; \ | |
130 | xor table(,%tmp,4),%a1; \ | |
131 | mov 12 sched,%a4; \ | |
132 | movzx %h(idx),%tmp; \ | |
133 | shr $16,%idx; \ | |
134 | xor table+tlen(,%tmp,4),%a2; \ | |
135 | movzx %l(idx),%tmp; \ | |
136 | movzx %h(idx),%idx; \ | |
137 | xor table+3*tlen(,%idx,4),%a4; \ | |
138 | mov %a3,%idx; \ | |
139 | mov 8 sched,%a3; \ | |
140 | xor table+2*tlen(,%tmp,4),%a3; | |
141 | ||
142 | ||
143 | // original Gladman had conditional saves to MMX regs. | |
144 | #define save(a1, a2) \ | |
145 | mov %a2,4*a1(%esp) | |
146 | ||
147 | #define restore(a1, a2) \ | |
148 | mov 4*a2(%esp),%a1 | |
149 | ||
150 | // These macros perform a forward encryption cycle. They are entered with | |
151 | // the first previous round column values in r0,r1,r4,r5 and | |
152 | // exit with the final values in the same registers, using stack | |
153 | // for temporary storage. | |
154 | ||
155 | // round column values | |
156 | // on entry: r0,r1,r4,r5 | |
157 | // on exit: r2,r1,r4,r5 | |
158 | #define fwd_rnd1(arg, table) \ | |
159 | save (0,r1); \ | |
160 | save (1,r5); \ | |
161 | \ | |
162 | /* compute new column values */ \ | |
163 | do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ | |
164 | do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ | |
165 | restore(r0,0); \ | |
166 | do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ | |
167 | restore(r0,1); \ | |
168 | do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ | |
169 | ||
170 | // round column values | |
171 | // on entry: r2,r1,r4,r5 | |
172 | // on exit: r0,r1,r4,r5 | |
173 | #define fwd_rnd2(arg, table) \ | |
174 | save (0,r1); \ | |
175 | save (1,r5); \ | |
176 | \ | |
177 | /* compute new column values */ \ | |
178 | do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ | |
179 | do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ | |
180 | restore(r2,0); \ | |
181 | do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ | |
182 | restore(r2,1); \ | |
183 | do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ | |
184 | ||
185 | // These macros performs an inverse encryption cycle. They are entered with | |
186 | // the first previous round column values in r0,r1,r4,r5 and | |
187 | // exit with the final values in the same registers, using stack | |
188 | // for temporary storage | |
189 | ||
190 | // round column values | |
191 | // on entry: r0,r1,r4,r5 | |
192 | // on exit: r2,r1,r4,r5 | |
193 | #define inv_rnd1(arg, table) \ | |
194 | save (0,r1); \ | |
195 | save (1,r5); \ | |
196 | \ | |
197 | /* compute new column values */ \ | |
198 | do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ | |
199 | do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ | |
200 | restore(r0,0); \ | |
201 | do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ | |
202 | restore(r0,1); \ | |
203 | do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ | |
204 | ||
205 | // round column values | |
206 | // on entry: r2,r1,r4,r5 | |
207 | // on exit: r0,r1,r4,r5 | |
208 | #define inv_rnd2(arg, table) \ | |
209 | save (0,r1); \ | |
210 | save (1,r5); \ | |
211 | \ | |
212 | /* compute new column values */ \ | |
213 | do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ | |
214 | do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ | |
215 | restore(r2,0); \ | |
216 | do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ | |
217 | restore(r2,1); \ | |
218 | do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ | |
219 | ||
220 | // AES (Rijndael) Encryption Subroutine | |
07bf44f8 | 221 | /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ |
1da177e4 | 222 | |
5157dea8 SS |
223 | .extern crypto_ft_tab |
224 | .extern crypto_fl_tab | |
1da177e4 | 225 | |
3f299743 | 226 | ENTRY(aes_enc_blk) |
1da177e4 | 227 | push %ebp |
07bf44f8 | 228 | mov ctx(%esp),%ebp |
1da177e4 LT |
229 | |
230 | // CAUTION: the order and the values used in these assigns | |
231 | // rely on the register mappings | |
232 | ||
233 | 1: push %ebx | |
234 | mov in_blk+4(%esp),%r2 | |
235 | push %esi | |
5157dea8 | 236 | mov klen(%ebp),%r3 // key size |
1da177e4 LT |
237 | push %edi |
238 | #if ekey != 0 | |
239 | lea ekey(%ebp),%ebp // key pointer | |
240 | #endif | |
241 | ||
242 | // input four columns and xor in first round key | |
243 | ||
244 | mov (%r2),%r0 | |
245 | mov 4(%r2),%r1 | |
246 | mov 8(%r2),%r4 | |
247 | mov 12(%r2),%r5 | |
248 | xor (%ebp),%r0 | |
249 | xor 4(%ebp),%r1 | |
250 | xor 8(%ebp),%r4 | |
251 | xor 12(%ebp),%r5 | |
252 | ||
e6a3a925 DV |
253 | sub $8,%esp // space for register saves on stack |
254 | add $16,%ebp // increment to next round key | |
5157dea8 | 255 | cmp $24,%r3 |
e6a3a925 DV |
256 | jb 4f // 10 rounds for 128-bit key |
257 | lea 32(%ebp),%ebp | |
258 | je 3f // 12 rounds for 192-bit key | |
259 | lea 32(%ebp),%ebp | |
260 | ||
5157dea8 SS |
261 | 2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key |
262 | fwd_rnd2( -48(%ebp), crypto_ft_tab) | |
263 | 3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key | |
264 | fwd_rnd2( -16(%ebp), crypto_ft_tab) | |
265 | 4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key | |
266 | fwd_rnd2( +16(%ebp), crypto_ft_tab) | |
267 | fwd_rnd1( +32(%ebp), crypto_ft_tab) | |
268 | fwd_rnd2( +48(%ebp), crypto_ft_tab) | |
269 | fwd_rnd1( +64(%ebp), crypto_ft_tab) | |
270 | fwd_rnd2( +80(%ebp), crypto_ft_tab) | |
271 | fwd_rnd1( +96(%ebp), crypto_ft_tab) | |
272 | fwd_rnd2(+112(%ebp), crypto_ft_tab) | |
273 | fwd_rnd1(+128(%ebp), crypto_ft_tab) | |
274 | fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table | |
1da177e4 LT |
275 | |
276 | // move final values to the output array. CAUTION: the | |
277 | // order of these assigns rely on the register mappings | |
278 | ||
279 | add $8,%esp | |
280 | mov out_blk+12(%esp),%ebp | |
281 | mov %r5,12(%ebp) | |
282 | pop %edi | |
283 | mov %r4,8(%ebp) | |
284 | pop %esi | |
285 | mov %r1,4(%ebp) | |
286 | pop %ebx | |
287 | mov %r0,(%ebp) | |
288 | pop %ebp | |
1da177e4 | 289 | ret |
3f299743 | 290 | ENDPROC(aes_enc_blk) |
1da177e4 LT |
291 | |
292 | // AES (Rijndael) Decryption Subroutine | |
07bf44f8 | 293 | /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ |
1da177e4 | 294 | |
5157dea8 SS |
295 | .extern crypto_it_tab |
296 | .extern crypto_il_tab | |
1da177e4 | 297 | |
3f299743 | 298 | ENTRY(aes_dec_blk) |
1da177e4 | 299 | push %ebp |
07bf44f8 | 300 | mov ctx(%esp),%ebp |
1da177e4 LT |
301 | |
302 | // CAUTION: the order and the values used in these assigns | |
303 | // rely on the register mappings | |
304 | ||
305 | 1: push %ebx | |
306 | mov in_blk+4(%esp),%r2 | |
307 | push %esi | |
5157dea8 | 308 | mov klen(%ebp),%r3 // key size |
1da177e4 LT |
309 | push %edi |
310 | #if dkey != 0 | |
311 | lea dkey(%ebp),%ebp // key pointer | |
312 | #endif | |
1da177e4 LT |
313 | |
314 | // input four columns and xor in first round key | |
315 | ||
316 | mov (%r2),%r0 | |
317 | mov 4(%r2),%r1 | |
318 | mov 8(%r2),%r4 | |
319 | mov 12(%r2),%r5 | |
320 | xor (%ebp),%r0 | |
321 | xor 4(%ebp),%r1 | |
322 | xor 8(%ebp),%r4 | |
323 | xor 12(%ebp),%r5 | |
324 | ||
e6a3a925 | 325 | sub $8,%esp // space for register saves on stack |
5157dea8 SS |
326 | add $16,%ebp // increment to next round key |
327 | cmp $24,%r3 | |
e6a3a925 | 328 | jb 4f // 10 rounds for 128-bit key |
5157dea8 | 329 | lea 32(%ebp),%ebp |
e6a3a925 | 330 | je 3f // 12 rounds for 192-bit key |
5157dea8 SS |
331 | lea 32(%ebp),%ebp |
332 | ||
333 | 2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key | |
334 | inv_rnd2( -48(%ebp), crypto_it_tab) | |
335 | 3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key | |
336 | inv_rnd2( -16(%ebp), crypto_it_tab) | |
337 | 4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key | |
338 | inv_rnd2( +16(%ebp), crypto_it_tab) | |
339 | inv_rnd1( +32(%ebp), crypto_it_tab) | |
340 | inv_rnd2( +48(%ebp), crypto_it_tab) | |
341 | inv_rnd1( +64(%ebp), crypto_it_tab) | |
342 | inv_rnd2( +80(%ebp), crypto_it_tab) | |
343 | inv_rnd1( +96(%ebp), crypto_it_tab) | |
344 | inv_rnd2(+112(%ebp), crypto_it_tab) | |
345 | inv_rnd1(+128(%ebp), crypto_it_tab) | |
346 | inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table | |
1da177e4 LT |
347 | |
348 | // move final values to the output array. CAUTION: the | |
349 | // order of these assigns rely on the register mappings | |
350 | ||
351 | add $8,%esp | |
352 | mov out_blk+12(%esp),%ebp | |
353 | mov %r5,12(%ebp) | |
354 | pop %edi | |
355 | mov %r4,8(%ebp) | |
356 | pop %esi | |
357 | mov %r1,4(%ebp) | |
358 | pop %ebx | |
359 | mov %r0,(%ebp) | |
360 | pop %ebp | |
1da177e4 | 361 | ret |
3f299743 | 362 | ENDPROC(aes_dec_blk) |