Commit | Line | Data |
---|---|---|
4d6d6a2c JG |
1 | /* |
2 | * Glue Code for the AVX assembler implemention of the Cast5 Cipher | |
3 | * | |
4 | * Copyright (C) 2012 Johannes Goetzfried | |
5 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2 of the License, or | |
10 | * (at your option) any later version. | |
11 | * | |
12 | * This program is distributed in the hope that it will be useful, | |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | * GNU General Public License for more details. | |
16 | * | |
17 | * You should have received a copy of the GNU General Public License | |
18 | * along with this program; if not, write to the Free Software | |
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | |
20 | * USA | |
21 | * | |
22 | */ | |
23 | ||
24 | #include <linux/module.h> | |
25 | #include <linux/hardirq.h> | |
26 | #include <linux/types.h> | |
27 | #include <linux/crypto.h> | |
28 | #include <linux/err.h> | |
801201aa | 29 | #include <crypto/ablk_helper.h> |
4d6d6a2c JG |
30 | #include <crypto/algapi.h> |
31 | #include <crypto/cast5.h> | |
32 | #include <crypto/cryptd.h> | |
33 | #include <crypto/ctr.h> | |
d5d34d98 | 34 | #include <asm/fpu/api.h> |
4d6d6a2c JG |
35 | #include <asm/crypto/glue_helper.h> |
36 | ||
37 | #define CAST5_PARALLEL_BLOCKS 16 | |
38 | ||
c12ab20b | 39 | asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, |
4d6d6a2c | 40 | const u8 *src); |
c12ab20b JK |
41 | asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, |
42 | const u8 *src); | |
43 | asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, | |
44 | const u8 *src); | |
45 | asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, | |
46 | __be64 *iv); | |
4d6d6a2c JG |
47 | |
48 | static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes) | |
49 | { | |
50 | return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, | |
51 | NULL, fpu_enabled, nbytes); | |
52 | } | |
53 | ||
54 | static inline void cast5_fpu_end(bool fpu_enabled) | |
55 | { | |
56 | return glue_fpu_end(fpu_enabled); | |
57 | } | |
58 | ||
59 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
60 | bool enc) | |
61 | { | |
62 | bool fpu_enabled = false; | |
63 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
64 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
65 | unsigned int nbytes; | |
c12ab20b | 66 | void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); |
4d6d6a2c JG |
67 | int err; |
68 | ||
c12ab20b JK |
69 | fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; |
70 | ||
4d6d6a2c JG |
71 | err = blkcipher_walk_virt(desc, walk); |
72 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
73 | ||
74 | while ((nbytes = walk->nbytes)) { | |
75 | u8 *wsrc = walk->src.virt.addr; | |
76 | u8 *wdst = walk->dst.virt.addr; | |
77 | ||
78 | fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
79 | ||
80 | /* Process multi-block batch */ | |
81 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
82 | do { | |
c12ab20b | 83 | fn(ctx, wdst, wsrc); |
4d6d6a2c JG |
84 | |
85 | wsrc += bsize * CAST5_PARALLEL_BLOCKS; | |
86 | wdst += bsize * CAST5_PARALLEL_BLOCKS; | |
87 | nbytes -= bsize * CAST5_PARALLEL_BLOCKS; | |
88 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | |
89 | ||
90 | if (nbytes < bsize) | |
91 | goto done; | |
92 | } | |
93 | ||
c12ab20b JK |
94 | fn = (enc) ? __cast5_encrypt : __cast5_decrypt; |
95 | ||
4d6d6a2c JG |
96 | /* Handle leftovers */ |
97 | do { | |
c12ab20b | 98 | fn(ctx, wdst, wsrc); |
4d6d6a2c JG |
99 | |
100 | wsrc += bsize; | |
101 | wdst += bsize; | |
102 | nbytes -= bsize; | |
103 | } while (nbytes >= bsize); | |
104 | ||
105 | done: | |
106 | err = blkcipher_walk_done(desc, walk, nbytes); | |
107 | } | |
108 | ||
109 | cast5_fpu_end(fpu_enabled); | |
110 | return err; | |
111 | } | |
112 | ||
113 | static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
114 | struct scatterlist *src, unsigned int nbytes) | |
115 | { | |
116 | struct blkcipher_walk walk; | |
117 | ||
118 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
119 | return ecb_crypt(desc, &walk, true); | |
120 | } | |
121 | ||
122 | static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
123 | struct scatterlist *src, unsigned int nbytes) | |
124 | { | |
125 | struct blkcipher_walk walk; | |
126 | ||
127 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
128 | return ecb_crypt(desc, &walk, false); | |
129 | } | |
130 | ||
131 | static unsigned int __cbc_encrypt(struct blkcipher_desc *desc, | |
132 | struct blkcipher_walk *walk) | |
133 | { | |
134 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
135 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
136 | unsigned int nbytes = walk->nbytes; | |
137 | u64 *src = (u64 *)walk->src.virt.addr; | |
138 | u64 *dst = (u64 *)walk->dst.virt.addr; | |
139 | u64 *iv = (u64 *)walk->iv; | |
140 | ||
141 | do { | |
142 | *dst = *src ^ *iv; | |
143 | __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); | |
144 | iv = dst; | |
145 | ||
146 | src += 1; | |
147 | dst += 1; | |
148 | nbytes -= bsize; | |
149 | } while (nbytes >= bsize); | |
150 | ||
200429cc | 151 | *(u64 *)walk->iv = *iv; |
4d6d6a2c JG |
152 | return nbytes; |
153 | } | |
154 | ||
155 | static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
156 | struct scatterlist *src, unsigned int nbytes) | |
157 | { | |
158 | struct blkcipher_walk walk; | |
159 | int err; | |
160 | ||
161 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
162 | err = blkcipher_walk_virt(desc, &walk); | |
163 | ||
164 | while ((nbytes = walk.nbytes)) { | |
165 | nbytes = __cbc_encrypt(desc, &walk); | |
166 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
167 | } | |
168 | ||
169 | return err; | |
170 | } | |
171 | ||
172 | static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | |
173 | struct blkcipher_walk *walk) | |
174 | { | |
175 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
176 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
177 | unsigned int nbytes = walk->nbytes; | |
178 | u64 *src = (u64 *)walk->src.virt.addr; | |
179 | u64 *dst = (u64 *)walk->dst.virt.addr; | |
4d6d6a2c | 180 | u64 last_iv; |
4d6d6a2c JG |
181 | |
182 | /* Start of the last block. */ | |
183 | src += nbytes / bsize - 1; | |
184 | dst += nbytes / bsize - 1; | |
185 | ||
186 | last_iv = *src; | |
187 | ||
188 | /* Process multi-block batch */ | |
189 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
190 | do { | |
191 | nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); | |
192 | src -= CAST5_PARALLEL_BLOCKS - 1; | |
193 | dst -= CAST5_PARALLEL_BLOCKS - 1; | |
194 | ||
c12ab20b | 195 | cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); |
4d6d6a2c JG |
196 | |
197 | nbytes -= bsize; | |
198 | if (nbytes < bsize) | |
199 | goto done; | |
200 | ||
201 | *dst ^= *(src - 1); | |
202 | src -= 1; | |
203 | dst -= 1; | |
204 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | |
4d6d6a2c JG |
205 | } |
206 | ||
207 | /* Handle leftovers */ | |
208 | for (;;) { | |
209 | __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); | |
210 | ||
211 | nbytes -= bsize; | |
212 | if (nbytes < bsize) | |
213 | break; | |
214 | ||
215 | *dst ^= *(src - 1); | |
216 | src -= 1; | |
217 | dst -= 1; | |
218 | } | |
219 | ||
220 | done: | |
221 | *dst ^= *(u64 *)walk->iv; | |
222 | *(u64 *)walk->iv = last_iv; | |
223 | ||
224 | return nbytes; | |
225 | } | |
226 | ||
227 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
228 | struct scatterlist *src, unsigned int nbytes) | |
229 | { | |
230 | bool fpu_enabled = false; | |
231 | struct blkcipher_walk walk; | |
232 | int err; | |
233 | ||
234 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
235 | err = blkcipher_walk_virt(desc, &walk); | |
236 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
237 | ||
238 | while ((nbytes = walk.nbytes)) { | |
239 | fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
240 | nbytes = __cbc_decrypt(desc, &walk); | |
241 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
242 | } | |
243 | ||
244 | cast5_fpu_end(fpu_enabled); | |
245 | return err; | |
246 | } | |
247 | ||
248 | static void ctr_crypt_final(struct blkcipher_desc *desc, | |
249 | struct blkcipher_walk *walk) | |
250 | { | |
251 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
252 | u8 *ctrblk = walk->iv; | |
253 | u8 keystream[CAST5_BLOCK_SIZE]; | |
254 | u8 *src = walk->src.virt.addr; | |
255 | u8 *dst = walk->dst.virt.addr; | |
256 | unsigned int nbytes = walk->nbytes; | |
257 | ||
258 | __cast5_encrypt(ctx, keystream, ctrblk); | |
259 | crypto_xor(keystream, src, nbytes); | |
260 | memcpy(dst, keystream, nbytes); | |
261 | ||
262 | crypto_inc(ctrblk, CAST5_BLOCK_SIZE); | |
263 | } | |
264 | ||
265 | static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | |
266 | struct blkcipher_walk *walk) | |
267 | { | |
268 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
269 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
270 | unsigned int nbytes = walk->nbytes; | |
271 | u64 *src = (u64 *)walk->src.virt.addr; | |
272 | u64 *dst = (u64 *)walk->dst.virt.addr; | |
4d6d6a2c JG |
273 | |
274 | /* Process multi-block batch */ | |
275 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
276 | do { | |
c12ab20b JK |
277 | cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, |
278 | (__be64 *)walk->iv); | |
4d6d6a2c JG |
279 | |
280 | src += CAST5_PARALLEL_BLOCKS; | |
281 | dst += CAST5_PARALLEL_BLOCKS; | |
282 | nbytes -= bsize * CAST5_PARALLEL_BLOCKS; | |
283 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | |
284 | ||
285 | if (nbytes < bsize) | |
286 | goto done; | |
287 | } | |
288 | ||
289 | /* Handle leftovers */ | |
290 | do { | |
c12ab20b JK |
291 | u64 ctrblk; |
292 | ||
4d6d6a2c JG |
293 | if (dst != src) |
294 | *dst = *src; | |
295 | ||
c12ab20b JK |
296 | ctrblk = *(u64 *)walk->iv; |
297 | be64_add_cpu((__be64 *)walk->iv, 1); | |
4d6d6a2c | 298 | |
c12ab20b JK |
299 | __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
300 | *dst ^= ctrblk; | |
4d6d6a2c JG |
301 | |
302 | src += 1; | |
303 | dst += 1; | |
304 | nbytes -= bsize; | |
305 | } while (nbytes >= bsize); | |
306 | ||
307 | done: | |
4d6d6a2c JG |
308 | return nbytes; |
309 | } | |
310 | ||
311 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
312 | struct scatterlist *src, unsigned int nbytes) | |
313 | { | |
314 | bool fpu_enabled = false; | |
315 | struct blkcipher_walk walk; | |
316 | int err; | |
317 | ||
318 | blkcipher_walk_init(&walk, dst, src, nbytes); | |
319 | err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE); | |
320 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
321 | ||
322 | while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { | |
323 | fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
324 | nbytes = __ctr_crypt(desc, &walk); | |
325 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
326 | } | |
327 | ||
328 | cast5_fpu_end(fpu_enabled); | |
329 | ||
330 | if (walk.nbytes) { | |
331 | ctr_crypt_final(desc, &walk); | |
332 | err = blkcipher_walk_done(desc, &walk, 0); | |
333 | } | |
334 | ||
335 | return err; | |
336 | } | |
337 | ||
338 | ||
339 | static struct crypto_alg cast5_algs[6] = { { | |
340 | .cra_name = "__ecb-cast5-avx", | |
341 | .cra_driver_name = "__driver-ecb-cast5-avx", | |
342 | .cra_priority = 0, | |
680574e8 SM |
343 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
344 | CRYPTO_ALG_INTERNAL, | |
4d6d6a2c JG |
345 | .cra_blocksize = CAST5_BLOCK_SIZE, |
346 | .cra_ctxsize = sizeof(struct cast5_ctx), | |
347 | .cra_alignmask = 0, | |
348 | .cra_type = &crypto_blkcipher_type, | |
349 | .cra_module = THIS_MODULE, | |
350 | .cra_u = { | |
351 | .blkcipher = { | |
352 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
353 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
354 | .setkey = cast5_setkey, | |
355 | .encrypt = ecb_encrypt, | |
356 | .decrypt = ecb_decrypt, | |
357 | }, | |
358 | }, | |
359 | }, { | |
360 | .cra_name = "__cbc-cast5-avx", | |
361 | .cra_driver_name = "__driver-cbc-cast5-avx", | |
362 | .cra_priority = 0, | |
680574e8 SM |
363 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
364 | CRYPTO_ALG_INTERNAL, | |
4d6d6a2c JG |
365 | .cra_blocksize = CAST5_BLOCK_SIZE, |
366 | .cra_ctxsize = sizeof(struct cast5_ctx), | |
367 | .cra_alignmask = 0, | |
368 | .cra_type = &crypto_blkcipher_type, | |
369 | .cra_module = THIS_MODULE, | |
370 | .cra_u = { | |
371 | .blkcipher = { | |
372 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
373 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
374 | .setkey = cast5_setkey, | |
375 | .encrypt = cbc_encrypt, | |
376 | .decrypt = cbc_decrypt, | |
377 | }, | |
378 | }, | |
379 | }, { | |
380 | .cra_name = "__ctr-cast5-avx", | |
381 | .cra_driver_name = "__driver-ctr-cast5-avx", | |
382 | .cra_priority = 0, | |
680574e8 SM |
383 | .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | |
384 | CRYPTO_ALG_INTERNAL, | |
4d6d6a2c JG |
385 | .cra_blocksize = 1, |
386 | .cra_ctxsize = sizeof(struct cast5_ctx), | |
387 | .cra_alignmask = 0, | |
388 | .cra_type = &crypto_blkcipher_type, | |
389 | .cra_module = THIS_MODULE, | |
390 | .cra_u = { | |
391 | .blkcipher = { | |
392 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
393 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
394 | .ivsize = CAST5_BLOCK_SIZE, | |
395 | .setkey = cast5_setkey, | |
396 | .encrypt = ctr_crypt, | |
397 | .decrypt = ctr_crypt, | |
398 | }, | |
399 | }, | |
400 | }, { | |
401 | .cra_name = "ecb(cast5)", | |
402 | .cra_driver_name = "ecb-cast5-avx", | |
403 | .cra_priority = 200, | |
404 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | |
405 | .cra_blocksize = CAST5_BLOCK_SIZE, | |
406 | .cra_ctxsize = sizeof(struct async_helper_ctx), | |
407 | .cra_alignmask = 0, | |
408 | .cra_type = &crypto_ablkcipher_type, | |
409 | .cra_module = THIS_MODULE, | |
410 | .cra_init = ablk_init, | |
411 | .cra_exit = ablk_exit, | |
412 | .cra_u = { | |
413 | .ablkcipher = { | |
414 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
415 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
416 | .setkey = ablk_set_key, | |
417 | .encrypt = ablk_encrypt, | |
418 | .decrypt = ablk_decrypt, | |
419 | }, | |
420 | }, | |
421 | }, { | |
422 | .cra_name = "cbc(cast5)", | |
423 | .cra_driver_name = "cbc-cast5-avx", | |
424 | .cra_priority = 200, | |
425 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | |
426 | .cra_blocksize = CAST5_BLOCK_SIZE, | |
427 | .cra_ctxsize = sizeof(struct async_helper_ctx), | |
428 | .cra_alignmask = 0, | |
429 | .cra_type = &crypto_ablkcipher_type, | |
430 | .cra_module = THIS_MODULE, | |
431 | .cra_init = ablk_init, | |
432 | .cra_exit = ablk_exit, | |
433 | .cra_u = { | |
434 | .ablkcipher = { | |
435 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
436 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
437 | .ivsize = CAST5_BLOCK_SIZE, | |
438 | .setkey = ablk_set_key, | |
439 | .encrypt = __ablk_encrypt, | |
440 | .decrypt = ablk_decrypt, | |
441 | }, | |
442 | }, | |
443 | }, { | |
444 | .cra_name = "ctr(cast5)", | |
445 | .cra_driver_name = "ctr-cast5-avx", | |
446 | .cra_priority = 200, | |
447 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, | |
448 | .cra_blocksize = 1, | |
449 | .cra_ctxsize = sizeof(struct async_helper_ctx), | |
450 | .cra_alignmask = 0, | |
451 | .cra_type = &crypto_ablkcipher_type, | |
452 | .cra_module = THIS_MODULE, | |
453 | .cra_init = ablk_init, | |
454 | .cra_exit = ablk_exit, | |
455 | .cra_u = { | |
456 | .ablkcipher = { | |
457 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
458 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
459 | .ivsize = CAST5_BLOCK_SIZE, | |
460 | .setkey = ablk_set_key, | |
461 | .encrypt = ablk_encrypt, | |
462 | .decrypt = ablk_encrypt, | |
463 | .geniv = "chainiv", | |
464 | }, | |
465 | }, | |
466 | } }; | |
467 | ||
468 | static int __init cast5_init(void) | |
469 | { | |
d5d34d98 | 470 | const char *feature_name; |
4d6d6a2c | 471 | |
d5d34d98 IM |
472 | if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { |
473 | pr_info("CPU feature '%s' is not supported.\n", feature_name); | |
4d6d6a2c JG |
474 | return -ENODEV; |
475 | } | |
476 | ||
477 | return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); | |
478 | } | |
479 | ||
480 | static void __exit cast5_exit(void) | |
481 | { | |
482 | crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs)); | |
483 | } | |
484 | ||
485 | module_init(cast5_init); | |
486 | module_exit(cast5_exit); | |
487 | ||
488 | MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); | |
489 | MODULE_LICENSE("GPL"); | |
5d26a105 | 490 | MODULE_ALIAS_CRYPTO("cast5"); |