2 * Glue Code for the AVX assembler implemention of the Cast5 Cipher
4 * Copyright (C) 2012 Johannes Goetzfried
5 * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/algapi.h>
30 #include <crypto/cast5.h>
31 #include <crypto/cryptd.h>
32 #include <crypto/ctr.h>
34 #include <asm/xsave.h>
35 #include <asm/crypto/ablk_helper.h>
36 #include <asm/crypto/glue_helper.h>
38 #define CAST5_PARALLEL_BLOCKS 16
40 asmlinkage
void __cast5_enc_blk_16way(struct cast5_ctx
*ctx
, u8
*dst
,
41 const u8
*src
, bool xor);
42 asmlinkage
void cast5_dec_blk_16way(struct cast5_ctx
*ctx
, u8
*dst
,
45 static inline void cast5_enc_blk_xway(struct cast5_ctx
*ctx
, u8
*dst
,
48 __cast5_enc_blk_16way(ctx
, dst
, src
, false);
51 static inline void cast5_enc_blk_xway_xor(struct cast5_ctx
*ctx
, u8
*dst
,
54 __cast5_enc_blk_16way(ctx
, dst
, src
, true);
57 static inline void cast5_dec_blk_xway(struct cast5_ctx
*ctx
, u8
*dst
,
60 cast5_dec_blk_16way(ctx
, dst
, src
);
64 static inline bool cast5_fpu_begin(bool fpu_enabled
, unsigned int nbytes
)
66 return glue_fpu_begin(CAST5_BLOCK_SIZE
, CAST5_PARALLEL_BLOCKS
,
67 NULL
, fpu_enabled
, nbytes
);
70 static inline void cast5_fpu_end(bool fpu_enabled
)
72 return glue_fpu_end(fpu_enabled
);
75 static int ecb_crypt(struct blkcipher_desc
*desc
, struct blkcipher_walk
*walk
,
78 bool fpu_enabled
= false;
79 struct cast5_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
80 const unsigned int bsize
= CAST5_BLOCK_SIZE
;
84 err
= blkcipher_walk_virt(desc
, walk
);
85 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
87 while ((nbytes
= walk
->nbytes
)) {
88 u8
*wsrc
= walk
->src
.virt
.addr
;
89 u8
*wdst
= walk
->dst
.virt
.addr
;
91 fpu_enabled
= cast5_fpu_begin(fpu_enabled
, nbytes
);
93 /* Process multi-block batch */
94 if (nbytes
>= bsize
* CAST5_PARALLEL_BLOCKS
) {
97 cast5_enc_blk_xway(ctx
, wdst
, wsrc
);
99 cast5_dec_blk_xway(ctx
, wdst
, wsrc
);
101 wsrc
+= bsize
* CAST5_PARALLEL_BLOCKS
;
102 wdst
+= bsize
* CAST5_PARALLEL_BLOCKS
;
103 nbytes
-= bsize
* CAST5_PARALLEL_BLOCKS
;
104 } while (nbytes
>= bsize
* CAST5_PARALLEL_BLOCKS
);
110 /* Handle leftovers */
113 __cast5_encrypt(ctx
, wdst
, wsrc
);
115 __cast5_decrypt(ctx
, wdst
, wsrc
);
120 } while (nbytes
>= bsize
);
123 err
= blkcipher_walk_done(desc
, walk
, nbytes
);
126 cast5_fpu_end(fpu_enabled
);
130 static int ecb_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
131 struct scatterlist
*src
, unsigned int nbytes
)
133 struct blkcipher_walk walk
;
135 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
136 return ecb_crypt(desc
, &walk
, true);
139 static int ecb_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
140 struct scatterlist
*src
, unsigned int nbytes
)
142 struct blkcipher_walk walk
;
144 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
145 return ecb_crypt(desc
, &walk
, false);
148 static unsigned int __cbc_encrypt(struct blkcipher_desc
*desc
,
149 struct blkcipher_walk
*walk
)
151 struct cast5_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
152 const unsigned int bsize
= CAST5_BLOCK_SIZE
;
153 unsigned int nbytes
= walk
->nbytes
;
154 u64
*src
= (u64
*)walk
->src
.virt
.addr
;
155 u64
*dst
= (u64
*)walk
->dst
.virt
.addr
;
156 u64
*iv
= (u64
*)walk
->iv
;
160 __cast5_encrypt(ctx
, (u8
*)dst
, (u8
*)dst
);
166 } while (nbytes
>= bsize
);
168 *(u64
*)walk
->iv
^= *iv
;
172 static int cbc_encrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
173 struct scatterlist
*src
, unsigned int nbytes
)
175 struct blkcipher_walk walk
;
178 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
179 err
= blkcipher_walk_virt(desc
, &walk
);
181 while ((nbytes
= walk
.nbytes
)) {
182 nbytes
= __cbc_encrypt(desc
, &walk
);
183 err
= blkcipher_walk_done(desc
, &walk
, nbytes
);
189 static unsigned int __cbc_decrypt(struct blkcipher_desc
*desc
,
190 struct blkcipher_walk
*walk
)
192 struct cast5_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
193 const unsigned int bsize
= CAST5_BLOCK_SIZE
;
194 unsigned int nbytes
= walk
->nbytes
;
195 u64
*src
= (u64
*)walk
->src
.virt
.addr
;
196 u64
*dst
= (u64
*)walk
->dst
.virt
.addr
;
197 u64 ivs
[CAST5_PARALLEL_BLOCKS
- 1];
201 /* Start of the last block. */
202 src
+= nbytes
/ bsize
- 1;
203 dst
+= nbytes
/ bsize
- 1;
207 /* Process multi-block batch */
208 if (nbytes
>= bsize
* CAST5_PARALLEL_BLOCKS
) {
210 nbytes
-= bsize
* (CAST5_PARALLEL_BLOCKS
- 1);
211 src
-= CAST5_PARALLEL_BLOCKS
- 1;
212 dst
-= CAST5_PARALLEL_BLOCKS
- 1;
214 for (i
= 0; i
< CAST5_PARALLEL_BLOCKS
- 1; i
++)
217 cast5_dec_blk_xway(ctx
, (u8
*)dst
, (u8
*)src
);
219 for (i
= 0; i
< CAST5_PARALLEL_BLOCKS
- 1; i
++)
220 *(dst
+ (i
+ 1)) ^= *(ivs
+ i
);
229 } while (nbytes
>= bsize
* CAST5_PARALLEL_BLOCKS
);
235 /* Handle leftovers */
237 __cast5_decrypt(ctx
, (u8
*)dst
, (u8
*)src
);
249 *dst
^= *(u64
*)walk
->iv
;
250 *(u64
*)walk
->iv
= last_iv
;
255 static int cbc_decrypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
256 struct scatterlist
*src
, unsigned int nbytes
)
258 bool fpu_enabled
= false;
259 struct blkcipher_walk walk
;
262 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
263 err
= blkcipher_walk_virt(desc
, &walk
);
264 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
266 while ((nbytes
= walk
.nbytes
)) {
267 fpu_enabled
= cast5_fpu_begin(fpu_enabled
, nbytes
);
268 nbytes
= __cbc_decrypt(desc
, &walk
);
269 err
= blkcipher_walk_done(desc
, &walk
, nbytes
);
272 cast5_fpu_end(fpu_enabled
);
276 static void ctr_crypt_final(struct blkcipher_desc
*desc
,
277 struct blkcipher_walk
*walk
)
279 struct cast5_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
280 u8
*ctrblk
= walk
->iv
;
281 u8 keystream
[CAST5_BLOCK_SIZE
];
282 u8
*src
= walk
->src
.virt
.addr
;
283 u8
*dst
= walk
->dst
.virt
.addr
;
284 unsigned int nbytes
= walk
->nbytes
;
286 __cast5_encrypt(ctx
, keystream
, ctrblk
);
287 crypto_xor(keystream
, src
, nbytes
);
288 memcpy(dst
, keystream
, nbytes
);
290 crypto_inc(ctrblk
, CAST5_BLOCK_SIZE
);
293 static unsigned int __ctr_crypt(struct blkcipher_desc
*desc
,
294 struct blkcipher_walk
*walk
)
296 struct cast5_ctx
*ctx
= crypto_blkcipher_ctx(desc
->tfm
);
297 const unsigned int bsize
= CAST5_BLOCK_SIZE
;
298 unsigned int nbytes
= walk
->nbytes
;
299 u64
*src
= (u64
*)walk
->src
.virt
.addr
;
300 u64
*dst
= (u64
*)walk
->dst
.virt
.addr
;
301 u64 ctrblk
= be64_to_cpu(*(__be64
*)walk
->iv
);
302 __be64 ctrblocks
[CAST5_PARALLEL_BLOCKS
];
305 /* Process multi-block batch */
306 if (nbytes
>= bsize
* CAST5_PARALLEL_BLOCKS
) {
308 /* create ctrblks for parallel encrypt */
309 for (i
= 0; i
< CAST5_PARALLEL_BLOCKS
; i
++) {
313 ctrblocks
[i
] = cpu_to_be64(ctrblk
++);
316 cast5_enc_blk_xway_xor(ctx
, (u8
*)dst
,
319 src
+= CAST5_PARALLEL_BLOCKS
;
320 dst
+= CAST5_PARALLEL_BLOCKS
;
321 nbytes
-= bsize
* CAST5_PARALLEL_BLOCKS
;
322 } while (nbytes
>= bsize
* CAST5_PARALLEL_BLOCKS
);
328 /* Handle leftovers */
333 ctrblocks
[0] = cpu_to_be64(ctrblk
++);
335 __cast5_encrypt(ctx
, (u8
*)ctrblocks
, (u8
*)ctrblocks
);
336 *dst
^= ctrblocks
[0];
341 } while (nbytes
>= bsize
);
344 *(__be64
*)walk
->iv
= cpu_to_be64(ctrblk
);
348 static int ctr_crypt(struct blkcipher_desc
*desc
, struct scatterlist
*dst
,
349 struct scatterlist
*src
, unsigned int nbytes
)
351 bool fpu_enabled
= false;
352 struct blkcipher_walk walk
;
355 blkcipher_walk_init(&walk
, dst
, src
, nbytes
);
356 err
= blkcipher_walk_virt_block(desc
, &walk
, CAST5_BLOCK_SIZE
);
357 desc
->flags
&= ~CRYPTO_TFM_REQ_MAY_SLEEP
;
359 while ((nbytes
= walk
.nbytes
) >= CAST5_BLOCK_SIZE
) {
360 fpu_enabled
= cast5_fpu_begin(fpu_enabled
, nbytes
);
361 nbytes
= __ctr_crypt(desc
, &walk
);
362 err
= blkcipher_walk_done(desc
, &walk
, nbytes
);
365 cast5_fpu_end(fpu_enabled
);
368 ctr_crypt_final(desc
, &walk
);
369 err
= blkcipher_walk_done(desc
, &walk
, 0);
376 static struct crypto_alg cast5_algs
[6] = { {
377 .cra_name
= "__ecb-cast5-avx",
378 .cra_driver_name
= "__driver-ecb-cast5-avx",
380 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
381 .cra_blocksize
= CAST5_BLOCK_SIZE
,
382 .cra_ctxsize
= sizeof(struct cast5_ctx
),
384 .cra_type
= &crypto_blkcipher_type
,
385 .cra_module
= THIS_MODULE
,
388 .min_keysize
= CAST5_MIN_KEY_SIZE
,
389 .max_keysize
= CAST5_MAX_KEY_SIZE
,
390 .setkey
= cast5_setkey
,
391 .encrypt
= ecb_encrypt
,
392 .decrypt
= ecb_decrypt
,
396 .cra_name
= "__cbc-cast5-avx",
397 .cra_driver_name
= "__driver-cbc-cast5-avx",
399 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
400 .cra_blocksize
= CAST5_BLOCK_SIZE
,
401 .cra_ctxsize
= sizeof(struct cast5_ctx
),
403 .cra_type
= &crypto_blkcipher_type
,
404 .cra_module
= THIS_MODULE
,
407 .min_keysize
= CAST5_MIN_KEY_SIZE
,
408 .max_keysize
= CAST5_MAX_KEY_SIZE
,
409 .setkey
= cast5_setkey
,
410 .encrypt
= cbc_encrypt
,
411 .decrypt
= cbc_decrypt
,
415 .cra_name
= "__ctr-cast5-avx",
416 .cra_driver_name
= "__driver-ctr-cast5-avx",
418 .cra_flags
= CRYPTO_ALG_TYPE_BLKCIPHER
,
420 .cra_ctxsize
= sizeof(struct cast5_ctx
),
422 .cra_type
= &crypto_blkcipher_type
,
423 .cra_module
= THIS_MODULE
,
426 .min_keysize
= CAST5_MIN_KEY_SIZE
,
427 .max_keysize
= CAST5_MAX_KEY_SIZE
,
428 .ivsize
= CAST5_BLOCK_SIZE
,
429 .setkey
= cast5_setkey
,
430 .encrypt
= ctr_crypt
,
431 .decrypt
= ctr_crypt
,
435 .cra_name
= "ecb(cast5)",
436 .cra_driver_name
= "ecb-cast5-avx",
438 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
439 .cra_blocksize
= CAST5_BLOCK_SIZE
,
440 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
442 .cra_type
= &crypto_ablkcipher_type
,
443 .cra_module
= THIS_MODULE
,
444 .cra_init
= ablk_init
,
445 .cra_exit
= ablk_exit
,
448 .min_keysize
= CAST5_MIN_KEY_SIZE
,
449 .max_keysize
= CAST5_MAX_KEY_SIZE
,
450 .setkey
= ablk_set_key
,
451 .encrypt
= ablk_encrypt
,
452 .decrypt
= ablk_decrypt
,
456 .cra_name
= "cbc(cast5)",
457 .cra_driver_name
= "cbc-cast5-avx",
459 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
460 .cra_blocksize
= CAST5_BLOCK_SIZE
,
461 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
463 .cra_type
= &crypto_ablkcipher_type
,
464 .cra_module
= THIS_MODULE
,
465 .cra_init
= ablk_init
,
466 .cra_exit
= ablk_exit
,
469 .min_keysize
= CAST5_MIN_KEY_SIZE
,
470 .max_keysize
= CAST5_MAX_KEY_SIZE
,
471 .ivsize
= CAST5_BLOCK_SIZE
,
472 .setkey
= ablk_set_key
,
473 .encrypt
= __ablk_encrypt
,
474 .decrypt
= ablk_decrypt
,
478 .cra_name
= "ctr(cast5)",
479 .cra_driver_name
= "ctr-cast5-avx",
481 .cra_flags
= CRYPTO_ALG_TYPE_ABLKCIPHER
| CRYPTO_ALG_ASYNC
,
483 .cra_ctxsize
= sizeof(struct async_helper_ctx
),
485 .cra_type
= &crypto_ablkcipher_type
,
486 .cra_module
= THIS_MODULE
,
487 .cra_init
= ablk_init
,
488 .cra_exit
= ablk_exit
,
491 .min_keysize
= CAST5_MIN_KEY_SIZE
,
492 .max_keysize
= CAST5_MAX_KEY_SIZE
,
493 .ivsize
= CAST5_BLOCK_SIZE
,
494 .setkey
= ablk_set_key
,
495 .encrypt
= ablk_encrypt
,
496 .decrypt
= ablk_encrypt
,
502 static int __init
cast5_init(void)
506 if (!cpu_has_avx
|| !cpu_has_osxsave
) {
507 pr_info("AVX instructions are not detected.\n");
511 xcr0
= xgetbv(XCR_XFEATURE_ENABLED_MASK
);
512 if ((xcr0
& (XSTATE_SSE
| XSTATE_YMM
)) != (XSTATE_SSE
| XSTATE_YMM
)) {
513 pr_info("AVX detected but unusable.\n");
517 return crypto_register_algs(cast5_algs
, ARRAY_SIZE(cast5_algs
));
520 static void __exit
cast5_exit(void)
522 crypto_unregister_algs(cast5_algs
, ARRAY_SIZE(cast5_algs
));
525 module_init(cast5_init
);
526 module_exit(cast5_exit
);
528 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
529 MODULE_LICENSE("GPL");
530 MODULE_ALIAS("cast5");