Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
authorHerbert Xu <herbert@gondor.apana.org.au>
Tue, 4 Aug 2015 12:44:35 +0000 (20:44 +0800)
committerHerbert Xu <herbert@gondor.apana.org.au>
Tue, 4 Aug 2015 12:44:35 +0000 (20:44 +0800)
Merge the crypto tree to pull in the qat registration bug fix.

96 files changed:
Documentation/DocBook/crypto-API.tmpl
Documentation/devicetree/bindings/crypto/sun4i-ss.txt [new file with mode: 0644]
MAINTAINERS
arch/arm/boot/dts/sun4i-a10.dtsi
arch/arm/boot/dts/sun7i-a20.dtsi
arch/arm/crypto/.gitignore
arch/arm64/crypto/aes-ce-ccm-glue.c
arch/powerpc/include/asm/switch_to.h
arch/powerpc/kernel/process.c
arch/x86/crypto/Makefile
arch/x86/crypto/aesni-intel_glue.c
arch/x86/crypto/chacha20-avx2-x86_64.S [new file with mode: 0644]
arch/x86/crypto/chacha20-ssse3-x86_64.S [new file with mode: 0644]
arch/x86/crypto/chacha20_glue.c [new file with mode: 0644]
arch/x86/crypto/poly1305-avx2-x86_64.S [new file with mode: 0644]
arch/x86/crypto/poly1305-sse2-x86_64.S [new file with mode: 0644]
arch/x86/crypto/poly1305_glue.c [new file with mode: 0644]
crypto/Kconfig
crypto/aead.c
crypto/algapi.c
crypto/ccm.c
crypto/chacha20_generic.c
crypto/chacha20poly1305.c
crypto/cryptd.c
crypto/echainiv.c
crypto/gcm.c
crypto/jitterentropy-kcapi.c
crypto/pcrypt.c
crypto/poly1305_generic.c
crypto/rsa.c
crypto/rsa_helper.c
crypto/seqiv.c
crypto/tcrypt.c
crypto/tcrypt.h
crypto/testmgr.h
drivers/crypto/Kconfig
drivers/crypto/Makefile
drivers/crypto/caam/caamalg.c
drivers/crypto/caam/ctrl.c
drivers/crypto/caam/desc_constr.h
drivers/crypto/caam/regs.h
drivers/crypto/ccp/ccp-platform.c
drivers/crypto/img-hash.c
drivers/crypto/marvell/cesa.c
drivers/crypto/nx/Kconfig
drivers/crypto/nx/Makefile
drivers/crypto/nx/nx-842-crypto.c [deleted file]
drivers/crypto/nx/nx-842-platform.c [deleted file]
drivers/crypto/nx/nx-842-powernv.c
drivers/crypto/nx/nx-842-pseries.c
drivers/crypto/nx/nx-842.c
drivers/crypto/nx/nx-842.h
drivers/crypto/nx/nx-aes-ccm.c
drivers/crypto/nx/nx-aes-gcm.c
drivers/crypto/nx/nx.c
drivers/crypto/nx/nx.h
drivers/crypto/omap-aes.c
drivers/crypto/qat/Kconfig
drivers/crypto/qat/qat_common/.gitignore [new file with mode: 0644]
drivers/crypto/qat/qat_common/Makefile
drivers/crypto/qat/qat_common/adf_accel_devices.h
drivers/crypto/qat/qat_common/adf_accel_engine.c
drivers/crypto/qat/qat_common/adf_aer.c
drivers/crypto/qat/qat_common/adf_cfg.c
drivers/crypto/qat/qat_common/adf_common_drv.h
drivers/crypto/qat/qat_common/adf_dev_mgr.c
drivers/crypto/qat/qat_common/adf_init.c
drivers/crypto/qat/qat_common/adf_transport.c
drivers/crypto/qat/qat_common/adf_transport_access_macros.h
drivers/crypto/qat/qat_common/icp_qat_fw.h
drivers/crypto/qat/qat_common/icp_qat_fw_pke.h [new file with mode: 0644]
drivers/crypto/qat/qat_common/qat_algs.c
drivers/crypto/qat/qat_common/qat_asym_algs.c [new file with mode: 0644]
drivers/crypto/qat/qat_common/qat_crypto.c
drivers/crypto/qat/qat_common/qat_crypto.h
drivers/crypto/qat/qat_common/qat_hal.c
drivers/crypto/qat/qat_common/qat_rsakey.asn1 [new file with mode: 0644]
drivers/crypto/qat/qat_common/qat_uclo.c
drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h
drivers/crypto/qat/qat_dh895xcc/adf_drv.c
drivers/crypto/sunxi-ss/Makefile [new file with mode: 0644]
drivers/crypto/sunxi-ss/sun4i-ss-cipher.c [new file with mode: 0644]
drivers/crypto/sunxi-ss/sun4i-ss-core.c [new file with mode: 0644]
drivers/crypto/sunxi-ss/sun4i-ss-hash.c [new file with mode: 0644]
drivers/crypto/sunxi-ss/sun4i-ss.h [new file with mode: 0644]
drivers/crypto/talitos.h
drivers/crypto/vmx/aes.c
drivers/crypto/vmx/aes_cbc.c
drivers/crypto/vmx/aes_ctr.c
drivers/crypto/vmx/ghash.c
include/crypto/aead.h
include/crypto/algapi.h
include/crypto/chacha20.h [new file with mode: 0644]
include/crypto/internal/aead.h
include/crypto/poly1305.h [new file with mode: 0644]

index 0992531ffefb761eb047d22003130cf45ede9584..8e17a41df4c3b80aa0fbe7a0c385a5646f4e1a11 100644 (file)
@@ -585,7 +585,7 @@ kernel crypto API                                |   IPSEC Layer
 +-----------+                                    |
 |           |            (1)
 |   aead    | <-----------------------------------  esp_output
-| (seqniv)  | ---+
+|  (seqiv)  | ---+
 +-----------+    |
                  | (2)
 +-----------+    |
@@ -1687,7 +1687,7 @@ read(opfd, out, outlen);
 !Pinclude/linux/crypto.h Block Cipher Algorithm Definitions
 !Finclude/linux/crypto.h crypto_alg
 !Finclude/linux/crypto.h ablkcipher_alg
-!Finclude/linux/crypto.h aead_alg
+!Finclude/crypto/aead.h aead_alg
 !Finclude/linux/crypto.h blkcipher_alg
 !Finclude/linux/crypto.h cipher_alg
 !Finclude/crypto/rng.h rng_alg
diff --git a/Documentation/devicetree/bindings/crypto/sun4i-ss.txt b/Documentation/devicetree/bindings/crypto/sun4i-ss.txt
new file mode 100644 (file)
index 0000000..1e02d17
--- /dev/null
@@ -0,0 +1,19 @@
+* Allwinner Security System found on A20 SoC
+
+Required properties:
+- compatible : Should be "allwinner,sun4i-a10-crypto".
+- reg: Should contain the Security System register location and length.
+- interrupts: Should contain the IRQ line for the Security System.
+- clocks : List of clock specifiers, corresponding to ahb and ss.
+- clock-names : Name of the functional clock, should be
+       * "ahb" : AHB gating clock
+       * "mod" : SS controller clock
+
+Example:
+       crypto: crypto-engine@01c15000 {
+               compatible = "allwinner,sun4i-a10-crypto";
+               reg = <0x01c15000 0x1000>;
+               interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+               clocks = <&ahb_gates 5>, <&ss_clk>;
+               clock-names = "ahb", "mod";
+       };
index 8133cefb6b6e28715197a86aad555c29edbb7aa1..6295ec562d5fe39c13066bf1bc2417ada5a561dc 100644 (file)
@@ -556,6 +556,12 @@ S: Maintained
 F:     Documentation/i2c/busses/i2c-ali1563
 F:     drivers/i2c/busses/i2c-ali1563.c
 
+ALLWINNER SECURITY SYSTEM
+M:     Corentin Labbe <clabbe.montjoie@gmail.com>
+L:     linux-crypto@vger.kernel.org
+S:     Maintained
+F:     drivers/crypto/sunxi-ss/
+
 ALPHA PORT
 M:     Richard Henderson <rth@twiddle.net>
 M:     Ivan Kokshaysky <ink@jurassic.park.msu.ru>
@@ -5067,7 +5073,7 @@ F:        drivers/crypto/nx/nx_csbcpb.h
 F:     drivers/crypto/nx/nx_debugfs.h
 
 IBM Power 842 compression accelerator
-M:     Dan Streetman <ddstreet@us.ibm.com>
+M:     Dan Streetman <ddstreet@ieee.org>
 S:     Supported
 F:     drivers/crypto/nx/Makefile
 F:     drivers/crypto/nx/Kconfig
index 61c03d1fe5303301a7ee44f1069c3865da958313..551e3d1047778bcb5515f8580d647b6957129c66 100644 (file)
                        status = "disabled";
                };
 
+               crypto: crypto-engine@01c15000 {
+                       compatible = "allwinner,sun4i-a10-crypto";
+                       reg = <0x01c15000 0x1000>;
+                       interrupts = <86>;
+                       clocks = <&ahb_gates 5>, <&ss_clk>;
+                       clock-names = "ahb", "mod";
+               };
+
                spi2: spi@01c17000 {
                        compatible = "allwinner,sun4i-a10-spi";
                        reg = <0x01c17000 0x1000>;
index 6a63f30c9a699d0e4620aac31f64247ad9c1f95b..ba049d311200f1bd8481c60d98f26d0ff6499ac3 100644 (file)
                        status = "disabled";
                };
 
+               crypto: crypto-engine@01c15000 {
+                       compatible = "allwinner,sun4i-a10-crypto";
+                       reg = <0x01c15000 0x1000>;
+                       interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+                       clocks = <&ahb_gates 5>, <&ss_clk>;
+                       clock-names = "ahb", "mod";
+               };
+
                spi2: spi@01c17000 {
                        compatible = "allwinner,sun4i-a10-spi";
                        reg = <0x01c17000 0x1000>;
index 6231d36b3635260cf0f2a7185182099ce844ccb9..31e1f538df7dee529af3cf36a30fd941314de95c 100644 (file)
@@ -1 +1,3 @@
 aesbs-core.S
+sha256-core.S
+sha512-core.S
index 3303e8a7b837c9fc033da5a94206af4bb0594ba5..f3690fa76a5bac207f9e46b71b7f03e690f3e1ca 100644 (file)
@@ -124,7 +124,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
 
        ce_aes_ccm_auth_data(mac, (u8 *)&ltag, ltag.len, &macp, ctx->key_enc,
                             num_rounds(ctx));
-       scatterwalk_start(&walk, req->assoc);
+       scatterwalk_start(&walk, req->src);
 
        do {
                u32 n = scatterwalk_clamp(&walk, len);
@@ -151,6 +151,10 @@ static int ccm_encrypt(struct aead_request *req)
        struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
        struct blkcipher_desc desc = { .info = req->iv };
        struct blkcipher_walk walk;
+       struct scatterlist srcbuf[2];
+       struct scatterlist dstbuf[2];
+       struct scatterlist *src;
+       struct scatterlist *dst;
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u32 len = req->cryptlen;
@@ -168,7 +172,12 @@ static int ccm_encrypt(struct aead_request *req)
        /* preserve the original iv for the final round */
        memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
-       blkcipher_walk_init(&walk, req->dst, req->src, len);
+       src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen);
+       dst = src;
+       if (req->src != req->dst)
+               dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen);
+
+       blkcipher_walk_init(&walk, dst, src, len);
        err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
                                             AES_BLOCK_SIZE);
 
@@ -194,7 +203,7 @@ static int ccm_encrypt(struct aead_request *req)
                return err;
 
        /* copy authtag to end of dst */
-       scatterwalk_map_and_copy(mac, req->dst, req->cryptlen,
+       scatterwalk_map_and_copy(mac, dst, req->cryptlen,
                                 crypto_aead_authsize(aead), 1);
 
        return 0;
@@ -207,6 +216,10 @@ static int ccm_decrypt(struct aead_request *req)
        unsigned int authsize = crypto_aead_authsize(aead);
        struct blkcipher_desc desc = { .info = req->iv };
        struct blkcipher_walk walk;
+       struct scatterlist srcbuf[2];
+       struct scatterlist dstbuf[2];
+       struct scatterlist *src;
+       struct scatterlist *dst;
        u8 __aligned(8) mac[AES_BLOCK_SIZE];
        u8 buf[AES_BLOCK_SIZE];
        u32 len = req->cryptlen - authsize;
@@ -224,7 +237,12 @@ static int ccm_decrypt(struct aead_request *req)
        /* preserve the original iv for the final round */
        memcpy(buf, req->iv, AES_BLOCK_SIZE);
 
-       blkcipher_walk_init(&walk, req->dst, req->src, len);
+       src = scatterwalk_ffwd(srcbuf, req->src, req->assoclen);
+       dst = src;
+       if (req->src != req->dst)
+               dst = scatterwalk_ffwd(dstbuf, req->dst, req->assoclen);
+
+       blkcipher_walk_init(&walk, dst, src, len);
        err = blkcipher_aead_walk_virt_block(&desc, &walk, aead,
                                             AES_BLOCK_SIZE);
 
@@ -250,44 +268,43 @@ static int ccm_decrypt(struct aead_request *req)
                return err;
 
        /* compare calculated auth tag with the stored one */
-       scatterwalk_map_and_copy(buf, req->src, req->cryptlen - authsize,
+       scatterwalk_map_and_copy(buf, src, req->cryptlen - authsize,
                                 authsize, 0);
 
-       if (memcmp(mac, buf, authsize))
+       if (crypto_memneq(mac, buf, authsize))
                return -EBADMSG;
        return 0;
 }
 
-static struct crypto_alg ccm_aes_alg = {
-       .cra_name               = "ccm(aes)",
-       .cra_driver_name        = "ccm-aes-ce",
-       .cra_priority           = 300,
-       .cra_flags              = CRYPTO_ALG_TYPE_AEAD,
-       .cra_blocksize          = 1,
-       .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
-       .cra_alignmask          = 7,
-       .cra_type               = &crypto_aead_type,
-       .cra_module             = THIS_MODULE,
-       .cra_aead = {
-               .ivsize         = AES_BLOCK_SIZE,
-               .maxauthsize    = AES_BLOCK_SIZE,
-               .setkey         = ccm_setkey,
-               .setauthsize    = ccm_setauthsize,
-               .encrypt        = ccm_encrypt,
-               .decrypt        = ccm_decrypt,
-       }
+static struct aead_alg ccm_aes_alg = {
+       .base = {
+               .cra_name               = "ccm(aes)",
+               .cra_driver_name        = "ccm-aes-ce",
+               .cra_flags              = CRYPTO_ALG_AEAD_NEW,
+               .cra_priority           = 300,
+               .cra_blocksize          = 1,
+               .cra_ctxsize            = sizeof(struct crypto_aes_ctx),
+               .cra_alignmask          = 7,
+               .cra_module             = THIS_MODULE,
+       },
+       .ivsize         = AES_BLOCK_SIZE,
+       .maxauthsize    = AES_BLOCK_SIZE,
+       .setkey         = ccm_setkey,
+       .setauthsize    = ccm_setauthsize,
+       .encrypt        = ccm_encrypt,
+       .decrypt        = ccm_decrypt,
 };
 
 static int __init aes_mod_init(void)
 {
        if (!(elf_hwcap & HWCAP_AES))
                return -ENODEV;
-       return crypto_register_alg(&ccm_aes_alg);
+       return crypto_register_aead(&ccm_aes_alg);
 }
 
 static void __exit aes_mod_exit(void)
 {
-       crypto_unregister_alg(&ccm_aes_alg);
+       crypto_unregister_aead(&ccm_aes_alg);
 }
 
 module_init(aes_mod_init);
index 58abeda64cb7afa271078497f25e3ae5a8a0e26c..15cca17cba4b9fe47c5598006f9e61214e20295c 100644 (file)
@@ -29,6 +29,7 @@ static inline void save_early_sprs(struct thread_struct *prev) {}
 
 extern void enable_kernel_fp(void);
 extern void enable_kernel_altivec(void);
+extern void enable_kernel_vsx(void);
 extern int emulate_altivec(struct pt_regs *);
 extern void __giveup_vsx(struct task_struct *);
 extern void giveup_vsx(struct task_struct *);
index 8005e18d1b40381f6b815890b0d58c99a382b4e8..64e6e9d9e656280d819da7f0c9725026b4bf83a9 100644 (file)
@@ -204,8 +204,6 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_VSX
-#if 0
-/* not currently used, but some crazy RAID module might want to later */
 void enable_kernel_vsx(void)
 {
        WARN_ON(preemptible());
@@ -220,7 +218,6 @@ void enable_kernel_vsx(void)
 #endif /* CONFIG_SMP */
 }
 EXPORT_SYMBOL(enable_kernel_vsx);
-#endif
 
 void giveup_vsx(struct task_struct *tsk)
 {
index 5a4a089e8b1fd7166e396b52917424e1d9a421b5..9a2838cf05916a132ee573ad4c0061138fda716a 100644 (file)
@@ -20,6 +20,7 @@ obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
 obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
+obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
 obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
 obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
@@ -30,6 +31,7 @@ obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
+obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
 
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
@@ -60,6 +62,7 @@ blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
 salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
+chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
 serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
 
 ifeq ($(avx_supported),yes)
@@ -75,6 +78,7 @@ endif
 
 ifeq ($(avx2_supported),yes)
        camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
+       chacha20-x86_64-y += chacha20-avx2-x86_64.o
        serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
 endif
 
@@ -82,8 +86,10 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
 aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
 ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
+poly1305-x86_64-y := poly1305-sse2-x86_64.o poly1305_glue.o
 ifeq ($(avx2_supported),yes)
 sha1-ssse3-y += sha1_avx2_x86_64_asm.o
+poly1305-x86_64-y += poly1305-avx2-x86_64.o
 endif
 crc32c-intel-y := crc32c-intel_glue.o
 crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
index dccad38b59a8d741fe5f442b558b63de2b4d931d..2347ef0a1a6de144a4b0367d751d7583ccba5098 100644 (file)
@@ -803,10 +803,7 @@ static int rfc4106_init(struct crypto_aead *aead)
                return PTR_ERR(cryptd_tfm);
 
        *ctx = cryptd_tfm;
-       crypto_aead_set_reqsize(
-               aead,
-               sizeof(struct aead_request) +
-               crypto_aead_reqsize(&cryptd_tfm->base));
+       crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
        return 0;
 }
 
@@ -955,8 +952,8 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
 
        /* Assuming we are supporting rfc4106 64-bit extended */
        /* sequence numbers We need to have the AAD length equal */
-       /* to 8 or 12 bytes */
-       if (unlikely(req->assoclen != 8 && req->assoclen != 12))
+       /* to 16 or 20 bytes */
+       if (unlikely(req->assoclen != 16 && req->assoclen != 20))
                return -EINVAL;
 
        /* IV below built */
@@ -992,9 +989,9 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
        }
 
        kernel_fpu_begin();
-       aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv,
-               ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst
-               + ((unsigned long)req->cryptlen), auth_tag_len);
+       aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv,
+                         ctx->hash_subkey, assoc, req->assoclen - 8,
+                         dst + req->cryptlen, auth_tag_len);
        kernel_fpu_end();
 
        /* The authTag (aka the Integrity Check Value) needs to be written
@@ -1033,12 +1030,12 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
        struct scatter_walk dst_sg_walk;
        unsigned int i;
 
-       if (unlikely(req->assoclen != 8 && req->assoclen != 12))
+       if (unlikely(req->assoclen != 16 && req->assoclen != 20))
                return -EINVAL;
 
        /* Assuming we are supporting rfc4106 64-bit extended */
        /* sequence numbers We need to have the AAD length */
-       /* equal to 8 or 12 bytes */
+       /* equal to 16 or 20 bytes */
 
        tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
        /* IV below built */
@@ -1075,8 +1072,8 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
 
        kernel_fpu_begin();
        aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv,
-               ctx->hash_subkey, assoc, (unsigned long)req->assoclen,
-               authTag, auth_tag_len);
+                         ctx->hash_subkey, assoc, req->assoclen - 8,
+                         authTag, auth_tag_len);
        kernel_fpu_end();
 
        /* Compare generated tag with passed in tag. */
@@ -1105,19 +1102,12 @@ static int rfc4106_encrypt(struct aead_request *req)
        struct crypto_aead *tfm = crypto_aead_reqtfm(req);
        struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
        struct cryptd_aead *cryptd_tfm = *ctx;
-       struct aead_request *subreq = aead_request_ctx(req);
 
-       aead_request_set_tfm(subreq, irq_fpu_usable() ?
-                                    cryptd_aead_child(cryptd_tfm) :
-                                    &cryptd_tfm->base);
+       aead_request_set_tfm(req, irq_fpu_usable() ?
+                                 cryptd_aead_child(cryptd_tfm) :
+                                 &cryptd_tfm->base);
 
-       aead_request_set_callback(subreq, req->base.flags,
-                                 req->base.complete, req->base.data);
-       aead_request_set_crypt(subreq, req->src, req->dst,
-                              req->cryptlen, req->iv);
-       aead_request_set_ad(subreq, req->assoclen);
-
-       return crypto_aead_encrypt(subreq);
+       return crypto_aead_encrypt(req);
 }
 
 static int rfc4106_decrypt(struct aead_request *req)
@@ -1125,19 +1115,12 @@ static int rfc4106_decrypt(struct aead_request *req)
        struct crypto_aead *tfm = crypto_aead_reqtfm(req);
        struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
        struct cryptd_aead *cryptd_tfm = *ctx;
-       struct aead_request *subreq = aead_request_ctx(req);
-
-       aead_request_set_tfm(subreq, irq_fpu_usable() ?
-                                    cryptd_aead_child(cryptd_tfm) :
-                                    &cryptd_tfm->base);
 
-       aead_request_set_callback(subreq, req->base.flags,
-                                 req->base.complete, req->base.data);
-       aead_request_set_crypt(subreq, req->src, req->dst,
-                              req->cryptlen, req->iv);
-       aead_request_set_ad(subreq, req->assoclen);
+       aead_request_set_tfm(req, irq_fpu_usable() ?
+                                 cryptd_aead_child(cryptd_tfm) :
+                                 &cryptd_tfm->base);
 
-       return crypto_aead_decrypt(subreq);
+       return crypto_aead_decrypt(req);
 }
 #endif
 
@@ -1454,7 +1437,8 @@ static struct aead_alg aesni_aead_algs[] = { {
                .cra_name               = "rfc4106(gcm(aes))",
                .cra_driver_name        = "rfc4106-gcm-aesni",
                .cra_priority           = 400,
-               .cra_flags              = CRYPTO_ALG_ASYNC,
+               .cra_flags              = CRYPTO_ALG_ASYNC |
+                                         CRYPTO_ALG_AEAD_NEW,
                .cra_blocksize          = 1,
                .cra_ctxsize            = sizeof(struct cryptd_aead *),
                .cra_module             = THIS_MODULE,
diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S
new file mode 100644 (file)
index 0000000..16694e6
--- /dev/null
@@ -0,0 +1,443 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.data
+.align 32
+
+ROT8:  .octa 0x0e0d0c0f0a09080b0605040702010003
+       .octa 0x0e0d0c0f0a09080b0605040702010003
+ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
+       .octa 0x0d0c0f0e09080b0a0504070601000302
+CTRINC:        .octa 0x00000003000000020000000100000000
+       .octa 0x00000007000000060000000500000004
+
+.text
+
+ENTRY(chacha20_8block_xor_avx2)
+       # %rdi: Input state matrix, s
+       # %rsi: 8 data blocks output, o
+       # %rdx: 8 data blocks input, i
+
+       # This function encrypts eight consecutive ChaCha20 blocks by loading
+       # the state matrix in AVX registers eight times. As we need some
+       # scratch registers, we save the first four registers on the stack. The
+       # algorithm performs each operation on the corresponding word of each
+       # state matrix, hence requires no word shuffling. For final XORing step
+       # we transpose the matrix by interleaving 32-, 64- and then 128-bit
+       # words, which allows us to do XOR in AVX registers. 8/16-bit word
+       # rotation is done with the slightly better performing byte shuffling,
+       # 7/12-bit word rotation uses traditional shift+OR.
+
+       vzeroupper
+       # 4 * 32 byte stack, 32-byte aligned
+       mov             %rsp, %r8
+       and             $~31, %rsp
+       sub             $0x80, %rsp
+
+       # x0..15[0-7] = s[0..15]
+       vpbroadcastd    0x00(%rdi),%ymm0
+       vpbroadcastd    0x04(%rdi),%ymm1
+       vpbroadcastd    0x08(%rdi),%ymm2
+       vpbroadcastd    0x0c(%rdi),%ymm3
+       vpbroadcastd    0x10(%rdi),%ymm4
+       vpbroadcastd    0x14(%rdi),%ymm5
+       vpbroadcastd    0x18(%rdi),%ymm6
+       vpbroadcastd    0x1c(%rdi),%ymm7
+       vpbroadcastd    0x20(%rdi),%ymm8
+       vpbroadcastd    0x24(%rdi),%ymm9
+       vpbroadcastd    0x28(%rdi),%ymm10
+       vpbroadcastd    0x2c(%rdi),%ymm11
+       vpbroadcastd    0x30(%rdi),%ymm12
+       vpbroadcastd    0x34(%rdi),%ymm13
+       vpbroadcastd    0x38(%rdi),%ymm14
+       vpbroadcastd    0x3c(%rdi),%ymm15
+       # x0..3 on stack
+       vmovdqa         %ymm0,0x00(%rsp)
+       vmovdqa         %ymm1,0x20(%rsp)
+       vmovdqa         %ymm2,0x40(%rsp)
+       vmovdqa         %ymm3,0x60(%rsp)
+
+       vmovdqa         CTRINC(%rip),%ymm1
+       vmovdqa         ROT8(%rip),%ymm2
+       vmovdqa         ROT16(%rip),%ymm3
+
+       # x12 += counter values 0-3
+       vpaddd          %ymm1,%ymm12,%ymm12
+
+       mov             $10,%ecx
+
+.Ldoubleround8:
+       # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+       vpaddd          0x00(%rsp),%ymm4,%ymm0
+       vmovdqa         %ymm0,0x00(%rsp)
+       vpxor           %ymm0,%ymm12,%ymm12
+       vpshufb         %ymm3,%ymm12,%ymm12
+       # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+       vpaddd          0x20(%rsp),%ymm5,%ymm0
+       vmovdqa         %ymm0,0x20(%rsp)
+       vpxor           %ymm0,%ymm13,%ymm13
+       vpshufb         %ymm3,%ymm13,%ymm13
+       # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+       vpaddd          0x40(%rsp),%ymm6,%ymm0
+       vmovdqa         %ymm0,0x40(%rsp)
+       vpxor           %ymm0,%ymm14,%ymm14
+       vpshufb         %ymm3,%ymm14,%ymm14
+       # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+       vpaddd          0x60(%rsp),%ymm7,%ymm0
+       vmovdqa         %ymm0,0x60(%rsp)
+       vpxor           %ymm0,%ymm15,%ymm15
+       vpshufb         %ymm3,%ymm15,%ymm15
+
+       # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+       vpaddd          %ymm12,%ymm8,%ymm8
+       vpxor           %ymm8,%ymm4,%ymm4
+       vpslld          $12,%ymm4,%ymm0
+       vpsrld          $20,%ymm4,%ymm4
+       vpor            %ymm0,%ymm4,%ymm4
+       # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+       vpaddd          %ymm13,%ymm9,%ymm9
+       vpxor           %ymm9,%ymm5,%ymm5
+       vpslld          $12,%ymm5,%ymm0
+       vpsrld          $20,%ymm5,%ymm5
+       vpor            %ymm0,%ymm5,%ymm5
+       # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+       vpaddd          %ymm14,%ymm10,%ymm10
+       vpxor           %ymm10,%ymm6,%ymm6
+       vpslld          $12,%ymm6,%ymm0
+       vpsrld          $20,%ymm6,%ymm6
+       vpor            %ymm0,%ymm6,%ymm6
+       # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+       vpaddd          %ymm15,%ymm11,%ymm11
+       vpxor           %ymm11,%ymm7,%ymm7
+       vpslld          $12,%ymm7,%ymm0
+       vpsrld          $20,%ymm7,%ymm7
+       vpor            %ymm0,%ymm7,%ymm7
+
+       # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+       vpaddd          0x00(%rsp),%ymm4,%ymm0
+       vmovdqa         %ymm0,0x00(%rsp)
+       vpxor           %ymm0,%ymm12,%ymm12
+       vpshufb         %ymm2,%ymm12,%ymm12
+       # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+       vpaddd          0x20(%rsp),%ymm5,%ymm0
+       vmovdqa         %ymm0,0x20(%rsp)
+       vpxor           %ymm0,%ymm13,%ymm13
+       vpshufb         %ymm2,%ymm13,%ymm13
+       # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+       vpaddd          0x40(%rsp),%ymm6,%ymm0
+       vmovdqa         %ymm0,0x40(%rsp)
+       vpxor           %ymm0,%ymm14,%ymm14
+       vpshufb         %ymm2,%ymm14,%ymm14
+       # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+       vpaddd          0x60(%rsp),%ymm7,%ymm0
+       vmovdqa         %ymm0,0x60(%rsp)
+       vpxor           %ymm0,%ymm15,%ymm15
+       vpshufb         %ymm2,%ymm15,%ymm15
+
+       # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+       vpaddd          %ymm12,%ymm8,%ymm8
+       vpxor           %ymm8,%ymm4,%ymm4
+       vpslld          $7,%ymm4,%ymm0
+       vpsrld          $25,%ymm4,%ymm4
+       vpor            %ymm0,%ymm4,%ymm4
+       # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+       vpaddd          %ymm13,%ymm9,%ymm9
+       vpxor           %ymm9,%ymm5,%ymm5
+       vpslld          $7,%ymm5,%ymm0
+       vpsrld          $25,%ymm5,%ymm5
+       vpor            %ymm0,%ymm5,%ymm5
+       # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+       vpaddd          %ymm14,%ymm10,%ymm10
+       vpxor           %ymm10,%ymm6,%ymm6
+       vpslld          $7,%ymm6,%ymm0
+       vpsrld          $25,%ymm6,%ymm6
+       vpor            %ymm0,%ymm6,%ymm6
+       # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+       vpaddd          %ymm15,%ymm11,%ymm11
+       vpxor           %ymm11,%ymm7,%ymm7
+       vpslld          $7,%ymm7,%ymm0
+       vpsrld          $25,%ymm7,%ymm7
+       vpor            %ymm0,%ymm7,%ymm7
+
+       # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+       vpaddd          0x00(%rsp),%ymm5,%ymm0
+       vmovdqa         %ymm0,0x00(%rsp)
+       vpxor           %ymm0,%ymm15,%ymm15
+       vpshufb         %ymm3,%ymm15,%ymm15
+       # x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0
+       vpaddd          0x20(%rsp),%ymm6,%ymm0
+       vmovdqa         %ymm0,0x20(%rsp)
+       vpxor           %ymm0,%ymm12,%ymm12
+       vpshufb         %ymm3,%ymm12,%ymm12
+       # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+       vpaddd          0x40(%rsp),%ymm7,%ymm0
+       vmovdqa         %ymm0,0x40(%rsp)
+       vpxor           %ymm0,%ymm13,%ymm13
+       vpshufb         %ymm3,%ymm13,%ymm13
+       # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+       vpaddd          0x60(%rsp),%ymm4,%ymm0
+       vmovdqa         %ymm0,0x60(%rsp)
+       vpxor           %ymm0,%ymm14,%ymm14
+       vpshufb         %ymm3,%ymm14,%ymm14
+
+       # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+       vpaddd          %ymm15,%ymm10,%ymm10
+       vpxor           %ymm10,%ymm5,%ymm5
+       vpslld          $12,%ymm5,%ymm0
+       vpsrld          $20,%ymm5,%ymm5
+       vpor            %ymm0,%ymm5,%ymm5
+       # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+       vpaddd          %ymm12,%ymm11,%ymm11
+       vpxor           %ymm11,%ymm6,%ymm6
+       vpslld          $12,%ymm6,%ymm0
+       vpsrld          $20,%ymm6,%ymm6
+       vpor            %ymm0,%ymm6,%ymm6
+       # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+       vpaddd          %ymm13,%ymm8,%ymm8
+       vpxor           %ymm8,%ymm7,%ymm7
+       vpslld          $12,%ymm7,%ymm0
+       vpsrld          $20,%ymm7,%ymm7
+       vpor            %ymm0,%ymm7,%ymm7
+       # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+       vpaddd          %ymm14,%ymm9,%ymm9
+       vpxor           %ymm9,%ymm4,%ymm4
+       vpslld          $12,%ymm4,%ymm0
+       vpsrld          $20,%ymm4,%ymm4
+       vpor            %ymm0,%ymm4,%ymm4
+
+       # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+       vpaddd          0x00(%rsp),%ymm5,%ymm0
+       vmovdqa         %ymm0,0x00(%rsp)
+       vpxor           %ymm0,%ymm15,%ymm15
+       vpshufb         %ymm2,%ymm15,%ymm15
+       # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+       vpaddd          0x20(%rsp),%ymm6,%ymm0
+       vmovdqa         %ymm0,0x20(%rsp)
+       vpxor           %ymm0,%ymm12,%ymm12
+       vpshufb         %ymm2,%ymm12,%ymm12
+       # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+       vpaddd          0x40(%rsp),%ymm7,%ymm0
+       vmovdqa         %ymm0,0x40(%rsp)
+       vpxor           %ymm0,%ymm13,%ymm13
+       vpshufb         %ymm2,%ymm13,%ymm13
+       # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+       vpaddd          0x60(%rsp),%ymm4,%ymm0
+       vmovdqa         %ymm0,0x60(%rsp)
+       vpxor           %ymm0,%ymm14,%ymm14
+       vpshufb         %ymm2,%ymm14,%ymm14
+
+       # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+       vpaddd          %ymm15,%ymm10,%ymm10
+       vpxor           %ymm10,%ymm5,%ymm5
+       vpslld          $7,%ymm5,%ymm0
+       vpsrld          $25,%ymm5,%ymm5
+       vpor            %ymm0,%ymm5,%ymm5
+       # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+       vpaddd          %ymm12,%ymm11,%ymm11
+       vpxor           %ymm11,%ymm6,%ymm6
+       vpslld          $7,%ymm6,%ymm0
+       vpsrld          $25,%ymm6,%ymm6
+       vpor            %ymm0,%ymm6,%ymm6
+       # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+       vpaddd          %ymm13,%ymm8,%ymm8
+       vpxor           %ymm8,%ymm7,%ymm7
+       vpslld          $7,%ymm7,%ymm0
+       vpsrld          $25,%ymm7,%ymm7
+       vpor            %ymm0,%ymm7,%ymm7
+       # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+       vpaddd          %ymm14,%ymm9,%ymm9
+       vpxor           %ymm9,%ymm4,%ymm4
+       vpslld          $7,%ymm4,%ymm0
+       vpsrld          $25,%ymm4,%ymm4
+       vpor            %ymm0,%ymm4,%ymm4
+
+       dec             %ecx
+       jnz             .Ldoubleround8
+
+       # x0..15[0-3] += s[0..15]
+       vpbroadcastd    0x00(%rdi),%ymm0
+       vpaddd          0x00(%rsp),%ymm0,%ymm0
+       vmovdqa         %ymm0,0x00(%rsp)
+       vpbroadcastd    0x04(%rdi),%ymm0
+       vpaddd          0x20(%rsp),%ymm0,%ymm0
+       vmovdqa         %ymm0,0x20(%rsp)
+       vpbroadcastd    0x08(%rdi),%ymm0
+       vpaddd          0x40(%rsp),%ymm0,%ymm0
+       vmovdqa         %ymm0,0x40(%rsp)
+       vpbroadcastd    0x0c(%rdi),%ymm0
+       vpaddd          0x60(%rsp),%ymm0,%ymm0
+       vmovdqa         %ymm0,0x60(%rsp)
+       vpbroadcastd    0x10(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm4,%ymm4
+       vpbroadcastd    0x14(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm5,%ymm5
+       vpbroadcastd    0x18(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm6,%ymm6
+       vpbroadcastd    0x1c(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm7,%ymm7
+       vpbroadcastd    0x20(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm8,%ymm8
+       vpbroadcastd    0x24(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm9,%ymm9
+       vpbroadcastd    0x28(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm10,%ymm10
+       vpbroadcastd    0x2c(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm11,%ymm11
+       vpbroadcastd    0x30(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm12,%ymm12
+       vpbroadcastd    0x34(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm13,%ymm13
+       vpbroadcastd    0x38(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm14,%ymm14
+       vpbroadcastd    0x3c(%rdi),%ymm0
+       vpaddd          %ymm0,%ymm15,%ymm15
+
+       # x12 += counter values 0-3
+       vpaddd          %ymm1,%ymm12,%ymm12
+
+       # interleave 32-bit words in state n, n+1
+       vmovdqa         0x00(%rsp),%ymm0
+       vmovdqa         0x20(%rsp),%ymm1
+       vpunpckldq      %ymm1,%ymm0,%ymm2
+       vpunpckhdq      %ymm1,%ymm0,%ymm1
+       vmovdqa         %ymm2,0x00(%rsp)
+       vmovdqa         %ymm1,0x20(%rsp)
+       vmovdqa         0x40(%rsp),%ymm0
+       vmovdqa         0x60(%rsp),%ymm1
+       vpunpckldq      %ymm1,%ymm0,%ymm2
+       vpunpckhdq      %ymm1,%ymm0,%ymm1
+       vmovdqa         %ymm2,0x40(%rsp)
+       vmovdqa         %ymm1,0x60(%rsp)
+       vmovdqa         %ymm4,%ymm0
+       vpunpckldq      %ymm5,%ymm0,%ymm4
+       vpunpckhdq      %ymm5,%ymm0,%ymm5
+       vmovdqa         %ymm6,%ymm0
+       vpunpckldq      %ymm7,%ymm0,%ymm6
+       vpunpckhdq      %ymm7,%ymm0,%ymm7
+       vmovdqa         %ymm8,%ymm0
+       vpunpckldq      %ymm9,%ymm0,%ymm8
+       vpunpckhdq      %ymm9,%ymm0,%ymm9
+       vmovdqa         %ymm10,%ymm0
+       vpunpckldq      %ymm11,%ymm0,%ymm10
+       vpunpckhdq      %ymm11,%ymm0,%ymm11
+       vmovdqa         %ymm12,%ymm0
+       vpunpckldq      %ymm13,%ymm0,%ymm12
+       vpunpckhdq      %ymm13,%ymm0,%ymm13
+       vmovdqa         %ymm14,%ymm0
+       vpunpckldq      %ymm15,%ymm0,%ymm14
+       vpunpckhdq      %ymm15,%ymm0,%ymm15
+
+       # interleave 64-bit words in state n, n+2
+       vmovdqa         0x00(%rsp),%ymm0
+       vmovdqa         0x40(%rsp),%ymm2
+       vpunpcklqdq     %ymm2,%ymm0,%ymm1
+       vpunpckhqdq     %ymm2,%ymm0,%ymm2
+       vmovdqa         %ymm1,0x00(%rsp)
+       vmovdqa         %ymm2,0x40(%rsp)
+       vmovdqa         0x20(%rsp),%ymm0
+       vmovdqa         0x60(%rsp),%ymm2
+       vpunpcklqdq     %ymm2,%ymm0,%ymm1
+       vpunpckhqdq     %ymm2,%ymm0,%ymm2
+       vmovdqa         %ymm1,0x20(%rsp)
+       vmovdqa         %ymm2,0x60(%rsp)
+       vmovdqa         %ymm4,%ymm0
+       vpunpcklqdq     %ymm6,%ymm0,%ymm4
+       vpunpckhqdq     %ymm6,%ymm0,%ymm6
+       vmovdqa         %ymm5,%ymm0
+       vpunpcklqdq     %ymm7,%ymm0,%ymm5
+       vpunpckhqdq     %ymm7,%ymm0,%ymm7
+       vmovdqa         %ymm8,%ymm0
+       vpunpcklqdq     %ymm10,%ymm0,%ymm8
+       vpunpckhqdq     %ymm10,%ymm0,%ymm10
+       vmovdqa         %ymm9,%ymm0
+       vpunpcklqdq     %ymm11,%ymm0,%ymm9
+       vpunpckhqdq     %ymm11,%ymm0,%ymm11
+       vmovdqa         %ymm12,%ymm0
+       vpunpcklqdq     %ymm14,%ymm0,%ymm12
+       vpunpckhqdq     %ymm14,%ymm0,%ymm14
+       vmovdqa         %ymm13,%ymm0
+       vpunpcklqdq     %ymm15,%ymm0,%ymm13
+       vpunpckhqdq     %ymm15,%ymm0,%ymm15
+
+       # interleave 128-bit words in state n, n+4
+       vmovdqa         0x00(%rsp),%ymm0
+       vperm2i128      $0x20,%ymm4,%ymm0,%ymm1
+       vperm2i128      $0x31,%ymm4,%ymm0,%ymm4
+       vmovdqa         %ymm1,0x00(%rsp)
+       vmovdqa         0x20(%rsp),%ymm0
+       vperm2i128      $0x20,%ymm5,%ymm0,%ymm1
+       vperm2i128      $0x31,%ymm5,%ymm0,%ymm5
+       vmovdqa         %ymm1,0x20(%rsp)
+       vmovdqa         0x40(%rsp),%ymm0
+       vperm2i128      $0x20,%ymm6,%ymm0,%ymm1
+       vperm2i128      $0x31,%ymm6,%ymm0,%ymm6
+       vmovdqa         %ymm1,0x40(%rsp)
+       vmovdqa         0x60(%rsp),%ymm0
+       vperm2i128      $0x20,%ymm7,%ymm0,%ymm1
+       vperm2i128      $0x31,%ymm7,%ymm0,%ymm7
+       vmovdqa         %ymm1,0x60(%rsp)
+       vperm2i128      $0x20,%ymm12,%ymm8,%ymm0
+       vperm2i128      $0x31,%ymm12,%ymm8,%ymm12
+       vmovdqa         %ymm0,%ymm8
+       vperm2i128      $0x20,%ymm13,%ymm9,%ymm0
+       vperm2i128      $0x31,%ymm13,%ymm9,%ymm13
+       vmovdqa         %ymm0,%ymm9
+       vperm2i128      $0x20,%ymm14,%ymm10,%ymm0
+       vperm2i128      $0x31,%ymm14,%ymm10,%ymm14
+       vmovdqa         %ymm0,%ymm10
+       vperm2i128      $0x20,%ymm15,%ymm11,%ymm0
+       vperm2i128      $0x31,%ymm15,%ymm11,%ymm15
+       vmovdqa         %ymm0,%ymm11
+
+       # xor with corresponding input, write to output
+       vmovdqa         0x00(%rsp),%ymm0
+       vpxor           0x0000(%rdx),%ymm0,%ymm0
+       vmovdqu         %ymm0,0x0000(%rsi)
+       vmovdqa         0x20(%rsp),%ymm0
+       vpxor           0x0080(%rdx),%ymm0,%ymm0
+       vmovdqu         %ymm0,0x0080(%rsi)
+       vmovdqa         0x40(%rsp),%ymm0
+       vpxor           0x0040(%rdx),%ymm0,%ymm0
+       vmovdqu         %ymm0,0x0040(%rsi)
+       vmovdqa         0x60(%rsp),%ymm0
+       vpxor           0x00c0(%rdx),%ymm0,%ymm0
+       vmovdqu         %ymm0,0x00c0(%rsi)
+       vpxor           0x0100(%rdx),%ymm4,%ymm4
+       vmovdqu         %ymm4,0x0100(%rsi)
+       vpxor           0x0180(%rdx),%ymm5,%ymm5
+       vmovdqu         %ymm5,0x00180(%rsi)
+       vpxor           0x0140(%rdx),%ymm6,%ymm6
+       vmovdqu         %ymm6,0x0140(%rsi)
+       vpxor           0x01c0(%rdx),%ymm7,%ymm7
+       vmovdqu         %ymm7,0x01c0(%rsi)
+       vpxor           0x0020(%rdx),%ymm8,%ymm8
+       vmovdqu         %ymm8,0x0020(%rsi)
+       vpxor           0x00a0(%rdx),%ymm9,%ymm9
+       vmovdqu         %ymm9,0x00a0(%rsi)
+       vpxor           0x0060(%rdx),%ymm10,%ymm10
+       vmovdqu         %ymm10,0x0060(%rsi)
+       vpxor           0x00e0(%rdx),%ymm11,%ymm11
+       vmovdqu         %ymm11,0x00e0(%rsi)
+       vpxor           0x0120(%rdx),%ymm12,%ymm12
+       vmovdqu         %ymm12,0x0120(%rsi)
+       vpxor           0x01a0(%rdx),%ymm13,%ymm13
+       vmovdqu         %ymm13,0x01a0(%rsi)
+       vpxor           0x0160(%rdx),%ymm14,%ymm14
+       vmovdqu         %ymm14,0x0160(%rsi)
+       vpxor           0x01e0(%rdx),%ymm15,%ymm15
+       vmovdqu         %ymm15,0x01e0(%rsi)
+
+       vzeroupper
+       mov             %r8,%rsp
+       ret
+ENDPROC(chacha20_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S
new file mode 100644 (file)
index 0000000..712b130
--- /dev/null
@@ -0,0 +1,625 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.data
+.align 16
+
+ROT8:  .octa 0x0e0d0c0f0a09080b0605040702010003
+ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
+CTRINC:        .octa 0x00000003000000020000000100000000
+
+.text
+
+ENTRY(chacha20_block_xor_ssse3)
+       # %rdi: Input state matrix, s
+       # %rsi: 1 data block output, o
+       # %rdx: 1 data block input, i
+
+       # This function encrypts one ChaCha20 block by loading the state matrix
+       # in four SSE registers. It performs matrix operation on four words in
+       # parallel, but requireds shuffling to rearrange the words after each
+       # round. 8/16-bit word rotation is done with the slightly better
+       # performing SSSE3 byte shuffling, 7/12-bit word rotation uses
+       # traditional shift+OR.
+
+       # x0..3 = s0..3
+       movdqa          0x00(%rdi),%xmm0
+       movdqa          0x10(%rdi),%xmm1
+       movdqa          0x20(%rdi),%xmm2
+       movdqa          0x30(%rdi),%xmm3
+       movdqa          %xmm0,%xmm8
+       movdqa          %xmm1,%xmm9
+       movdqa          %xmm2,%xmm10
+       movdqa          %xmm3,%xmm11
+
+       movdqa          ROT8(%rip),%xmm4
+       movdqa          ROT16(%rip),%xmm5
+
+       mov     $10,%ecx
+
+.Ldoubleround:
+
+       # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+       paddd           %xmm1,%xmm0
+       pxor            %xmm0,%xmm3
+       pshufb          %xmm5,%xmm3
+
+       # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+       paddd           %xmm3,%xmm2
+       pxor            %xmm2,%xmm1
+       movdqa          %xmm1,%xmm6
+       pslld           $12,%xmm6
+       psrld           $20,%xmm1
+       por             %xmm6,%xmm1
+
+       # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+       paddd           %xmm1,%xmm0
+       pxor            %xmm0,%xmm3
+       pshufb          %xmm4,%xmm3
+
+       # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+       paddd           %xmm3,%xmm2
+       pxor            %xmm2,%xmm1
+       movdqa          %xmm1,%xmm7
+       pslld           $7,%xmm7
+       psrld           $25,%xmm1
+       por             %xmm7,%xmm1
+
+       # x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+       pshufd          $0x39,%xmm1,%xmm1
+       # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+       pshufd          $0x4e,%xmm2,%xmm2
+       # x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+       pshufd          $0x93,%xmm3,%xmm3
+
+       # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+       paddd           %xmm1,%xmm0
+       pxor            %xmm0,%xmm3
+       pshufb          %xmm5,%xmm3
+
+       # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+       paddd           %xmm3,%xmm2
+       pxor            %xmm2,%xmm1
+       movdqa          %xmm1,%xmm6
+       pslld           $12,%xmm6
+       psrld           $20,%xmm1
+       por             %xmm6,%xmm1
+
+       # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+       paddd           %xmm1,%xmm0
+       pxor            %xmm0,%xmm3
+       pshufb          %xmm4,%xmm3
+
+       # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+       paddd           %xmm3,%xmm2
+       pxor            %xmm2,%xmm1
+       movdqa          %xmm1,%xmm7
+       pslld           $7,%xmm7
+       psrld           $25,%xmm1
+       por             %xmm7,%xmm1
+
+       # x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+       pshufd          $0x93,%xmm1,%xmm1
+       # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+       pshufd          $0x4e,%xmm2,%xmm2
+       # x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+       pshufd          $0x39,%xmm3,%xmm3
+
+       dec             %ecx
+       jnz             .Ldoubleround
+
+       # o0 = i0 ^ (x0 + s0)
+       movdqu          0x00(%rdx),%xmm4
+       paddd           %xmm8,%xmm0
+       pxor            %xmm4,%xmm0
+       movdqu          %xmm0,0x00(%rsi)
+       # o1 = i1 ^ (x1 + s1)
+       movdqu          0x10(%rdx),%xmm5
+       paddd           %xmm9,%xmm1
+       pxor            %xmm5,%xmm1
+       movdqu          %xmm1,0x10(%rsi)
+       # o2 = i2 ^ (x2 + s2)
+       movdqu          0x20(%rdx),%xmm6
+       paddd           %xmm10,%xmm2
+       pxor            %xmm6,%xmm2
+       movdqu          %xmm2,0x20(%rsi)
+       # o3 = i3 ^ (x3 + s3)
+       movdqu          0x30(%rdx),%xmm7
+       paddd           %xmm11,%xmm3
+       pxor            %xmm7,%xmm3
+       movdqu          %xmm3,0x30(%rsi)
+
+       ret
+ENDPROC(chacha20_block_xor_ssse3)
+
+ENTRY(chacha20_4block_xor_ssse3)
+       # %rdi: Input state matrix, s
+       # %rsi: 4 data blocks output, o
+       # %rdx: 4 data blocks input, i
+
+       # This function encrypts four consecutive ChaCha20 blocks by loading the
+       # the state matrix in SSE registers four times. As we need some scratch
+       # registers, we save the first four registers on the stack. The
+       # algorithm performs each operation on the corresponding word of each
+       # state matrix, hence requires no word shuffling. For final XORing step
+       # we transpose the matrix by interleaving 32- and then 64-bit words,
+       # which allows us to do XOR in SSE registers. 8/16-bit word rotation is
+       # done with the slightly better performing SSSE3 byte shuffling,
+       # 7/12-bit word rotation uses traditional shift+OR.
+
+       sub             $0x40,%rsp
+
+       # x0..15[0-3] = s0..3[0..3]
+       movq            0x00(%rdi),%xmm1
+       pshufd          $0x00,%xmm1,%xmm0
+       pshufd          $0x55,%xmm1,%xmm1
+       movq            0x08(%rdi),%xmm3
+       pshufd          $0x00,%xmm3,%xmm2
+       pshufd          $0x55,%xmm3,%xmm3
+       movq            0x10(%rdi),%xmm5
+       pshufd          $0x00,%xmm5,%xmm4
+       pshufd          $0x55,%xmm5,%xmm5
+       movq            0x18(%rdi),%xmm7
+       pshufd          $0x00,%xmm7,%xmm6
+       pshufd          $0x55,%xmm7,%xmm7
+       movq            0x20(%rdi),%xmm9
+       pshufd          $0x00,%xmm9,%xmm8
+       pshufd          $0x55,%xmm9,%xmm9
+       movq            0x28(%rdi),%xmm11
+       pshufd          $0x00,%xmm11,%xmm10
+       pshufd          $0x55,%xmm11,%xmm11
+       movq            0x30(%rdi),%xmm13
+       pshufd          $0x00,%xmm13,%xmm12
+       pshufd          $0x55,%xmm13,%xmm13
+       movq            0x38(%rdi),%xmm15
+       pshufd          $0x00,%xmm15,%xmm14
+       pshufd          $0x55,%xmm15,%xmm15
+       # x0..3 on stack
+       movdqa          %xmm0,0x00(%rsp)
+       movdqa          %xmm1,0x10(%rsp)
+       movdqa          %xmm2,0x20(%rsp)
+       movdqa          %xmm3,0x30(%rsp)
+
+       movdqa          CTRINC(%rip),%xmm1
+       movdqa          ROT8(%rip),%xmm2
+       movdqa          ROT16(%rip),%xmm3
+
+       # x12 += counter values 0-3
+       paddd           %xmm1,%xmm12
+
+       mov             $10,%ecx
+
+.Ldoubleround4:
+       # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+       movdqa          0x00(%rsp),%xmm0
+       paddd           %xmm4,%xmm0
+       movdqa          %xmm0,0x00(%rsp)
+       pxor            %xmm0,%xmm12
+       pshufb          %xmm3,%xmm12
+       # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+       movdqa          0x10(%rsp),%xmm0
+       paddd           %xmm5,%xmm0
+       movdqa          %xmm0,0x10(%rsp)
+       pxor            %xmm0,%xmm13
+       pshufb          %xmm3,%xmm13
+       # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+       movdqa          0x20(%rsp),%xmm0
+       paddd           %xmm6,%xmm0
+       movdqa          %xmm0,0x20(%rsp)
+       pxor            %xmm0,%xmm14
+       pshufb          %xmm3,%xmm14
+       # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+       movdqa          0x30(%rsp),%xmm0
+       paddd           %xmm7,%xmm0
+       movdqa          %xmm0,0x30(%rsp)
+       pxor            %xmm0,%xmm15
+       pshufb          %xmm3,%xmm15
+
+       # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+       paddd           %xmm12,%xmm8
+       pxor            %xmm8,%xmm4
+       movdqa          %xmm4,%xmm0
+       pslld           $12,%xmm0
+       psrld           $20,%xmm4
+       por             %xmm0,%xmm4
+       # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+       paddd           %xmm13,%xmm9
+       pxor            %xmm9,%xmm5
+       movdqa          %xmm5,%xmm0
+       pslld           $12,%xmm0
+       psrld           $20,%xmm5
+       por             %xmm0,%xmm5
+       # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+       paddd           %xmm14,%xmm10
+       pxor            %xmm10,%xmm6
+       movdqa          %xmm6,%xmm0
+       pslld           $12,%xmm0
+       psrld           $20,%xmm6
+       por             %xmm0,%xmm6
+       # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+       paddd           %xmm15,%xmm11
+       pxor            %xmm11,%xmm7
+       movdqa          %xmm7,%xmm0
+       pslld           $12,%xmm0
+       psrld           $20,%xmm7
+       por             %xmm0,%xmm7
+
+       # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+       movdqa          0x00(%rsp),%xmm0
+       paddd           %xmm4,%xmm0
+       movdqa          %xmm0,0x00(%rsp)
+       pxor            %xmm0,%xmm12
+       pshufb          %xmm2,%xmm12
+       # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+       movdqa          0x10(%rsp),%xmm0
+       paddd           %xmm5,%xmm0
+       movdqa          %xmm0,0x10(%rsp)
+       pxor            %xmm0,%xmm13
+       pshufb          %xmm2,%xmm13
+       # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+       movdqa          0x20(%rsp),%xmm0
+       paddd           %xmm6,%xmm0
+       movdqa          %xmm0,0x20(%rsp)
+       pxor            %xmm0,%xmm14
+       pshufb          %xmm2,%xmm14
+       # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+       movdqa          0x30(%rsp),%xmm0
+       paddd           %xmm7,%xmm0
+       movdqa          %xmm0,0x30(%rsp)
+       pxor            %xmm0,%xmm15
+       pshufb          %xmm2,%xmm15
+
+       # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+       paddd           %xmm12,%xmm8
+       pxor            %xmm8,%xmm4
+       movdqa          %xmm4,%xmm0
+       pslld           $7,%xmm0
+       psrld           $25,%xmm4
+       por             %xmm0,%xmm4
+       # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+       paddd           %xmm13,%xmm9
+       pxor            %xmm9,%xmm5
+       movdqa          %xmm5,%xmm0
+       pslld           $7,%xmm0
+       psrld           $25,%xmm5
+       por             %xmm0,%xmm5
+       # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+       paddd           %xmm14,%xmm10
+       pxor            %xmm10,%xmm6
+       movdqa          %xmm6,%xmm0
+       pslld           $7,%xmm0
+       psrld           $25,%xmm6
+       por             %xmm0,%xmm6
+       # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+       paddd           %xmm15,%xmm11
+       pxor            %xmm11,%xmm7
+       movdqa          %xmm7,%xmm0
+       pslld           $7,%xmm0
+       psrld           $25,%xmm7
+       por             %xmm0,%xmm7
+
+       # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+       movdqa          0x00(%rsp),%xmm0
+       paddd           %xmm5,%xmm0
+       movdqa          %xmm0,0x00(%rsp)
+       pxor            %xmm0,%xmm15
+       pshufb          %xmm3,%xmm15
+       # x1 += x6, x12 = rotl32(x12 ^ x1, 16)
+       movdqa          0x10(%rsp),%xmm0
+       paddd           %xmm6,%xmm0
+       movdqa          %xmm0,0x10(%rsp)
+       pxor            %xmm0,%xmm12
+       pshufb          %xmm3,%xmm12
+       # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+       movdqa          0x20(%rsp),%xmm0
+       paddd           %xmm7,%xmm0
+       movdqa          %xmm0,0x20(%rsp)
+       pxor            %xmm0,%xmm13
+       pshufb          %xmm3,%xmm13
+       # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+       movdqa          0x30(%rsp),%xmm0
+       paddd           %xmm4,%xmm0
+       movdqa          %xmm0,0x30(%rsp)
+       pxor            %xmm0,%xmm14
+       pshufb          %xmm3,%xmm14
+
+       # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+       paddd           %xmm15,%xmm10
+       pxor            %xmm10,%xmm5
+       movdqa          %xmm5,%xmm0
+       pslld           $12,%xmm0
+       psrld           $20,%xmm5
+       por             %xmm0,%xmm5
+       # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+       paddd           %xmm12,%xmm11
+       pxor            %xmm11,%xmm6
+       movdqa          %xmm6,%xmm0
+       pslld           $12,%xmm0
+       psrld           $20,%xmm6
+       por             %xmm0,%xmm6
+       # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+       paddd           %xmm13,%xmm8
+       pxor            %xmm8,%xmm7
+       movdqa          %xmm7,%xmm0
+       pslld           $12,%xmm0
+       psrld           $20,%xmm7
+       por             %xmm0,%xmm7
+       # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+       paddd           %xmm14,%xmm9
+       pxor            %xmm9,%xmm4
+       movdqa          %xmm4,%xmm0
+       pslld           $12,%xmm0
+       psrld           $20,%xmm4
+       por             %xmm0,%xmm4
+
+       # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+       movdqa          0x00(%rsp),%xmm0
+       paddd           %xmm5,%xmm0
+       movdqa          %xmm0,0x00(%rsp)
+       pxor            %xmm0,%xmm15
+       pshufb          %xmm2,%xmm15
+       # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+       movdqa          0x10(%rsp),%xmm0
+       paddd           %xmm6,%xmm0
+       movdqa          %xmm0,0x10(%rsp)
+       pxor            %xmm0,%xmm12
+       pshufb          %xmm2,%xmm12
+       # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+       movdqa          0x20(%rsp),%xmm0
+       paddd           %xmm7,%xmm0
+       movdqa          %xmm0,0x20(%rsp)
+       pxor            %xmm0,%xmm13
+       pshufb          %xmm2,%xmm13
+       # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+       movdqa          0x30(%rsp),%xmm0
+       paddd           %xmm4,%xmm0
+       movdqa          %xmm0,0x30(%rsp)
+       pxor            %xmm0,%xmm14
+       pshufb          %xmm2,%xmm14
+
+       # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+       paddd           %xmm15,%xmm10
+       pxor            %xmm10,%xmm5
+       movdqa          %xmm5,%xmm0
+       pslld           $7,%xmm0
+       psrld           $25,%xmm5
+       por             %xmm0,%xmm5
+       # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+       paddd           %xmm12,%xmm11
+       pxor            %xmm11,%xmm6
+       movdqa          %xmm6,%xmm0
+       pslld           $7,%xmm0
+       psrld           $25,%xmm6
+       por             %xmm0,%xmm6
+       # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+       paddd           %xmm13,%xmm8
+       pxor            %xmm8,%xmm7
+       movdqa          %xmm7,%xmm0
+       pslld           $7,%xmm0
+       psrld           $25,%xmm7
+       por             %xmm0,%xmm7
+       # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+       paddd           %xmm14,%xmm9
+       pxor            %xmm9,%xmm4
+       movdqa          %xmm4,%xmm0
+       pslld           $7,%xmm0
+       psrld           $25,%xmm4
+       por             %xmm0,%xmm4
+
+       dec             %ecx
+       jnz             .Ldoubleround4
+
+       # x0[0-3] += s0[0]
+       # x1[0-3] += s0[1]
+       movq            0x00(%rdi),%xmm3
+       pshufd          $0x00,%xmm3,%xmm2
+       pshufd          $0x55,%xmm3,%xmm3
+       paddd           0x00(%rsp),%xmm2
+       movdqa          %xmm2,0x00(%rsp)
+       paddd           0x10(%rsp),%xmm3
+       movdqa          %xmm3,0x10(%rsp)
+       # x2[0-3] += s0[2]
+       # x3[0-3] += s0[3]
+       movq            0x08(%rdi),%xmm3
+       pshufd          $0x00,%xmm3,%xmm2
+       pshufd          $0x55,%xmm3,%xmm3
+       paddd           0x20(%rsp),%xmm2
+       movdqa          %xmm2,0x20(%rsp)
+       paddd           0x30(%rsp),%xmm3
+       movdqa          %xmm3,0x30(%rsp)
+
+       # x4[0-3] += s1[0]
+       # x5[0-3] += s1[1]
+       movq            0x10(%rdi),%xmm3
+       pshufd          $0x00,%xmm3,%xmm2
+       pshufd          $0x55,%xmm3,%xmm3
+       paddd           %xmm2,%xmm4
+       paddd           %xmm3,%xmm5
+       # x6[0-3] += s1[2]
+       # x7[0-3] += s1[3]
+       movq            0x18(%rdi),%xmm3
+       pshufd          $0x00,%xmm3,%xmm2
+       pshufd          $0x55,%xmm3,%xmm3
+       paddd           %xmm2,%xmm6
+       paddd           %xmm3,%xmm7
+
+       # x8[0-3] += s2[0]
+       # x9[0-3] += s2[1]
+       movq            0x20(%rdi),%xmm3
+       pshufd          $0x00,%xmm3,%xmm2
+       pshufd          $0x55,%xmm3,%xmm3
+       paddd           %xmm2,%xmm8
+       paddd           %xmm3,%xmm9
+       # x10[0-3] += s2[2]
+       # x11[0-3] += s2[3]
+       movq            0x28(%rdi),%xmm3
+       pshufd          $0x00,%xmm3,%xmm2
+       pshufd          $0x55,%xmm3,%xmm3
+       paddd           %xmm2,%xmm10
+       paddd           %xmm3,%xmm11
+
+       # x12[0-3] += s3[0]
+       # x13[0-3] += s3[1]
+       movq            0x30(%rdi),%xmm3
+       pshufd          $0x00,%xmm3,%xmm2
+       pshufd          $0x55,%xmm3,%xmm3
+       paddd           %xmm2,%xmm12
+       paddd           %xmm3,%xmm13
+       # x14[0-3] += s3[2]
+       # x15[0-3] += s3[3]
+       movq            0x38(%rdi),%xmm3
+       pshufd          $0x00,%xmm3,%xmm2
+       pshufd          $0x55,%xmm3,%xmm3
+       paddd           %xmm2,%xmm14
+       paddd           %xmm3,%xmm15
+
+       # x12 += counter values 0-3
+       paddd           %xmm1,%xmm12
+
+       # interleave 32-bit words in state n, n+1
+       movdqa          0x00(%rsp),%xmm0
+       movdqa          0x10(%rsp),%xmm1
+       movdqa          %xmm0,%xmm2
+       punpckldq       %xmm1,%xmm2
+       punpckhdq       %xmm1,%xmm0
+       movdqa          %xmm2,0x00(%rsp)
+       movdqa          %xmm0,0x10(%rsp)
+       movdqa          0x20(%rsp),%xmm0
+       movdqa          0x30(%rsp),%xmm1
+       movdqa          %xmm0,%xmm2
+       punpckldq       %xmm1,%xmm2
+       punpckhdq       %xmm1,%xmm0
+       movdqa          %xmm2,0x20(%rsp)
+       movdqa          %xmm0,0x30(%rsp)
+       movdqa          %xmm4,%xmm0
+       punpckldq       %xmm5,%xmm4
+       punpckhdq       %xmm5,%xmm0
+       movdqa          %xmm0,%xmm5
+       movdqa          %xmm6,%xmm0
+       punpckldq       %xmm7,%xmm6
+       punpckhdq       %xmm7,%xmm0
+       movdqa          %xmm0,%xmm7
+       movdqa          %xmm8,%xmm0
+       punpckldq       %xmm9,%xmm8
+       punpckhdq       %xmm9,%xmm0
+       movdqa          %xmm0,%xmm9
+       movdqa          %xmm10,%xmm0
+       punpckldq       %xmm11,%xmm10
+       punpckhdq       %xmm11,%xmm0
+       movdqa          %xmm0,%xmm11
+       movdqa          %xmm12,%xmm0
+       punpckldq       %xmm13,%xmm12
+       punpckhdq       %xmm13,%xmm0
+       movdqa          %xmm0,%xmm13
+       movdqa          %xmm14,%xmm0
+       punpckldq       %xmm15,%xmm14
+       punpckhdq       %xmm15,%xmm0
+       movdqa          %xmm0,%xmm15
+
+       # interleave 64-bit words in state n, n+2
+       movdqa          0x00(%rsp),%xmm0
+       movdqa          0x20(%rsp),%xmm1
+       movdqa          %xmm0,%xmm2
+       punpcklqdq      %xmm1,%xmm2
+       punpckhqdq      %xmm1,%xmm0
+       movdqa          %xmm2,0x00(%rsp)
+       movdqa          %xmm0,0x20(%rsp)
+       movdqa          0x10(%rsp),%xmm0
+       movdqa          0x30(%rsp),%xmm1
+       movdqa          %xmm0,%xmm2
+       punpcklqdq      %xmm1,%xmm2
+       punpckhqdq      %xmm1,%xmm0
+       movdqa          %xmm2,0x10(%rsp)
+       movdqa          %xmm0,0x30(%rsp)
+       movdqa          %xmm4,%xmm0
+       punpcklqdq      %xmm6,%xmm4
+       punpckhqdq      %xmm6,%xmm0
+       movdqa          %xmm0,%xmm6
+       movdqa          %xmm5,%xmm0
+       punpcklqdq      %xmm7,%xmm5
+       punpckhqdq      %xmm7,%xmm0
+       movdqa          %xmm0,%xmm7
+       movdqa          %xmm8,%xmm0
+       punpcklqdq      %xmm10,%xmm8
+       punpckhqdq      %xmm10,%xmm0
+       movdqa          %xmm0,%xmm10
+       movdqa          %xmm9,%xmm0
+       punpcklqdq      %xmm11,%xmm9
+       punpckhqdq      %xmm11,%xmm0
+       movdqa          %xmm0,%xmm11
+       movdqa          %xmm12,%xmm0
+       punpcklqdq      %xmm14,%xmm12
+       punpckhqdq      %xmm14,%xmm0
+       movdqa          %xmm0,%xmm14
+       movdqa          %xmm13,%xmm0
+       punpcklqdq      %xmm15,%xmm13
+       punpckhqdq      %xmm15,%xmm0
+       movdqa          %xmm0,%xmm15
+
+       # xor with corresponding input, write to output
+       movdqa          0x00(%rsp),%xmm0
+       movdqu          0x00(%rdx),%xmm1
+       pxor            %xmm1,%xmm0
+       movdqu          %xmm0,0x00(%rsi)
+       movdqa          0x10(%rsp),%xmm0
+       movdqu          0x80(%rdx),%xmm1
+       pxor            %xmm1,%xmm0
+       movdqu          %xmm0,0x80(%rsi)
+       movdqa          0x20(%rsp),%xmm0
+       movdqu          0x40(%rdx),%xmm1
+       pxor            %xmm1,%xmm0
+       movdqu          %xmm0,0x40(%rsi)
+       movdqa          0x30(%rsp),%xmm0
+       movdqu          0xc0(%rdx),%xmm1
+       pxor            %xmm1,%xmm0
+       movdqu          %xmm0,0xc0(%rsi)
+       movdqu          0x10(%rdx),%xmm1
+       pxor            %xmm1,%xmm4
+       movdqu          %xmm4,0x10(%rsi)
+       movdqu          0x90(%rdx),%xmm1
+       pxor            %xmm1,%xmm5
+       movdqu          %xmm5,0x90(%rsi)
+       movdqu          0x50(%rdx),%xmm1
+       pxor            %xmm1,%xmm6
+       movdqu          %xmm6,0x50(%rsi)
+       movdqu          0xd0(%rdx),%xmm1
+       pxor            %xmm1,%xmm7
+       movdqu          %xmm7,0xd0(%rsi)
+       movdqu          0x20(%rdx),%xmm1
+       pxor            %xmm1,%xmm8
+       movdqu          %xmm8,0x20(%rsi)
+       movdqu          0xa0(%rdx),%xmm1
+       pxor            %xmm1,%xmm9
+       movdqu          %xmm9,0xa0(%rsi)
+       movdqu          0x60(%rdx),%xmm1
+       pxor            %xmm1,%xmm10
+       movdqu          %xmm10,0x60(%rsi)
+       movdqu          0xe0(%rdx),%xmm1
+       pxor            %xmm1,%xmm11
+       movdqu          %xmm11,0xe0(%rsi)
+       movdqu          0x30(%rdx),%xmm1
+       pxor            %xmm1,%xmm12
+       movdqu          %xmm12,0x30(%rsi)
+       movdqu          0xb0(%rdx),%xmm1
+       pxor            %xmm1,%xmm13
+       movdqu          %xmm13,0xb0(%rsi)
+       movdqu          0x70(%rdx),%xmm1
+       pxor            %xmm1,%xmm14
+       movdqu          %xmm14,0x70(%rsi)
+       movdqu          0xf0(%rdx),%xmm1
+       pxor            %xmm1,%xmm15
+       movdqu          %xmm15,0xf0(%rsi)
+
+       add             $0x40,%rsp
+       ret
+ENDPROC(chacha20_4block_xor_ssse3)
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
new file mode 100644 (file)
index 0000000..effe216
--- /dev/null
@@ -0,0 +1,150 @@
+/*
+ * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha20.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/simd.h>
+
+#define CHACHA20_STATE_ALIGN 16
+
+asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
+asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
+#ifdef CONFIG_AS_AVX2
+asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
+static bool chacha20_use_avx2;
+#endif
+
+static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
+                           unsigned int bytes)
+{
+       u8 buf[CHACHA20_BLOCK_SIZE];
+
+#ifdef CONFIG_AS_AVX2
+       if (chacha20_use_avx2) {
+               while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
+                       chacha20_8block_xor_avx2(state, dst, src);
+                       bytes -= CHACHA20_BLOCK_SIZE * 8;
+                       src += CHACHA20_BLOCK_SIZE * 8;
+                       dst += CHACHA20_BLOCK_SIZE * 8;
+                       state[12] += 8;
+               }
+       }
+#endif
+       while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
+               chacha20_4block_xor_ssse3(state, dst, src);
+               bytes -= CHACHA20_BLOCK_SIZE * 4;
+               src += CHACHA20_BLOCK_SIZE * 4;
+               dst += CHACHA20_BLOCK_SIZE * 4;
+               state[12] += 4;
+       }
+       while (bytes >= CHACHA20_BLOCK_SIZE) {
+               chacha20_block_xor_ssse3(state, dst, src);
+               bytes -= CHACHA20_BLOCK_SIZE;
+               src += CHACHA20_BLOCK_SIZE;
+               dst += CHACHA20_BLOCK_SIZE;
+               state[12]++;
+       }
+       if (bytes) {
+               memcpy(buf, src, bytes);
+               chacha20_block_xor_ssse3(state, buf, buf);
+               memcpy(dst, buf, bytes);
+       }
+}
+
+static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
+                        struct scatterlist *src, unsigned int nbytes)
+{
+       u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1];
+       struct blkcipher_walk walk;
+       int err;
+
+       if (!may_use_simd())
+               return crypto_chacha20_crypt(desc, dst, src, nbytes);
+
+       state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN);
+
+       blkcipher_walk_init(&walk, dst, src, nbytes);
+       err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
+
+       crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
+
+       kernel_fpu_begin();
+
+       while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
+               chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+                               rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
+               err = blkcipher_walk_done(desc, &walk,
+                                         walk.nbytes % CHACHA20_BLOCK_SIZE);
+       }
+
+       if (walk.nbytes) {
+               chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
+                               walk.nbytes);
+               err = blkcipher_walk_done(desc, &walk, 0);
+       }
+
+       kernel_fpu_end();
+
+       return err;
+}
+
+static struct crypto_alg alg = {
+       .cra_name               = "chacha20",
+       .cra_driver_name        = "chacha20-simd",
+       .cra_priority           = 300,
+       .cra_flags              = CRYPTO_ALG_TYPE_BLKCIPHER,
+       .cra_blocksize          = 1,
+       .cra_type               = &crypto_blkcipher_type,
+       .cra_ctxsize            = sizeof(struct chacha20_ctx),
+       .cra_alignmask          = sizeof(u32) - 1,
+       .cra_module             = THIS_MODULE,
+       .cra_u                  = {
+               .blkcipher = {
+                       .min_keysize    = CHACHA20_KEY_SIZE,
+                       .max_keysize    = CHACHA20_KEY_SIZE,
+                       .ivsize         = CHACHA20_IV_SIZE,
+                       .geniv          = "seqiv",
+                       .setkey         = crypto_chacha20_setkey,
+                       .encrypt        = chacha20_simd,
+                       .decrypt        = chacha20_simd,
+               },
+       },
+};
+
+static int __init chacha20_simd_mod_init(void)
+{
+       if (!cpu_has_ssse3)
+               return -ENODEV;
+
+#ifdef CONFIG_AS_AVX2
+       chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+                           cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
+#endif
+       return crypto_register_alg(&alg);
+}
+
+static void __exit chacha20_simd_mod_fini(void)
+{
+       crypto_unregister_alg(&alg);
+}
+
+module_init(chacha20_simd_mod_init);
+module_exit(chacha20_simd_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-simd");
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
new file mode 100644 (file)
index 0000000..eff2f41
--- /dev/null
@@ -0,0 +1,386 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.data
+.align 32
+
+ANMASK:        .octa 0x0000000003ffffff0000000003ffffff
+       .octa 0x0000000003ffffff0000000003ffffff
+ORMASK:        .octa 0x00000000010000000000000001000000
+       .octa 0x00000000010000000000000001000000
+
+.text
+
+#define h0 0x00(%rdi)
+#define h1 0x04(%rdi)
+#define h2 0x08(%rdi)
+#define h3 0x0c(%rdi)
+#define h4 0x10(%rdi)
+#define r0 0x00(%rdx)
+#define r1 0x04(%rdx)
+#define r2 0x08(%rdx)
+#define r3 0x0c(%rdx)
+#define r4 0x10(%rdx)
+#define u0 0x00(%r8)
+#define u1 0x04(%r8)
+#define u2 0x08(%r8)
+#define u3 0x0c(%r8)
+#define u4 0x10(%r8)
+#define w0 0x14(%r8)
+#define w1 0x18(%r8)
+#define w2 0x1c(%r8)
+#define w3 0x20(%r8)
+#define w4 0x24(%r8)
+#define y0 0x28(%r8)
+#define y1 0x2c(%r8)
+#define y2 0x30(%r8)
+#define y3 0x34(%r8)
+#define y4 0x38(%r8)
+#define m %rsi
+#define hc0 %ymm0
+#define hc1 %ymm1
+#define hc2 %ymm2
+#define hc3 %ymm3
+#define hc4 %ymm4
+#define hc0x %xmm0
+#define hc1x %xmm1
+#define hc2x %xmm2
+#define hc3x %xmm3
+#define hc4x %xmm4
+#define t1 %ymm5
+#define t2 %ymm6
+#define t1x %xmm5
+#define t2x %xmm6
+#define ruwy0 %ymm7
+#define ruwy1 %ymm8
+#define ruwy2 %ymm9
+#define ruwy3 %ymm10
+#define ruwy4 %ymm11
+#define ruwy0x %xmm7
+#define ruwy1x %xmm8
+#define ruwy2x %xmm9
+#define ruwy3x %xmm10
+#define ruwy4x %xmm11
+#define svxz1 %ymm12
+#define svxz2 %ymm13
+#define svxz3 %ymm14
+#define svxz4 %ymm15
+#define d0 %r9
+#define d1 %r10
+#define d2 %r11
+#define d3 %r12
+#define d4 %r13
+
+ENTRY(poly1305_4block_avx2)
+       # %rdi: Accumulator h[5]
+       # %rsi: 64 byte input block m
+       # %rdx: Poly1305 key r[5]
+       # %rcx: Quadblock count
+       # %r8:  Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5],
+
+       # This four-block variant uses loop unrolled block processing. It
+       # requires 4 Poly1305 keys: r, r^2, r^3 and r^4:
+       # h = (h + m) * r  =>  h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r
+
+       vzeroupper
+       push            %rbx
+       push            %r12
+       push            %r13
+
+       # combine r0,u0,w0,y0
+       vmovd           y0,ruwy0x
+       vmovd           w0,t1x
+       vpunpcklqdq     t1,ruwy0,ruwy0
+       vmovd           u0,t1x
+       vmovd           r0,t2x
+       vpunpcklqdq     t2,t1,t1
+       vperm2i128      $0x20,t1,ruwy0,ruwy0
+
+       # combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5
+       vmovd           y1,ruwy1x
+       vmovd           w1,t1x
+       vpunpcklqdq     t1,ruwy1,ruwy1
+       vmovd           u1,t1x
+       vmovd           r1,t2x
+       vpunpcklqdq     t2,t1,t1
+       vperm2i128      $0x20,t1,ruwy1,ruwy1
+       vpslld          $2,ruwy1,svxz1
+       vpaddd          ruwy1,svxz1,svxz1
+
+       # combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5
+       vmovd           y2,ruwy2x
+       vmovd           w2,t1x
+       vpunpcklqdq     t1,ruwy2,ruwy2
+       vmovd           u2,t1x
+       vmovd           r2,t2x
+       vpunpcklqdq     t2,t1,t1
+       vperm2i128      $0x20,t1,ruwy2,ruwy2
+       vpslld          $2,ruwy2,svxz2
+       vpaddd          ruwy2,svxz2,svxz2
+
+       # combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5
+       vmovd           y3,ruwy3x
+       vmovd           w3,t1x
+       vpunpcklqdq     t1,ruwy3,ruwy3
+       vmovd           u3,t1x
+       vmovd           r3,t2x
+       vpunpcklqdq     t2,t1,t1
+       vperm2i128      $0x20,t1,ruwy3,ruwy3
+       vpslld          $2,ruwy3,svxz3
+       vpaddd          ruwy3,svxz3,svxz3
+
+       # combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5
+       vmovd           y4,ruwy4x
+       vmovd           w4,t1x
+       vpunpcklqdq     t1,ruwy4,ruwy4
+       vmovd           u4,t1x
+       vmovd           r4,t2x
+       vpunpcklqdq     t2,t1,t1
+       vperm2i128      $0x20,t1,ruwy4,ruwy4
+       vpslld          $2,ruwy4,svxz4
+       vpaddd          ruwy4,svxz4,svxz4
+
+.Ldoblock4:
+       # hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff,
+       #        m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0]
+       vmovd           0x00(m),hc0x
+       vmovd           0x10(m),t1x
+       vpunpcklqdq     t1,hc0,hc0
+       vmovd           0x20(m),t1x
+       vmovd           0x30(m),t2x
+       vpunpcklqdq     t2,t1,t1
+       vperm2i128      $0x20,t1,hc0,hc0
+       vpand           ANMASK(%rip),hc0,hc0
+       vmovd           h0,t1x
+       vpaddd          t1,hc0,hc0
+       # hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff,
+       #        (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1]
+       vmovd           0x03(m),hc1x
+       vmovd           0x13(m),t1x
+       vpunpcklqdq     t1,hc1,hc1
+       vmovd           0x23(m),t1x
+       vmovd           0x33(m),t2x
+       vpunpcklqdq     t2,t1,t1
+       vperm2i128      $0x20,t1,hc1,hc1
+       vpsrld          $2,hc1,hc1
+       vpand           ANMASK(%rip),hc1,hc1
+       vmovd           h1,t1x
+       vpaddd          t1,hc1,hc1
+       # hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff,
+       #        (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2]
+       vmovd           0x06(m),hc2x
+       vmovd           0x16(m),t1x
+       vpunpcklqdq     t1,hc2,hc2
+       vmovd           0x26(m),t1x
+       vmovd           0x36(m),t2x
+       vpunpcklqdq     t2,t1,t1
+       vperm2i128      $0x20,t1,hc2,hc2
+       vpsrld          $4,hc2,hc2
+       vpand           ANMASK(%rip),hc2,hc2
+       vmovd           h2,t1x
+       vpaddd          t1,hc2,hc2
+       # hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff,
+       #        (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3]
+       vmovd           0x09(m),hc3x
+       vmovd           0x19(m),t1x
+       vpunpcklqdq     t1,hc3,hc3
+       vmovd           0x29(m),t1x
+       vmovd           0x39(m),t2x
+       vpunpcklqdq     t2,t1,t1
+       vperm2i128      $0x20,t1,hc3,hc3
+       vpsrld          $6,hc3,hc3
+       vpand           ANMASK(%rip),hc3,hc3
+       vmovd           h3,t1x
+       vpaddd          t1,hc3,hc3
+       # hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24),
+       #        (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4]
+       vmovd           0x0c(m),hc4x
+       vmovd           0x1c(m),t1x
+       vpunpcklqdq     t1,hc4,hc4
+       vmovd           0x2c(m),t1x
+       vmovd           0x3c(m),t2x
+       vpunpcklqdq     t2,t1,t1
+       vperm2i128      $0x20,t1,hc4,hc4
+       vpsrld          $8,hc4,hc4
+       vpor            ORMASK(%rip),hc4,hc4
+       vmovd           h4,t1x
+       vpaddd          t1,hc4,hc4
+
+       # t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ]
+       vpmuludq        hc0,ruwy0,t1
+       # t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ]
+       vpmuludq        hc1,svxz4,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ]
+       vpmuludq        hc2,svxz3,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ]
+       vpmuludq        hc3,svxz2,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ]
+       vpmuludq        hc4,svxz1,t2
+       vpaddq          t2,t1,t1
+       # d0 = t1[0] + t1[1] + t[2] + t[3]
+       vpermq          $0xee,t1,t2
+       vpaddq          t2,t1,t1
+       vpsrldq         $8,t1,t2
+       vpaddq          t2,t1,t1
+       vmovq           t1x,d0
+
+       # t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ]
+       vpmuludq        hc0,ruwy1,t1
+       # t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ]
+       vpmuludq        hc1,ruwy0,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ]
+       vpmuludq        hc2,svxz4,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ]
+       vpmuludq        hc3,svxz3,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ]
+       vpmuludq        hc4,svxz2,t2
+       vpaddq          t2,t1,t1
+       # d1 = t1[0] + t1[1] + t1[3] + t1[4]
+       vpermq          $0xee,t1,t2
+       vpaddq          t2,t1,t1
+       vpsrldq         $8,t1,t2
+       vpaddq          t2,t1,t1
+       vmovq           t1x,d1
+
+       # t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ]
+       vpmuludq        hc0,ruwy2,t1
+       # t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ]
+       vpmuludq        hc1,ruwy1,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ]
+       vpmuludq        hc2,ruwy0,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ]
+       vpmuludq        hc3,svxz4,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ]
+       vpmuludq        hc4,svxz3,t2
+       vpaddq          t2,t1,t1
+       # d2 = t1[0] + t1[1] + t1[2] + t1[3]
+       vpermq          $0xee,t1,t2
+       vpaddq          t2,t1,t1
+       vpsrldq         $8,t1,t2
+       vpaddq          t2,t1,t1
+       vmovq           t1x,d2
+
+       # t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ]
+       vpmuludq        hc0,ruwy3,t1
+       # t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ]
+       vpmuludq        hc1,ruwy2,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ]
+       vpmuludq        hc2,ruwy1,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ]
+       vpmuludq        hc3,ruwy0,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ]
+       vpmuludq        hc4,svxz4,t2
+       vpaddq          t2,t1,t1
+       # d3 = t1[0] + t1[1] + t1[2] + t1[3]
+       vpermq          $0xee,t1,t2
+       vpaddq          t2,t1,t1
+       vpsrldq         $8,t1,t2
+       vpaddq          t2,t1,t1
+       vmovq           t1x,d3
+
+       # t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ]
+       vpmuludq        hc0,ruwy4,t1
+       # t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ]
+       vpmuludq        hc1,ruwy3,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ]
+       vpmuludq        hc2,ruwy2,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ]
+       vpmuludq        hc3,ruwy1,t2
+       vpaddq          t2,t1,t1
+       # t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ]
+       vpmuludq        hc4,ruwy0,t2
+       vpaddq          t2,t1,t1
+       # d4 = t1[0] + t1[1] + t1[2] + t1[3]
+       vpermq          $0xee,t1,t2
+       vpaddq          t2,t1,t1
+       vpsrldq         $8,t1,t2
+       vpaddq          t2,t1,t1
+       vmovq           t1x,d4
+
+       # d1 += d0 >> 26
+       mov             d0,%rax
+       shr             $26,%rax
+       add             %rax,d1
+       # h0 = d0 & 0x3ffffff
+       mov             d0,%rbx
+       and             $0x3ffffff,%ebx
+
+       # d2 += d1 >> 26
+       mov             d1,%rax
+       shr             $26,%rax
+       add             %rax,d2
+       # h1 = d1 & 0x3ffffff
+       mov             d1,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h1
+
+       # d3 += d2 >> 26
+       mov             d2,%rax
+       shr             $26,%rax
+       add             %rax,d3
+       # h2 = d2 & 0x3ffffff
+       mov             d2,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h2
+
+       # d4 += d3 >> 26
+       mov             d3,%rax
+       shr             $26,%rax
+       add             %rax,d4
+       # h3 = d3 & 0x3ffffff
+       mov             d3,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h3
+
+       # h0 += (d4 >> 26) * 5
+       mov             d4,%rax
+       shr             $26,%rax
+       lea             (%eax,%eax,4),%eax
+       add             %eax,%ebx
+       # h4 = d4 & 0x3ffffff
+       mov             d4,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h4
+
+       # h1 += h0 >> 26
+       mov             %ebx,%eax
+       shr             $26,%eax
+       add             %eax,h1
+       # h0 = h0 & 0x3ffffff
+       andl            $0x3ffffff,%ebx
+       mov             %ebx,h0
+
+       add             $0x40,m
+       dec             %rcx
+       jnz             .Ldoblock4
+
+       vzeroupper
+       pop             %r13
+       pop             %r12
+       pop             %rbx
+       ret
+ENDPROC(poly1305_4block_avx2)
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
new file mode 100644 (file)
index 0000000..338c748
--- /dev/null
@@ -0,0 +1,582 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.data
+.align 16
+
+ANMASK:        .octa 0x0000000003ffffff0000000003ffffff
+ORMASK:        .octa 0x00000000010000000000000001000000
+
+.text
+
+#define h0 0x00(%rdi)
+#define h1 0x04(%rdi)
+#define h2 0x08(%rdi)
+#define h3 0x0c(%rdi)
+#define h4 0x10(%rdi)
+#define r0 0x00(%rdx)
+#define r1 0x04(%rdx)
+#define r2 0x08(%rdx)
+#define r3 0x0c(%rdx)
+#define r4 0x10(%rdx)
+#define s1 0x00(%rsp)
+#define s2 0x04(%rsp)
+#define s3 0x08(%rsp)
+#define s4 0x0c(%rsp)
+#define m %rsi
+#define h01 %xmm0
+#define h23 %xmm1
+#define h44 %xmm2
+#define t1 %xmm3
+#define t2 %xmm4
+#define t3 %xmm5
+#define t4 %xmm6
+#define mask %xmm7
+#define d0 %r8
+#define d1 %r9
+#define d2 %r10
+#define d3 %r11
+#define d4 %r12
+
+ENTRY(poly1305_block_sse2)
+       # %rdi: Accumulator h[5]
+       # %rsi: 16 byte input block m
+       # %rdx: Poly1305 key r[5]
+       # %rcx: Block count
+
+       # This single block variant tries to improve performance by doing two
+       # multiplications in parallel using SSE instructions. There is quite
+       # some quardword packing involved, hence the speedup is marginal.
+
+       push            %rbx
+       push            %r12
+       sub             $0x10,%rsp
+
+       # s1..s4 = r1..r4 * 5
+       mov             r1,%eax
+       lea             (%eax,%eax,4),%eax
+       mov             %eax,s1
+       mov             r2,%eax
+       lea             (%eax,%eax,4),%eax
+       mov             %eax,s2
+       mov             r3,%eax
+       lea             (%eax,%eax,4),%eax
+       mov             %eax,s3
+       mov             r4,%eax
+       lea             (%eax,%eax,4),%eax
+       mov             %eax,s4
+
+       movdqa          ANMASK(%rip),mask
+
+.Ldoblock:
+       # h01 = [0, h1, 0, h0]
+       # h23 = [0, h3, 0, h2]
+       # h44 = [0, h4, 0, h4]
+       movd            h0,h01
+       movd            h1,t1
+       movd            h2,h23
+       movd            h3,t2
+       movd            h4,h44
+       punpcklqdq      t1,h01
+       punpcklqdq      t2,h23
+       punpcklqdq      h44,h44
+
+       # h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ]
+       movd            0x00(m),t1
+       movd            0x03(m),t2
+       psrld           $2,t2
+       punpcklqdq      t2,t1
+       pand            mask,t1
+       paddd           t1,h01
+       # h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ]
+       movd            0x06(m),t1
+       movd            0x09(m),t2
+       psrld           $4,t1
+       psrld           $6,t2
+       punpcklqdq      t2,t1
+       pand            mask,t1
+       paddd           t1,h23
+       # h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ]
+       mov             0x0c(m),%eax
+       shr             $8,%eax
+       or              $0x01000000,%eax
+       movd            %eax,t1
+       pshufd          $0xc4,t1,t1
+       paddd           t1,h44
+
+       # t1[0] = h0 * r0 + h2 * s3
+       # t1[1] = h1 * s4 + h3 * s2
+       movd            r0,t1
+       movd            s4,t2
+       punpcklqdq      t2,t1
+       pmuludq         h01,t1
+       movd            s3,t2
+       movd            s2,t3
+       punpcklqdq      t3,t2
+       pmuludq         h23,t2
+       paddq           t2,t1
+       # t2[0] = h0 * r1 + h2 * s4
+       # t2[1] = h1 * r0 + h3 * s3
+       movd            r1,t2
+       movd            r0,t3
+       punpcklqdq      t3,t2
+       pmuludq         h01,t2
+       movd            s4,t3
+       movd            s3,t4
+       punpcklqdq      t4,t3
+       pmuludq         h23,t3
+       paddq           t3,t2
+       # t3[0] = h4 * s1
+       # t3[1] = h4 * s2
+       movd            s1,t3
+       movd            s2,t4
+       punpcklqdq      t4,t3
+       pmuludq         h44,t3
+       # d0 = t1[0] + t1[1] + t3[0]
+       # d1 = t2[0] + t2[1] + t3[1]
+       movdqa          t1,t4
+       punpcklqdq      t2,t4
+       punpckhqdq      t2,t1
+       paddq           t4,t1
+       paddq           t3,t1
+       movq            t1,d0
+       psrldq          $8,t1
+       movq            t1,d1
+
+       # t1[0] = h0 * r2 + h2 * r0
+       # t1[1] = h1 * r1 + h3 * s4
+       movd            r2,t1
+       movd            r1,t2
+       punpcklqdq      t2,t1
+       pmuludq         h01,t1
+       movd            r0,t2
+       movd            s4,t3
+       punpcklqdq      t3,t2
+       pmuludq         h23,t2
+       paddq           t2,t1
+       # t2[0] = h0 * r3 + h2 * r1
+       # t2[1] = h1 * r2 + h3 * r0
+       movd            r3,t2
+       movd            r2,t3
+       punpcklqdq      t3,t2
+       pmuludq         h01,t2
+       movd            r1,t3
+       movd            r0,t4
+       punpcklqdq      t4,t3
+       pmuludq         h23,t3
+       paddq           t3,t2
+       # t3[0] = h4 * s3
+       # t3[1] = h4 * s4
+       movd            s3,t3
+       movd            s4,t4
+       punpcklqdq      t4,t3
+       pmuludq         h44,t3
+       # d2 = t1[0] + t1[1] + t3[0]
+       # d3 = t2[0] + t2[1] + t3[1]
+       movdqa          t1,t4
+       punpcklqdq      t2,t4
+       punpckhqdq      t2,t1
+       paddq           t4,t1
+       paddq           t3,t1
+       movq            t1,d2
+       psrldq          $8,t1
+       movq            t1,d3
+
+       # t1[0] = h0 * r4 + h2 * r2
+       # t1[1] = h1 * r3 + h3 * r1
+       movd            r4,t1
+       movd            r3,t2
+       punpcklqdq      t2,t1
+       pmuludq         h01,t1
+       movd            r2,t2
+       movd            r1,t3
+       punpcklqdq      t3,t2
+       pmuludq         h23,t2
+       paddq           t2,t1
+       # t3[0] = h4 * r0
+       movd            r0,t3
+       pmuludq         h44,t3
+       # d4 = t1[0] + t1[1] + t3[0]
+       movdqa          t1,t4
+       psrldq          $8,t4
+       paddq           t4,t1
+       paddq           t3,t1
+       movq            t1,d4
+
+       # d1 += d0 >> 26
+       mov             d0,%rax
+       shr             $26,%rax
+       add             %rax,d1
+       # h0 = d0 & 0x3ffffff
+       mov             d0,%rbx
+       and             $0x3ffffff,%ebx
+
+       # d2 += d1 >> 26
+       mov             d1,%rax
+       shr             $26,%rax
+       add             %rax,d2
+       # h1 = d1 & 0x3ffffff
+       mov             d1,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h1
+
+       # d3 += d2 >> 26
+       mov             d2,%rax
+       shr             $26,%rax
+       add             %rax,d3
+       # h2 = d2 & 0x3ffffff
+       mov             d2,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h2
+
+       # d4 += d3 >> 26
+       mov             d3,%rax
+       shr             $26,%rax
+       add             %rax,d4
+       # h3 = d3 & 0x3ffffff
+       mov             d3,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h3
+
+       # h0 += (d4 >> 26) * 5
+       mov             d4,%rax
+       shr             $26,%rax
+       lea             (%eax,%eax,4),%eax
+       add             %eax,%ebx
+       # h4 = d4 & 0x3ffffff
+       mov             d4,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h4
+
+       # h1 += h0 >> 26
+       mov             %ebx,%eax
+       shr             $26,%eax
+       add             %eax,h1
+       # h0 = h0 & 0x3ffffff
+       andl            $0x3ffffff,%ebx
+       mov             %ebx,h0
+
+       add             $0x10,m
+       dec             %rcx
+       jnz             .Ldoblock
+
+       add             $0x10,%rsp
+       pop             %r12
+       pop             %rbx
+       ret
+ENDPROC(poly1305_block_sse2)
+
+
+#define u0 0x00(%r8)
+#define u1 0x04(%r8)
+#define u2 0x08(%r8)
+#define u3 0x0c(%r8)
+#define u4 0x10(%r8)
+#define hc0 %xmm0
+#define hc1 %xmm1
+#define hc2 %xmm2
+#define hc3 %xmm5
+#define hc4 %xmm6
+#define ru0 %xmm7
+#define ru1 %xmm8
+#define ru2 %xmm9
+#define ru3 %xmm10
+#define ru4 %xmm11
+#define sv1 %xmm12
+#define sv2 %xmm13
+#define sv3 %xmm14
+#define sv4 %xmm15
+#undef d0
+#define d0 %r13
+
+ENTRY(poly1305_2block_sse2)
+       # %rdi: Accumulator h[5]
+       # %rsi: 16 byte input block m
+       # %rdx: Poly1305 key r[5]
+       # %rcx: Doubleblock count
+       # %r8:  Poly1305 derived key r^2 u[5]
+
+       # This two-block variant further improves performance by using loop
+       # unrolled block processing. This is more straight forward and does
+       # less byte shuffling, but requires a second Poly1305 key r^2:
+       # h = (h + m) * r    =>    h = (h + m1) * r^2 + m2 * r
+
+       push            %rbx
+       push            %r12
+       push            %r13
+
+       # combine r0,u0
+       movd            u0,ru0
+       movd            r0,t1
+       punpcklqdq      t1,ru0
+
+       # combine r1,u1 and s1=r1*5,v1=u1*5
+       movd            u1,ru1
+       movd            r1,t1
+       punpcklqdq      t1,ru1
+       movdqa          ru1,sv1
+       pslld           $2,sv1
+       paddd           ru1,sv1
+
+       # combine r2,u2 and s2=r2*5,v2=u2*5
+       movd            u2,ru2
+       movd            r2,t1
+       punpcklqdq      t1,ru2
+       movdqa          ru2,sv2
+       pslld           $2,sv2
+       paddd           ru2,sv2
+
+       # combine r3,u3 and s3=r3*5,v3=u3*5
+       movd            u3,ru3
+       movd            r3,t1
+       punpcklqdq      t1,ru3
+       movdqa          ru3,sv3
+       pslld           $2,sv3
+       paddd           ru3,sv3
+
+       # combine r4,u4 and s4=r4*5,v4=u4*5
+       movd            u4,ru4
+       movd            r4,t1
+       punpcklqdq      t1,ru4
+       movdqa          ru4,sv4
+       pslld           $2,sv4
+       paddd           ru4,sv4
+
+.Ldoblock2:
+       # hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ]
+       movd            0x00(m),hc0
+       movd            0x10(m),t1
+       punpcklqdq      t1,hc0
+       pand            ANMASK(%rip),hc0
+       movd            h0,t1
+       paddd           t1,hc0
+       # hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ]
+       movd            0x03(m),hc1
+       movd            0x13(m),t1
+       punpcklqdq      t1,hc1
+       psrld           $2,hc1
+       pand            ANMASK(%rip),hc1
+       movd            h1,t1
+       paddd           t1,hc1
+       # hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ]
+       movd            0x06(m),hc2
+       movd            0x16(m),t1
+       punpcklqdq      t1,hc2
+       psrld           $4,hc2
+       pand            ANMASK(%rip),hc2
+       movd            h2,t1
+       paddd           t1,hc2
+       # hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ]
+       movd            0x09(m),hc3
+       movd            0x19(m),t1
+       punpcklqdq      t1,hc3
+       psrld           $6,hc3
+       pand            ANMASK(%rip),hc3
+       movd            h3,t1
+       paddd           t1,hc3
+       # hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ]
+       movd            0x0c(m),hc4
+       movd            0x1c(m),t1
+       punpcklqdq      t1,hc4
+       psrld           $8,hc4
+       por             ORMASK(%rip),hc4
+       movd            h4,t1
+       paddd           t1,hc4
+
+       # t1 = [ hc0[1] * r0, hc0[0] * u0 ]
+       movdqa          ru0,t1
+       pmuludq         hc0,t1
+       # t1 += [ hc1[1] * s4, hc1[0] * v4 ]
+       movdqa          sv4,t2
+       pmuludq         hc1,t2
+       paddq           t2,t1
+       # t1 += [ hc2[1] * s3, hc2[0] * v3 ]
+       movdqa          sv3,t2
+       pmuludq         hc2,t2
+       paddq           t2,t1
+       # t1 += [ hc3[1] * s2, hc3[0] * v2 ]
+       movdqa          sv2,t2
+       pmuludq         hc3,t2
+       paddq           t2,t1
+       # t1 += [ hc4[1] * s1, hc4[0] * v1 ]
+       movdqa          sv1,t2
+       pmuludq         hc4,t2
+       paddq           t2,t1
+       # d0 = t1[0] + t1[1]
+       movdqa          t1,t2
+       psrldq          $8,t2
+       paddq           t2,t1
+       movq            t1,d0
+
+       # t1 = [ hc0[1] * r1, hc0[0] * u1 ]
+       movdqa          ru1,t1
+       pmuludq         hc0,t1
+       # t1 += [ hc1[1] * r0, hc1[0] * u0 ]
+       movdqa          ru0,t2
+       pmuludq         hc1,t2
+       paddq           t2,t1
+       # t1 += [ hc2[1] * s4, hc2[0] * v4 ]
+       movdqa          sv4,t2
+       pmuludq         hc2,t2
+       paddq           t2,t1
+       # t1 += [ hc3[1] * s3, hc3[0] * v3 ]
+       movdqa          sv3,t2
+       pmuludq         hc3,t2
+       paddq           t2,t1
+       # t1 += [ hc4[1] * s2, hc4[0] * v2 ]
+       movdqa          sv2,t2
+       pmuludq         hc4,t2
+       paddq           t2,t1
+       # d1 = t1[0] + t1[1]
+       movdqa          t1,t2
+       psrldq          $8,t2
+       paddq           t2,t1
+       movq            t1,d1
+
+       # t1 = [ hc0[1] * r2, hc0[0] * u2 ]
+       movdqa          ru2,t1
+       pmuludq         hc0,t1
+       # t1 += [ hc1[1] * r1, hc1[0] * u1 ]
+       movdqa          ru1,t2
+       pmuludq         hc1,t2
+       paddq           t2,t1
+       # t1 += [ hc2[1] * r0, hc2[0] * u0 ]
+       movdqa          ru0,t2
+       pmuludq         hc2,t2
+       paddq           t2,t1
+       # t1 += [ hc3[1] * s4, hc3[0] * v4 ]
+       movdqa          sv4,t2
+       pmuludq         hc3,t2
+       paddq           t2,t1
+       # t1 += [ hc4[1] * s3, hc4[0] * v3 ]
+       movdqa          sv3,t2
+       pmuludq         hc4,t2
+       paddq           t2,t1
+       # d2 = t1[0] + t1[1]
+       movdqa          t1,t2
+       psrldq          $8,t2
+       paddq           t2,t1
+       movq            t1,d2
+
+       # t1 = [ hc0[1] * r3, hc0[0] * u3 ]
+       movdqa          ru3,t1
+       pmuludq         hc0,t1
+       # t1 += [ hc1[1] * r2, hc1[0] * u2 ]
+       movdqa          ru2,t2
+       pmuludq         hc1,t2
+       paddq           t2,t1
+       # t1 += [ hc2[1] * r1, hc2[0] * u1 ]
+       movdqa          ru1,t2
+       pmuludq         hc2,t2
+       paddq           t2,t1
+       # t1 += [ hc3[1] * r0, hc3[0] * u0 ]
+       movdqa          ru0,t2
+       pmuludq         hc3,t2
+       paddq           t2,t1
+       # t1 += [ hc4[1] * s4, hc4[0] * v4 ]
+       movdqa          sv4,t2
+       pmuludq         hc4,t2
+       paddq           t2,t1
+       # d3 = t1[0] + t1[1]
+       movdqa          t1,t2
+       psrldq          $8,t2
+       paddq           t2,t1
+       movq            t1,d3
+
+       # t1 = [ hc0[1] * r4, hc0[0] * u4 ]
+       movdqa          ru4,t1
+       pmuludq         hc0,t1
+       # t1 += [ hc1[1] * r3, hc1[0] * u3 ]
+       movdqa          ru3,t2
+       pmuludq         hc1,t2
+       paddq           t2,t1
+       # t1 += [ hc2[1] * r2, hc2[0] * u2 ]
+       movdqa          ru2,t2
+       pmuludq         hc2,t2
+       paddq           t2,t1
+       # t1 += [ hc3[1] * r1, hc3[0] * u1 ]
+       movdqa          ru1,t2
+       pmuludq         hc3,t2
+       paddq           t2,t1
+       # t1 += [ hc4[1] * r0, hc4[0] * u0 ]
+       movdqa          ru0,t2
+       pmuludq         hc4,t2
+       paddq           t2,t1
+       # d4 = t1[0] + t1[1]
+       movdqa          t1,t2
+       psrldq          $8,t2
+       paddq           t2,t1
+       movq            t1,d4
+
+       # d1 += d0 >> 26
+       mov             d0,%rax
+       shr             $26,%rax
+       add             %rax,d1
+       # h0 = d0 & 0x3ffffff
+       mov             d0,%rbx
+       and             $0x3ffffff,%ebx
+
+       # d2 += d1 >> 26
+       mov             d1,%rax
+       shr             $26,%rax
+       add             %rax,d2
+       # h1 = d1 & 0x3ffffff
+       mov             d1,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h1
+
+       # d3 += d2 >> 26
+       mov             d2,%rax
+       shr             $26,%rax
+       add             %rax,d3
+       # h2 = d2 & 0x3ffffff
+       mov             d2,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h2
+
+       # d4 += d3 >> 26
+       mov             d3,%rax
+       shr             $26,%rax
+       add             %rax,d4
+       # h3 = d3 & 0x3ffffff
+       mov             d3,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h3
+
+       # h0 += (d4 >> 26) * 5
+       mov             d4,%rax
+       shr             $26,%rax
+       lea             (%eax,%eax,4),%eax
+       add             %eax,%ebx
+       # h4 = d4 & 0x3ffffff
+       mov             d4,%rax
+       and             $0x3ffffff,%eax
+       mov             %eax,h4
+
+       # h1 += h0 >> 26
+       mov             %ebx,%eax
+       shr             $26,%eax
+       add             %eax,h1
+       # h0 = h0 & 0x3ffffff
+       andl            $0x3ffffff,%ebx
+       mov             %ebx,h0
+
+       add             $0x20,m
+       dec             %rcx
+       jnz             .Ldoblock2
+
+       pop             %r13
+       pop             %r12
+       pop             %rbx
+       ret
+ENDPROC(poly1305_2block_sse2)
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
new file mode 100644 (file)
index 0000000..f7170d7
--- /dev/null
@@ -0,0 +1,207 @@
+/*
+ * Poly1305 authenticator algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/poly1305.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/simd.h>
+
+struct poly1305_simd_desc_ctx {
+       struct poly1305_desc_ctx base;
+       /* derived key u set? */
+       bool uset;
+#ifdef CONFIG_AS_AVX2
+       /* derived keys r^3, r^4 set? */
+       bool wset;
+#endif
+       /* derived Poly1305 key r^2 */
+       u32 u[5];
+       /* ... silently appended r^3 and r^4 when using AVX2 */
+};
+
+asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
+                                   const u32 *r, unsigned int blocks);
+asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
+                                    unsigned int blocks, const u32 *u);
+#ifdef CONFIG_AS_AVX2
+asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
+                                    unsigned int blocks, const u32 *u);
+static bool poly1305_use_avx2;
+#endif
+
+static int poly1305_simd_init(struct shash_desc *desc)
+{
+       struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
+
+       sctx->uset = false;
+#ifdef CONFIG_AS_AVX2
+       sctx->wset = false;
+#endif
+
+       return crypto_poly1305_init(desc);
+}
+
+static void poly1305_simd_mult(u32 *a, const u32 *b)
+{
+       u8 m[POLY1305_BLOCK_SIZE];
+
+       memset(m, 0, sizeof(m));
+       /* The poly1305 block function adds a hi-bit to the accumulator which
+        * we don't need for key multiplication; compensate for it. */
+       a[4] -= 1 << 24;
+       poly1305_block_sse2(a, m, b, 1);
+}
+
+static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
+                                        const u8 *src, unsigned int srclen)
+{
+       struct poly1305_simd_desc_ctx *sctx;
+       unsigned int blocks, datalen;
+
+       BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
+       sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
+
+       if (unlikely(!dctx->sset)) {
+               datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
+               src += srclen - datalen;
+               srclen = datalen;
+       }
+
+#ifdef CONFIG_AS_AVX2
+       if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
+               if (unlikely(!sctx->wset)) {
+                       if (!sctx->uset) {
+                               memcpy(sctx->u, dctx->r, sizeof(sctx->u));
+                               poly1305_simd_mult(sctx->u, dctx->r);
+                               sctx->uset = true;
+                       }
+                       memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
+                       poly1305_simd_mult(sctx->u + 5, dctx->r);
+                       memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
+                       poly1305_simd_mult(sctx->u + 10, dctx->r);
+                       sctx->wset = true;
+               }
+               blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
+               poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
+               src += POLY1305_BLOCK_SIZE * 4 * blocks;
+               srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
+       }
+#endif
+       if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
+               if (unlikely(!sctx->uset)) {
+                       memcpy(sctx->u, dctx->r, sizeof(sctx->u));
+                       poly1305_simd_mult(sctx->u, dctx->r);
+                       sctx->uset = true;
+               }
+               blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
+               poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
+               src += POLY1305_BLOCK_SIZE * 2 * blocks;
+               srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
+       }
+       if (srclen >= POLY1305_BLOCK_SIZE) {
+               poly1305_block_sse2(dctx->h, src, dctx->r, 1);
+               srclen -= POLY1305_BLOCK_SIZE;
+       }
+       return srclen;
+}
+
+static int poly1305_simd_update(struct shash_desc *desc,
+                               const u8 *src, unsigned int srclen)
+{
+       struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+       unsigned int bytes;
+
+       /* kernel_fpu_begin/end is costly, use fallback for small updates */
+       if (srclen <= 288 || !may_use_simd())
+               return crypto_poly1305_update(desc, src, srclen);
+
+       kernel_fpu_begin();
+
+       if (unlikely(dctx->buflen)) {
+               bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
+               memcpy(dctx->buf + dctx->buflen, src, bytes);
+               src += bytes;
+               srclen -= bytes;
+               dctx->buflen += bytes;
+
+               if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+                       poly1305_simd_blocks(dctx, dctx->buf,
+                                            POLY1305_BLOCK_SIZE);
+                       dctx->buflen = 0;
+               }
+       }
+
+       if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
+               bytes = poly1305_simd_blocks(dctx, src, srclen);
+               src += srclen - bytes;
+               srclen = bytes;
+       }
+
+       kernel_fpu_end();
+
+       if (unlikely(srclen)) {
+               dctx->buflen = srclen;
+               memcpy(dctx->buf, src, srclen);
+       }
+
+       return 0;
+}
+
+static struct shash_alg alg = {
+       .digestsize     = POLY1305_DIGEST_SIZE,
+       .init           = poly1305_simd_init,
+       .update         = poly1305_simd_update,
+       .final          = crypto_poly1305_final,
+       .setkey         = crypto_poly1305_setkey,
+       .descsize       = sizeof(struct poly1305_simd_desc_ctx),
+       .base           = {
+               .cra_name               = "poly1305",
+               .cra_driver_name        = "poly1305-simd",
+               .cra_priority           = 300,
+               .cra_flags              = CRYPTO_ALG_TYPE_SHASH,
+               .cra_alignmask          = sizeof(u32) - 1,
+               .cra_blocksize          = POLY1305_BLOCK_SIZE,
+               .cra_module             = THIS_MODULE,
+       },
+};
+
+static int __init poly1305_simd_mod_init(void)
+{
+       if (!cpu_has_xmm2)
+               return -ENODEV;
+
+#ifdef CONFIG_AS_AVX2
+       poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
+                           cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
+       alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
+       if (poly1305_use_avx2)
+               alg.descsize += 10 * sizeof(u32);
+#endif
+       return crypto_register_shash(&alg);
+}
+
+static void __exit poly1305_simd_mod_exit(void)
+{
+       crypto_unregister_shash(&alg);
+}
+
+module_init(poly1305_simd_mod_init);
+module_exit(poly1305_simd_mod_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("Poly1305 authenticator");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-simd");
index b4cfc5754033b9ddfa17d8ad395baf0e751891bd..354bb692c9645c36e1590d1a40f73cf537b154f4 100644 (file)
@@ -470,6 +470,18 @@ config CRYPTO_POLY1305
          It is used for the ChaCha20-Poly1305 AEAD, specified in RFC7539 for use
          in IETF protocols. This is the portable C implementation of Poly1305.
 
+config CRYPTO_POLY1305_X86_64
+       tristate "Poly1305 authenticator algorithm (x86_64/SSE2/AVX2)"
+       depends on X86 && 64BIT
+       select CRYPTO_POLY1305
+       help
+         Poly1305 authenticator algorithm, RFC7539.
+
+         Poly1305 is an authenticator algorithm designed by Daniel J. Bernstein.
+         It is used for the ChaCha20-Poly1305 AEAD, specified in RFC7539 for use
+         in IETF protocols. This is the x86_64 assembler implementation using SIMD
+         instructions.
+
 config CRYPTO_MD4
        tristate "MD4 digest algorithm"
        select CRYPTO_HASH
@@ -1213,6 +1225,21 @@ config CRYPTO_CHACHA20
          See also:
          <http://cr.yp.to/chacha/chacha-20080128.pdf>
 
+config CRYPTO_CHACHA20_X86_64
+       tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)"
+       depends on X86 && 64BIT
+       select CRYPTO_BLKCIPHER
+       select CRYPTO_CHACHA20
+       help
+         ChaCha20 cipher algorithm, RFC7539.
+
+         ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
+         Bernstein and further specified in RFC7539 for use in IETF protocols.
+         This is the x86_64 assembler implementation using SIMD instructions.
+
+         See also:
+         <http://cr.yp.to/chacha/chacha-20080128.pdf>
+
 config CRYPTO_SEED
        tristate "SEED cipher algorithm"
        select CRYPTO_ALGAPI
index 07bf99773548bf9f088b6ff8380edb4caf7cda4a..1a5b118c301a546580425fc5eec5e5dfd15b76b7 100644 (file)
@@ -307,9 +307,22 @@ static void crypto_aead_show(struct seq_file *m, struct crypto_alg *alg)
        seq_printf(m, "geniv        : <none>\n");
 }
 
+static void crypto_aead_free_instance(struct crypto_instance *inst)
+{
+       struct aead_instance *aead = aead_instance(inst);
+
+       if (!aead->free) {
+               inst->tmpl->free(inst);
+               return;
+       }
+
+       aead->free(aead);
+}
+
 static const struct crypto_type crypto_new_aead_type = {
        .extsize = crypto_alg_extsize,
        .init_tfm = crypto_aead_init_tfm,
+       .free = crypto_aead_free_instance,
 #ifdef CONFIG_PROC_FS
        .show = crypto_aead_show,
 #endif
@@ -591,7 +604,7 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
                return ERR_CAST(algt);
 
        if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV)) &
-           algt->mask)
+           algt->mask & ~CRYPTO_ALG_AEAD_NEW)
                return ERR_PTR(-EINVAL);
 
        name = crypto_attr_alg_name(tb[1]);
@@ -670,7 +683,8 @@ struct aead_instance *aead_geniv_alloc(struct crypto_template *tmpl,
            CRYPTO_MAX_ALG_NAME)
                goto err_drop_alg;
 
-       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_flags = alg->base.cra_flags &
+                                  (CRYPTO_ALG_ASYNC | CRYPTO_ALG_AEAD_NEW);
        inst->alg.base.cra_priority = alg->base.cra_priority;
        inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
index 3c079b7f23f6bada906f9d444a50cd15b0831d65..d130b41dbaea244000c35328d7eba7b463158bc8 100644 (file)
@@ -67,12 +67,22 @@ static int crypto_check_alg(struct crypto_alg *alg)
        return crypto_set_driver_name(alg);
 }
 
+static void crypto_free_instance(struct crypto_instance *inst)
+{
+       if (!inst->alg.cra_type->free) {
+               inst->tmpl->free(inst);
+               return;
+       }
+
+       inst->alg.cra_type->free(inst);
+}
+
 static void crypto_destroy_instance(struct crypto_alg *alg)
 {
        struct crypto_instance *inst = (void *)alg;
        struct crypto_template *tmpl = inst->tmpl;
 
-       tmpl->free(inst);
+       crypto_free_instance(inst);
        crypto_tmpl_put(tmpl);
 }
 
@@ -481,7 +491,7 @@ void crypto_unregister_template(struct crypto_template *tmpl)
 
        hlist_for_each_entry_safe(inst, n, list, list) {
                BUG_ON(atomic_read(&inst->alg.cra_refcnt) != 1);
-               tmpl->free(inst);
+               crypto_free_instance(inst);
        }
        crypto_remove_final(&users);
 }
@@ -892,7 +902,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(crypto_enqueue_request);
 
-void *__crypto_dequeue_request(struct crypto_queue *queue, unsigned int offset)
+struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue)
 {
        struct list_head *request;
 
@@ -907,14 +917,7 @@ void *__crypto_dequeue_request(struct crypto_queue *queue, unsigned int offset)
        request = queue->list.next;
        list_del(request);
 
-       return (char *)list_entry(request, struct crypto_async_request, list) -
-              offset;
-}
-EXPORT_SYMBOL_GPL(__crypto_dequeue_request);
-
-struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue)
-{
-       return __crypto_dequeue_request(queue, 0);
+       return list_entry(request, struct crypto_async_request, list);
 }
 EXPORT_SYMBOL_GPL(crypto_dequeue_request);
 
index a4d1a5eda18b4e9758ec899fdf48c6f1359a8c14..b63f96a0b39ca96acc79c4601ee2959e54fefecd 100644 (file)
@@ -36,14 +36,20 @@ struct crypto_rfc4309_ctx {
        u8 nonce[3];
 };
 
+struct crypto_rfc4309_req_ctx {
+       struct scatterlist src[3];
+       struct scatterlist dst[3];
+       struct aead_request subreq;
+};
+
 struct crypto_ccm_req_priv_ctx {
        u8 odata[16];
        u8 idata[16];
        u8 auth_tag[16];
        u32 ilen;
        u32 flags;
-       struct scatterlist src[2];
-       struct scatterlist dst[2];
+       struct scatterlist src[3];
+       struct scatterlist dst[3];
        struct ablkcipher_request abreq;
 };
 
@@ -265,7 +271,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
        /* format associated data and compute into mac */
        if (assoclen) {
                pctx->ilen = format_adata(idata, assoclen);
-               get_data_to_compute(cipher, pctx, req->assoc, req->assoclen);
+               get_data_to_compute(cipher, pctx, req->src, req->assoclen);
        } else {
                pctx->ilen = 0;
        }
@@ -286,7 +292,8 @@ static void crypto_ccm_encrypt_done(struct crypto_async_request *areq, int err)
        u8 *odata = pctx->odata;
 
        if (!err)
-               scatterwalk_map_and_copy(odata, req->dst, req->cryptlen,
+               scatterwalk_map_and_copy(odata, req->dst,
+                                        req->assoclen + req->cryptlen,
                                         crypto_aead_authsize(aead), 1);
        aead_request_complete(req, err);
 }
@@ -300,6 +307,41 @@ static inline int crypto_ccm_check_iv(const u8 *iv)
        return 0;
 }
 
+static int crypto_ccm_init_crypt(struct aead_request *req, u8 *tag)
+{
+       struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
+       struct scatterlist *sg;
+       u8 *iv = req->iv;
+       int err;
+
+       err = crypto_ccm_check_iv(iv);
+       if (err)
+               return err;
+
+       pctx->flags = aead_request_flags(req);
+
+        /* Note: rfc 3610 and NIST 800-38C require counter of
+        * zero to encrypt auth tag.
+        */
+       memset(iv + 15 - iv[0], 0, iv[0] + 1);
+
+       sg_init_table(pctx->src, 3);
+       sg_set_buf(pctx->src, tag, 16);
+       sg = scatterwalk_ffwd(pctx->src + 1, req->src, req->assoclen);
+       if (sg != pctx->src + 1)
+               sg_chain(pctx->src, 2, sg);
+
+       if (req->src != req->dst) {
+               sg_init_table(pctx->dst, 3);
+               sg_set_buf(pctx->dst, tag, 16);
+               sg = scatterwalk_ffwd(pctx->dst + 1, req->dst, req->assoclen);
+               if (sg != pctx->dst + 1)
+                       sg_chain(pctx->dst, 2, sg);
+       }
+
+       return 0;
+}
+
 static int crypto_ccm_encrypt(struct aead_request *req)
 {
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
@@ -312,32 +354,17 @@ static int crypto_ccm_encrypt(struct aead_request *req)
        u8 *iv = req->iv;
        int err;
 
-       err = crypto_ccm_check_iv(iv);
+       err = crypto_ccm_init_crypt(req, odata);
        if (err)
                return err;
 
-       pctx->flags = aead_request_flags(req);
-
-       err = crypto_ccm_auth(req, req->src, cryptlen);
+       err = crypto_ccm_auth(req, sg_next(pctx->src), cryptlen);
        if (err)
                return err;
 
-        /* Note: rfc 3610 and NIST 800-38C require counter of
-        * zero to encrypt auth tag.
-        */
-       memset(iv + 15 - iv[0], 0, iv[0] + 1);
-
-       sg_init_table(pctx->src, 2);
-       sg_set_buf(pctx->src, odata, 16);
-       scatterwalk_sg_chain(pctx->src, 2, req->src);
-
        dst = pctx->src;
-       if (req->src != req->dst) {
-               sg_init_table(pctx->dst, 2);
-               sg_set_buf(pctx->dst, odata, 16);
-               scatterwalk_sg_chain(pctx->dst, 2, req->dst);
+       if (req->src != req->dst)
                dst = pctx->dst;
-       }
 
        ablkcipher_request_set_tfm(abreq, ctx->ctr);
        ablkcipher_request_set_callback(abreq, pctx->flags,
@@ -348,7 +375,7 @@ static int crypto_ccm_encrypt(struct aead_request *req)
                return err;
 
        /* copy authtag to end of dst */
-       scatterwalk_map_and_copy(odata, req->dst, cryptlen,
+       scatterwalk_map_and_copy(odata, sg_next(dst), cryptlen,
                                 crypto_aead_authsize(aead), 1);
        return err;
 }
@@ -361,9 +388,14 @@ static void crypto_ccm_decrypt_done(struct crypto_async_request *areq,
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
        unsigned int authsize = crypto_aead_authsize(aead);
        unsigned int cryptlen = req->cryptlen - authsize;
+       struct scatterlist *dst;
+
+       pctx->flags = 0;
+
+       dst = sg_next(req->src == req->dst ? pctx->src : pctx->dst);
 
        if (!err) {
-               err = crypto_ccm_auth(req, req->dst, cryptlen);
+               err = crypto_ccm_auth(req, dst, cryptlen);
                if (!err && crypto_memneq(pctx->auth_tag, pctx->odata, authsize))
                        err = -EBADMSG;
        }
@@ -384,31 +416,18 @@ static int crypto_ccm_decrypt(struct aead_request *req)
        u8 *iv = req->iv;
        int err;
 
-       if (cryptlen < authsize)
-               return -EINVAL;
        cryptlen -= authsize;
 
-       err = crypto_ccm_check_iv(iv);
+       err = crypto_ccm_init_crypt(req, authtag);
        if (err)
                return err;
 
-       pctx->flags = aead_request_flags(req);
-
-       scatterwalk_map_and_copy(authtag, req->src, cryptlen, authsize, 0);
-
-       memset(iv + 15 - iv[0], 0, iv[0] + 1);
-
-       sg_init_table(pctx->src, 2);
-       sg_set_buf(pctx->src, authtag, 16);
-       scatterwalk_sg_chain(pctx->src, 2, req->src);
+       scatterwalk_map_and_copy(authtag, sg_next(pctx->src), cryptlen,
+                                authsize, 0);
 
        dst = pctx->src;
-       if (req->src != req->dst) {
-               sg_init_table(pctx->dst, 2);
-               sg_set_buf(pctx->dst, authtag, 16);
-               scatterwalk_sg_chain(pctx->dst, 2, req->dst);
+       if (req->src != req->dst)
                dst = pctx->dst;
-       }
 
        ablkcipher_request_set_tfm(abreq, ctx->ctr);
        ablkcipher_request_set_callback(abreq, pctx->flags,
@@ -418,7 +437,7 @@ static int crypto_ccm_decrypt(struct aead_request *req)
        if (err)
                return err;
 
-       err = crypto_ccm_auth(req, req->dst, cryptlen);
+       err = crypto_ccm_auth(req, sg_next(dst), cryptlen);
        if (err)
                return err;
 
@@ -429,11 +448,11 @@ static int crypto_ccm_decrypt(struct aead_request *req)
        return err;
 }
 
-static int crypto_ccm_init_tfm(struct crypto_tfm *tfm)
+static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
 {
-       struct crypto_instance *inst = (void *)tfm->__crt_alg;
-       struct ccm_instance_ctx *ictx = crypto_instance_ctx(inst);
-       struct crypto_ccm_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct aead_instance *inst = aead_alg_instance(tfm);
+       struct ccm_instance_ctx *ictx = aead_instance_ctx(inst);
+       struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
        struct crypto_cipher *cipher;
        struct crypto_ablkcipher *ctr;
        unsigned long align;
@@ -451,9 +470,10 @@ static int crypto_ccm_init_tfm(struct crypto_tfm *tfm)
        ctx->cipher = cipher;
        ctx->ctr = ctr;
 
-       align = crypto_tfm_alg_alignmask(tfm);
+       align = crypto_aead_alignmask(tfm);
        align &= ~(crypto_tfm_ctx_alignment() - 1);
-       crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
+       crypto_aead_set_reqsize(
+               tfm,
                align + sizeof(struct crypto_ccm_req_priv_ctx) +
                crypto_ablkcipher_reqsize(ctr));
 
@@ -464,21 +484,31 @@ err_free_cipher:
        return err;
 }
 
-static void crypto_ccm_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_ccm_exit_tfm(struct crypto_aead *tfm)
 {
-       struct crypto_ccm_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm);
 
        crypto_free_cipher(ctx->cipher);
        crypto_free_ablkcipher(ctx->ctr);
 }
 
-static struct crypto_instance *crypto_ccm_alloc_common(struct rtattr **tb,
-                                                      const char *full_name,
-                                                      const char *ctr_name,
-                                                      const char *cipher_name)
+static void crypto_ccm_free(struct aead_instance *inst)
+{
+       struct ccm_instance_ctx *ctx = aead_instance_ctx(inst);
+
+       crypto_drop_spawn(&ctx->cipher);
+       crypto_drop_skcipher(&ctx->ctr);
+       kfree(inst);
+}
+
+static int crypto_ccm_create_common(struct crypto_template *tmpl,
+                                   struct rtattr **tb,
+                                   const char *full_name,
+                                   const char *ctr_name,
+                                   const char *cipher_name)
 {
        struct crypto_attr_type *algt;
-       struct crypto_instance *inst;
+       struct aead_instance *inst;
        struct crypto_alg *ctr;
        struct crypto_alg *cipher;
        struct ccm_instance_ctx *ictx;
@@ -486,15 +516,16 @@ static struct crypto_instance *crypto_ccm_alloc_common(struct rtattr **tb,
 
        algt = crypto_get_attr_type(tb);
        if (IS_ERR(algt))
-               return ERR_CAST(algt);
+               return PTR_ERR(algt);
 
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return ERR_PTR(-EINVAL);
+       if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_AEAD_NEW)) &
+           algt->mask)
+               return -EINVAL;
 
        cipher = crypto_alg_mod_lookup(cipher_name,  CRYPTO_ALG_TYPE_CIPHER,
                                       CRYPTO_ALG_TYPE_MASK);
        if (IS_ERR(cipher))
-               return ERR_CAST(cipher);
+               return PTR_ERR(cipher);
 
        err = -EINVAL;
        if (cipher->cra_blocksize != 16)
@@ -505,14 +536,15 @@ static struct crypto_instance *crypto_ccm_alloc_common(struct rtattr **tb,
        if (!inst)
                goto out_put_cipher;
 
-       ictx = crypto_instance_ctx(inst);
+       ictx = aead_instance_ctx(inst);
 
-       err = crypto_init_spawn(&ictx->cipher, cipher, inst,
+       err = crypto_init_spawn(&ictx->cipher, cipher,
+                               aead_crypto_instance(inst),
                                CRYPTO_ALG_TYPE_MASK);
        if (err)
                goto err_free_inst;
 
-       crypto_set_skcipher_spawn(&ictx->ctr, inst);
+       crypto_set_skcipher_spawn(&ictx->ctr, aead_crypto_instance(inst));
        err = crypto_grab_skcipher(&ictx->ctr, ctr_name, 0,
                                   crypto_requires_sync(algt->type,
                                                        algt->mask));
@@ -531,33 +563,40 @@ static struct crypto_instance *crypto_ccm_alloc_common(struct rtattr **tb,
                goto err_drop_ctr;
 
        err = -ENAMETOOLONG;
-       if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+       if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
                     "ccm_base(%s,%s)", ctr->cra_driver_name,
                     cipher->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
                goto err_drop_ctr;
 
-       memcpy(inst->alg.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
-
-       inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
-       inst->alg.cra_flags |= ctr->cra_flags & CRYPTO_ALG_ASYNC;
-       inst->alg.cra_priority = cipher->cra_priority + ctr->cra_priority;
-       inst->alg.cra_blocksize = 1;
-       inst->alg.cra_alignmask = cipher->cra_alignmask | ctr->cra_alignmask |
-                                 (__alignof__(u32) - 1);
-       inst->alg.cra_type = &crypto_aead_type;
-       inst->alg.cra_aead.ivsize = 16;
-       inst->alg.cra_aead.maxauthsize = 16;
-       inst->alg.cra_ctxsize = sizeof(struct crypto_ccm_ctx);
-       inst->alg.cra_init = crypto_ccm_init_tfm;
-       inst->alg.cra_exit = crypto_ccm_exit_tfm;
-       inst->alg.cra_aead.setkey = crypto_ccm_setkey;
-       inst->alg.cra_aead.setauthsize = crypto_ccm_setauthsize;
-       inst->alg.cra_aead.encrypt = crypto_ccm_encrypt;
-       inst->alg.cra_aead.decrypt = crypto_ccm_decrypt;
+       memcpy(inst->alg.base.cra_name, full_name, CRYPTO_MAX_ALG_NAME);
+
+       inst->alg.base.cra_flags = ctr->cra_flags & CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_flags |= CRYPTO_ALG_AEAD_NEW;
+       inst->alg.base.cra_priority = (cipher->cra_priority +
+                                      ctr->cra_priority) / 2;
+       inst->alg.base.cra_blocksize = 1;
+       inst->alg.base.cra_alignmask = cipher->cra_alignmask |
+                                      ctr->cra_alignmask |
+                                      (__alignof__(u32) - 1);
+       inst->alg.ivsize = 16;
+       inst->alg.maxauthsize = 16;
+       inst->alg.base.cra_ctxsize = sizeof(struct crypto_ccm_ctx);
+       inst->alg.init = crypto_ccm_init_tfm;
+       inst->alg.exit = crypto_ccm_exit_tfm;
+       inst->alg.setkey = crypto_ccm_setkey;
+       inst->alg.setauthsize = crypto_ccm_setauthsize;
+       inst->alg.encrypt = crypto_ccm_encrypt;
+       inst->alg.decrypt = crypto_ccm_decrypt;
+
+       inst->free = crypto_ccm_free;
+
+       err = aead_register_instance(tmpl, inst);
+       if (err)
+               goto err_drop_ctr;
 
-out:
+out_put_cipher:
        crypto_mod_put(cipher);
-       return inst;
+       return err;
 
 err_drop_ctr:
        crypto_drop_skcipher(&ictx->ctr);
@@ -565,12 +604,10 @@ err_drop_cipher:
        crypto_drop_spawn(&ictx->cipher);
 err_free_inst:
        kfree(inst);
-out_put_cipher:
-       inst = ERR_PTR(err);
-       goto out;
+       goto out_put_cipher;
 }
 
-static struct crypto_instance *crypto_ccm_alloc(struct rtattr **tb)
+static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
        const char *cipher_name;
        char ctr_name[CRYPTO_MAX_ALG_NAME];
@@ -578,36 +615,28 @@ static struct crypto_instance *crypto_ccm_alloc(struct rtattr **tb)
 
        cipher_name = crypto_attr_alg_name(tb[1]);
        if (IS_ERR(cipher_name))
-               return ERR_CAST(cipher_name);
+               return PTR_ERR(cipher_name);
 
        if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)",
                     cipher_name) >= CRYPTO_MAX_ALG_NAME)
-               return ERR_PTR(-ENAMETOOLONG);
+               return -ENAMETOOLONG;
 
        if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm(%s)", cipher_name) >=
            CRYPTO_MAX_ALG_NAME)
-               return ERR_PTR(-ENAMETOOLONG);
+               return -ENAMETOOLONG;
 
-       return crypto_ccm_alloc_common(tb, full_name, ctr_name, cipher_name);
-}
-
-static void crypto_ccm_free(struct crypto_instance *inst)
-{
-       struct ccm_instance_ctx *ctx = crypto_instance_ctx(inst);
-
-       crypto_drop_spawn(&ctx->cipher);
-       crypto_drop_skcipher(&ctx->ctr);
-       kfree(inst);
+       return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name,
+                                       cipher_name);
 }
 
 static struct crypto_template crypto_ccm_tmpl = {
        .name = "ccm",
-       .alloc = crypto_ccm_alloc,
-       .free = crypto_ccm_free,
+       .create = crypto_ccm_create,
        .module = THIS_MODULE,
 };
 
-static struct crypto_instance *crypto_ccm_base_alloc(struct rtattr **tb)
+static int crypto_ccm_base_create(struct crypto_template *tmpl,
+                                 struct rtattr **tb)
 {
        const char *ctr_name;
        const char *cipher_name;
@@ -615,23 +644,23 @@ static struct crypto_instance *crypto_ccm_base_alloc(struct rtattr **tb)
 
        ctr_name = crypto_attr_alg_name(tb[1]);
        if (IS_ERR(ctr_name))
-               return ERR_CAST(ctr_name);
+               return PTR_ERR(ctr_name);
 
        cipher_name = crypto_attr_alg_name(tb[2]);
        if (IS_ERR(cipher_name))
-               return ERR_CAST(cipher_name);
+               return PTR_ERR(cipher_name);
 
        if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm_base(%s,%s)",
                     ctr_name, cipher_name) >= CRYPTO_MAX_ALG_NAME)
-               return ERR_PTR(-ENAMETOOLONG);
+               return -ENAMETOOLONG;
 
-       return crypto_ccm_alloc_common(tb, full_name, ctr_name, cipher_name);
+       return crypto_ccm_create_common(tmpl, tb, full_name, ctr_name,
+                                       cipher_name);
 }
 
 static struct crypto_template crypto_ccm_base_tmpl = {
        .name = "ccm_base",
-       .alloc = crypto_ccm_base_alloc,
-       .free = crypto_ccm_free,
+       .create = crypto_ccm_base_create,
        .module = THIS_MODULE,
 };
 
@@ -677,10 +706,12 @@ static int crypto_rfc4309_setauthsize(struct crypto_aead *parent,
 
 static struct aead_request *crypto_rfc4309_crypt(struct aead_request *req)
 {
-       struct aead_request *subreq = aead_request_ctx(req);
+       struct crypto_rfc4309_req_ctx *rctx = aead_request_ctx(req);
+       struct aead_request *subreq = &rctx->subreq;
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
        struct crypto_rfc4309_ctx *ctx = crypto_aead_ctx(aead);
        struct crypto_aead *child = ctx->child;
+       struct scatterlist *sg;
        u8 *iv = PTR_ALIGN((u8 *)(subreq + 1) + crypto_aead_reqsize(child),
                           crypto_aead_alignmask(child) + 1);
 
@@ -690,17 +721,38 @@ static struct aead_request *crypto_rfc4309_crypt(struct aead_request *req)
        memcpy(iv + 1, ctx->nonce, 3);
        memcpy(iv + 4, req->iv, 8);
 
+       scatterwalk_map_and_copy(iv + 16, req->src, 0, req->assoclen - 8, 0);
+
+       sg_init_table(rctx->src, 3);
+       sg_set_buf(rctx->src, iv + 16, req->assoclen - 8);
+       sg = scatterwalk_ffwd(rctx->src + 1, req->src, req->assoclen);
+       if (sg != rctx->src + 1)
+               sg_chain(rctx->src, 2, sg);
+
+       if (req->src != req->dst) {
+               sg_init_table(rctx->dst, 3);
+               sg_set_buf(rctx->dst, iv + 16, req->assoclen - 8);
+               sg = scatterwalk_ffwd(rctx->dst + 1, req->dst, req->assoclen);
+               if (sg != rctx->dst + 1)
+                       sg_chain(rctx->dst, 2, sg);
+       }
+
        aead_request_set_tfm(subreq, child);
        aead_request_set_callback(subreq, req->base.flags, req->base.complete,
                                  req->base.data);
-       aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, iv);
-       aead_request_set_assoc(subreq, req->assoc, req->assoclen);
+       aead_request_set_crypt(subreq, rctx->src,
+                              req->src == req->dst ? rctx->src : rctx->dst,
+                              req->cryptlen, iv);
+       aead_request_set_ad(subreq, req->assoclen - 8);
 
        return subreq;
 }
 
 static int crypto_rfc4309_encrypt(struct aead_request *req)
 {
+       if (req->assoclen != 16 && req->assoclen != 20)
+               return -EINVAL;
+
        req = crypto_rfc4309_crypt(req);
 
        return crypto_aead_encrypt(req);
@@ -708,16 +760,19 @@ static int crypto_rfc4309_encrypt(struct aead_request *req)
 
 static int crypto_rfc4309_decrypt(struct aead_request *req)
 {
+       if (req->assoclen != 16 && req->assoclen != 20)
+               return -EINVAL;
+
        req = crypto_rfc4309_crypt(req);
 
        return crypto_aead_decrypt(req);
 }
 
-static int crypto_rfc4309_init_tfm(struct crypto_tfm *tfm)
+static int crypto_rfc4309_init_tfm(struct crypto_aead *tfm)
 {
-       struct crypto_instance *inst = (void *)tfm->__crt_alg;
-       struct crypto_aead_spawn *spawn = crypto_instance_ctx(inst);
-       struct crypto_rfc4309_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct aead_instance *inst = aead_alg_instance(tfm);
+       struct crypto_aead_spawn *spawn = aead_instance_ctx(inst);
+       struct crypto_rfc4309_ctx *ctx = crypto_aead_ctx(tfm);
        struct crypto_aead *aead;
        unsigned long align;
 
@@ -729,115 +784,120 @@ static int crypto_rfc4309_init_tfm(struct crypto_tfm *tfm)
 
        align = crypto_aead_alignmask(aead);
        align &= ~(crypto_tfm_ctx_alignment() - 1);
-       crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
-               sizeof(struct aead_request) +
+       crypto_aead_set_reqsize(
+               tfm,
+               sizeof(struct crypto_rfc4309_req_ctx) +
                ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) +
-               align + 16);
+               align + 32);
 
        return 0;
 }
 
-static void crypto_rfc4309_exit_tfm(struct crypto_tfm *tfm)
+static void crypto_rfc4309_exit_tfm(struct crypto_aead *tfm)
 {
-       struct crypto_rfc4309_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct crypto_rfc4309_ctx *ctx = crypto_aead_ctx(tfm);
 
        crypto_free_aead(ctx->child);
 }
 
-static struct crypto_instance *crypto_rfc4309_alloc(struct rtattr **tb)
+static void crypto_rfc4309_free(struct aead_instance *inst)
+{
+       crypto_drop_aead(aead_instance_ctx(inst));
+       kfree(inst);
+}
+
+static int crypto_rfc4309_create(struct crypto_template *tmpl,
+                                struct rtattr **tb)
 {
        struct crypto_attr_type *algt;
-       struct crypto_instance *inst;
+       struct aead_instance *inst;
        struct crypto_aead_spawn *spawn;
-       struct crypto_alg *alg;
+       struct aead_alg *alg;
        const char *ccm_name;
        int err;
 
        algt = crypto_get_attr_type(tb);
        if (IS_ERR(algt))
-               return ERR_CAST(algt);
+               return PTR_ERR(algt);
 
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return ERR_PTR(-EINVAL);
+       if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_AEAD_NEW)) &
+           algt->mask)
+               return -EINVAL;
 
        ccm_name = crypto_attr_alg_name(tb[1]);
        if (IS_ERR(ccm_name))
-               return ERR_CAST(ccm_name);
+               return PTR_ERR(ccm_name);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
 
-       spawn = crypto_instance_ctx(inst);
-       crypto_set_aead_spawn(spawn, inst);
+       spawn = aead_instance_ctx(inst);
+       crypto_set_aead_spawn(spawn, aead_crypto_instance(inst));
        err = crypto_grab_aead(spawn, ccm_name, 0,
                               crypto_requires_sync(algt->type, algt->mask));
        if (err)
                goto out_free_inst;
 
-       alg = crypto_aead_spawn_alg(spawn);
+       alg = crypto_spawn_aead_alg(spawn);
 
        err = -EINVAL;
 
        /* We only support 16-byte blocks. */
-       if (alg->cra_aead.ivsize != 16)
+       if (crypto_aead_alg_ivsize(alg) != 16)
                goto out_drop_alg;
 
        /* Not a stream cipher? */
-       if (alg->cra_blocksize != 1)
+       if (alg->base.cra_blocksize != 1)
                goto out_drop_alg;
 
        err = -ENAMETOOLONG;
-       if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
-                    "rfc4309(%s)", alg->cra_name) >= CRYPTO_MAX_ALG_NAME ||
-           snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-                    "rfc4309(%s)", alg->cra_driver_name) >=
+       if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+                    "rfc4309(%s)", alg->base.cra_name) >=
+           CRYPTO_MAX_ALG_NAME ||
+           snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+                    "rfc4309(%s)", alg->base.cra_driver_name) >=
            CRYPTO_MAX_ALG_NAME)
                goto out_drop_alg;
 
-       inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
-       inst->alg.cra_flags |= alg->cra_flags & CRYPTO_ALG_ASYNC;
-       inst->alg.cra_priority = alg->cra_priority;
-       inst->alg.cra_blocksize = 1;
-       inst->alg.cra_alignmask = alg->cra_alignmask;
-       inst->alg.cra_type = &crypto_nivaead_type;
+       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_flags |= CRYPTO_ALG_AEAD_NEW;
+       inst->alg.base.cra_priority = alg->base.cra_priority;
+       inst->alg.base.cra_blocksize = 1;
+       inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
 
-       inst->alg.cra_aead.ivsize = 8;
-       inst->alg.cra_aead.maxauthsize = 16;
+       inst->alg.ivsize = 8;
+       inst->alg.maxauthsize = 16;
 
-       inst->alg.cra_ctxsize = sizeof(struct crypto_rfc4309_ctx);
+       inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc4309_ctx);
 
-       inst->alg.cra_init = crypto_rfc4309_init_tfm;
-       inst->alg.cra_exit = crypto_rfc4309_exit_tfm;
+       inst->alg.init = crypto_rfc4309_init_tfm;
+       inst->alg.exit = crypto_rfc4309_exit_tfm;
 
-       inst->alg.cra_aead.setkey = crypto_rfc4309_setkey;
-       inst->alg.cra_aead.setauthsize = crypto_rfc4309_setauthsize;
-       inst->alg.cra_aead.encrypt = crypto_rfc4309_encrypt;
-       inst->alg.cra_aead.decrypt = crypto_rfc4309_decrypt;
+       inst->alg.setkey = crypto_rfc4309_setkey;
+       inst->alg.setauthsize = crypto_rfc4309_setauthsize;
+       inst->alg.encrypt = crypto_rfc4309_encrypt;
+       inst->alg.decrypt = crypto_rfc4309_decrypt;
 
-       inst->alg.cra_aead.geniv = "seqiv";
+       inst->free = crypto_rfc4309_free;
+
+       err = aead_register_instance(tmpl, inst);
+       if (err)
+               goto out_drop_alg;
 
 out:
-       return inst;
+       return err;
 
 out_drop_alg:
        crypto_drop_aead(spawn);
 out_free_inst:
        kfree(inst);
-       inst = ERR_PTR(err);
        goto out;
 }
 
-static void crypto_rfc4309_free(struct crypto_instance *inst)
-{
-       crypto_drop_spawn(crypto_instance_ctx(inst));
-       kfree(inst);
-}
-
 static struct crypto_template crypto_rfc4309_tmpl = {
        .name = "rfc4309",
-       .alloc = crypto_rfc4309_alloc,
-       .free = crypto_rfc4309_free,
+       .create = crypto_rfc4309_create,
        .module = THIS_MODULE,
 };
 
index fa42e708aa9616d82ec18191a505fcc7e24abab4..da9c89968223673e08160fd6656cd0907437e070 100644 (file)
 #include <linux/crypto.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-
-#define CHACHA20_NONCE_SIZE 16
-#define CHACHA20_KEY_SIZE   32
-#define CHACHA20_BLOCK_SIZE 64
-
-struct chacha20_ctx {
-       u32 key[8];
-};
+#include <crypto/chacha20.h>
 
 static inline u32 rotl32(u32 v, u8 n)
 {
@@ -108,7 +101,7 @@ static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
        }
 }
 
-static void chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
+void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
 {
        static const char constant[16] = "expand 32-byte k";
 
@@ -129,8 +122,9 @@ static void chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
        state[14] = le32_to_cpuvp(iv +  8);
        state[15] = le32_to_cpuvp(iv + 12);
 }
+EXPORT_SYMBOL_GPL(crypto_chacha20_init);
 
-static int chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
+int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
                           unsigned int keysize)
 {
        struct chacha20_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -144,8 +138,9 @@ static int chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
 
-static int chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
                          struct scatterlist *src, unsigned int nbytes)
 {
        struct blkcipher_walk walk;
@@ -155,7 +150,7 @@ static int chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
        blkcipher_walk_init(&walk, dst, src, nbytes);
        err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE);
 
-       chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
+       crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
 
        while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
                chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
@@ -172,6 +167,7 @@ static int chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 
        return err;
 }
+EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
 
 static struct crypto_alg alg = {
        .cra_name               = "chacha20",
@@ -187,11 +183,11 @@ static struct crypto_alg alg = {
                .blkcipher = {
                        .min_keysize    = CHACHA20_KEY_SIZE,
                        .max_keysize    = CHACHA20_KEY_SIZE,
-                       .ivsize         = CHACHA20_NONCE_SIZE,
+                       .ivsize         = CHACHA20_IV_SIZE,
                        .geniv          = "seqiv",
-                       .setkey         = chacha20_setkey,
-                       .encrypt        = chacha20_crypt,
-                       .decrypt        = chacha20_crypt,
+                       .setkey         = crypto_chacha20_setkey,
+                       .encrypt        = crypto_chacha20_crypt,
+                       .decrypt        = crypto_chacha20_crypt,
                },
        },
 };
index 7b46ed799a64cdc6710408a16a847147ccd58290..b71445f282ad2ac62d23a0dee095d67572e12de3 100644 (file)
@@ -13,6 +13,8 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/chacha20.h>
+#include <crypto/poly1305.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 
 #include "internal.h"
 
-#define POLY1305_BLOCK_SIZE    16
-#define POLY1305_DIGEST_SIZE   16
-#define POLY1305_KEY_SIZE      32
-#define CHACHA20_KEY_SIZE      32
-#define CHACHA20_IV_SIZE       16
 #define CHACHAPOLY_IV_SIZE     12
 
 struct chachapoly_instance_ctx {
@@ -60,12 +57,16 @@ struct chacha_req {
 };
 
 struct chachapoly_req_ctx {
+       struct scatterlist src[2];
+       struct scatterlist dst[2];
        /* the key we generate for Poly1305 using Chacha20 */
        u8 key[POLY1305_KEY_SIZE];
        /* calculated Poly1305 tag */
        u8 tag[POLY1305_DIGEST_SIZE];
        /* length of data to en/decrypt, without ICV */
        unsigned int cryptlen;
+       /* Actual AD, excluding IV */
+       unsigned int assoclen;
        union {
                struct poly_req poly;
                struct chacha_req chacha;
@@ -98,7 +99,9 @@ static int poly_verify_tag(struct aead_request *req)
        struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
        u8 tag[sizeof(rctx->tag)];
 
-       scatterwalk_map_and_copy(tag, req->src, rctx->cryptlen, sizeof(tag), 0);
+       scatterwalk_map_and_copy(tag, req->src,
+                                req->assoclen + rctx->cryptlen,
+                                sizeof(tag), 0);
        if (crypto_memneq(tag, rctx->tag, sizeof(tag)))
                return -EBADMSG;
        return 0;
@@ -108,7 +111,8 @@ static int poly_copy_tag(struct aead_request *req)
 {
        struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
 
-       scatterwalk_map_and_copy(rctx->tag, req->dst, rctx->cryptlen,
+       scatterwalk_map_and_copy(rctx->tag, req->dst,
+                                req->assoclen + rctx->cryptlen,
                                 sizeof(rctx->tag), 1);
        return 0;
 }
@@ -123,14 +127,24 @@ static int chacha_decrypt(struct aead_request *req)
        struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
        struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
        struct chacha_req *creq = &rctx->u.chacha;
+       struct scatterlist *src, *dst;
        int err;
 
        chacha_iv(creq->iv, req, 1);
 
+       sg_init_table(rctx->src, 2);
+       src = scatterwalk_ffwd(rctx->src, req->src, req->assoclen);
+       dst = src;
+
+       if (req->src != req->dst) {
+               sg_init_table(rctx->dst, 2);
+               dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen);
+       }
+
        ablkcipher_request_set_callback(&creq->req, aead_request_flags(req),
                                        chacha_decrypt_done, req);
        ablkcipher_request_set_tfm(&creq->req, ctx->chacha);
-       ablkcipher_request_set_crypt(&creq->req, req->src, req->dst,
+       ablkcipher_request_set_crypt(&creq->req, src, dst,
                                     rctx->cryptlen, creq->iv);
        err = crypto_ablkcipher_decrypt(&creq->req);
        if (err)
@@ -156,14 +170,15 @@ static void poly_tail_done(struct crypto_async_request *areq, int err)
 
 static int poly_tail(struct aead_request *req)
 {
-       struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm);
        struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
        struct poly_req *preq = &rctx->u.poly;
        __le64 len;
        int err;
 
        sg_init_table(preq->src, 1);
-       len = cpu_to_le64(req->assoclen);
+       len = cpu_to_le64(rctx->assoclen);
        memcpy(&preq->tail.assoclen, &len, sizeof(len));
        len = cpu_to_le64(rctx->cryptlen);
        memcpy(&preq->tail.cryptlen, &len, sizeof(len));
@@ -228,6 +243,9 @@ static int poly_cipher(struct aead_request *req)
        if (rctx->cryptlen == req->cryptlen) /* encrypting */
                crypt = req->dst;
 
+       sg_init_table(rctx->src, 2);
+       crypt = scatterwalk_ffwd(rctx->src, crypt, req->assoclen);
+
        ahash_request_set_callback(&preq->req, aead_request_flags(req),
                                   poly_cipher_done, req);
        ahash_request_set_tfm(&preq->req, ctx->poly);
@@ -253,7 +271,7 @@ static int poly_adpad(struct aead_request *req)
        unsigned int padlen, bs = POLY1305_BLOCK_SIZE;
        int err;
 
-       padlen = (bs - (req->assoclen % bs)) % bs;
+       padlen = (bs - (rctx->assoclen % bs)) % bs;
        memset(preq->pad, 0, sizeof(preq->pad));
        sg_init_table(preq->src, 1);
        sg_set_buf(preq->src, preq->pad, padlen);
@@ -285,7 +303,7 @@ static int poly_ad(struct aead_request *req)
        ahash_request_set_callback(&preq->req, aead_request_flags(req),
                                   poly_ad_done, req);
        ahash_request_set_tfm(&preq->req, ctx->poly);
-       ahash_request_set_crypt(&preq->req, req->assoc, NULL, req->assoclen);
+       ahash_request_set_crypt(&preq->req, req->src, NULL, rctx->assoclen);
 
        err = crypto_ahash_update(&preq->req);
        if (err)
@@ -351,11 +369,20 @@ static void poly_genkey_done(struct crypto_async_request *areq, int err)
 
 static int poly_genkey(struct aead_request *req)
 {
-       struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+       struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm);
        struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
        struct chacha_req *creq = &rctx->u.chacha;
        int err;
 
+       rctx->assoclen = req->assoclen;
+
+       if (crypto_aead_ivsize(tfm) == 8) {
+               if (rctx->assoclen < 8)
+                       return -EINVAL;
+               rctx->assoclen -= 8;
+       }
+
        sg_init_table(creq->src, 1);
        memset(rctx->key, 0, sizeof(rctx->key));
        sg_set_buf(creq->src, rctx->key, sizeof(rctx->key));
@@ -385,14 +412,24 @@ static int chacha_encrypt(struct aead_request *req)
        struct chachapoly_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
        struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
        struct chacha_req *creq = &rctx->u.chacha;
+       struct scatterlist *src, *dst;
        int err;
 
        chacha_iv(creq->iv, req, 1);
 
+       sg_init_table(rctx->src, 2);
+       src = scatterwalk_ffwd(rctx->src, req->src, req->assoclen);
+       dst = src;
+
+       if (req->src != req->dst) {
+               sg_init_table(rctx->dst, 2);
+               dst = scatterwalk_ffwd(rctx->dst, req->dst, req->assoclen);
+       }
+
        ablkcipher_request_set_callback(&creq->req, aead_request_flags(req),
                                        chacha_encrypt_done, req);
        ablkcipher_request_set_tfm(&creq->req, ctx->chacha);
-       ablkcipher_request_set_crypt(&creq->req, req->src, req->dst,
+       ablkcipher_request_set_crypt(&creq->req, src, dst,
                                     req->cryptlen, creq->iv);
        err = crypto_ablkcipher_encrypt(&creq->req);
        if (err)
@@ -426,8 +463,6 @@ static int chachapoly_decrypt(struct aead_request *req)
 {
        struct chachapoly_req_ctx *rctx = aead_request_ctx(req);
 
-       if (req->cryptlen < POLY1305_DIGEST_SIZE)
-               return -EINVAL;
        rctx->cryptlen = req->cryptlen - POLY1305_DIGEST_SIZE;
 
        /* decrypt call chain:
@@ -476,11 +511,11 @@ static int chachapoly_setauthsize(struct crypto_aead *tfm,
        return 0;
 }
 
-static int chachapoly_init(struct crypto_tfm *tfm)
+static int chachapoly_init(struct crypto_aead *tfm)
 {
-       struct crypto_instance *inst = (void *)tfm->__crt_alg;
-       struct chachapoly_instance_ctx *ictx = crypto_instance_ctx(inst);
-       struct chachapoly_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct aead_instance *inst = aead_alg_instance(tfm);
+       struct chachapoly_instance_ctx *ictx = aead_instance_ctx(inst);
+       struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm);
        struct crypto_ablkcipher *chacha;
        struct crypto_ahash *poly;
        unsigned long align;
@@ -499,77 +534,88 @@ static int chachapoly_init(struct crypto_tfm *tfm)
        ctx->poly = poly;
        ctx->saltlen = ictx->saltlen;
 
-       align = crypto_tfm_alg_alignmask(tfm);
+       align = crypto_aead_alignmask(tfm);
        align &= ~(crypto_tfm_ctx_alignment() - 1);
-       crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
-                               align + offsetof(struct chachapoly_req_ctx, u) +
-                               max(offsetof(struct chacha_req, req) +
-                                   sizeof(struct ablkcipher_request) +
-                                   crypto_ablkcipher_reqsize(chacha),
-                                   offsetof(struct poly_req, req) +
-                                   sizeof(struct ahash_request) +
-                                   crypto_ahash_reqsize(poly)));
+       crypto_aead_set_reqsize(
+               tfm,
+               align + offsetof(struct chachapoly_req_ctx, u) +
+               max(offsetof(struct chacha_req, req) +
+                   sizeof(struct ablkcipher_request) +
+                   crypto_ablkcipher_reqsize(chacha),
+                   offsetof(struct poly_req, req) +
+                   sizeof(struct ahash_request) +
+                   crypto_ahash_reqsize(poly)));
 
        return 0;
 }
 
-static void chachapoly_exit(struct crypto_tfm *tfm)
+static void chachapoly_exit(struct crypto_aead *tfm)
 {
-       struct chachapoly_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct chachapoly_ctx *ctx = crypto_aead_ctx(tfm);
 
        crypto_free_ahash(ctx->poly);
        crypto_free_ablkcipher(ctx->chacha);
 }
 
-static struct crypto_instance *chachapoly_alloc(struct rtattr **tb,
-                                               const char *name,
-                                               unsigned int ivsize)
+static void chachapoly_free(struct aead_instance *inst)
+{
+       struct chachapoly_instance_ctx *ctx = aead_instance_ctx(inst);
+
+       crypto_drop_skcipher(&ctx->chacha);
+       crypto_drop_ahash(&ctx->poly);
+       kfree(inst);
+}
+
+static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
+                            const char *name, unsigned int ivsize)
 {
        struct crypto_attr_type *algt;
-       struct crypto_instance *inst;
+       struct aead_instance *inst;
        struct crypto_alg *chacha;
        struct crypto_alg *poly;
-       struct ahash_alg *poly_ahash;
+       struct hash_alg_common *poly_hash;
        struct chachapoly_instance_ctx *ctx;
        const char *chacha_name, *poly_name;
        int err;
 
        if (ivsize > CHACHAPOLY_IV_SIZE)
-               return ERR_PTR(-EINVAL);
+               return -EINVAL;
 
        algt = crypto_get_attr_type(tb);
        if (IS_ERR(algt))
-               return ERR_CAST(algt);
+               return PTR_ERR(algt);
 
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
-               return ERR_PTR(-EINVAL);
+       if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_AEAD_NEW)) &
+           algt->mask)
+               return -EINVAL;
 
        chacha_name = crypto_attr_alg_name(tb[1]);
        if (IS_ERR(chacha_name))
-               return ERR_CAST(chacha_name);
+               return PTR_ERR(chacha_name);
        poly_name = crypto_attr_alg_name(tb[2]);
        if (IS_ERR(poly_name))
-               return ERR_CAST(poly_name);
+               return PTR_ERR(poly_name);
 
        poly = crypto_find_alg(poly_name, &crypto_ahash_type,
                               CRYPTO_ALG_TYPE_HASH,
                               CRYPTO_ALG_TYPE_AHASH_MASK);
        if (IS_ERR(poly))
-               return ERR_CAST(poly);
+               return PTR_ERR(poly);
 
        err = -ENOMEM;
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
        if (!inst)
                goto out_put_poly;
 
-       ctx = crypto_instance_ctx(inst);
+       ctx = aead_instance_ctx(inst);
        ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize;
-       poly_ahash = container_of(poly, struct ahash_alg, halg.base);
-       err = crypto_init_ahash_spawn(&ctx->poly, &poly_ahash->halg, inst);
+       poly_hash = __crypto_hash_alg_common(poly);
+       err = crypto_init_ahash_spawn(&ctx->poly, poly_hash,
+                                     aead_crypto_instance(inst));
        if (err)
                goto err_free_inst;
 
-       crypto_set_skcipher_spawn(&ctx->chacha, inst);
+       crypto_set_skcipher_spawn(&ctx->chacha, aead_crypto_instance(inst));
        err = crypto_grab_skcipher(&ctx->chacha, chacha_name, 0,
                                   crypto_requires_sync(algt->type,
                                                        algt->mask));
@@ -587,37 +633,43 @@ static struct crypto_instance *chachapoly_alloc(struct rtattr **tb,
                goto out_drop_chacha;
 
        err = -ENAMETOOLONG;
-       if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME,
+       if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
                     "%s(%s,%s)", name, chacha_name,
                     poly_name) >= CRYPTO_MAX_ALG_NAME)
                goto out_drop_chacha;
-       if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+       if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
                     "%s(%s,%s)", name, chacha->cra_driver_name,
                     poly->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
                goto out_drop_chacha;
 
-       inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD;
-       inst->alg.cra_flags |= (chacha->cra_flags |
-                               poly->cra_flags) & CRYPTO_ALG_ASYNC;
-       inst->alg.cra_priority = (chacha->cra_priority +
-                                 poly->cra_priority) / 2;
-       inst->alg.cra_blocksize = 1;
-       inst->alg.cra_alignmask = chacha->cra_alignmask | poly->cra_alignmask;
-       inst->alg.cra_type = &crypto_nivaead_type;
-       inst->alg.cra_aead.ivsize = ivsize;
-       inst->alg.cra_aead.maxauthsize = POLY1305_DIGEST_SIZE;
-       inst->alg.cra_ctxsize = sizeof(struct chachapoly_ctx) + ctx->saltlen;
-       inst->alg.cra_init = chachapoly_init;
-       inst->alg.cra_exit = chachapoly_exit;
-       inst->alg.cra_aead.encrypt = chachapoly_encrypt;
-       inst->alg.cra_aead.decrypt = chachapoly_decrypt;
-       inst->alg.cra_aead.setkey = chachapoly_setkey;
-       inst->alg.cra_aead.setauthsize = chachapoly_setauthsize;
-       inst->alg.cra_aead.geniv = "seqiv";
-
-out:
+       inst->alg.base.cra_flags = (chacha->cra_flags | poly->cra_flags) &
+                                  CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_flags |= CRYPTO_ALG_AEAD_NEW;
+       inst->alg.base.cra_priority = (chacha->cra_priority +
+                                      poly->cra_priority) / 2;
+       inst->alg.base.cra_blocksize = 1;
+       inst->alg.base.cra_alignmask = chacha->cra_alignmask |
+                                      poly->cra_alignmask;
+       inst->alg.base.cra_ctxsize = sizeof(struct chachapoly_ctx) +
+                                    ctx->saltlen;
+       inst->alg.ivsize = ivsize;
+       inst->alg.maxauthsize = POLY1305_DIGEST_SIZE;
+       inst->alg.init = chachapoly_init;
+       inst->alg.exit = chachapoly_exit;
+       inst->alg.encrypt = chachapoly_encrypt;
+       inst->alg.decrypt = chachapoly_decrypt;
+       inst->alg.setkey = chachapoly_setkey;
+       inst->alg.setauthsize = chachapoly_setauthsize;
+
+       inst->free = chachapoly_free;
+
+       err = aead_register_instance(tmpl, inst);
+       if (err)
+               goto out_drop_chacha;
+
+out_put_poly:
        crypto_mod_put(poly);
-       return inst;
+       return err;
 
 out_drop_chacha:
        crypto_drop_skcipher(&ctx->chacha);
@@ -625,41 +677,28 @@ err_drop_poly:
        crypto_drop_ahash(&ctx->poly);
 err_free_inst:
        kfree(inst);
-out_put_poly:
-       inst = ERR_PTR(err);
-       goto out;
-}
-
-static struct crypto_instance *rfc7539_alloc(struct rtattr **tb)
-{
-       return chachapoly_alloc(tb, "rfc7539", 12);
+       goto out_put_poly;
 }
 
-static struct crypto_instance *rfc7539esp_alloc(struct rtattr **tb)
+static int rfc7539_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-       return chachapoly_alloc(tb, "rfc7539esp", 8);
+       return chachapoly_create(tmpl, tb, "rfc7539", 12);
 }
 
-static void chachapoly_free(struct crypto_instance *inst)
+static int rfc7539esp_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
-       struct chachapoly_instance_ctx *ctx = crypto_instance_ctx(inst);
-
-       crypto_drop_skcipher(&ctx->chacha);
-       crypto_drop_ahash(&ctx->poly);
-       kfree(inst);
+       return chachapoly_create(tmpl, tb, "rfc7539esp", 8);
 }
 
 static struct crypto_template rfc7539_tmpl = {
        .name = "rfc7539",
-       .alloc = rfc7539_alloc,
-       .free = chachapoly_free,
+       .create = rfc7539_create,
        .module = THIS_MODULE,
 };
 
 static struct crypto_template rfc7539esp_tmpl = {
        .name = "rfc7539esp",
-       .alloc = rfc7539esp_alloc,
-       .free = chachapoly_free,
+       .create = rfc7539esp_create,
        .module = THIS_MODULE,
 };
 
@@ -690,6 +729,5 @@ module_exit(chacha20poly1305_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
 MODULE_DESCRIPTION("ChaCha20-Poly1305 AEAD");
-MODULE_ALIAS_CRYPTO("chacha20poly1305");
 MODULE_ALIAS_CRYPTO("rfc7539");
 MODULE_ALIAS_CRYPTO("rfc7539esp");
index 22ba81f76764aff2edd614ad8fad2914df33a8a9..360ee85543fdb3ea28fd5e8380c63bcdb7350418 100644 (file)
@@ -176,10 +176,9 @@ static inline void cryptd_check_internal(struct rtattr **tb, u32 *type,
        algt = crypto_get_attr_type(tb);
        if (IS_ERR(algt))
                return;
-       if ((algt->type & CRYPTO_ALG_INTERNAL))
-               *type |= CRYPTO_ALG_INTERNAL;
-       if ((algt->mask & CRYPTO_ALG_INTERNAL))
-               *mask |= CRYPTO_ALG_INTERNAL;
+
+       *type |= algt->type & (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_AEAD_NEW);
+       *mask |= algt->mask & (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_AEAD_NEW);
 }
 
 static int cryptd_blkcipher_setkey(struct crypto_ablkcipher *parent,
@@ -688,16 +687,18 @@ static void cryptd_aead_crypt(struct aead_request *req,
                        int (*crypt)(struct aead_request *req))
 {
        struct cryptd_aead_request_ctx *rctx;
+       crypto_completion_t compl;
+
        rctx = aead_request_ctx(req);
+       compl = rctx->complete;
 
        if (unlikely(err == -EINPROGRESS))
                goto out;
        aead_request_set_tfm(req, child);
        err = crypt( req );
-       req->base.complete = rctx->complete;
 out:
        local_bh_disable();
-       rctx->complete(&req->base, err);
+       compl(&req->base, err);
        local_bh_enable();
 }
 
@@ -756,7 +757,9 @@ static int cryptd_aead_init_tfm(struct crypto_aead *tfm)
                return PTR_ERR(cipher);
 
        ctx->child = cipher;
-       crypto_aead_set_reqsize(tfm, sizeof(struct cryptd_aead_request_ctx));
+       crypto_aead_set_reqsize(
+               tfm, max((unsigned)sizeof(struct cryptd_aead_request_ctx),
+                        crypto_aead_reqsize(cipher)));
        return 0;
 }
 
@@ -775,7 +778,7 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
        struct aead_alg *alg;
        const char *name;
        u32 type = 0;
-       u32 mask = 0;
+       u32 mask = CRYPTO_ALG_ASYNC;
        int err;
 
        cryptd_check_internal(tb, &type, &mask);
@@ -802,7 +805,9 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
                goto out_drop_aead;
 
        inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC |
-                                  (alg->base.cra_flags & CRYPTO_ALG_INTERNAL);
+                                  (alg->base.cra_flags &
+                                   (CRYPTO_ALG_INTERNAL |
+                                    CRYPTO_ALG_AEAD_NEW));
        inst->alg.base.cra_ctxsize = sizeof(struct cryptd_aead_ctx);
 
        inst->alg.ivsize = crypto_aead_alg_ivsize(alg);
index b6e43dc6135653eddb3e31f9659d54bc774bc6d1..d3896c7e634be28f27864593b2f0eeff9ba34211 100644 (file)
@@ -145,8 +145,8 @@ static int echainiv_encrypt(struct aead_request *req)
 
        aead_request_set_callback(subreq, req->base.flags, compl, data);
        aead_request_set_crypt(subreq, req->dst, req->dst,
-                              req->cryptlen - ivsize, info);
-       aead_request_set_ad(subreq, req->assoclen + ivsize);
+                              req->cryptlen, info);
+       aead_request_set_ad(subreq, req->assoclen);
 
        crypto_xor(info, ctx->salt, ivsize);
        scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
@@ -166,7 +166,7 @@ static int echainiv_decrypt(struct aead_request *req)
        void *data;
        unsigned int ivsize = crypto_aead_ivsize(geniv);
 
-       if (req->cryptlen < ivsize + crypto_aead_authsize(geniv))
+       if (req->cryptlen < ivsize)
                return -EINVAL;
 
        aead_request_set_tfm(subreq, ctx->geniv.child);
@@ -180,16 +180,12 @@ static int echainiv_decrypt(struct aead_request *req)
        aead_request_set_ad(subreq, req->assoclen + ivsize);
 
        scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0);
-       if (req->src != req->dst)
-               scatterwalk_map_and_copy(req->iv, req->dst,
-                                        req->assoclen, ivsize, 1);
 
        return crypto_aead_decrypt(subreq);
 }
 
-static int echainiv_init(struct crypto_tfm *tfm)
+static int echainiv_init(struct crypto_aead *geniv)
 {
-       struct crypto_aead *geniv = __crypto_aead_cast(tfm);
        struct echainiv_ctx *ctx = crypto_aead_ctx(geniv);
        int err;
 
@@ -212,7 +208,7 @@ static int echainiv_init(struct crypto_tfm *tfm)
        if (IS_ERR(ctx->null))
                goto out;
 
-       err = aead_geniv_init(tfm);
+       err = aead_geniv_init(crypto_aead_tfm(geniv));
        if (err)
                goto drop_null;
 
@@ -227,9 +223,9 @@ drop_null:
        goto out;
 }
 
-static void echainiv_exit(struct crypto_tfm *tfm)
+static void echainiv_exit(struct crypto_aead *tfm)
 {
-       struct echainiv_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct echainiv_ctx *ctx = crypto_aead_ctx(tfm);
 
        crypto_free_aead(ctx->geniv.child);
        crypto_put_default_null_skcipher();
@@ -262,13 +258,15 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
        inst->alg.encrypt = echainiv_encrypt;
        inst->alg.decrypt = echainiv_decrypt;
 
-       inst->alg.base.cra_init = echainiv_init;
-       inst->alg.base.cra_exit = echainiv_exit;
+       inst->alg.init = echainiv_init;
+       inst->alg.exit = echainiv_exit;
 
        inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
        inst->alg.base.cra_ctxsize = sizeof(struct echainiv_ctx);
        inst->alg.base.cra_ctxsize += inst->alg.ivsize;
 
+       inst->free = aead_geniv_free;
+
 done:
        err = aead_register_instance(tmpl, inst);
        if (err)
index 7d32d4720564315f57a2766f5e1813d536d9ee69..0c9e33bdce1a76ea62b40956cd48619cedcd5e8e 100644 (file)
@@ -38,6 +38,12 @@ struct crypto_rfc4106_ctx {
        u8 nonce[4];
 };
 
+struct crypto_rfc4106_req_ctx {
+       struct scatterlist src[3];
+       struct scatterlist dst[3];
+       struct aead_request subreq;
+};
+
 struct crypto_rfc4543_instance_ctx {
        struct crypto_aead_spawn aead;
 };
@@ -601,6 +607,15 @@ static void crypto_gcm_exit_tfm(struct crypto_aead *tfm)
        crypto_free_ablkcipher(ctx->ctr);
 }
 
+static void crypto_gcm_free(struct aead_instance *inst)
+{
+       struct gcm_instance_ctx *ctx = aead_instance_ctx(inst);
+
+       crypto_drop_skcipher(&ctx->ctr);
+       crypto_drop_ahash(&ctx->ghash);
+       kfree(inst);
+}
+
 static int crypto_gcm_create_common(struct crypto_template *tmpl,
                                    struct rtattr **tb,
                                    const char *full_name,
@@ -619,7 +634,8 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
        if (IS_ERR(algt))
                return PTR_ERR(algt);
 
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
+       if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_AEAD_NEW)) &
+           algt->mask)
                return -EINVAL;
 
        ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
@@ -674,6 +690,7 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
 
        inst->alg.base.cra_flags = (ghash->base.cra_flags | ctr->cra_flags) &
                                   CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_flags |= CRYPTO_ALG_AEAD_NEW;
        inst->alg.base.cra_priority = (ghash->base.cra_priority +
                                       ctr->cra_priority) / 2;
        inst->alg.base.cra_blocksize = 1;
@@ -689,6 +706,8 @@ static int crypto_gcm_create_common(struct crypto_template *tmpl,
        inst->alg.encrypt = crypto_gcm_encrypt;
        inst->alg.decrypt = crypto_gcm_decrypt;
 
+       inst->free = crypto_gcm_free;
+
        err = aead_register_instance(tmpl, inst);
        if (err)
                goto out_put_ctr;
@@ -728,19 +747,9 @@ static int crypto_gcm_create(struct crypto_template *tmpl, struct rtattr **tb)
                                        ctr_name, "ghash");
 }
 
-static void crypto_gcm_free(struct crypto_instance *inst)
-{
-       struct gcm_instance_ctx *ctx = crypto_instance_ctx(inst);
-
-       crypto_drop_skcipher(&ctx->ctr);
-       crypto_drop_ahash(&ctx->ghash);
-       kfree(aead_instance(inst));
-}
-
 static struct crypto_template crypto_gcm_tmpl = {
        .name = "gcm",
        .create = crypto_gcm_create,
-       .free = crypto_gcm_free,
        .module = THIS_MODULE,
 };
 
@@ -770,7 +779,6 @@ static int crypto_gcm_base_create(struct crypto_template *tmpl,
 static struct crypto_template crypto_gcm_base_tmpl = {
        .name = "gcm_base",
        .create = crypto_gcm_base_create,
-       .free = crypto_gcm_free,
        .module = THIS_MODULE,
 };
 
@@ -816,27 +824,50 @@ static int crypto_rfc4106_setauthsize(struct crypto_aead *parent,
 
 static struct aead_request *crypto_rfc4106_crypt(struct aead_request *req)
 {
-       struct aead_request *subreq = aead_request_ctx(req);
+       struct crypto_rfc4106_req_ctx *rctx = aead_request_ctx(req);
        struct crypto_aead *aead = crypto_aead_reqtfm(req);
        struct crypto_rfc4106_ctx *ctx = crypto_aead_ctx(aead);
+       struct aead_request *subreq = &rctx->subreq;
        struct crypto_aead *child = ctx->child;
+       struct scatterlist *sg;
        u8 *iv = PTR_ALIGN((u8 *)(subreq + 1) + crypto_aead_reqsize(child),
                           crypto_aead_alignmask(child) + 1);
 
+       scatterwalk_map_and_copy(iv + 12, req->src, 0, req->assoclen - 8, 0);
+
        memcpy(iv, ctx->nonce, 4);
        memcpy(iv + 4, req->iv, 8);
 
+       sg_init_table(rctx->src, 3);
+       sg_set_buf(rctx->src, iv + 12, req->assoclen - 8);
+       sg = scatterwalk_ffwd(rctx->src + 1, req->src, req->assoclen);
+       if (sg != rctx->src + 1)
+               sg_chain(rctx->src, 2, sg);
+
+       if (req->src != req->dst) {
+               sg_init_table(rctx->dst, 3);
+               sg_set_buf(rctx->dst, iv + 12, req->assoclen - 8);
+               sg = scatterwalk_ffwd(rctx->dst + 1, req->dst, req->assoclen);
+               if (sg != rctx->dst + 1)
+                       sg_chain(rctx->dst, 2, sg);
+       }
+
        aead_request_set_tfm(subreq, child);
        aead_request_set_callback(subreq, req->base.flags, req->base.complete,
                                  req->base.data);
-       aead_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, iv);
-       aead_request_set_ad(subreq, req->assoclen);
+       aead_request_set_crypt(subreq, rctx->src,
+                              req->src == req->dst ? rctx->src : rctx->dst,
+                              req->cryptlen, iv);
+       aead_request_set_ad(subreq, req->assoclen - 8);
 
        return subreq;
 }
 
 static int crypto_rfc4106_encrypt(struct aead_request *req)
 {
+       if (req->assoclen != 16 && req->assoclen != 20)
+               return -EINVAL;
+
        req = crypto_rfc4106_crypt(req);
 
        return crypto_aead_encrypt(req);
@@ -844,6 +875,9 @@ static int crypto_rfc4106_encrypt(struct aead_request *req)
 
 static int crypto_rfc4106_decrypt(struct aead_request *req)
 {
+       if (req->assoclen != 16 && req->assoclen != 20)
+               return -EINVAL;
+
        req = crypto_rfc4106_crypt(req);
 
        return crypto_aead_decrypt(req);
@@ -867,9 +901,9 @@ static int crypto_rfc4106_init_tfm(struct crypto_aead *tfm)
        align &= ~(crypto_tfm_ctx_alignment() - 1);
        crypto_aead_set_reqsize(
                tfm,
-               sizeof(struct aead_request) +
+               sizeof(struct crypto_rfc4106_req_ctx) +
                ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) +
-               align + 12);
+               align + 24);
 
        return 0;
 }
@@ -881,6 +915,12 @@ static void crypto_rfc4106_exit_tfm(struct crypto_aead *tfm)
        crypto_free_aead(ctx->child);
 }
 
+static void crypto_rfc4106_free(struct aead_instance *inst)
+{
+       crypto_drop_aead(aead_instance_ctx(inst));
+       kfree(inst);
+}
+
 static int crypto_rfc4106_create(struct crypto_template *tmpl,
                                 struct rtattr **tb)
 {
@@ -895,7 +935,8 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
        if (IS_ERR(algt))
                return PTR_ERR(algt);
 
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
+       if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_AEAD_NEW)) &
+           algt->mask)
                return -EINVAL;
 
        ccm_name = crypto_attr_alg_name(tb[1]);
@@ -934,7 +975,8 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
            CRYPTO_MAX_ALG_NAME)
                goto out_drop_alg;
 
-       inst->alg.base.cra_flags |= alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_flags |= CRYPTO_ALG_AEAD_NEW;
        inst->alg.base.cra_priority = alg->base.cra_priority;
        inst->alg.base.cra_blocksize = 1;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
@@ -952,6 +994,8 @@ static int crypto_rfc4106_create(struct crypto_template *tmpl,
        inst->alg.encrypt = crypto_rfc4106_encrypt;
        inst->alg.decrypt = crypto_rfc4106_decrypt;
 
+       inst->free = crypto_rfc4106_free;
+
        err = aead_register_instance(tmpl, inst);
        if (err)
                goto out_drop_alg;
@@ -966,16 +1010,9 @@ out_free_inst:
        goto out;
 }
 
-static void crypto_rfc4106_free(struct crypto_instance *inst)
-{
-       crypto_drop_aead(crypto_instance_ctx(inst));
-       kfree(aead_instance(inst));
-}
-
 static struct crypto_template crypto_rfc4106_tmpl = {
        .name = "rfc4106",
        .create = crypto_rfc4106_create,
-       .free = crypto_rfc4106_free,
        .module = THIS_MODULE,
 };
 
@@ -1114,6 +1151,15 @@ static void crypto_rfc4543_exit_tfm(struct crypto_aead *tfm)
        crypto_put_default_null_skcipher();
 }
 
+static void crypto_rfc4543_free(struct aead_instance *inst)
+{
+       struct crypto_rfc4543_instance_ctx *ctx = aead_instance_ctx(inst);
+
+       crypto_drop_aead(&ctx->aead);
+
+       kfree(inst);
+}
+
 static int crypto_rfc4543_create(struct crypto_template *tmpl,
                                struct rtattr **tb)
 {
@@ -1129,7 +1175,8 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
        if (IS_ERR(algt))
                return PTR_ERR(algt);
 
-       if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
+       if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_AEAD_NEW)) &
+           algt->mask)
                return -EINVAL;
 
        ccm_name = crypto_attr_alg_name(tb[1]);
@@ -1170,6 +1217,7 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
                goto out_drop_alg;
 
        inst->alg.base.cra_flags = alg->base.cra_flags & CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_flags |= CRYPTO_ALG_AEAD_NEW;
        inst->alg.base.cra_priority = alg->base.cra_priority;
        inst->alg.base.cra_blocksize = 1;
        inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
@@ -1187,6 +1235,8 @@ static int crypto_rfc4543_create(struct crypto_template *tmpl,
        inst->alg.encrypt = crypto_rfc4543_encrypt;
        inst->alg.decrypt = crypto_rfc4543_decrypt;
 
+       inst->free = crypto_rfc4543_free,
+
        err = aead_register_instance(tmpl, inst);
        if (err)
                goto out_drop_alg;
@@ -1201,19 +1251,9 @@ out_free_inst:
        goto out;
 }
 
-static void crypto_rfc4543_free(struct crypto_instance *inst)
-{
-       struct crypto_rfc4543_instance_ctx *ctx = crypto_instance_ctx(inst);
-
-       crypto_drop_aead(&ctx->aead);
-
-       kfree(aead_instance(inst));
-}
-
 static struct crypto_template crypto_rfc4543_tmpl = {
        .name = "rfc4543",
        .create = crypto_rfc4543_create,
-       .free = crypto_rfc4543_free,
        .module = THIS_MODULE,
 };
 
index b32d834144cdce724613dda889cec09484af7384..ceea83d13168f648e2ca22f635504992c24ba7f2 100644 (file)
@@ -79,7 +79,7 @@ int jent_fips_enabled(void)
 
 void jent_panic(char *s)
 {
-       panic(s);
+       panic("%s", s);
 }
 
 void jent_memcpy(void *dest, const void *src, unsigned int n)
index 45e7d515567294506142904fd771005c8e1ca70c..001a3a3e75dfd152c4687bc5b70eb0e60ce2faf8 100644 (file)
@@ -274,11 +274,16 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
                              u32 type, u32 mask)
 {
        struct pcrypt_instance_ctx *ctx;
+       struct crypto_attr_type *algt;
        struct aead_instance *inst;
        struct aead_alg *alg;
        const char *name;
        int err;
 
+       algt = crypto_get_attr_type(tb);
+       if (IS_ERR(algt))
+               return PTR_ERR(algt);
+
        name = crypto_attr_alg_name(tb[1]);
        if (IS_ERR(name))
                return PTR_ERR(name);
@@ -290,7 +295,9 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
        ctx = aead_instance_ctx(inst);
        crypto_set_aead_spawn(&ctx->spawn, aead_crypto_instance(inst));
 
-       err = crypto_grab_aead(&ctx->spawn, name, 0, 0);
+       err = crypto_grab_aead(&ctx->spawn, name,
+                              algt->type & CRYPTO_ALG_AEAD_NEW,
+                              algt->mask & CRYPTO_ALG_AEAD_NEW);
        if (err)
                goto out_free_inst;
 
@@ -299,6 +306,9 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
        if (err)
                goto out_drop_aead;
 
+       inst->alg.base.cra_flags = CRYPTO_ALG_ASYNC;
+       inst->alg.base.cra_flags |= alg->base.cra_flags & CRYPTO_ALG_AEAD_NEW;
+
        inst->alg.ivsize = crypto_aead_alg_ivsize(alg);
        inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(alg);
 
index 387b5c887a8035cfff51cc0c57eb8a3668235eee..2df9835dfbc0c6e039c522460926cee88991809a 100644 (file)
 
 #include <crypto/algapi.h>
 #include <crypto/internal/hash.h>
+#include <crypto/poly1305.h>
 #include <linux/crypto.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-#define POLY1305_BLOCK_SIZE    16
-#define POLY1305_KEY_SIZE      32
-#define POLY1305_DIGEST_SIZE   16
-
-struct poly1305_desc_ctx {
-       /* key */
-       u32 r[5];
-       /* finalize key */
-       u32 s[4];
-       /* accumulator */
-       u32 h[5];
-       /* partial buffer */
-       u8 buf[POLY1305_BLOCK_SIZE];
-       /* bytes used in partial buffer */
-       unsigned int buflen;
-       /* r key has been set */
-       bool rset;
-       /* s key has been set */
-       bool sset;
-};
-
 static inline u64 mlt(u64 a, u64 b)
 {
        return a * b;
@@ -58,7 +38,7 @@ static inline u32 le32_to_cpuvp(const void *p)
        return le32_to_cpup(p);
 }
 
-static int poly1305_init(struct shash_desc *desc)
+int crypto_poly1305_init(struct shash_desc *desc)
 {
        struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
 
@@ -69,8 +49,9 @@ static int poly1305_init(struct shash_desc *desc)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(crypto_poly1305_init);
 
-static int poly1305_setkey(struct crypto_shash *tfm,
+int crypto_poly1305_setkey(struct crypto_shash *tfm,
                           const u8 *key, unsigned int keylen)
 {
        /* Poly1305 requires a unique key for each tag, which implies that
@@ -79,6 +60,7 @@ static int poly1305_setkey(struct crypto_shash *tfm,
         * the update() call. */
        return -ENOTSUPP;
 }
+EXPORT_SYMBOL_GPL(crypto_poly1305_setkey);
 
 static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
 {
@@ -98,16 +80,10 @@ static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
        dctx->s[3] = le32_to_cpuvp(key + 12);
 }
 
-static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
-                                   const u8 *src, unsigned int srclen,
-                                   u32 hibit)
+unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
+                                       const u8 *src, unsigned int srclen)
 {
-       u32 r0, r1, r2, r3, r4;
-       u32 s1, s2, s3, s4;
-       u32 h0, h1, h2, h3, h4;
-       u64 d0, d1, d2, d3, d4;
-
-       if (unlikely(!dctx->sset)) {
+       if (!dctx->sset) {
                if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
                        poly1305_setrkey(dctx, src);
                        src += POLY1305_BLOCK_SIZE;
@@ -121,6 +97,25 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
                        dctx->sset = true;
                }
        }
+       return srclen;
+}
+EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
+
+static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
+                                   const u8 *src, unsigned int srclen,
+                                   u32 hibit)
+{
+       u32 r0, r1, r2, r3, r4;
+       u32 s1, s2, s3, s4;
+       u32 h0, h1, h2, h3, h4;
+       u64 d0, d1, d2, d3, d4;
+       unsigned int datalen;
+
+       if (unlikely(!dctx->sset)) {
+               datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
+               src += srclen - datalen;
+               srclen = datalen;
+       }
 
        r0 = dctx->r[0];
        r1 = dctx->r[1];
@@ -181,7 +176,7 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
        return srclen;
 }
 
-static int poly1305_update(struct shash_desc *desc,
+int crypto_poly1305_update(struct shash_desc *desc,
                           const u8 *src, unsigned int srclen)
 {
        struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
@@ -214,8 +209,9 @@ static int poly1305_update(struct shash_desc *desc,
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(crypto_poly1305_update);
 
-static int poly1305_final(struct shash_desc *desc, u8 *dst)
+int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
 {
        struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
        __le32 *mac = (__le32 *)dst;
@@ -282,13 +278,14 @@ static int poly1305_final(struct shash_desc *desc, u8 *dst)
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(crypto_poly1305_final);
 
 static struct shash_alg poly1305_alg = {
        .digestsize     = POLY1305_DIGEST_SIZE,
-       .init           = poly1305_init,
-       .update         = poly1305_update,
-       .final          = poly1305_final,
-       .setkey         = poly1305_setkey,
+       .init           = crypto_poly1305_init,
+       .update         = crypto_poly1305_update,
+       .final          = crypto_poly1305_final,
+       .setkey         = crypto_poly1305_setkey,
        .descsize       = sizeof(struct poly1305_desc_ctx),
        .base           = {
                .cra_name               = "poly1305",
index 752af0656f2e60e9e4c56ea8bcb065bca63ebde4..466003e1a8cf20b501425ef8049cdc71b3f262bb 100644 (file)
@@ -267,12 +267,36 @@ err_free_m:
        return ret;
 }
 
+static int rsa_check_key_length(unsigned int len)
+{
+       switch (len) {
+       case 512:
+       case 1024:
+       case 1536:
+       case 2048:
+       case 3072:
+       case 4096:
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
 static int rsa_setkey(struct crypto_akcipher *tfm, const void *key,
                      unsigned int keylen)
 {
        struct rsa_key *pkey = akcipher_tfm_ctx(tfm);
+       int ret;
 
-       return rsa_parse_key(pkey, key, keylen);
+       ret = rsa_parse_key(pkey, key, keylen);
+       if (ret)
+               return ret;
+
+       if (rsa_check_key_length(mpi_get_size(pkey->n) << 3)) {
+               rsa_free_key(pkey);
+               ret = -EINVAL;
+       }
+       return ret;
 }
 
 static void rsa_exit_tfm(struct crypto_akcipher *tfm)
index 3e8e0a9e5a8e5e7a6ea89e5364db73d2a754b0a5..8d96ce969b4480601b125a14db850f5edda38ddc 100644 (file)
@@ -28,7 +28,7 @@ int rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
                return -ENOMEM;
 
        /* In FIPS mode only allow key size 2K & 3K */
-       if (fips_enabled && (mpi_get_size(key->n) != 256 ||
+       if (fips_enabled && (mpi_get_size(key->n) != 256 &&
                             mpi_get_size(key->n) != 384)) {
                pr_err("RSA: key size not allowed in FIPS mode\n");
                mpi_free(key->n);
@@ -62,7 +62,7 @@ int rsa_get_d(void *context, size_t hdrlen, unsigned char tag,
                return -ENOMEM;
 
        /* In FIPS mode only allow key size 2K & 3K */
-       if (fips_enabled && (mpi_get_size(key->d) != 256 ||
+       if (fips_enabled && (mpi_get_size(key->d) != 256 &&
                             mpi_get_size(key->d) != 384)) {
                pr_err("RSA: key size not allowed in FIPS mode\n");
                mpi_free(key->d);
index 122c56e3491b9819b20cc34871a8cb4f6d6fdaac..45d0563f681ab4958b7853a9d7403981372c3332 100644 (file)
@@ -467,9 +467,6 @@ static int seqiv_aead_decrypt(struct aead_request *req)
        aead_request_set_ad(subreq, req->assoclen + ivsize);
 
        scatterwalk_map_and_copy(req->iv, req->src, req->assoclen, ivsize, 0);
-       if (req->src != req->dst)
-               scatterwalk_map_and_copy(req->iv, req->dst,
-                                        req->assoclen, ivsize, 1);
 
        return crypto_aead_decrypt(subreq);
 }
@@ -516,9 +513,9 @@ static int seqiv_old_aead_init(struct crypto_tfm *tfm)
        return err ?: aead_geniv_init(tfm);
 }
 
-static int seqiv_aead_init_common(struct crypto_tfm *tfm, unsigned int reqsize)
+static int seqiv_aead_init_common(struct crypto_aead *geniv,
+                                 unsigned int reqsize)
 {
-       struct crypto_aead *geniv = __crypto_aead_cast(tfm);
        struct seqiv_aead_ctx *ctx = crypto_aead_ctx(geniv);
        int err;
 
@@ -541,7 +538,7 @@ static int seqiv_aead_init_common(struct crypto_tfm *tfm, unsigned int reqsize)
        if (IS_ERR(ctx->null))
                goto out;
 
-       err = aead_geniv_init(tfm);
+       err = aead_geniv_init(crypto_aead_tfm(geniv));
        if (err)
                goto drop_null;
 
@@ -556,19 +553,19 @@ drop_null:
        goto out;
 }
 
-static int seqiv_aead_init(struct crypto_tfm *tfm)
+static int seqiv_aead_init(struct crypto_aead *tfm)
 {
        return seqiv_aead_init_common(tfm, sizeof(struct aead_request));
 }
 
-static int seqniv_aead_init(struct crypto_tfm *tfm)
+static int seqniv_aead_init(struct crypto_aead *tfm)
 {
        return seqiv_aead_init_common(tfm, sizeof(struct seqniv_request_ctx));
 }
 
-static void seqiv_aead_exit(struct crypto_tfm *tfm)
+static void seqiv_aead_exit(struct crypto_aead *tfm)
 {
-       struct seqiv_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct seqiv_aead_ctx *ctx = crypto_aead_ctx(tfm);
 
        crypto_free_aead(ctx->geniv.child);
        crypto_put_default_null_skcipher();
@@ -666,11 +663,11 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
        inst->alg.encrypt = seqiv_aead_encrypt;
        inst->alg.decrypt = seqiv_aead_decrypt;
 
-       inst->alg.base.cra_init = seqiv_aead_init;
-       inst->alg.base.cra_exit = seqiv_aead_exit;
+       inst->alg.init = seqiv_aead_init;
+       inst->alg.exit = seqiv_aead_exit;
 
        inst->alg.base.cra_ctxsize = sizeof(struct seqiv_aead_ctx);
-       inst->alg.base.cra_ctxsize += inst->alg.base.cra_aead.ivsize;
+       inst->alg.base.cra_ctxsize += inst->alg.ivsize;
 
 done:
        err = aead_register_instance(tmpl, inst);
@@ -727,8 +724,15 @@ static int seqniv_create(struct crypto_template *tmpl, struct rtattr **tb)
        inst->alg.encrypt = seqniv_aead_encrypt;
        inst->alg.decrypt = seqniv_aead_decrypt;
 
-       inst->alg.base.cra_init = seqniv_aead_init;
-       inst->alg.base.cra_exit = seqiv_aead_exit;
+       inst->alg.init = seqniv_aead_init;
+       inst->alg.exit = seqiv_aead_exit;
+
+       if ((alg->base.cra_flags & CRYPTO_ALG_AEAD_NEW)) {
+               inst->alg.encrypt = seqiv_aead_encrypt;
+               inst->alg.decrypt = seqiv_aead_decrypt;
+
+               inst->alg.init = seqiv_aead_init;
+       }
 
        inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
        inst->alg.base.cra_ctxsize = sizeof(struct seqiv_aead_ctx);
index 9f6f10b498ba4aab02849c760b1da61bd5a7fd99..e9a05ba2bfb4468d9fc6550bdf27fdb0a0adad9c 100644 (file)
@@ -73,6 +73,22 @@ static char *check[] = {
        "lzo", "cts", "zlib", NULL
 };
 
+struct tcrypt_result {
+       struct completion completion;
+       int err;
+};
+
+static void tcrypt_complete(struct crypto_async_request *req, int err)
+{
+       struct tcrypt_result *res = req->data;
+
+       if (err == -EINPROGRESS)
+               return;
+
+       res->err = err;
+       complete(&res->completion);
+}
+
 static int test_cipher_jiffies(struct blkcipher_desc *desc, int enc,
                               struct scatterlist *sg, int blen, int secs)
 {
@@ -143,6 +159,20 @@ out:
        return ret;
 }
 
+static inline int do_one_aead_op(struct aead_request *req, int ret)
+{
+       if (ret == -EINPROGRESS || ret == -EBUSY) {
+               struct tcrypt_result *tr = req->base.data;
+
+               ret = wait_for_completion_interruptible(&tr->completion);
+               if (!ret)
+                       ret = tr->err;
+               reinit_completion(&tr->completion);
+       }
+
+       return ret;
+}
+
 static int test_aead_jiffies(struct aead_request *req, int enc,
                                int blen, int secs)
 {
@@ -153,9 +183,9 @@ static int test_aead_jiffies(struct aead_request *req, int enc,
        for (start = jiffies, end = start + secs * HZ, bcount = 0;
             time_before(jiffies, end); bcount++) {
                if (enc)
-                       ret = crypto_aead_encrypt(req);
+                       ret = do_one_aead_op(req, crypto_aead_encrypt(req));
                else
-                       ret = crypto_aead_decrypt(req);
+                       ret = do_one_aead_op(req, crypto_aead_decrypt(req));
 
                if (ret)
                        return ret;
@@ -177,9 +207,9 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
        /* Warm-up run. */
        for (i = 0; i < 4; i++) {
                if (enc)
-                       ret = crypto_aead_encrypt(req);
+                       ret = do_one_aead_op(req, crypto_aead_encrypt(req));
                else
-                       ret = crypto_aead_decrypt(req);
+                       ret = do_one_aead_op(req, crypto_aead_decrypt(req));
 
                if (ret)
                        goto out;
@@ -191,9 +221,9 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
 
                start = get_cycles();
                if (enc)
-                       ret = crypto_aead_encrypt(req);
+                       ret = do_one_aead_op(req, crypto_aead_encrypt(req));
                else
-                       ret = crypto_aead_decrypt(req);
+                       ret = do_one_aead_op(req, crypto_aead_decrypt(req));
                end = get_cycles();
 
                if (ret)
@@ -286,6 +316,7 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
        char *axbuf[XBUFSIZE];
        unsigned int *b_size;
        unsigned int iv_len;
+       struct tcrypt_result result;
 
        iv = kzalloc(MAX_IVLEN, GFP_KERNEL);
        if (!iv)
@@ -313,7 +344,12 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
                goto out_nosg;
        sgout = &sg[9];
 
-       tfm = crypto_alloc_aead(algo, 0, 0);
+       tfm = crypto_alloc_aead(algo, CRYPTO_ALG_AEAD_NEW,
+                               CRYPTO_ALG_AEAD_NEW);
+       if (PTR_ERR(tfm) == -ENOENT) {
+               aad_size -= 8;
+               tfm = crypto_alloc_aead(algo, 0, CRYPTO_ALG_AEAD_NEW);
+       }
 
        if (IS_ERR(tfm)) {
                pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo,
@@ -321,6 +357,7 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
                goto out_notfm;
        }
 
+       init_completion(&result.completion);
        printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
                        get_driver_name(crypto_aead, tfm), e);
 
@@ -331,6 +368,9 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
                goto out_noreq;
        }
 
+       aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+                                 tcrypt_complete, &result);
+
        i = 0;
        do {
                b_size = aead_sizes;
@@ -749,22 +789,6 @@ out:
        crypto_free_hash(tfm);
 }
 
-struct tcrypt_result {
-       struct completion completion;
-       int err;
-};
-
-static void tcrypt_complete(struct crypto_async_request *req, int err)
-{
-       struct tcrypt_result *res = req->data;
-
-       if (err == -EINPROGRESS)
-               return;
-
-       res->err = err;
-       complete(&res->completion);
-}
-
 static inline int do_one_ahash_op(struct ahash_request *req, int ret)
 {
        if (ret == -EINPROGRESS || ret == -EBUSY) {
@@ -1759,14 +1783,27 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
 
        case 211:
                test_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec,
+                               NULL, 0, 16, 16, aead_speed_template_20);
+               test_aead_speed("gcm(aes)", ENCRYPT, sec,
                                NULL, 0, 16, 8, aead_speed_template_20);
                break;
 
        case 212:
                test_aead_speed("rfc4309(ccm(aes))", ENCRYPT, sec,
-                               NULL, 0, 16, 8, aead_speed_template_19);
+                               NULL, 0, 16, 16, aead_speed_template_19);
+               break;
+
+       case 213:
+               test_aead_speed("rfc7539esp(chacha20,poly1305)", ENCRYPT, sec,
+                               NULL, 0, 16, 8, aead_speed_template_36);
+               break;
+
+       case 214:
+               test_cipher_speed("chacha20", ENCRYPT, sec, NULL, 0,
+                                 speed_template_32);
                break;
 
+
        case 300:
                if (alg) {
                        test_hash_speed(alg, sec, generic_hash_speed_template);
@@ -1855,6 +1892,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m)
                test_hash_speed("crct10dif", sec, generic_hash_speed_template);
                if (mode > 300 && mode < 400) break;
 
+       case 321:
+               test_hash_speed("poly1305", sec, poly1305_speed_template);
+               if (mode > 300 && mode < 400) break;
+
        case 399:
                break;
 
index 6cc1b856871b96d5b0d9064fc6ae028649fe89b6..f0bfee1bb293809355d1c7455042fc5e7753b826 100644 (file)
@@ -61,12 +61,14 @@ static u8 speed_template_32_40_48[] = {32, 40, 48, 0};
 static u8 speed_template_32_48[] = {32, 48, 0};
 static u8 speed_template_32_48_64[] = {32, 48, 64, 0};
 static u8 speed_template_32_64[] = {32, 64, 0};
+static u8 speed_template_32[] = {32, 0};
 
 /*
  * AEAD speed tests
  */
 static u8 aead_speed_template_19[] = {19, 0};
 static u8 aead_speed_template_20[] = {20, 0};
+static u8 aead_speed_template_36[] = {36, 0};
 
 /*
  * Digest speed tests
@@ -127,4 +129,22 @@ static struct hash_speed hash_speed_template_16[] = {
        {  .blen = 0,   .plen = 0,      .klen = 0, }
 };
 
+static struct hash_speed poly1305_speed_template[] = {
+       { .blen = 96,   .plen = 16, },
+       { .blen = 96,   .plen = 32, },
+       { .blen = 96,   .plen = 96, },
+       { .blen = 288,  .plen = 16, },
+       { .blen = 288,  .plen = 32, },
+       { .blen = 288,  .plen = 288, },
+       { .blen = 1056, .plen = 32, },
+       { .blen = 1056, .plen = 1056, },
+       { .blen = 2080, .plen = 32, },
+       { .blen = 2080, .plen = 2080, },
+       { .blen = 4128, .plen = 4128, },
+       { .blen = 8224, .plen = 8224, },
+
+       /* End marker */
+       {  .blen = 0,   .plen = 0, }
+};
+
 #endif /* _CRYPTO_TCRYPT_H */
index 868edf11704142deec8e05c44a2edfb37661ad27..dc467ab300c7af49f757f91bd30fb6a0e60acc9c 100644 (file)
@@ -14504,6 +14504,9 @@ static struct cipher_testvec aes_cbc_enc_tv_template[] = {
                .result = "\xe3\x53\x77\x9c\x10\x79\xae\xb8"
                          "\x27\x08\x94\x2d\xbe\x77\x18\x1a",
                .rlen   = 16,
+               .also_non_np = 1,
+               .np     = 8,
+               .tap    = { 3, 2, 3, 2, 3, 1, 1, 1 },
        }, {
                .key    = "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0"
                          "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a",
@@ -14723,6 +14726,9 @@ static struct cipher_testvec aes_cbc_dec_tv_template[] = {
                .ilen   = 16,
                .result = "Single block msg",
                .rlen   = 16,
+               .also_non_np = 1,
+               .np     = 8,
+               .tap    = { 3, 2, 3, 2, 3, 1, 1, 1 },
        }, {
                .key    = "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0"
                          "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a",
@@ -20129,149 +20135,150 @@ static struct aead_testvec aes_gcm_dec_tv_template[] = {
 };
 
 static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
-        { /* Generated using Crypto++ */
+       { /* Generated using Crypto++ */
                .key    = zeroed_string,
                .klen   = 20,
-                .iv     = zeroed_string,
-                .input  = zeroed_string,
-                .ilen   = 16,
-                .assoc  = zeroed_string,
-                .alen   = 8,
+               .iv     = zeroed_string,
+               .input  = zeroed_string,
+               .ilen   = 16,
+               .assoc  = zeroed_string,
+               .alen   = 16,
                .result = "\x03\x88\xDA\xCE\x60\xB6\xA3\x92"
-                          "\xF3\x28\xC2\xB9\x71\xB2\xFE\x78"
-                          "\x97\xFE\x4C\x23\x37\x42\x01\xE0"
-                          "\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B",
+                         "\xF3\x28\xC2\xB9\x71\xB2\xFE\x78"
+                         "\x97\xFE\x4C\x23\x37\x42\x01\xE0"
+                         "\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B",
                .rlen   = 32,
-        },{
+       },{
                .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
                          "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01"
-                          "\x00\x00\x00\x00",
-                .input  = zeroed_string,
-                .ilen   = 16,
-                .assoc  = zeroed_string,
-                .alen   = 8,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .input  = zeroed_string,
+               .ilen   = 16,
+               .assoc  = "\x00\x00\x00\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
                .result = "\xC0\x0D\x8B\x42\x0F\x8F\x34\x18"
-                          "\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28"
-                          "\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D"
-                          "\x2F\x70\x44\x92\xF7\xF2\xE3\xEF",
+                         "\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28"
+                         "\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D"
+                         "\x2F\x70\x44\x92\xF7\xF2\xE3\xEF",
                .rlen   = 32,
 
-        }, {
+       }, {
                .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
                          "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = zeroed_string,
-                .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .ilen   = 16,
-                .assoc  = zeroed_string,
-                .alen   = 8,
+               .iv     = zeroed_string,
+               .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .ilen   = 16,
+               .assoc  = zeroed_string,
+               .alen   = 16,
                .result = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
-                          "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
-                          "\x0B\x8F\x88\x69\x17\xE6\xB4\x3C"
-                          "\xB1\x68\xFD\x14\x52\x64\x61\xB2",
+                         "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
+                         "\x0B\x8F\x88\x69\x17\xE6\xB4\x3C"
+                         "\xB1\x68\xFD\x14\x52\x64\x61\xB2",
                .rlen   = 32,
-        }, {
+       }, {
                .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
                          "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = zeroed_string,
-                .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .ilen   = 16,
-                .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .alen   = 8,
+               .iv     = zeroed_string,
+               .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .ilen   = 16,
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x00",
+               .alen   = 16,
                .result = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
-                          "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
-                          "\x90\x92\xB7\xE3\x5F\xA3\x9A\x63"
-                          "\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5",
+                         "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
+                         "\x90\x92\xB7\xE3\x5F\xA3\x9A\x63"
+                         "\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5",
                .rlen   = 32,
-        }, {
+       }, {
                .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
                          "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01"
-                          "\x00\x00\x00\x00",
-                .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .ilen   = 16,
-                .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .alen   = 8,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .ilen   = 16,
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
                .result = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
-                          "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
-                          "\x64\x50\xF9\x32\x13\xFB\x74\x61"
-                          "\xF4\xED\x52\xD3\xC5\x10\x55\x3C",
+                         "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
+                         "\x64\x50\xF9\x32\x13\xFB\x74\x61"
+                         "\xF4\xED\x52\xD3\xC5\x10\x55\x3C",
                .rlen   = 32,
-        }, {
+       }, {
                .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
                          "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01"
-                          "\x00\x00\x00\x00",
-                .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .ilen   = 64,
-                .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .alen   = 8,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .ilen   = 64,
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
                .result = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
-                          "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
-                          "\x98\x14\xA1\x42\x37\x80\xFD\x90"
-                          "\x68\x12\x01\xA8\x91\x89\xB9\x83"
-                          "\x5B\x11\x77\x12\x9B\xFF\x24\x89"
-                          "\x94\x5F\x18\x12\xBA\x27\x09\x39"
-                          "\x99\x96\x76\x42\x15\x1C\xCD\xCB"
-                          "\xDC\xD3\xDA\x65\x73\xAF\x80\xCD"
-                          "\xD2\xB6\xC2\x4A\x76\xC2\x92\x85"
-                          "\xBD\xCF\x62\x98\x58\x14\xE5\xBD",
+                         "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
+                         "\x98\x14\xA1\x42\x37\x80\xFD\x90"
+                         "\x68\x12\x01\xA8\x91\x89\xB9\x83"
+                         "\x5B\x11\x77\x12\x9B\xFF\x24\x89"
+                         "\x94\x5F\x18\x12\xBA\x27\x09\x39"
+                         "\x99\x96\x76\x42\x15\x1C\xCD\xCB"
+                         "\xDC\xD3\xDA\x65\x73\xAF\x80\xCD"
+                         "\xD2\xB6\xC2\x4A\x76\xC2\x92\x85"
+                         "\xBD\xCF\x62\x98\x58\x14\xE5\xBD",
                .rlen   = 80,
-        }, {
+       }, {
                .key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
                          "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = "\x00\x00\x45\x67\x89\xab\xcd\xef"
-                          "\x00\x00\x00\x00",
-                .input  = "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff",
-                .ilen   = 192,
-                .assoc  = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-                          "\xaa\xaa\xaa\xaa",
-                .alen   = 12,
+               .iv     = "\x00\x00\x45\x67\x89\xab\xcd\xef",
+               .input  = "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff",
+               .ilen   = 192,
+               .assoc  = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+                         "\xaa\xaa\xaa\xaa\x00\x00\x45\x67"
+                         "\x89\xab\xcd\xef",
+               .alen   = 20,
                .result = "\xC1\x76\x33\x85\xE2\x9B\x5F\xDE"
                          "\xDE\x89\x3D\x42\xE7\xC9\x69\x8A"
                          "\x44\x6D\xC3\x88\x46\x2E\xC2\x01"
@@ -20316,8 +20323,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x00\x21\x00\x01\x01\x02\x02\x01",
                .ilen   = 72,
                .assoc  = "\x00\x00\x43\x21\x87\x65\x43\x21"
-                         "\x00\x00\x00\x00",
-               .alen   = 12,
+                         "\x00\x00\x00\x00\x49\x56\xED\x7E"
+                         "\x3B\x24\x4C\xFE",
+               .alen   = 20,
                .result = "\xFE\xCF\x53\x7E\x72\x9D\x5B\x07"
                          "\xDC\x30\xDF\x52\x8D\xD2\x2B\x76"
                          "\x8D\x1B\x98\x73\x66\x96\xA6\xFD"
@@ -20345,8 +20353,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x65\x72\x63\x69\x74\x79\x02\x64"
                          "\x6B\x00\x00\x01\x00\x01\x00\x01",
                .ilen   = 64,
-               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A",
-               .alen   = 8,
+               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A"
+                         "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .alen   = 16,
                .result = "\xDE\xB2\x2C\xD9\xB0\x7C\x72\xC1"
                          "\x6E\x3A\x65\xBE\xEB\x8D\xF3\x04"
                          "\xA5\xA5\x89\x7D\x33\xAE\x53\x0F"
@@ -20374,8 +20383,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x02\x04\x05\xB4\x01\x01\x04\x02"
                          "\x01\x02\x02\x01",
                .ilen   = 52,
-               .assoc  = "\x4A\x2C\xBF\xE3\x00\x00\x00\x02",
-               .alen   = 8,
+               .assoc  = "\x4A\x2C\xBF\xE3\x00\x00\x00\x02"
+                         "\x01\x02\x03\x04\x05\x06\x07\x08",
+               .alen   = 16,
                .result = "\xFF\x42\x5C\x9B\x72\x45\x99\xDF"
                          "\x7A\x3B\xCD\x51\x01\x94\xE0\x0D"
                          "\x6A\x78\x10\x7F\x1B\x0B\x1C\xBF"
@@ -20401,8 +20411,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x75\x76\x77\x61\x62\x63\x64\x65"
                          "\x66\x67\x68\x69\x01\x02\x02\x01",
                .ilen   = 64,
-               .assoc  = "\x00\x00\x00\x00\x00\x00\x00\x01",
-               .alen   = 8,
+               .assoc  = "\x00\x00\x00\x00\x00\x00\x00\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x00",
+               .alen   = 16,
                .result = "\x46\x88\xDA\xF2\xF9\x73\xA3\x92"
                          "\x73\x29\x09\xC3\x31\xD5\x6D\x60"
                          "\xF6\x94\xAB\xAA\x41\x4B\x5E\x7F"
@@ -20430,8 +20441,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x66\x67\x68\x69\x01\x02\x02\x01",
                .ilen   = 64,
                .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
-                         "\x10\x10\x10\x10",
-               .alen   = 12,
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
                .result = "\xFB\xA2\xCA\xA4\x85\x3C\xF9\xF0"
                          "\xF2\x2C\xB1\x0D\x86\xDD\x83\xB0"
                          "\xFE\xC7\x56\x91\xCF\x1A\x04\xB0"
@@ -20455,8 +20467,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x01\x02\x02\x01",
                .ilen   = 28,
                .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
-                         "\x10\x10\x10\x10",
-               .alen   = 12,
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
                .result = "\xFB\xA2\xCA\x84\x5E\x5D\xF9\xF0"
                          "\xF2\x2C\x3E\x6E\x86\xDD\x83\x1E"
                          "\x1F\xC6\x57\x92\xCD\x1A\xF9\x13"
@@ -20477,8 +20490,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\xCB\x71\x26\x02\xDD\x6B\xB0\x3E"
                          "\x50\x10\x16\xD0\x75\x68\x00\x01",
                .ilen   = 40,
-               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A",
-               .alen   = 8,
+               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A"
+                         "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .alen   = 16,
                .result = "\xA5\xB1\xF8\x06\x60\x29\xAE\xA4"
                          "\x0E\x59\x8B\x81\x22\xDE\x02\x42"
                          "\x09\x38\xB3\xAB\x33\xF8\x28\xE6"
@@ -20505,8 +20519,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x23\x01\x01\x01",
                .ilen   = 76,
                .assoc  = "\x00\x00\x01\x00\x00\x00\x00\x00"
-                         "\x00\x00\x00\x01",
-               .alen   = 12,
+                         "\x00\x00\x00\x01\xCA\xFE\xDE\xBA"
+                         "\xCE\xFA\xCE\x74",
+               .alen   = 20,
                .result = "\x18\xA6\xFD\x42\xF7\x2C\xBF\x4A"
                          "\xB2\xA2\xEA\x90\x1F\x73\xD8\x14"
                          "\xE3\xE7\xF2\x43\xD9\x54\x12\xE1"
@@ -20535,8 +20550,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x50\x10\x1F\x64\x6D\x54\x00\x01",
                .ilen   = 40,
                .assoc  = "\x17\x40\x5E\x67\x15\x6F\x31\x26"
-                         "\xDD\x0D\xB9\x9B",
-               .alen   = 12,
+                         "\xDD\x0D\xB9\x9B\x61\x6E\x64\x01"
+                         "\x69\x76\x65\x63",
+               .alen   = 20,
                .result = "\xF2\xD6\x9E\xCD\xBD\x5A\x0D\x5B"
                          "\x8D\x5E\xF3\x8B\xAD\x4D\xA5\x8D"
                          "\x1F\x27\x8F\xDE\x98\xEF\x67\x54"
@@ -20563,8 +20579,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x15\x01\x01\x01",
                .ilen   = 76,
                .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
-                         "\x10\x10\x10\x10",
-               .alen   = 12,
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
                .result = "\xFB\xA2\xCA\xD1\x2F\xC1\xF9\xF0"
                          "\x0D\x3C\xEB\xF3\x05\x41\x0D\xB8"
                          "\x3D\x77\x84\xB6\x07\x32\x3D\x22"
@@ -20597,8 +20614,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x72\x72\x6F\x77\x01\x02\x02\x01",
                .ilen   = 72,
                .assoc  = "\x17\x40\x5E\x67\x15\x6F\x31\x26"
-                         "\xDD\x0D\xB9\x9B",
-               .alen   = 12,
+                         "\xDD\x0D\xB9\x9B\x61\x6E\x64\x01"
+                         "\x69\x76\x65\x63",
+               .alen   = 20,
                .result = "\xD4\xB7\xED\x86\xA1\x77\x7F\x2E"
                          "\xA1\x3D\x69\x73\xD3\x24\xC6\x9E"
                          "\x7B\x43\xF8\x26\xFB\x56\x83\x12"
@@ -20619,8 +20637,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                .iv     = "\x43\x45\x7E\x91\x82\x44\x3B\xC6",
                .input  = "\x01\x02\x02\x01",
                .ilen   = 4,
-               .assoc  = "\x33\x54\x67\xAE\xFF\xFF\xFF\xFF",
-               .alen   = 8,
+               .assoc  = "\x33\x54\x67\xAE\xFF\xFF\xFF\xFF"
+                         "\x43\x45\x7E\x91\x82\x44\x3B\xC6",
+               .alen   = 16,
                .result = "\x43\x7F\x86\x6B\xCB\x3F\x69\x9F"
                          "\xE9\xB0\x82\x2B\xAC\x96\x1C\x45"
                          "\x04\xBE\xF2\x70",
@@ -20636,8 +20655,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x62\x65\x00\x01",
                .ilen   = 20,
                .assoc  = "\x00\x00\x01\x00\x00\x00\x00\x00"
-                         "\x00\x00\x00\x01",
-               .alen   = 12,
+                         "\x00\x00\x00\x01\xCA\xFE\xDE\xBA"
+                         "\xCE\xFA\xCE\x74",
+               .alen   = 20,
                .result = "\x29\xC9\xFC\x69\xA1\x97\xD0\x38"
                          "\xCC\xDD\x14\xE2\xDD\xFC\xAA\x05"
                          "\x43\x33\x21\x64\x41\x25\x03\x52"
@@ -20661,8 +20681,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x01\x02\x02\x01",
                .ilen   = 52,
                .assoc  = "\x79\x6B\x69\x63\xFF\xFF\xFF\xFF"
-                         "\xFF\xFF\xFF\xFF",
-               .alen   = 12,
+                         "\xFF\xFF\xFF\xFF\x33\x30\x21\x69"
+                         "\x67\x65\x74\x6D",
+               .alen   = 20,
                .result = "\xF9\x7A\xB2\xAA\x35\x6D\x8E\xDC"
                          "\xE1\x76\x44\xAC\x8C\x78\xE2\x5D"
                          "\xD2\x4D\xED\xBB\x29\xEB\xF1\xB6"
@@ -20688,8 +20709,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x01\x02\x02\x01",
                .ilen   = 52,
                .assoc  = "\x3F\x7E\xF6\x42\x10\x10\x10\x10"
-                         "\x10\x10\x10\x10",
-               .alen   = 12,
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
                .result = "\xFB\xA2\xCA\xA8\xC6\xC5\xF9\xF0"
                          "\xF2\x2C\xA5\x4A\x06\x12\x10\xAD"
                          "\x3F\x6E\x57\x91\xCF\x1A\xCA\x21"
@@ -20712,8 +20734,9 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
                          "\x71\x72\x73\x74\x01\x02\x02\x01",
                .ilen   = 32,
                .assoc  = "\x00\x00\x43\x21\x87\x65\x43\x21"
-                         "\x00\x00\x00\x07",
-               .alen   = 12,
+                         "\x00\x00\x00\x07\x48\x55\xEC\x7D"
+                         "\x3A\x23\x4B\xFD",
+               .alen   = 20,
                .result = "\x74\x75\x2E\x8A\xEB\x5D\x87\x3C"
                          "\xD7\xC0\xF4\xAC\xC3\x6C\x4B\xFF"
                          "\x84\xB7\xD7\xB9\x8F\x0C\xA8\xB6"
@@ -20725,122 +20748,122 @@ static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = {
 };
 
 static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
-        { /* Generated using Crypto++ */
+       { /* Generated using Crypto++ */
                .key    = zeroed_string,
                .klen   = 20,
-                .iv     = zeroed_string,
+               .iv     = zeroed_string,
                .input  = "\x03\x88\xDA\xCE\x60\xB6\xA3\x92"
-                          "\xF3\x28\xC2\xB9\x71\xB2\xFE\x78"
-                          "\x97\xFE\x4C\x23\x37\x42\x01\xE0"
-                          "\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B",
+                         "\xF3\x28\xC2\xB9\x71\xB2\xFE\x78"
+                         "\x97\xFE\x4C\x23\x37\x42\x01\xE0"
+                         "\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B",
                .ilen   = 32,
-                .assoc  = zeroed_string,
-                .alen   = 8,
-                .result = zeroed_string,
-                .rlen   = 16,
+               .assoc  = zeroed_string,
+               .alen   = 16,
+               .result = zeroed_string,
+               .rlen   = 16,
 
-        },{
+       },{
                .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
                          "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01"
-                          "\x00\x00\x00\x00",
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
                .input  = "\xC0\x0D\x8B\x42\x0F\x8F\x34\x18"
-                          "\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28"
-                          "\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D"
-                          "\x2F\x70\x44\x92\xF7\xF2\xE3\xEF",
+                         "\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28"
+                         "\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D"
+                         "\x2F\x70\x44\x92\xF7\xF2\xE3\xEF",
                .ilen   = 32,
-                .assoc  = zeroed_string,
-                .alen   = 8,
-                .result = zeroed_string,
-                .rlen   = 16,
-        }, {
+               .assoc  = "\x00\x00\x00\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
+               .result = zeroed_string,
+               .rlen   = 16,
+       }, {
                .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
                          "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = zeroed_string,
+               .iv     = zeroed_string,
                .input  = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
-                          "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
-                          "\x0B\x8F\x88\x69\x17\xE6\xB4\x3C"
-                          "\xB1\x68\xFD\x14\x52\x64\x61\xB2",
+                         "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
+                         "\x0B\x8F\x88\x69\x17\xE6\xB4\x3C"
+                         "\xB1\x68\xFD\x14\x52\x64\x61\xB2",
                .ilen   = 32,
-                .assoc  = zeroed_string,
-                .alen   = 8,
-                .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .rlen   = 16,
-        }, {
+               .assoc  = zeroed_string,
+               .alen   = 16,
+               .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .rlen   = 16,
+       }, {
                .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
                          "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = zeroed_string,
+               .iv     = zeroed_string,
                .input  = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE"
-                          "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
-                          "\x90\x92\xB7\xE3\x5F\xA3\x9A\x63"
-                          "\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5",
+                         "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC"
+                         "\x90\x92\xB7\xE3\x5F\xA3\x9A\x63"
+                         "\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5",
                .ilen   = 32,
-                .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .alen   = 8,
-                .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .rlen   = 16,
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x00",
+               .alen   = 16,
+               .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .rlen   = 16,
 
-        }, {
+       }, {
                .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
                          "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01"
-                          "\x00\x00\x00\x00",
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
                .input  = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
-                          "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
-                          "\x64\x50\xF9\x32\x13\xFB\x74\x61"
-                          "\xF4\xED\x52\xD3\xC5\x10\x55\x3C",
+                         "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
+                         "\x64\x50\xF9\x32\x13\xFB\x74\x61"
+                         "\xF4\xED\x52\xD3\xC5\x10\x55\x3C",
                .ilen   = 32,
-                .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .alen   = 8,
-                .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .rlen   = 16,
-        }, {
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
+               .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .rlen   = 16,
+       }, {
                .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
                          "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01"
-                          "\x00\x00\x00\x00",
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
                .input  = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19"
-                          "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
-                          "\x98\x14\xA1\x42\x37\x80\xFD\x90"
-                          "\x68\x12\x01\xA8\x91\x89\xB9\x83"
-                          "\x5B\x11\x77\x12\x9B\xFF\x24\x89"
-                          "\x94\x5F\x18\x12\xBA\x27\x09\x39"
-                          "\x99\x96\x76\x42\x15\x1C\xCD\xCB"
-                          "\xDC\xD3\xDA\x65\x73\xAF\x80\xCD"
-                          "\xD2\xB6\xC2\x4A\x76\xC2\x92\x85"
-                          "\xBD\xCF\x62\x98\x58\x14\xE5\xBD",
+                         "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29"
+                         "\x98\x14\xA1\x42\x37\x80\xFD\x90"
+                         "\x68\x12\x01\xA8\x91\x89\xB9\x83"
+                         "\x5B\x11\x77\x12\x9B\xFF\x24\x89"
+                         "\x94\x5F\x18\x12\xBA\x27\x09\x39"
+                         "\x99\x96\x76\x42\x15\x1C\xCD\xCB"
+                         "\xDC\xD3\xDA\x65\x73\xAF\x80\xCD"
+                         "\xD2\xB6\xC2\x4A\x76\xC2\x92\x85"
+                         "\xBD\xCF\x62\x98\x58\x14\xE5\xBD",
                .ilen   = 80,
-                .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .alen   = 8,
-                .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01"
-                          "\x01\x01\x01\x01\x01\x01\x01\x01",
-                .rlen   = 64,
-        }, {
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
+               .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .rlen   = 64,
+       }, {
                .key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
                          "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
-                          "\x00\x00\x00\x00",
+                         "\x00\x00\x00\x00",
                .klen   = 20,
-                .iv     = "\x00\x00\x45\x67\x89\xab\xcd\xef"
-                          "\x00\x00\x00\x00",
+               .iv     = "\x00\x00\x45\x67\x89\xab\xcd\xef",
                .input  = "\xC1\x76\x33\x85\xE2\x9B\x5F\xDE"
                          "\xDE\x89\x3D\x42\xE7\xC9\x69\x8A"
                          "\x44\x6D\xC3\x88\x46\x2E\xC2\x01"
@@ -20868,34 +20891,35 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x37\x08\x1C\xCF\xBA\x5D\x71\x46"
                          "\x80\x72\xB0\x4C\x82\x0D\x60\x3C",
                .ilen   = 208,
-                .assoc  = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-                          "\xaa\xaa\xaa\xaa",
-                .alen   = 12,
-                .result = "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff"
-                          "\xff\xff\xff\xff\xff\xff\xff\xff",
-                .rlen   = 192,
+               .assoc  = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+                         "\xaa\xaa\xaa\xaa\x00\x00\x45\x67"
+                         "\x89\xab\xcd\xef",
+               .alen   = 20,
+               .result = "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff",
+               .rlen   = 192,
        }, {
                .key    = "\x4C\x80\xCD\xEF\xBB\x5D\x10\xDA"
                          "\x90\x6A\xC7\x3C\x36\x13\xA6\x34"
@@ -20913,8 +20937,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x00\x21\x00\x01\x01\x02\x02\x01",
                .rlen   = 72,
                .assoc  = "\x00\x00\x43\x21\x87\x65\x43\x21"
-                         "\x00\x00\x00\x00",
-               .alen   = 12,
+                         "\x00\x00\x00\x00\x49\x56\xED\x7E"
+                         "\x3B\x24\x4C\xFE",
+               .alen   = 20,
                .input  = "\xFE\xCF\x53\x7E\x72\x9D\x5B\x07"
                          "\xDC\x30\xDF\x52\x8D\xD2\x2B\x76"
                          "\x8D\x1B\x98\x73\x66\x96\xA6\xFD"
@@ -20942,8 +20967,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x65\x72\x63\x69\x74\x79\x02\x64"
                          "\x6B\x00\x00\x01\x00\x01\x00\x01",
                .rlen   = 64,
-               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A",
-               .alen   = 8,
+               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A"
+                         "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .alen   = 16,
                .input  = "\xDE\xB2\x2C\xD9\xB0\x7C\x72\xC1"
                          "\x6E\x3A\x65\xBE\xEB\x8D\xF3\x04"
                          "\xA5\xA5\x89\x7D\x33\xAE\x53\x0F"
@@ -20971,8 +20997,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x02\x04\x05\xB4\x01\x01\x04\x02"
                          "\x01\x02\x02\x01",
                .rlen   = 52,
-               .assoc  = "\x4A\x2C\xBF\xE3\x00\x00\x00\x02",
-               .alen   = 8,
+               .assoc  = "\x4A\x2C\xBF\xE3\x00\x00\x00\x02"
+                         "\x01\x02\x03\x04\x05\x06\x07\x08",
+               .alen   = 16,
                .input  = "\xFF\x42\x5C\x9B\x72\x45\x99\xDF"
                          "\x7A\x3B\xCD\x51\x01\x94\xE0\x0D"
                          "\x6A\x78\x10\x7F\x1B\x0B\x1C\xBF"
@@ -20998,8 +21025,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x75\x76\x77\x61\x62\x63\x64\x65"
                          "\x66\x67\x68\x69\x01\x02\x02\x01",
                .rlen   = 64,
-               .assoc  = "\x00\x00\x00\x00\x00\x00\x00\x01",
-               .alen   = 8,
+               .assoc  = "\x00\x00\x00\x00\x00\x00\x00\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x00",
+               .alen   = 16,
                .input  = "\x46\x88\xDA\xF2\xF9\x73\xA3\x92"
                          "\x73\x29\x09\xC3\x31\xD5\x6D\x60"
                          "\xF6\x94\xAB\xAA\x41\x4B\x5E\x7F"
@@ -21027,8 +21055,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x66\x67\x68\x69\x01\x02\x02\x01",
                .rlen   = 64,
                .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
-                         "\x10\x10\x10\x10",
-               .alen   = 12,
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
                .input  = "\xFB\xA2\xCA\xA4\x85\x3C\xF9\xF0"
                          "\xF2\x2C\xB1\x0D\x86\xDD\x83\xB0"
                          "\xFE\xC7\x56\x91\xCF\x1A\x04\xB0"
@@ -21052,8 +21081,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x01\x02\x02\x01",
                .rlen   = 28,
                .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
-                         "\x10\x10\x10\x10",
-               .alen   = 12,
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
                .input  = "\xFB\xA2\xCA\x84\x5E\x5D\xF9\xF0"
                          "\xF2\x2C\x3E\x6E\x86\xDD\x83\x1E"
                          "\x1F\xC6\x57\x92\xCD\x1A\xF9\x13"
@@ -21074,8 +21104,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\xCB\x71\x26\x02\xDD\x6B\xB0\x3E"
                          "\x50\x10\x16\xD0\x75\x68\x00\x01",
                .rlen   = 40,
-               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A",
-               .alen   = 8,
+               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A"
+                         "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .alen   = 16,
                .input  = "\xA5\xB1\xF8\x06\x60\x29\xAE\xA4"
                          "\x0E\x59\x8B\x81\x22\xDE\x02\x42"
                          "\x09\x38\xB3\xAB\x33\xF8\x28\xE6"
@@ -21102,8 +21133,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x23\x01\x01\x01",
                .rlen   = 76,
                .assoc  = "\x00\x00\x01\x00\x00\x00\x00\x00"
-                         "\x00\x00\x00\x01",
-               .alen   = 12,
+                         "\x00\x00\x00\x01\xCA\xFE\xDE\xBA"
+                         "\xCE\xFA\xCE\x74",
+               .alen   = 20,
                .input  = "\x18\xA6\xFD\x42\xF7\x2C\xBF\x4A"
                          "\xB2\xA2\xEA\x90\x1F\x73\xD8\x14"
                          "\xE3\xE7\xF2\x43\xD9\x54\x12\xE1"
@@ -21132,8 +21164,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x50\x10\x1F\x64\x6D\x54\x00\x01",
                .rlen   = 40,
                .assoc  = "\x17\x40\x5E\x67\x15\x6F\x31\x26"
-                         "\xDD\x0D\xB9\x9B",
-               .alen   = 12,
+                         "\xDD\x0D\xB9\x9B\x61\x6E\x64\x01"
+                         "\x69\x76\x65\x63",
+               .alen   = 20,
                .input  = "\xF2\xD6\x9E\xCD\xBD\x5A\x0D\x5B"
                          "\x8D\x5E\xF3\x8B\xAD\x4D\xA5\x8D"
                          "\x1F\x27\x8F\xDE\x98\xEF\x67\x54"
@@ -21160,8 +21193,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x15\x01\x01\x01",
                .rlen   = 76,
                .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
-                         "\x10\x10\x10\x10",
-               .alen   = 12,
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
                .input  = "\xFB\xA2\xCA\xD1\x2F\xC1\xF9\xF0"
                          "\x0D\x3C\xEB\xF3\x05\x41\x0D\xB8"
                          "\x3D\x77\x84\xB6\x07\x32\x3D\x22"
@@ -21194,8 +21228,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x72\x72\x6F\x77\x01\x02\x02\x01",
                .rlen   = 72,
                .assoc  = "\x17\x40\x5E\x67\x15\x6F\x31\x26"
-                         "\xDD\x0D\xB9\x9B",
-               .alen   = 12,
+                         "\xDD\x0D\xB9\x9B\x61\x6E\x64\x01"
+                         "\x69\x76\x65\x63",
+               .alen   = 20,
                .input  = "\xD4\xB7\xED\x86\xA1\x77\x7F\x2E"
                          "\xA1\x3D\x69\x73\xD3\x24\xC6\x9E"
                          "\x7B\x43\xF8\x26\xFB\x56\x83\x12"
@@ -21216,8 +21251,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                .iv     = "\x43\x45\x7E\x91\x82\x44\x3B\xC6",
                .result = "\x01\x02\x02\x01",
                .rlen   = 4,
-               .assoc  = "\x33\x54\x67\xAE\xFF\xFF\xFF\xFF",
-               .alen   = 8,
+               .assoc  = "\x33\x54\x67\xAE\xFF\xFF\xFF\xFF"
+                         "\x43\x45\x7E\x91\x82\x44\x3B\xC6",
+               .alen   = 16,
                .input  = "\x43\x7F\x86\x6B\xCB\x3F\x69\x9F"
                          "\xE9\xB0\x82\x2B\xAC\x96\x1C\x45"
                          "\x04\xBE\xF2\x70",
@@ -21233,8 +21269,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x62\x65\x00\x01",
                .rlen   = 20,
                .assoc  = "\x00\x00\x01\x00\x00\x00\x00\x00"
-                         "\x00\x00\x00\x01",
-               .alen   = 12,
+                         "\x00\x00\x00\x01\xCA\xFE\xDE\xBA"
+                         "\xCE\xFA\xCE\x74",
+               .alen   = 20,
                .input  = "\x29\xC9\xFC\x69\xA1\x97\xD0\x38"
                          "\xCC\xDD\x14\xE2\xDD\xFC\xAA\x05"
                          "\x43\x33\x21\x64\x41\x25\x03\x52"
@@ -21258,8 +21295,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x01\x02\x02\x01",
                .rlen   = 52,
                .assoc  = "\x79\x6B\x69\x63\xFF\xFF\xFF\xFF"
-                         "\xFF\xFF\xFF\xFF",
-               .alen   = 12,
+                         "\xFF\xFF\xFF\xFF\x33\x30\x21\x69"
+                         "\x67\x65\x74\x6D",
+               .alen   = 20,
                .input  = "\xF9\x7A\xB2\xAA\x35\x6D\x8E\xDC"
                          "\xE1\x76\x44\xAC\x8C\x78\xE2\x5D"
                          "\xD2\x4D\xED\xBB\x29\xEB\xF1\xB6"
@@ -21285,8 +21323,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x01\x02\x02\x01",
                .rlen   = 52,
                .assoc  = "\x3F\x7E\xF6\x42\x10\x10\x10\x10"
-                         "\x10\x10\x10\x10",
-               .alen   = 12,
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
                .input  = "\xFB\xA2\xCA\xA8\xC6\xC5\xF9\xF0"
                          "\xF2\x2C\xA5\x4A\x06\x12\x10\xAD"
                          "\x3F\x6E\x57\x91\xCF\x1A\xCA\x21"
@@ -21309,8 +21348,9 @@ static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = {
                          "\x71\x72\x73\x74\x01\x02\x02\x01",
                .rlen   = 32,
                .assoc  = "\x00\x00\x43\x21\x87\x65\x43\x21"
-                         "\x00\x00\x00\x07",
-               .alen   = 12,
+                         "\x00\x00\x00\x07\x48\x55\xEC\x7D"
+                         "\x3A\x23\x4B\xFD",
+               .alen   = 20,
                .input  = "\x74\x75\x2E\x8A\xEB\x5D\x87\x3C"
                          "\xD7\xC0\xF4\xAC\xC3\x6C\x4B\xFF"
                          "\x84\xB7\xD7\xB9\x8F\x0C\xA8\xB6"
@@ -21538,10 +21578,7 @@ static struct aead_testvec aes_ccm_enc_tv_template[] = {
                          "\xba",
                .rlen   = 33,
        }, {
-               /*
-                * This is the same vector as aes_ccm_rfc4309_enc_tv_template[0]
-                * below but rewritten to use the ccm algorithm directly.
-                */
+               /* This is taken from FIPS CAVS. */
                .key    = "\x83\xac\x54\x66\xc2\xeb\xe5\x05"
                          "\x2e\x01\xd1\xfc\x5d\x82\x66\x2e",
                .klen   = 16,
@@ -21559,6 +21596,142 @@ static struct aead_testvec aes_ccm_enc_tv_template[] = {
                          "\xda\x24\xea\xd9\xa1\x39\x98\xfd"
                          "\xa4\xbe\xd9\xf2\x1a\x6d\x22\xa8",
                .rlen   = 48,
+       }, {
+               .key    = "\x1e\x2c\x7e\x01\x41\x9a\xef\xc0"
+                         "\x0d\x58\x96\x6e\x5c\xa2\x4b\xd3",
+               .klen   = 16,
+               .iv     = "\x03\x4f\xa3\x19\xd3\x01\x5a\xd8"
+                         "\x30\x60\x15\x56\x00\x00\x00\x00",
+               .assoc  = "\xda\xe6\x28\x9c\x45\x2d\xfd\x63"
+                         "\x5e\xda\x4c\xb6\xe6\xfc\xf9\xb7"
+                         "\x0c\x56\xcb\xe4\xe0\x05\x7a\xe1"
+                         "\x0a\x63\x09\x78\xbc\x2c\x55\xde",
+               .alen   = 32,
+               .input  = "\x87\xa3\x36\xfd\x96\xb3\x93\x78"
+                         "\xa9\x28\x63\xba\x12\xa3\x14\x85"
+                         "\x57\x1e\x06\xc9\x7b\x21\xef\x76"
+                         "\x7f\x38\x7e\x8e\x29\xa4\x3e\x7e",
+               .ilen   = 32,
+               .result = "\x8a\x1e\x11\xf0\x02\x6b\xe2\x19"
+                         "\xfc\x70\xc4\x6d\x8e\xb7\x99\xab"
+                         "\xc5\x4b\xa2\xac\xd3\xf3\x48\xff"
+                         "\x3b\xb5\xce\x53\xef\xde\xbb\x02"
+                         "\xa9\x86\x15\x6c\x13\xfe\xda\x0a"
+                         "\x22\xb8\x29\x3d\xd8\x39\x9a\x23",
+               .rlen   = 48,
+       }, {
+               .key    = "\xf4\x6b\xc2\x75\x62\xfe\xb4\xe1"
+                         "\xa3\xf0\xff\xdd\x4e\x4b\x12\x75"
+                         "\x53\x14\x73\x66\x8d\x88\xf6\x80",
+               .klen   = 24,
+               .iv     = "\x03\xa0\x20\x35\x26\xf2\x21\x8d"
+                         "\x50\x20\xda\xe2\x00\x00\x00\x00",
+               .assoc  = "\x5b\x9e\x13\x67\x02\x5e\xef\xc1"
+                         "\x6c\xf9\xd7\x1e\x52\x8f\x7a\x47"
+                         "\xe9\xd4\xcf\x20\x14\x6e\xf0\x2d"
+                         "\xd8\x9e\x2b\x56\x10\x23\x56\xe7",
+               .alen   = 32,
+               .result = "\x36\xea\x7a\x70\x08\xdc\x6a\xbc"
+                         "\xad\x0c\x7a\x63\xf6\x61\xfd\x9b",
+               .rlen   = 16,
+       }, {
+               .key    = "\x56\xdf\x5c\x8f\x26\x3f\x0e\x42"
+                         "\xef\x7a\xd3\xce\xfc\x84\x60\x62"
+                         "\xca\xb4\x40\xaf\x5f\xc9\xc9\x01",
+               .klen   = 24,
+               .iv     = "\x03\xd6\x3c\x8c\x86\x84\xb6\xcd"
+                         "\xef\x09\x2e\x94\x00\x00\x00\x00",
+               .assoc  = "\x02\x65\x78\x3c\xe9\x21\x30\x91"
+                         "\xb1\xb9\xda\x76\x9a\x78\x6d\x95"
+                         "\xf2\x88\x32\xa3\xf2\x50\xcb\x4c"
+                         "\xe3\x00\x73\x69\x84\x69\x87\x79",
+               .alen   = 32,
+               .input  = "\x9f\xd2\x02\x4b\x52\x49\x31\x3c"
+                         "\x43\x69\x3a\x2d\x8e\x70\xad\x7e"
+                         "\xe0\xe5\x46\x09\x80\x89\x13\xb2"
+                         "\x8c\x8b\xd9\x3f\x86\xfb\xb5\x6b",
+               .ilen   = 32,
+               .result = "\x39\xdf\x7c\x3c\x5a\x29\xb9\x62"
+                         "\x5d\x51\xc2\x16\xd8\xbd\x06\x9f"
+                         "\x9b\x6a\x09\x70\xc1\x51\x83\xc2"
+                         "\x66\x88\x1d\x4f\x9a\xda\xe0\x1e"
+                         "\xc7\x79\x11\x58\xe5\x6b\x20\x40"
+                         "\x7a\xea\x46\x42\x8b\xe4\x6f\xe1",
+               .rlen   = 48,
+       }, {
+               .key    = "\xe0\x8d\x99\x71\x60\xd7\x97\x1a"
+                         "\xbd\x01\x99\xd5\x8a\xdf\x71\x3a"
+                         "\xd3\xdf\x24\x4b\x5e\x3d\x4b\x4e"
+                         "\x30\x7a\xb9\xd8\x53\x0a\x5e\x2b",
+               .klen   = 32,
+               .iv     = "\x03\x1e\x29\x91\xad\x8e\xc1\x53"
+                         "\x0a\xcf\x2d\xbe\x00\x00\x00\x00",
+               .assoc  = "\x19\xb6\x1f\x57\xc4\xf3\xf0\x8b"
+                         "\x78\x2b\x94\x02\x29\x0f\x42\x27"
+                         "\x6b\x75\xcb\x98\x34\x08\x7e\x79"
+                         "\xe4\x3e\x49\x0d\x84\x8b\x22\x87",
+               .alen   = 32,
+               .input  = "\xe1\xd9\xd8\x13\xeb\x3a\x75\x3f"
+                         "\x9d\xbd\x5f\x66\xbe\xdc\xbb\x66"
+                         "\xbf\x17\x99\x62\x4a\x39\x27\x1f"
+                         "\x1d\xdc\x24\xae\x19\x2f\x98\x4c",
+               .ilen   = 32,
+               .result = "\x19\xb8\x61\x33\x45\x2b\x43\x96"
+                         "\x6f\x51\xd0\x20\x30\x7d\x9b\xc6"
+                         "\x26\x3d\xf8\xc9\x65\x16\xa8\x9f"
+                         "\xf0\x62\x17\x34\xf2\x1e\x8d\x75"
+                         "\x4e\x13\xcc\xc0\xc3\x2a\x54\x2d",
+               .rlen   = 40,
+       }, {
+               .key    = "\x7c\xc8\x18\x3b\x8d\x99\xe0\x7c"
+                         "\x45\x41\xb8\xbd\x5c\xa7\xc2\x32"
+                         "\x8a\xb8\x02\x59\xa4\xfe\xa9\x2c"
+                         "\x09\x75\x9a\x9b\x3c\x9b\x27\x39",
+               .klen   = 32,
+               .iv     = "\x03\xf9\xd9\x4e\x63\xb5\x3d\x9d"
+                         "\x43\xf6\x1e\x50",
+               .assoc  = "\x57\xf5\x6b\x8b\x57\x5c\x3d\x3b"
+                         "\x13\x02\x01\x0c\x83\x4c\x96\x35"
+                         "\x8e\xd6\x39\xcf\x7d\x14\x9b\x94"
+                         "\xb0\x39\x36\xe6\x8f\x57\xe0\x13",
+               .alen   = 32,
+               .input  = "\x3b\x6c\x29\x36\xb6\xef\x07\xa6"
+                         "\x83\x72\x07\x4f\xcf\xfa\x66\x89"
+                         "\x5f\xca\xb1\xba\xd5\x8f\x2c\x27"
+                         "\x30\xdb\x75\x09\x93\xd4\x65\xe4",
+               .ilen   = 32,
+               .result = "\xb0\x88\x5a\x33\xaa\xe5\xc7\x1d"
+                         "\x85\x23\xc7\xc6\x2f\xf4\x1e\x3d"
+                         "\xcc\x63\x44\x25\x07\x78\x4f\x9e"
+                         "\x96\xb8\x88\xeb\xbc\x48\x1f\x06"
+                         "\x39\xaf\x39\xac\xd8\x4a\x80\x39"
+                         "\x7b\x72\x8a\xf7",
+               .rlen   = 44,
+       }, {
+               .key    = "\xab\xd0\xe9\x33\x07\x26\xe5\x83"
+                         "\x8c\x76\x95\xd4\xb6\xdc\xf3\x46"
+                         "\xf9\x8f\xad\xe3\x02\x13\x83\x77"
+                         "\x3f\xb0\xf1\xa1\xa1\x22\x0f\x2b",
+               .klen   = 32,
+               .iv     = "\x03\x24\xa7\x8b\x07\xcb\xcc\x0e"
+                         "\xe6\x33\xbf\xf5\x00\x00\x00\x00",
+               .assoc  = "\xd4\xdb\x30\x1d\x03\xfe\xfd\x5f"
+                         "\x87\xd4\x8c\xb6\xb6\xf1\x7a\x5d"
+                         "\xab\x90\x65\x8d\x8e\xca\x4d\x4f"
+                         "\x16\x0c\x40\x90\x4b\xc7\x36\x73",
+               .alen   = 32,
+               .input  = "\xf5\xc6\x7d\x48\xc1\xb7\xe6\x92"
+                         "\x97\x5a\xca\xc4\xa9\x6d\xf9\x3d"
+                         "\x6c\xde\xbc\xf1\x90\xea\x6a\xb2"
+                         "\x35\x86\x36\xaf\x5c\xfe\x4b\x3a",
+               .ilen   = 32,
+               .result = "\x83\x6f\x40\x87\x72\xcf\xc1\x13"
+                         "\xef\xbb\x80\x21\x04\x6c\x58\x09"
+                         "\x07\x1b\xfc\xdf\xc0\x3f\x5b\xc7"
+                         "\xe0\x79\xa8\x6e\x71\x7c\x3f\xcf"
+                         "\x5c\xda\xb2\x33\xe5\x13\xe2\x0d"
+                         "\x74\xd1\xef\xb5\x0f\x3a\xb5\xf8",
+               .rlen   = 48,
        }
 };
 
@@ -21688,186 +21861,13 @@ static struct aead_testvec aes_ccm_dec_tv_template[] = {
                          "\x8e\x5e\x67\x01\xc9\x17\x87\x65"
                          "\x98\x09\xd6\x7d\xbe\xdd\x18",
                .rlen   = 23,
-       },
-};
-
-/*
- * rfc4309 refers to section 8 of rfc3610 for test vectors, but they all
- * use a 13-byte nonce, we only support an 11-byte nonce. Similarly, all of
- * Special Publication 800-38C's test vectors also use nonce lengths our
- * implementation doesn't support. The following are taken from fips cavs
- * fax files on hand at Red Hat.
- *
- * nb: actual key lengths are (klen - 3), the last 3 bytes are actually
- * part of the nonce which combine w/the iv, but need to be input this way.
- */
-static struct aead_testvec aes_ccm_rfc4309_enc_tv_template[] = {
-       {
-               .key    = "\x83\xac\x54\x66\xc2\xeb\xe5\x05"
-                         "\x2e\x01\xd1\xfc\x5d\x82\x66\x2e"
-                         "\x96\xac\x59",
-               .klen   = 19,
-               .iv     = "\x30\x07\xa1\xe2\xa2\xc7\x55\x24",
-               .alen   = 0,
-               .input  = "\x19\xc8\x81\xf6\xe9\x86\xff\x93"
-                         "\x0b\x78\x67\xe5\xbb\xb7\xfc\x6e"
-                         "\x83\x77\xb3\xa6\x0c\x8c\x9f\x9c"
-                         "\x35\x2e\xad\xe0\x62\xf9\x91\xa1",
-               .ilen   = 32,
-               .result = "\xab\x6f\xe1\x69\x1d\x19\x99\xa8"
-                         "\x92\xa0\xc4\x6f\x7e\xe2\x8b\xb1"
-                         "\x70\xbb\x8c\xa6\x4c\x6e\x97\x8a"
-                         "\x57\x2b\xbe\x5d\x98\xa6\xb1\x32"
-                         "\xda\x24\xea\xd9\xa1\x39\x98\xfd"
-                         "\xa4\xbe\xd9\xf2\x1a\x6d\x22\xa8",
-               .rlen   = 48,
-       }, {
-               .key    = "\x1e\x2c\x7e\x01\x41\x9a\xef\xc0"
-                         "\x0d\x58\x96\x6e\x5c\xa2\x4b\xd3"
-                         "\x4f\xa3\x19",
-               .klen   = 19,
-               .iv     = "\xd3\x01\x5a\xd8\x30\x60\x15\x56",
-               .assoc  = "\xda\xe6\x28\x9c\x45\x2d\xfd\x63"
-                         "\x5e\xda\x4c\xb6\xe6\xfc\xf9\xb7"
-                         "\x0c\x56\xcb\xe4\xe0\x05\x7a\xe1"
-                         "\x0a\x63\x09\x78\xbc\x2c\x55\xde",
-               .alen   = 32,
-               .input  = "\x87\xa3\x36\xfd\x96\xb3\x93\x78"
-                         "\xa9\x28\x63\xba\x12\xa3\x14\x85"
-                         "\x57\x1e\x06\xc9\x7b\x21\xef\x76"
-                         "\x7f\x38\x7e\x8e\x29\xa4\x3e\x7e",
-               .ilen   = 32,
-               .result = "\x8a\x1e\x11\xf0\x02\x6b\xe2\x19"
-                         "\xfc\x70\xc4\x6d\x8e\xb7\x99\xab"
-                         "\xc5\x4b\xa2\xac\xd3\xf3\x48\xff"
-                         "\x3b\xb5\xce\x53\xef\xde\xbb\x02"
-                         "\xa9\x86\x15\x6c\x13\xfe\xda\x0a"
-                         "\x22\xb8\x29\x3d\xd8\x39\x9a\x23",
-               .rlen   = 48,
-       }, {
-               .key    = "\xf4\x6b\xc2\x75\x62\xfe\xb4\xe1"
-                         "\xa3\xf0\xff\xdd\x4e\x4b\x12\x75"
-                         "\x53\x14\x73\x66\x8d\x88\xf6\x80"
-                         "\xa0\x20\x35",
-               .klen   = 27,
-               .iv     = "\x26\xf2\x21\x8d\x50\x20\xda\xe2",
-               .assoc  = "\x5b\x9e\x13\x67\x02\x5e\xef\xc1"
-                         "\x6c\xf9\xd7\x1e\x52\x8f\x7a\x47"
-                         "\xe9\xd4\xcf\x20\x14\x6e\xf0\x2d"
-                         "\xd8\x9e\x2b\x56\x10\x23\x56\xe7",
-               .alen   = 32,
-               .ilen   = 0,
-               .result = "\x36\xea\x7a\x70\x08\xdc\x6a\xbc"
-                         "\xad\x0c\x7a\x63\xf6\x61\xfd\x9b",
-               .rlen   = 16,
-       }, {
-               .key    = "\x56\xdf\x5c\x8f\x26\x3f\x0e\x42"
-                         "\xef\x7a\xd3\xce\xfc\x84\x60\x62"
-                         "\xca\xb4\x40\xaf\x5f\xc9\xc9\x01"
-                         "\xd6\x3c\x8c",
-               .klen   = 27,
-               .iv     = "\x86\x84\xb6\xcd\xef\x09\x2e\x94",
-               .assoc  = "\x02\x65\x78\x3c\xe9\x21\x30\x91"
-                         "\xb1\xb9\xda\x76\x9a\x78\x6d\x95"
-                         "\xf2\x88\x32\xa3\xf2\x50\xcb\x4c"
-                         "\xe3\x00\x73\x69\x84\x69\x87\x79",
-               .alen   = 32,
-               .input  = "\x9f\xd2\x02\x4b\x52\x49\x31\x3c"
-                         "\x43\x69\x3a\x2d\x8e\x70\xad\x7e"
-                         "\xe0\xe5\x46\x09\x80\x89\x13\xb2"
-                         "\x8c\x8b\xd9\x3f\x86\xfb\xb5\x6b",
-               .ilen   = 32,
-               .result = "\x39\xdf\x7c\x3c\x5a\x29\xb9\x62"
-                         "\x5d\x51\xc2\x16\xd8\xbd\x06\x9f"
-                         "\x9b\x6a\x09\x70\xc1\x51\x83\xc2"
-                         "\x66\x88\x1d\x4f\x9a\xda\xe0\x1e"
-                         "\xc7\x79\x11\x58\xe5\x6b\x20\x40"
-                         "\x7a\xea\x46\x42\x8b\xe4\x6f\xe1",
-               .rlen   = 48,
-       }, {
-               .key    = "\xe0\x8d\x99\x71\x60\xd7\x97\x1a"
-                         "\xbd\x01\x99\xd5\x8a\xdf\x71\x3a"
-                         "\xd3\xdf\x24\x4b\x5e\x3d\x4b\x4e"
-                         "\x30\x7a\xb9\xd8\x53\x0a\x5e\x2b"
-                         "\x1e\x29\x91",
-               .klen   = 35,
-               .iv     = "\xad\x8e\xc1\x53\x0a\xcf\x2d\xbe",
-               .assoc  = "\x19\xb6\x1f\x57\xc4\xf3\xf0\x8b"
-                         "\x78\x2b\x94\x02\x29\x0f\x42\x27"
-                         "\x6b\x75\xcb\x98\x34\x08\x7e\x79"
-                         "\xe4\x3e\x49\x0d\x84\x8b\x22\x87",
-               .alen   = 32,
-               .input  = "\xe1\xd9\xd8\x13\xeb\x3a\x75\x3f"
-                         "\x9d\xbd\x5f\x66\xbe\xdc\xbb\x66"
-                         "\xbf\x17\x99\x62\x4a\x39\x27\x1f"
-                         "\x1d\xdc\x24\xae\x19\x2f\x98\x4c",
-               .ilen   = 32,
-               .result = "\x19\xb8\x61\x33\x45\x2b\x43\x96"
-                         "\x6f\x51\xd0\x20\x30\x7d\x9b\xc6"
-                         "\x26\x3d\xf8\xc9\x65\x16\xa8\x9f"
-                         "\xf0\x62\x17\x34\xf2\x1e\x8d\x75"
-                         "\x4e\x13\xcc\xc0\xc3\x2a\x54\x2d",
-               .rlen   = 40,
-       }, {
-               .key    = "\x7c\xc8\x18\x3b\x8d\x99\xe0\x7c"
-                         "\x45\x41\xb8\xbd\x5c\xa7\xc2\x32"
-                         "\x8a\xb8\x02\x59\xa4\xfe\xa9\x2c"
-                         "\x09\x75\x9a\x9b\x3c\x9b\x27\x39"
-                         "\xf9\xd9\x4e",
-               .klen   = 35,
-               .iv     = "\x63\xb5\x3d\x9d\x43\xf6\x1e\x50",
-               .assoc  = "\x57\xf5\x6b\x8b\x57\x5c\x3d\x3b"
-                         "\x13\x02\x01\x0c\x83\x4c\x96\x35"
-                         "\x8e\xd6\x39\xcf\x7d\x14\x9b\x94"
-                         "\xb0\x39\x36\xe6\x8f\x57\xe0\x13",
-               .alen   = 32,
-               .input  = "\x3b\x6c\x29\x36\xb6\xef\x07\xa6"
-                         "\x83\x72\x07\x4f\xcf\xfa\x66\x89"
-                         "\x5f\xca\xb1\xba\xd5\x8f\x2c\x27"
-                         "\x30\xdb\x75\x09\x93\xd4\x65\xe4",
-               .ilen   = 32,
-               .result = "\xb0\x88\x5a\x33\xaa\xe5\xc7\x1d"
-                         "\x85\x23\xc7\xc6\x2f\xf4\x1e\x3d"
-                         "\xcc\x63\x44\x25\x07\x78\x4f\x9e"
-                         "\x96\xb8\x88\xeb\xbc\x48\x1f\x06"
-                         "\x39\xaf\x39\xac\xd8\x4a\x80\x39"
-                         "\x7b\x72\x8a\xf7",
-               .rlen   = 44,
-       }, {
-               .key    = "\xab\xd0\xe9\x33\x07\x26\xe5\x83"
-                         "\x8c\x76\x95\xd4\xb6\xdc\xf3\x46"
-                         "\xf9\x8f\xad\xe3\x02\x13\x83\x77"
-                         "\x3f\xb0\xf1\xa1\xa1\x22\x0f\x2b"
-                         "\x24\xa7\x8b",
-               .klen   = 35,
-               .iv     = "\x07\xcb\xcc\x0e\xe6\x33\xbf\xf5",
-               .assoc  = "\xd4\xdb\x30\x1d\x03\xfe\xfd\x5f"
-                         "\x87\xd4\x8c\xb6\xb6\xf1\x7a\x5d"
-                         "\xab\x90\x65\x8d\x8e\xca\x4d\x4f"
-                         "\x16\x0c\x40\x90\x4b\xc7\x36\x73",
-               .alen   = 32,
-               .input  = "\xf5\xc6\x7d\x48\xc1\xb7\xe6\x92"
-                         "\x97\x5a\xca\xc4\xa9\x6d\xf9\x3d"
-                         "\x6c\xde\xbc\xf1\x90\xea\x6a\xb2"
-                         "\x35\x86\x36\xaf\x5c\xfe\x4b\x3a",
-               .ilen   = 32,
-               .result = "\x83\x6f\x40\x87\x72\xcf\xc1\x13"
-                         "\xef\xbb\x80\x21\x04\x6c\x58\x09"
-                         "\x07\x1b\xfc\xdf\xc0\x3f\x5b\xc7"
-                         "\xe0\x79\xa8\x6e\x71\x7c\x3f\xcf"
-                         "\x5c\xda\xb2\x33\xe5\x13\xe2\x0d"
-                         "\x74\xd1\xef\xb5\x0f\x3a\xb5\xf8",
-               .rlen   = 48,
-       },
-};
-
-static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
-       {
-               .key    = "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
-                         "\xff\x80\x2e\x48\x7d\x82\xf8\xb9"
-                         "\xc6\xfb\x7d",
-               .klen   = 19,
-               .iv     = "\x80\x0d\x13\xab\xd8\xa6\xb2\xd8",
+       }, {
+               /* This is taken from FIPS CAVS. */
+               .key    = "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
+                         "\xff\x80\x2e\x48\x7d\x82\xf8\xb9",
+               .klen   = 16,
+               .iv     = "\x03\xc6\xfb\x7d\x80\x0d\x13\xab"
+                         "\xd8\xa6\xb2\xd8\x00\x00\x00\x00",
                .alen   = 0,
                .input  = "\xd5\xe8\x93\x9f\xc7\x89\x2e\x2b",
                .ilen   = 8,
@@ -21876,10 +21876,10 @@ static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
                .novrfy = 1,
        }, {
                .key    = "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
-                         "\xff\x80\x2e\x48\x7d\x82\xf8\xb9"
-                         "\xaf\x94\x87",
-               .klen   = 19,
-               .iv     = "\x78\x35\x82\x81\x7f\x88\x94\x68",
+                         "\xff\x80\x2e\x48\x7d\x82\xf8\xb9",
+               .klen   = 16,
+               .iv     = "\x03\xaf\x94\x87\x78\x35\x82\x81"
+                         "\x7f\x88\x94\x68\x00\x00\x00\x00",
                .alen   = 0,
                .input  = "\x41\x3c\xb8\x87\x73\xcb\xf3\xf3",
                .ilen   = 8,
@@ -21887,10 +21887,10 @@ static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
                .rlen   = 0,
        }, {
                .key    = "\x61\x0e\x8c\xae\xe3\x23\xb6\x38"
-                         "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8"
-                         "\xc6\xfb\x7d",
-               .klen   = 19,
-               .iv     = "\x80\x0d\x13\xab\xd8\xa6\xb2\xd8",
+                         "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8",
+               .klen   = 16,
+               .iv     = "\x03\xc6\xfb\x7d\x80\x0d\x13\xab"
+                         "\xd8\xa6\xb2\xd8\x00\x00\x00\x00",
                .assoc  = "\xf3\x94\x87\x78\x35\x82\x81\x7f"
                          "\x88\x94\x68\xb1\x78\x6b\x2b\xd6"
                          "\x04\x1f\x4e\xed\x78\xd5\x33\x66"
@@ -21911,10 +21911,10 @@ static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
                .novrfy = 1,
        }, {
                .key    = "\x61\x0e\x8c\xae\xe3\x23\xb6\x38"
-                         "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8"
-                         "\x05\xe0\xc9",
-               .klen   = 19,
-               .iv     = "\x0f\xed\x34\xea\x97\xd4\x3b\xdf",
+                         "\x76\x1c\xf6\x3a\x67\xa3\x9c\xd8",
+               .klen   = 16,
+               .iv     = "\x03\x05\xe0\xc9\x0f\xed\x34\xea"
+                         "\x97\xd4\x3b\xdf\x00\x00\x00\x00",
                .assoc  = "\x49\x5c\x50\x1f\x1d\x94\xcc\x81"
                          "\xba\xb7\xb6\x03\xaf\xa5\xc1\xa1"
                          "\xd8\x5c\x42\x68\xe0\x6c\xda\x89"
@@ -21935,10 +21935,10 @@ static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
        }, {
                .key    = "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
                          "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3"
-                         "\xa4\x48\x93\x39\x26\x71\x4a\xc6"
-                         "\xee\x49\x83",
-               .klen   = 27,
-               .iv     = "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
+                         "\xa4\x48\x93\x39\x26\x71\x4a\xc6",
+               .klen   = 24,
+               .iv     = "\x03\xee\x49\x83\xe9\xa9\xff\xe9"
+                         "\x57\xba\xfd\x9e\x00\x00\x00\x00",
                .assoc  = "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
                          "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
                          "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
@@ -21949,114 +21949,1348 @@ static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[] = {
                .result = "\x00",
                .rlen   = 0,
        }, {
-               .key    = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
-                         "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
-                         "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
-                         "\xee\x49\x83",
+               .key    = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+                         "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
+                         "\x29\xa0\xba\x9e\x48\x78\xd1\xba",
+               .klen   = 24,
+               .iv     = "\x03\xee\x49\x83\xe9\xa9\xff\xe9"
+                         "\x57\xba\xfd\x9e\x00\x00\x00\x00",
+               .assoc  = "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
+                         "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
+                         "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
+                         "\x11\xc4\xc6\xdb\x00\x56\x36\x61",
+               .alen   = 32,
+               .input  = "\xfb\xe5\x5d\x34\xbe\xe5\xe8\xe7"
+                         "\x5a\xef\x2f\xbf\x1f\x7f\xd4\xb2"
+                         "\x66\xca\x61\x1e\x96\x7a\x61\xb3"
+                         "\x1c\x16\x45\x52\xba\x04\x9c\x9f"
+                         "\xb1\xd2\x40\xbc\x52\x7c\x6f\xb1",
+               .ilen   = 40,
+               .result = "\x85\x34\x66\x42\xc8\x92\x0f\x36"
+                         "\x58\xe0\x6b\x91\x3c\x98\x5c\xbb"
+                         "\x0a\x85\xcc\x02\xad\x7a\x96\xe9"
+                         "\x65\x43\xa4\xc3\x0f\xdc\x55\x81",
+               .rlen   = 32,
+       }, {
+               .key    = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+                         "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
+                         "\x29\xa0\xba\x9e\x48\x78\xd1\xba",
+               .klen   = 24,
+               .iv     = "\x03\xd1\xfc\x57\x9c\xfe\xb8\x9c"
+                         "\xad\x71\xaa\x1f\x00\x00\x00\x00",
+               .assoc  = "\x86\x67\xa5\xa9\x14\x5f\x0d\xc6"
+                         "\xff\x14\xc7\x44\xbf\x6c\x3a\xc3"
+                         "\xff\xb6\x81\xbd\xe2\xd5\x06\xc7"
+                         "\x3c\xa1\x52\x13\x03\x8a\x23\x3a",
+               .alen   = 32,
+               .input  = "\x3f\x66\xb0\x9d\xe5\x4b\x38\x00"
+                         "\xc6\x0e\x6e\xe5\xd6\x98\xa6\x37"
+                         "\x8c\x26\x33\xc6\xb2\xa2\x17\xfa"
+                         "\x64\x19\xc0\x30\xd7\xfc\x14\x6b"
+                         "\xe3\x33\xc2\x04\xb0\x37\xbe\x3f"
+                         "\xa9\xb4\x2d\x68\x03\xa3\x44\xef",
+               .ilen   = 48,
+               .result = "\x02\x87\x4d\x28\x80\x6e\xb2\xed"
+                         "\x99\x2a\xa8\xca\x04\x25\x45\x90"
+                         "\x1d\xdd\x5a\xd9\xe4\xdb\x9c\x9c"
+                         "\x49\xe9\x01\xfe\xa7\x80\x6d\x6b",
+               .rlen   = 32,
+               .novrfy = 1,
+       }, {
+               .key    = "\xa4\x4b\x54\x29\x0a\xb8\x6d\x01"
+                         "\x5b\x80\x2a\xcf\x25\xc4\xb7\x5c"
+                         "\x20\x2c\xad\x30\xc2\x2b\x41\xfb"
+                         "\x0e\x85\xbc\x33\xad\x0f\x2b\xff",
+               .klen   = 32,
+               .iv     = "\x03\xee\x49\x83\xe9\xa9\xff\xe9"
+                         "\x57\xba\xfd\x9e\x00\x00\x00\x00",
+               .alen   = 0,
+               .input  = "\x1f\xb8\x8f\xa3\xdd\x54\x00\xf2",
+               .ilen   = 8,
+               .result = "\x00",
+               .rlen   = 0,
+       }, {
+               .key    = "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
+                         "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3"
+                         "\xa4\x48\x93\x39\x26\x71\x4a\xc6"
+                         "\xae\x8f\x11\x4c\xc2\x9c\x4a\xbb",
+               .klen   = 32,
+               .iv     = "\x03\x85\x34\x66\x42\xc8\x92\x0f"
+                         "\x36\x58\xe0\x6b\x00\x00\x00\x00",
+               .alen   = 0,
+               .input  = "\x48\x01\x5e\x02\x24\x04\x66\x47"
+                         "\xa1\xea\x6f\xaf\xe8\xfc\xfb\xdd"
+                         "\xa5\xa9\x87\x8d\x84\xee\x2e\x77"
+                         "\xbb\x86\xb9\xf5\x5c\x6c\xff\xf6"
+                         "\x72\xc3\x8e\xf7\x70\xb1\xb2\x07"
+                         "\xbc\xa8\xa3\xbd\x83\x7c\x1d\x2a",
+               .ilen   = 48,
+               .result = "\xdc\x56\xf2\x71\xb0\xb1\xa0\x6c"
+                         "\xf0\x97\x3a\xfb\x6d\xe7\x32\x99"
+                         "\x3e\xaf\x70\x5e\xb2\x4d\xea\x39"
+                         "\x89\xd4\x75\x7a\x63\xb1\xda\x93",
+               .rlen   = 32,
+               .novrfy = 1,
+       }, {
+               .key    = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+                         "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
+                         "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
+                         "\x0d\x1a\x53\x3b\xb5\xe3\xf8\x8b",
+               .klen   = 32,
+               .iv     = "\x03\xcf\x76\x3f\xd9\x95\x75\x8f"
+                         "\x44\x89\x40\x7b\x00\x00\x00\x00",
+               .assoc  = "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88"
+                         "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b"
+                         "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b"
+                         "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe",
+               .alen   = 32,
+               .input  = "\x48\x58\xd6\xf3\xad\x63\x58\xbf"
+                         "\xae\xc7\x5e\xae\x83\x8f\x7b\xe4"
+                         "\x78\x5c\x4c\x67\x71\x89\x94\xbf"
+                         "\x47\xf1\x63\x7e\x1c\x59\xbd\xc5"
+                         "\x7f\x44\x0a\x0c\x01\x18\x07\x92"
+                         "\xe1\xd3\x51\xce\x32\x6d\x0c\x5b",
+               .ilen   = 48,
+               .result = "\xc2\x54\xc8\xde\x78\x87\x77\x40"
+                         "\x49\x71\xe4\xb7\xe7\xcb\x76\x61"
+                         "\x0a\x41\xb9\xe9\xc0\x76\x54\xab"
+                         "\x04\x49\x3b\x19\x93\x57\x25\x5d",
+               .rlen   = 32,
+       },
+};
+
+/*
+ * rfc4309 refers to section 8 of rfc3610 for test vectors, but they all
+ * use a 13-byte nonce, we only support an 11-byte nonce.  Worse,
+ * they use AD lengths which are not valid ESP header lengths.
+ *
+ * These vectors are copied/generated from the ones for rfc4106 with
+ * the key truncated by one byte..
+ */
+static struct aead_testvec aes_ccm_rfc4309_enc_tv_template[] = {
+       { /* Generated using Crypto++ */
+               .key    = zeroed_string,
+               .klen   = 19,
+               .iv     = zeroed_string,
+               .input  = zeroed_string,
+               .ilen   = 16,
+               .assoc  = zeroed_string,
+               .alen   = 16,
+               .result = "\x2E\x9A\xCA\x6B\xDA\x54\xFC\x6F"
+                         "\x12\x50\xE8\xDE\x81\x3C\x63\x08"
+                         "\x1A\x22\xBA\x75\xEE\xD4\xD5\xB5"
+                         "\x27\x50\x01\xAC\x03\x33\x39\xFB",
+               .rlen   = 32,
+       },{
+               .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+                         "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .input  = zeroed_string,
+               .ilen   = 16,
+               .assoc  = "\x00\x00\x00\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
+               .result = "\xCF\xB9\x99\x17\xC8\x86\x0E\x7F"
+                         "\x7E\x76\xF8\xE6\xF8\xCC\x1F\x17"
+                         "\x6A\xE0\x53\x9F\x4B\x73\x7E\xDA"
+                         "\x08\x09\x4E\xC4\x1E\xAD\xC6\xB0",
+               .rlen   = 32,
+
+       }, {
+               .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+                         "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = zeroed_string,
+               .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .ilen   = 16,
+               .assoc  = zeroed_string,
+               .alen   = 16,
+               .result = "\x33\xDE\x73\xBC\xA6\xCE\x4E\xA6"
+                         "\x61\xF4\xF5\x41\x03\x4A\xE3\x86"
+                         "\xA1\xE2\xC2\x42\x2B\x81\x70\x40"
+                         "\xFD\x7F\x76\xD1\x03\x07\xBB\x0C",
+               .rlen   = 32,
+       }, {
+               .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+                         "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = zeroed_string,
+               .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .ilen   = 16,
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x00",
+               .alen   = 16,
+               .result = "\x33\xDE\x73\xBC\xA6\xCE\x4E\xA6"
+                         "\x61\xF4\xF5\x41\x03\x4A\xE3\x86"
+                         "\x5B\xC0\x73\xE0\x2B\x73\x68\xC9"
+                         "\x2D\x8C\x58\xC2\x90\x3D\xB0\x3E",
+               .rlen   = 32,
+       }, {
+               .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+                         "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .ilen   = 16,
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
+               .result = "\xCE\xB8\x98\x16\xC9\x87\x0F\x7E"
+                         "\x7F\x77\xF9\xE7\xF9\xCD\x1E\x16"
+                         "\x43\x8E\x76\x57\x3B\xB4\x05\xE8"
+                         "\xA9\x9B\xBF\x25\xE0\x4F\xC0\xED",
+               .rlen   = 32,
+       }, {
+               .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+                         "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .input  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .ilen   = 64,
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
+               .result = "\xCE\xB8\x98\x16\xC9\x87\x0F\x7E"
+                         "\x7F\x77\xF9\xE7\xF9\xCD\x1E\x16"
+                         "\x9C\xA4\x97\x83\x3F\x01\xA5\xF4"
+                         "\x43\x09\xE7\xB8\xE9\xD1\xD7\x02"
+                         "\x9B\xAB\x39\x18\xEB\x94\x34\x36"
+                         "\xE6\xC5\xC8\x9B\x00\x81\x9E\x49"
+                         "\x1D\x78\xE1\x48\xE3\xE9\xEA\x8E"
+                         "\x3A\x2B\x67\x5D\x35\x6A\x0F\xDB"
+                         "\x02\x73\xDD\xE7\x30\x4A\x30\x54"
+                         "\x1A\x9D\x09\xCA\xC8\x1C\x32\x5F",
+               .rlen   = 80,
+       }, {
+               .key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
+                         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = "\x00\x00\x45\x67\x89\xab\xcd\xef",
+               .input  = "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff",
+               .ilen   = 192,
+               .assoc  = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+                         "\xaa\xaa\xaa\xaa\x00\x00\x45\x67"
+                         "\x89\xab\xcd\xef",
+               .alen   = 20,
+               .result = "\x64\x17\xDC\x24\x9D\x92\xBA\x5E"
+                         "\x7C\x64\x6D\x33\x46\x77\xAC\xB1"
+                         "\x5C\x9E\xE2\xC7\x27\x11\x3E\x95"
+                         "\x7D\xBE\x28\xC8\xC1\xCA\x5E\x8C"
+                         "\xB4\xE2\xDE\x9F\x53\x59\x26\xDB"
+                         "\x0C\xD4\xE4\x07\x9A\xE6\x3E\x01"
+                         "\x58\x0D\x3E\x3D\xD5\x21\xEB\x04"
+                         "\x06\x9D\x5F\xB9\x02\x49\x1A\x2B"
+                         "\xBA\xF0\x4E\x3B\x85\x50\x5B\x09"
+                         "\xFE\xEC\xFC\x54\xEC\x0C\xE2\x79"
+                         "\x8A\x2F\x5F\xD7\x05\x5D\xF1\x6D"
+                         "\x22\xEB\xD1\x09\x80\x3F\x5A\x70"
+                         "\xB2\xB9\xD3\x63\x99\xC2\x4D\x1B"
+                         "\x36\x12\x00\x89\xAA\x5D\x55\xDA"
+                         "\x1D\x5B\xD8\x3C\x5F\x09\xD2\xE6"
+                         "\x39\x41\x5C\xF0\xBE\x26\x4E\x5F"
+                         "\x2B\x50\x44\x52\xC2\x10\x7D\x38"
+                         "\x82\x64\x83\x0C\xAE\x49\xD0\xE5"
+                         "\x4F\xE5\x66\x4C\x58\x7A\xEE\x43"
+                         "\x3B\x51\xFE\xBA\x24\x8A\xFE\xDC"
+                         "\x19\x6D\x60\x66\x61\xF9\x9A\x3F"
+                         "\x75\xFC\x38\x53\x5B\xB5\xCD\x52"
+                         "\x4F\xE5\xE4\xC9\xFE\x10\xCB\x98"
+                         "\xF0\x06\x5B\x07\xAB\xBB\xF4\x0E"
+                         "\x2D\xC2\xDD\x5D\xDD\x22\x9A\xCC"
+                         "\x39\xAB\x63\xA5\x3D\x9C\x51\x8A",
+               .rlen   = 208,
+       }, { /* From draft-mcgrew-gcm-test-01 */
+               .key    = "\x4C\x80\xCD\xEF\xBB\x5D\x10\xDA"
+                         "\x90\x6A\xC7\x3C\x36\x13\xA6\x34"
+                         "\x2E\x44\x3B",
+               .klen   = 19,
+               .iv     = "\x49\x56\xED\x7E\x3B\x24\x4C\xFE",
+               .input  = "\x45\x00\x00\x48\x69\x9A\x00\x00"
+                         "\x80\x11\x4D\xB7\xC0\xA8\x01\x02"
+                         "\xC0\xA8\x01\x01\x0A\x9B\xF1\x56"
+                         "\x38\xD3\x01\x00\x00\x01\x00\x00"
+                         "\x00\x00\x00\x00\x04\x5F\x73\x69"
+                         "\x70\x04\x5F\x75\x64\x70\x03\x73"
+                         "\x69\x70\x09\x63\x79\x62\x65\x72"
+                         "\x63\x69\x74\x79\x02\x64\x6B\x00"
+                         "\x00\x21\x00\x01\x01\x02\x02\x01",
+               .ilen   = 72,
+               .assoc  = "\x00\x00\x43\x21\x87\x65\x43\x21"
+                         "\x00\x00\x00\x00\x49\x56\xED\x7E"
+                         "\x3B\x24\x4C\xFE",
+               .alen   = 20,
+               .result = "\x89\xBA\x3E\xEF\xE6\xD6\xCF\xDB"
+                         "\x83\x60\xF5\xBA\x3A\x56\x79\xE6"
+                         "\x7E\x0C\x53\xCF\x9E\x87\xE0\x4E"
+                         "\x1A\x26\x01\x24\xC7\x2E\x3D\xBF"
+                         "\x29\x2C\x91\xC1\xB8\xA8\xCF\xE0"
+                         "\x39\xF8\x53\x6D\x31\x22\x2B\xBF"
+                         "\x98\x81\xFC\x34\xEE\x85\x36\xCD"
+                         "\x26\xDB\x6C\x7A\x0C\x77\x8A\x35"
+                         "\x18\x85\x54\xB2\xBC\xDD\x3F\x43"
+                         "\x61\x06\x8A\xDF\x86\x3F\xB4\xAC"
+                         "\x97\xDC\xBD\xFD\x92\x10\xC5\xFF",
+               .rlen   = 88,
+       }, {
+               .key    = "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+                         "\x6D\x6A\x8F\x94\x67\x30\x83\x08"
+                         "\xCA\xFE\xBA",
+               .klen   = 19,
+               .iv     = "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .input  = "\x45\x00\x00\x3E\x69\x8F\x00\x00"
+                         "\x80\x11\x4D\xCC\xC0\xA8\x01\x02"
+                         "\xC0\xA8\x01\x01\x0A\x98\x00\x35"
+                         "\x00\x2A\x23\x43\xB2\xD0\x01\x00"
+                         "\x00\x01\x00\x00\x00\x00\x00\x00"
+                         "\x03\x73\x69\x70\x09\x63\x79\x62"
+                         "\x65\x72\x63\x69\x74\x79\x02\x64"
+                         "\x6B\x00\x00\x01\x00\x01\x00\x01",
+               .ilen   = 64,
+               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A"
+                         "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .alen   = 16,
+               .result = "\x4B\xC2\x70\x60\x64\xD2\xF3\xC8"
+                         "\xE5\x26\x8A\xDE\xB8\x7E\x7D\x16"
+                         "\x56\xC7\xD2\x88\xBA\x8D\x58\xAF"
+                         "\xF5\x71\xB6\x37\x84\xA7\xB1\x99"
+                         "\x51\x5C\x0D\xA0\x27\xDE\xE7\x2D"
+                         "\xEF\x25\x88\x1F\x1D\x77\x11\xFF"
+                         "\xDB\xED\xEE\x56\x16\xC5\x5C\x9B"
+                         "\x00\x62\x1F\x68\x4E\x7C\xA0\x97"
+                         "\x10\x72\x7E\x53\x13\x3B\x68\xE4"
+                         "\x30\x99\x91\x79\x09\xEA\xFF\x6A",
+               .rlen   = 80,
+       }, {
+               .key    = "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\x11\x22\x33",
+               .klen   = 35,
+               .iv     = "\x01\x02\x03\x04\x05\x06\x07\x08",
+               .input  = "\x45\x00\x00\x30\x69\xA6\x40\x00"
+                         "\x80\x06\x26\x90\xC0\xA8\x01\x02"
+                         "\x93\x89\x15\x5E\x0A\x9E\x00\x8B"
+                         "\x2D\xC5\x7E\xE0\x00\x00\x00\x00"
+                         "\x70\x02\x40\x00\x20\xBF\x00\x00"
+                         "\x02\x04\x05\xB4\x01\x01\x04\x02"
+                         "\x01\x02\x02\x01",
+               .ilen   = 52,
+               .assoc  = "\x4A\x2C\xBF\xE3\x00\x00\x00\x02"
+                         "\x01\x02\x03\x04\x05\x06\x07\x08",
+               .alen   = 16,
+               .result = "\xD6\x31\x0D\x2B\x3D\x6F\xBD\x2F"
+                         "\x58\x41\x7E\xFF\x9A\x9E\x09\xB4"
+                         "\x1A\xF7\xF6\x42\x31\xCD\xBF\xAD"
+                         "\x27\x0E\x2C\xF2\xDB\x10\xDF\x55"
+                         "\x8F\x0D\xD7\xAC\x23\xBD\x42\x10"
+                         "\xD0\xB2\xAF\xD8\x37\xAC\x6B\x0B"
+                         "\x11\xD4\x0B\x12\xEC\xB4\xB1\x92"
+                         "\x23\xA6\x10\xB0\x26\xD6\xD9\x26"
+                         "\x5A\x48\x6A\x3E",
+               .rlen   = 68,
+       }, {
+               .key    = "\x00\x00\x00\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x00",
+               .input  = "\x45\x00\x00\x3C\x99\xC5\x00\x00"
+                         "\x80\x01\xCB\x7A\x40\x67\x93\x18"
+                         "\x01\x01\x01\x01\x08\x00\x07\x5C"
+                         "\x02\x00\x44\x00\x61\x62\x63\x64"
+                         "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+                         "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+                         "\x75\x76\x77\x61\x62\x63\x64\x65"
+                         "\x66\x67\x68\x69\x01\x02\x02\x01",
+               .ilen   = 64,
+               .assoc  = "\x00\x00\x00\x00\x00\x00\x00\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x00",
+               .alen   = 16,
+               .result = "\x6B\x9A\xCA\x57\x43\x91\xFC\x6F"
+                         "\x92\x51\x23\xA4\xC1\x5B\xF0\x10"
+                         "\xF3\x13\xF4\xF8\xA1\x9A\xB4\xDC"
+                         "\x89\xC8\xF8\x42\x62\x95\xB7\xCB"
+                         "\xB8\xF5\x0F\x1B\x2E\x94\xA2\xA7"
+                         "\xBF\xFB\x8A\x92\x13\x63\xD1\x3C"
+                         "\x08\xF5\xE8\xA6\xAA\xF6\x34\xF9"
+                         "\x42\x05\xAF\xB3\xE7\x9A\xFC\xEE"
+                         "\x36\x25\xC1\x10\x12\x1C\xCA\x82"
+                         "\xEA\xE6\x63\x5A\x57\x28\xA9\x9A",
+               .rlen   = 80,
+       }, {
+               .key    = "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+                         "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+                         "\x57\x69\x0E",
+               .klen   = 19,
+               .iv     = "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+               .input  = "\x45\x00\x00\x3C\x99\xC3\x00\x00"
+                         "\x80\x01\xCB\x7C\x40\x67\x93\x18"
+                         "\x01\x01\x01\x01\x08\x00\x08\x5C"
+                         "\x02\x00\x43\x00\x61\x62\x63\x64"
+                         "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+                         "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+                         "\x75\x76\x77\x61\x62\x63\x64\x65"
+                         "\x66\x67\x68\x69\x01\x02\x02\x01",
+               .ilen   = 64,
+               .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
+               .result = "\x6A\x6B\x45\x2B\x7C\x67\x52\xF6"
+                         "\x10\x60\x40\x62\x6B\x4F\x97\x8E"
+                         "\x0B\xB2\x22\x97\xCB\x21\xE0\x90"
+                         "\xA2\xE7\xD1\x41\x30\xE4\x4B\x1B"
+                         "\x79\x01\x58\x50\x01\x06\xE1\xE0"
+                         "\x2C\x83\x79\xD3\xDE\x46\x97\x1A"
+                         "\x30\xB8\xE5\xDF\xD7\x12\x56\x75"
+                         "\xD0\x95\xB7\xB8\x91\x42\xF7\xFD"
+                         "\x97\x57\xCA\xC1\x20\xD0\x86\xB9"
+                         "\x66\x9D\xB4\x2B\x96\x22\xAC\x67",
+               .rlen   = 80,
+       }, {
+               .key    = "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+                         "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+                         "\x57\x69\x0E",
+               .klen   = 19,
+               .iv     = "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+               .input  = "\x45\x00\x00\x1C\x42\xA2\x00\x00"
+                         "\x80\x01\x44\x1F\x40\x67\x93\xB6"
+                         "\xE0\x00\x00\x02\x0A\x00\xF5\xFF"
+                         "\x01\x02\x02\x01",
+               .ilen   = 28,
+               .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
+               .result = "\x6A\x6B\x45\x0B\xA7\x06\x52\xF6"
+                         "\x10\x60\xCF\x01\x6B\x4F\x97\x20"
+                         "\xEA\xB3\x23\x94\xC9\x21\x1D\x33"
+                         "\xA1\xE5\x90\x40\x05\x37\x45\x70"
+                         "\xB5\xD6\x09\x0A\x23\x73\x33\xF9"
+                         "\x08\xB4\x22\xE4",
+               .rlen   = 44,
+       }, {
+               .key    = "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+                         "\x6D\x6A\x8F\x94\x67\x30\x83\x08"
+                         "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+                         "\xCA\xFE\xBA",
+               .klen   = 27,
+               .iv     = "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .input  = "\x45\x00\x00\x28\xA4\xAD\x40\x00"
+                         "\x40\x06\x78\x80\x0A\x01\x03\x8F"
+                         "\x0A\x01\x06\x12\x80\x23\x06\xB8"
+                         "\xCB\x71\x26\x02\xDD\x6B\xB0\x3E"
+                         "\x50\x10\x16\xD0\x75\x68\x00\x01",
+               .ilen   = 40,
+               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A"
+                         "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .alen   = 16,
+               .result = "\x05\x22\x15\xD1\x52\x56\x85\x04"
+                         "\xA8\x5C\x5D\x6D\x7E\x6E\xF5\xFA"
+                         "\xEA\x16\x37\x50\xF3\xDF\x84\x3B"
+                         "\x2F\x32\x18\x57\x34\x2A\x8C\x23"
+                         "\x67\xDF\x6D\x35\x7B\x54\x0D\xFB"
+                         "\x34\xA5\x9F\x6C\x48\x30\x1E\x22"
+                         "\xFE\xB1\x22\x17\x17\x8A\xB9\x5B",
+               .rlen   = 56,
+       }, {
+               .key    = "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\xDE\xCA\xF8",
+               .klen   = 19,
+               .iv     = "\xCA\xFE\xDE\xBA\xCE\xFA\xCE\x74",
+               .input  = "\x45\x00\x00\x49\x33\xBA\x00\x00"
+                         "\x7F\x11\x91\x06\xC3\xFB\x1D\x10"
+                         "\xC2\xB1\xD3\x26\xC0\x28\x31\xCE"
+                         "\x00\x35\xDD\x7B\x80\x03\x02\xD5"
+                         "\x00\x00\x4E\x20\x00\x1E\x8C\x18"
+                         "\xD7\x5B\x81\xDC\x91\xBA\xA0\x47"
+                         "\x6B\x91\xB9\x24\xB2\x80\x38\x9D"
+                         "\x92\xC9\x63\xBA\xC0\x46\xEC\x95"
+                         "\x9B\x62\x66\xC0\x47\x22\xB1\x49"
+                         "\x23\x01\x01\x01",
+               .ilen   = 76,
+               .assoc  = "\x00\x00\x01\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x01\xCA\xFE\xDE\xBA"
+                         "\xCE\xFA\xCE\x74",
+               .alen   = 20,
+               .result = "\x92\xD0\x53\x79\x33\x38\xD5\xF3"
+                         "\x7D\xE4\x7A\x8E\x86\x03\xC9\x90"
+                         "\x96\x35\xAB\x9C\xFB\xE8\xA3\x76"
+                         "\xE9\xE9\xE2\xD1\x2E\x11\x0E\x00"
+                         "\xFA\xCE\xB5\x9E\x02\xA7\x7B\xEA"
+                         "\x71\x9A\x58\xFB\xA5\x8A\xE1\xB7"
+                         "\x9C\x39\x9D\xE3\xB5\x6E\x69\xE6"
+                         "\x63\xC9\xDB\x05\x69\x51\x12\xAD"
+                         "\x3E\x00\x32\x73\x86\xF2\xEE\xF5"
+                         "\x0F\xE8\x81\x7E\x84\xD3\xC0\x0D"
+                         "\x76\xD6\x55\xC6\xB4\xC2\x34\xC7"
+                         "\x12\x25\x0B\xF9",
+               .rlen   = 92,
+       }, {
+               .key    = "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\x73\x61\x6C",
+               .klen   = 35,
+               .iv     = "\x61\x6E\x64\x01\x69\x76\x65\x63",
+               .input  = "\x45\x08\x00\x28\x73\x2C\x00\x00"
+                         "\x40\x06\xE9\xF9\x0A\x01\x06\x12"
+                         "\x0A\x01\x03\x8F\x06\xB8\x80\x23"
+                         "\xDD\x6B\xAF\xBE\xCB\x71\x26\x02"
+                         "\x50\x10\x1F\x64\x6D\x54\x00\x01",
+               .ilen   = 40,
+               .assoc  = "\x17\x40\x5E\x67\x15\x6F\x31\x26"
+                         "\xDD\x0D\xB9\x9B\x61\x6E\x64\x01"
+                         "\x69\x76\x65\x63",
+               .alen   = 20,
+               .result = "\xCC\x74\xB7\xD3\xB0\x38\x50\x42"
+                         "\x2C\x64\x87\x46\x1E\x34\x10\x05"
+                         "\x29\x6B\xBB\x36\xE9\x69\xAD\x92"
+                         "\x82\xA1\x10\x6A\xEB\x0F\xDC\x7D"
+                         "\x08\xBA\xF3\x91\xCA\xAA\x61\xDA"
+                         "\x62\xF4\x14\x61\x5C\x9D\xB5\xA7"
+                         "\xEE\xD7\xB9\x7E\x87\x99\x9B\x7D",
+               .rlen   = 56,
+       }, {
+               .key    = "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+                         "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+                         "\x57\x69\x0E",
+               .klen   = 19,
+               .iv     = "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+               .input  = "\x45\x00\x00\x49\x33\x3E\x00\x00"
+                         "\x7F\x11\x91\x82\xC3\xFB\x1D\x10"
+                         "\xC2\xB1\xD3\x26\xC0\x28\x31\xCE"
+                         "\x00\x35\xCB\x45\x80\x03\x02\x5B"
+                         "\x00\x00\x01\xE0\x00\x1E\x8C\x18"
+                         "\xD6\x57\x59\xD5\x22\x84\xA0\x35"
+                         "\x2C\x71\x47\x5C\x88\x80\x39\x1C"
+                         "\x76\x4D\x6E\x5E\xE0\x49\x6B\x32"
+                         "\x5A\xE2\x70\xC0\x38\x99\x49\x39"
+                         "\x15\x01\x01\x01",
+               .ilen   = 76,
+               .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
+               .result = "\x6A\x6B\x45\x5E\xD6\x9A\x52\xF6"
+                         "\xEF\x70\x1A\x9C\xE8\xD3\x19\x86"
+                         "\xC8\x02\xF0\xB0\x03\x09\xD9\x02"
+                         "\xA0\xD2\x59\x04\xD1\x85\x2A\x24"
+                         "\x1C\x67\x3E\xD8\x68\x72\x06\x94"
+                         "\x97\xBA\x4F\x76\x8D\xB0\x44\x5B"
+                         "\x69\xBF\xD5\xE2\x3D\xF1\x0B\x0C"
+                         "\xC0\xBF\xB1\x8F\x70\x09\x9E\xCE"
+                         "\xA5\xF2\x55\x58\x84\xFA\xF9\xB5"
+                         "\x23\xF4\x84\x40\x74\x14\x8A\x6B"
+                         "\xDB\xD7\x67\xED\xA4\x93\xF3\x47"
+                         "\xCC\xF7\x46\x6F",
+               .rlen   = 92,
+       }, {
+               .key    = "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\x73\x61\x6C",
+               .klen   = 35,
+               .iv     = "\x61\x6E\x64\x01\x69\x76\x65\x63",
+               .input  = "\x63\x69\x73\x63\x6F\x01\x72\x75"
+                         "\x6C\x65\x73\x01\x74\x68\x65\x01"
+                         "\x6E\x65\x74\x77\x65\x01\x64\x65"
+                         "\x66\x69\x6E\x65\x01\x74\x68\x65"
+                         "\x74\x65\x63\x68\x6E\x6F\x6C\x6F"
+                         "\x67\x69\x65\x73\x01\x74\x68\x61"
+                         "\x74\x77\x69\x6C\x6C\x01\x64\x65"
+                         "\x66\x69\x6E\x65\x74\x6F\x6D\x6F"
+                         "\x72\x72\x6F\x77\x01\x02\x02\x01",
+               .ilen   = 72,
+               .assoc  = "\x17\x40\x5E\x67\x15\x6F\x31\x26"
+                         "\xDD\x0D\xB9\x9B\x61\x6E\x64\x01"
+                         "\x69\x76\x65\x63",
+               .alen   = 20,
+               .result = "\xEA\x15\xC4\x98\xAC\x15\x22\x37"
+                         "\x00\x07\x1D\xBE\x60\x5D\x73\x16"
+                         "\x4D\x0F\xCC\xCE\x8A\xD0\x49\xD4"
+                         "\x39\xA3\xD1\xB1\x21\x0A\x92\x1A"
+                         "\x2C\xCF\x8F\x9D\xC9\x91\x0D\xB4"
+                         "\x15\xFC\xBC\xA5\xC5\xBF\x54\xE5"
+                         "\x1C\xC7\x32\x41\x07\x7B\x2C\xB6"
+                         "\x5C\x23\x7C\x93\xEA\xEF\x23\x1C"
+                         "\x73\xF4\xE7\x12\x84\x4C\x37\x0A"
+                         "\x4A\x8F\x06\x37\x48\xF9\xF9\x05"
+                         "\x55\x13\x40\xC3\xD5\x55\x3A\x3D",
+               .rlen   = 88,
+       }, {
+               .key    = "\x7D\x77\x3D\x00\xC1\x44\xC5\x25"
+                         "\xAC\x61\x9D\x18\xC8\x4A\x3F\x47"
+                         "\xD9\x66\x42",
+               .klen   = 19,
+               .iv     = "\x43\x45\x7E\x91\x82\x44\x3B\xC6",
+               .input  = "\x01\x02\x02\x01",
+               .ilen   = 4,
+               .assoc  = "\x33\x54\x67\xAE\xFF\xFF\xFF\xFF"
+                         "\x43\x45\x7E\x91\x82\x44\x3B\xC6",
+               .alen   = 16,
+               .result = "\x4C\x72\x63\x30\x2F\xE6\x56\xDD"
+                         "\xD0\xD8\x60\x9D\x8B\xEF\x85\x90"
+                         "\xF7\x61\x24\x62",
+               .rlen   = 20,
+       }, {
+               .key    = "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\xDE\xCA\xF8",
+               .klen   = 19,
+               .iv     = "\xCA\xFE\xDE\xBA\xCE\xFA\xCE\x74",
+               .input  = "\x74\x6F\x01\x62\x65\x01\x6F\x72"
+                         "\x01\x6E\x6F\x74\x01\x74\x6F\x01"
+                         "\x62\x65\x00\x01",
+               .ilen   = 20,
+               .assoc  = "\x00\x00\x01\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x01\xCA\xFE\xDE\xBA"
+                         "\xCE\xFA\xCE\x74",
+               .alen   = 20,
+               .result = "\xA3\xBF\x52\x52\x65\x83\xBA\x81"
+                         "\x03\x9B\x84\xFC\x44\x8C\xBB\x81"
+                         "\x36\xE1\x78\xBB\xA5\x49\x3A\xD0"
+                         "\xF0\x6B\x21\xAF\x98\xC0\x34\xDC"
+                         "\x17\x17\x65\xAD",
+               .rlen   = 36,
+       }, {
+               .key    = "\x6C\x65\x67\x61\x6C\x69\x7A\x65"
+                         "\x6D\x61\x72\x69\x6A\x75\x61\x6E"
+                         "\x61\x61\x6E\x64\x64\x6F\x69\x74"
+                         "\x62\x65\x66\x6F\x72\x65\x69\x61"
+                         "\x74\x75\x72",
+               .klen   = 35,
+               .iv     = "\x33\x30\x21\x69\x67\x65\x74\x6D",
+               .input  = "\x45\x00\x00\x30\xDA\x3A\x00\x00"
+                         "\x80\x01\xDF\x3B\xC0\xA8\x00\x05"
+                         "\xC0\xA8\x00\x01\x08\x00\xC6\xCD"
+                         "\x02\x00\x07\x00\x61\x62\x63\x64"
+                         "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+                         "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+                         "\x01\x02\x02\x01",
+               .ilen   = 52,
+               .assoc  = "\x79\x6B\x69\x63\xFF\xFF\xFF\xFF"
+                         "\xFF\xFF\xFF\xFF\x33\x30\x21\x69"
+                         "\x67\x65\x74\x6D",
+               .alen   = 20,
+               .result = "\x96\xFD\x86\xF8\xD1\x98\xFF\x10"
+                         "\xAB\x8C\xDA\x8A\x5A\x08\x38\x1A"
+                         "\x48\x59\x80\x18\x1A\x18\x1A\x04"
+                         "\xC9\x0D\xE3\xE7\x0E\xA4\x0B\x75"
+                         "\x92\x9C\x52\x5C\x0B\xFB\xF8\xAF"
+                         "\x16\xC3\x35\xA8\xE7\xCE\x84\x04"
+                         "\xEB\x40\x6B\x7A\x8E\x75\xBB\x42"
+                         "\xE0\x63\x4B\x21\x44\xA2\x2B\x2B"
+                         "\x39\xDB\xC8\xDC",
+               .rlen   = 68,
+       }, {
+               .key    = "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+                         "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+                         "\x57\x69\x0E",
+               .klen   = 19,
+               .iv     = "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+               .input  = "\x45\x00\x00\x30\xDA\x3A\x00\x00"
+                         "\x80\x01\xDF\x3B\xC0\xA8\x00\x05"
+                         "\xC0\xA8\x00\x01\x08\x00\xC6\xCD"
+                         "\x02\x00\x07\x00\x61\x62\x63\x64"
+                         "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+                         "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+                         "\x01\x02\x02\x01",
+               .ilen   = 52,
+               .assoc  = "\x3F\x7E\xF6\x42\x10\x10\x10\x10"
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
+               .result = "\x6A\x6B\x45\x27\x3F\x9E\x52\xF6"
+                         "\x10\x60\x54\x25\xEB\x80\x04\x93"
+                         "\xCA\x1B\x23\x97\xCB\x21\x2E\x01"
+                         "\xA2\xE7\x95\x41\x30\xE4\x4B\x1B"
+                         "\x79\x01\x58\x50\x01\x06\xE1\xE0"
+                         "\x2C\x83\x79\xD3\xDE\x46\x97\x1A"
+                         "\x44\xCC\x90\xBF\x00\x94\x94\x92"
+                         "\x20\x17\x0C\x1B\x55\xDE\x7E\x68"
+                         "\xF4\x95\x5D\x4F",
+               .rlen   = 68,
+       }, {
+               .key    = "\x4C\x80\xCD\xEF\xBB\x5D\x10\xDA"
+                         "\x90\x6A\xC7\x3C\x36\x13\xA6\x34"
+                         "\x22\x43\x3C",
+               .klen   = 19,
+               .iv     = "\x48\x55\xEC\x7D\x3A\x23\x4B\xFD",
+               .input  = "\x08\x00\xC6\xCD\x02\x00\x07\x00"
+                         "\x61\x62\x63\x64\x65\x66\x67\x68"
+                         "\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70"
+                         "\x71\x72\x73\x74\x01\x02\x02\x01",
+               .ilen   = 32,
+               .assoc  = "\x00\x00\x43\x21\x87\x65\x43\x21"
+                         "\x00\x00\x00\x07\x48\x55\xEC\x7D"
+                         "\x3A\x23\x4B\xFD",
+               .alen   = 20,
+               .result = "\x67\xE9\x28\xB3\x1C\xA4\x6D\x02"
+                         "\xF0\xB5\x37\xB6\x6B\x2F\xF5\x4F"
+                         "\xF8\xA3\x4C\x53\xB8\x12\x09\xBF"
+                         "\x58\x7D\xCF\x29\xA3\x41\x68\x6B"
+                         "\xCE\xE8\x79\x85\x3C\xB0\x3A\x8F"
+                         "\x16\xB0\xA1\x26\xC9\xBC\xBC\xA6",
+               .rlen   = 48,
+       }
+};
+
+static struct aead_testvec aes_ccm_rfc4309_dec_tv_template[]   = {
+       { /* Generated using Crypto++ */
+               .key    = zeroed_string,
+               .klen   = 19,
+               .iv     = zeroed_string,
+               .result = zeroed_string,
+               .rlen   = 16,
+               .assoc  = zeroed_string,
+               .alen   = 16,
+               .input  = "\x2E\x9A\xCA\x6B\xDA\x54\xFC\x6F"
+                         "\x12\x50\xE8\xDE\x81\x3C\x63\x08"
+                         "\x1A\x22\xBA\x75\xEE\xD4\xD5\xB5"
+                         "\x27\x50\x01\xAC\x03\x33\x39\xFB",
+               .ilen   = 32,
+       },{
+               .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+                         "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .result = zeroed_string,
+               .rlen   = 16,
+               .assoc  = "\x00\x00\x00\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
+               .input  = "\xCF\xB9\x99\x17\xC8\x86\x0E\x7F"
+                         "\x7E\x76\xF8\xE6\xF8\xCC\x1F\x17"
+                         "\x6A\xE0\x53\x9F\x4B\x73\x7E\xDA"
+                         "\x08\x09\x4E\xC4\x1E\xAD\xC6\xB0",
+               .ilen   = 32,
+
+       }, {
+               .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+                         "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = zeroed_string,
+               .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .rlen   = 16,
+               .assoc  = zeroed_string,
+               .alen   = 16,
+               .input  = "\x33\xDE\x73\xBC\xA6\xCE\x4E\xA6"
+                         "\x61\xF4\xF5\x41\x03\x4A\xE3\x86"
+                         "\xA1\xE2\xC2\x42\x2B\x81\x70\x40"
+                         "\xFD\x7F\x76\xD1\x03\x07\xBB\x0C",
+               .ilen   = 32,
+       }, {
+               .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+                         "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = zeroed_string,
+               .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .rlen   = 16,
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x00",
+               .alen   = 16,
+               .input  = "\x33\xDE\x73\xBC\xA6\xCE\x4E\xA6"
+                         "\x61\xF4\xF5\x41\x03\x4A\xE3\x86"
+                         "\x5B\xC0\x73\xE0\x2B\x73\x68\xC9"
+                         "\x2D\x8C\x58\xC2\x90\x3D\xB0\x3E",
+               .ilen   = 32,
+       }, {
+               .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+                         "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .rlen   = 16,
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
+               .input  = "\xCE\xB8\x98\x16\xC9\x87\x0F\x7E"
+                         "\x7F\x77\xF9\xE7\xF9\xCD\x1E\x16"
+                         "\x43\x8E\x76\x57\x3B\xB4\x05\xE8"
+                         "\xA9\x9B\xBF\x25\xE0\x4F\xC0\xED",
+               .ilen   = 32,
+       }, {
+               .key    = "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+                         "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .result = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x01\x01\x01\x01\x01\x01\x01\x01",
+               .rlen   = 64,
+               .assoc  = "\x01\x01\x01\x01\x01\x01\x01\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .alen   = 16,
+               .input  = "\xCE\xB8\x98\x16\xC9\x87\x0F\x7E"
+                         "\x7F\x77\xF9\xE7\xF9\xCD\x1E\x16"
+                         "\x9C\xA4\x97\x83\x3F\x01\xA5\xF4"
+                         "\x43\x09\xE7\xB8\xE9\xD1\xD7\x02"
+                         "\x9B\xAB\x39\x18\xEB\x94\x34\x36"
+                         "\xE6\xC5\xC8\x9B\x00\x81\x9E\x49"
+                         "\x1D\x78\xE1\x48\xE3\xE9\xEA\x8E"
+                         "\x3A\x2B\x67\x5D\x35\x6A\x0F\xDB"
+                         "\x02\x73\xDD\xE7\x30\x4A\x30\x54"
+                         "\x1A\x9D\x09\xCA\xC8\x1C\x32\x5F",
+               .ilen   = 80,
+       }, {
+               .key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
+                         "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = "\x00\x00\x45\x67\x89\xab\xcd\xef",
+               .result = "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff"
+                         "\xff\xff\xff\xff\xff\xff\xff\xff",
+               .rlen   = 192,
+               .assoc  = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+                         "\xaa\xaa\xaa\xaa\x00\x00\x45\x67"
+                         "\x89\xab\xcd\xef",
+               .alen   = 20,
+               .input  = "\x64\x17\xDC\x24\x9D\x92\xBA\x5E"
+                         "\x7C\x64\x6D\x33\x46\x77\xAC\xB1"
+                         "\x5C\x9E\xE2\xC7\x27\x11\x3E\x95"
+                         "\x7D\xBE\x28\xC8\xC1\xCA\x5E\x8C"
+                         "\xB4\xE2\xDE\x9F\x53\x59\x26\xDB"
+                         "\x0C\xD4\xE4\x07\x9A\xE6\x3E\x01"
+                         "\x58\x0D\x3E\x3D\xD5\x21\xEB\x04"
+                         "\x06\x9D\x5F\xB9\x02\x49\x1A\x2B"
+                         "\xBA\xF0\x4E\x3B\x85\x50\x5B\x09"
+                         "\xFE\xEC\xFC\x54\xEC\x0C\xE2\x79"
+                         "\x8A\x2F\x5F\xD7\x05\x5D\xF1\x6D"
+                         "\x22\xEB\xD1\x09\x80\x3F\x5A\x70"
+                         "\xB2\xB9\xD3\x63\x99\xC2\x4D\x1B"
+                         "\x36\x12\x00\x89\xAA\x5D\x55\xDA"
+                         "\x1D\x5B\xD8\x3C\x5F\x09\xD2\xE6"
+                         "\x39\x41\x5C\xF0\xBE\x26\x4E\x5F"
+                         "\x2B\x50\x44\x52\xC2\x10\x7D\x38"
+                         "\x82\x64\x83\x0C\xAE\x49\xD0\xE5"
+                         "\x4F\xE5\x66\x4C\x58\x7A\xEE\x43"
+                         "\x3B\x51\xFE\xBA\x24\x8A\xFE\xDC"
+                         "\x19\x6D\x60\x66\x61\xF9\x9A\x3F"
+                         "\x75\xFC\x38\x53\x5B\xB5\xCD\x52"
+                         "\x4F\xE5\xE4\xC9\xFE\x10\xCB\x98"
+                         "\xF0\x06\x5B\x07\xAB\xBB\xF4\x0E"
+                         "\x2D\xC2\xDD\x5D\xDD\x22\x9A\xCC"
+                         "\x39\xAB\x63\xA5\x3D\x9C\x51\x8A",
+               .ilen   = 208,
+       }, { /* From draft-mcgrew-gcm-test-01 */
+               .key    = "\x4C\x80\xCD\xEF\xBB\x5D\x10\xDA"
+                         "\x90\x6A\xC7\x3C\x36\x13\xA6\x34"
+                         "\x2E\x44\x3B",
+               .klen   = 19,
+               .iv     = "\x49\x56\xED\x7E\x3B\x24\x4C\xFE",
+               .result = "\x45\x00\x00\x48\x69\x9A\x00\x00"
+                         "\x80\x11\x4D\xB7\xC0\xA8\x01\x02"
+                         "\xC0\xA8\x01\x01\x0A\x9B\xF1\x56"
+                         "\x38\xD3\x01\x00\x00\x01\x00\x00"
+                         "\x00\x00\x00\x00\x04\x5F\x73\x69"
+                         "\x70\x04\x5F\x75\x64\x70\x03\x73"
+                         "\x69\x70\x09\x63\x79\x62\x65\x72"
+                         "\x63\x69\x74\x79\x02\x64\x6B\x00"
+                         "\x00\x21\x00\x01\x01\x02\x02\x01",
+               .rlen   = 72,
+               .assoc  = "\x00\x00\x43\x21\x87\x65\x43\x21"
+                         "\x00\x00\x00\x00\x49\x56\xED\x7E"
+                         "\x3B\x24\x4C\xFE",
+               .alen   = 20,
+               .input  = "\x89\xBA\x3E\xEF\xE6\xD6\xCF\xDB"
+                         "\x83\x60\xF5\xBA\x3A\x56\x79\xE6"
+                         "\x7E\x0C\x53\xCF\x9E\x87\xE0\x4E"
+                         "\x1A\x26\x01\x24\xC7\x2E\x3D\xBF"
+                         "\x29\x2C\x91\xC1\xB8\xA8\xCF\xE0"
+                         "\x39\xF8\x53\x6D\x31\x22\x2B\xBF"
+                         "\x98\x81\xFC\x34\xEE\x85\x36\xCD"
+                         "\x26\xDB\x6C\x7A\x0C\x77\x8A\x35"
+                         "\x18\x85\x54\xB2\xBC\xDD\x3F\x43"
+                         "\x61\x06\x8A\xDF\x86\x3F\xB4\xAC"
+                         "\x97\xDC\xBD\xFD\x92\x10\xC5\xFF",
+               .ilen   = 88,
+       }, {
+               .key    = "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+                         "\x6D\x6A\x8F\x94\x67\x30\x83\x08"
+                         "\xCA\xFE\xBA",
+               .klen   = 19,
+               .iv     = "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .result = "\x45\x00\x00\x3E\x69\x8F\x00\x00"
+                         "\x80\x11\x4D\xCC\xC0\xA8\x01\x02"
+                         "\xC0\xA8\x01\x01\x0A\x98\x00\x35"
+                         "\x00\x2A\x23\x43\xB2\xD0\x01\x00"
+                         "\x00\x01\x00\x00\x00\x00\x00\x00"
+                         "\x03\x73\x69\x70\x09\x63\x79\x62"
+                         "\x65\x72\x63\x69\x74\x79\x02\x64"
+                         "\x6B\x00\x00\x01\x00\x01\x00\x01",
+               .rlen   = 64,
+               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A"
+                         "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .alen   = 16,
+               .input  = "\x4B\xC2\x70\x60\x64\xD2\xF3\xC8"
+                         "\xE5\x26\x8A\xDE\xB8\x7E\x7D\x16"
+                         "\x56\xC7\xD2\x88\xBA\x8D\x58\xAF"
+                         "\xF5\x71\xB6\x37\x84\xA7\xB1\x99"
+                         "\x51\x5C\x0D\xA0\x27\xDE\xE7\x2D"
+                         "\xEF\x25\x88\x1F\x1D\x77\x11\xFF"
+                         "\xDB\xED\xEE\x56\x16\xC5\x5C\x9B"
+                         "\x00\x62\x1F\x68\x4E\x7C\xA0\x97"
+                         "\x10\x72\x7E\x53\x13\x3B\x68\xE4"
+                         "\x30\x99\x91\x79\x09\xEA\xFF\x6A",
+               .ilen   = 80,
+       }, {
+               .key    = "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\x11\x22\x33",
+               .klen   = 35,
+               .iv     = "\x01\x02\x03\x04\x05\x06\x07\x08",
+               .result = "\x45\x00\x00\x30\x69\xA6\x40\x00"
+                         "\x80\x06\x26\x90\xC0\xA8\x01\x02"
+                         "\x93\x89\x15\x5E\x0A\x9E\x00\x8B"
+                         "\x2D\xC5\x7E\xE0\x00\x00\x00\x00"
+                         "\x70\x02\x40\x00\x20\xBF\x00\x00"
+                         "\x02\x04\x05\xB4\x01\x01\x04\x02"
+                         "\x01\x02\x02\x01",
+               .rlen   = 52,
+               .assoc  = "\x4A\x2C\xBF\xE3\x00\x00\x00\x02"
+                         "\x01\x02\x03\x04\x05\x06\x07\x08",
+               .alen   = 16,
+               .input  = "\xD6\x31\x0D\x2B\x3D\x6F\xBD\x2F"
+                         "\x58\x41\x7E\xFF\x9A\x9E\x09\xB4"
+                         "\x1A\xF7\xF6\x42\x31\xCD\xBF\xAD"
+                         "\x27\x0E\x2C\xF2\xDB\x10\xDF\x55"
+                         "\x8F\x0D\xD7\xAC\x23\xBD\x42\x10"
+                         "\xD0\xB2\xAF\xD8\x37\xAC\x6B\x0B"
+                         "\x11\xD4\x0B\x12\xEC\xB4\xB1\x92"
+                         "\x23\xA6\x10\xB0\x26\xD6\xD9\x26"
+                         "\x5A\x48\x6A\x3E",
+               .ilen   = 68,
+       }, {
+               .key    = "\x00\x00\x00\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00",
+               .klen   = 19,
+               .iv     = "\x00\x00\x00\x00\x00\x00\x00\x00",
+               .result = "\x45\x00\x00\x3C\x99\xC5\x00\x00"
+                         "\x80\x01\xCB\x7A\x40\x67\x93\x18"
+                         "\x01\x01\x01\x01\x08\x00\x07\x5C"
+                         "\x02\x00\x44\x00\x61\x62\x63\x64"
+                         "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+                         "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+                         "\x75\x76\x77\x61\x62\x63\x64\x65"
+                         "\x66\x67\x68\x69\x01\x02\x02\x01",
+               .rlen   = 64,
+               .assoc  = "\x00\x00\x00\x00\x00\x00\x00\x01"
+                         "\x00\x00\x00\x00\x00\x00\x00\x00",
+               .alen   = 16,
+               .input  = "\x6B\x9A\xCA\x57\x43\x91\xFC\x6F"
+                         "\x92\x51\x23\xA4\xC1\x5B\xF0\x10"
+                         "\xF3\x13\xF4\xF8\xA1\x9A\xB4\xDC"
+                         "\x89\xC8\xF8\x42\x62\x95\xB7\xCB"
+                         "\xB8\xF5\x0F\x1B\x2E\x94\xA2\xA7"
+                         "\xBF\xFB\x8A\x92\x13\x63\xD1\x3C"
+                         "\x08\xF5\xE8\xA6\xAA\xF6\x34\xF9"
+                         "\x42\x05\xAF\xB3\xE7\x9A\xFC\xEE"
+                         "\x36\x25\xC1\x10\x12\x1C\xCA\x82"
+                         "\xEA\xE6\x63\x5A\x57\x28\xA9\x9A",
+               .ilen   = 80,
+       }, {
+               .key    = "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+                         "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+                         "\x57\x69\x0E",
+               .klen   = 19,
+               .iv     = "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+               .result = "\x45\x00\x00\x3C\x99\xC3\x00\x00"
+                         "\x80\x01\xCB\x7C\x40\x67\x93\x18"
+                         "\x01\x01\x01\x01\x08\x00\x08\x5C"
+                         "\x02\x00\x43\x00\x61\x62\x63\x64"
+                         "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+                         "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+                         "\x75\x76\x77\x61\x62\x63\x64\x65"
+                         "\x66\x67\x68\x69\x01\x02\x02\x01",
+               .rlen   = 64,
+               .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
+               .input  = "\x6A\x6B\x45\x2B\x7C\x67\x52\xF6"
+                         "\x10\x60\x40\x62\x6B\x4F\x97\x8E"
+                         "\x0B\xB2\x22\x97\xCB\x21\xE0\x90"
+                         "\xA2\xE7\xD1\x41\x30\xE4\x4B\x1B"
+                         "\x79\x01\x58\x50\x01\x06\xE1\xE0"
+                         "\x2C\x83\x79\xD3\xDE\x46\x97\x1A"
+                         "\x30\xB8\xE5\xDF\xD7\x12\x56\x75"
+                         "\xD0\x95\xB7\xB8\x91\x42\xF7\xFD"
+                         "\x97\x57\xCA\xC1\x20\xD0\x86\xB9"
+                         "\x66\x9D\xB4\x2B\x96\x22\xAC\x67",
+               .ilen   = 80,
+       }, {
+               .key    = "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+                         "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+                         "\x57\x69\x0E",
+               .klen   = 19,
+               .iv     = "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+               .result = "\x45\x00\x00\x1C\x42\xA2\x00\x00"
+                         "\x80\x01\x44\x1F\x40\x67\x93\xB6"
+                         "\xE0\x00\x00\x02\x0A\x00\xF5\xFF"
+                         "\x01\x02\x02\x01",
+               .rlen   = 28,
+               .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
+               .input  = "\x6A\x6B\x45\x0B\xA7\x06\x52\xF6"
+                         "\x10\x60\xCF\x01\x6B\x4F\x97\x20"
+                         "\xEA\xB3\x23\x94\xC9\x21\x1D\x33"
+                         "\xA1\xE5\x90\x40\x05\x37\x45\x70"
+                         "\xB5\xD6\x09\x0A\x23\x73\x33\xF9"
+                         "\x08\xB4\x22\xE4",
+               .ilen   = 44,
+       }, {
+               .key    = "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+                         "\x6D\x6A\x8F\x94\x67\x30\x83\x08"
+                         "\xFE\xFF\xE9\x92\x86\x65\x73\x1C"
+                         "\xCA\xFE\xBA",
                .klen   = 27,
-               .iv     = "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
-               .assoc  = "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
-                         "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
-                         "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
-                         "\x11\xc4\xc6\xdb\x00\x56\x36\x61",
-               .alen   = 32,
-               .input  = "\xfb\xe5\x5d\x34\xbe\xe5\xe8\xe7"
-                         "\x5a\xef\x2f\xbf\x1f\x7f\xd4\xb2"
-                         "\x66\xca\x61\x1e\x96\x7a\x61\xb3"
-                         "\x1c\x16\x45\x52\xba\x04\x9c\x9f"
-                         "\xb1\xd2\x40\xbc\x52\x7c\x6f\xb1",
-               .ilen   = 40,
-               .result = "\x85\x34\x66\x42\xc8\x92\x0f\x36"
-                         "\x58\xe0\x6b\x91\x3c\x98\x5c\xbb"
-                         "\x0a\x85\xcc\x02\xad\x7a\x96\xe9"
-                         "\x65\x43\xa4\xc3\x0f\xdc\x55\x81",
-               .rlen   = 32,
+               .iv     = "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .result = "\x45\x00\x00\x28\xA4\xAD\x40\x00"
+                         "\x40\x06\x78\x80\x0A\x01\x03\x8F"
+                         "\x0A\x01\x06\x12\x80\x23\x06\xB8"
+                         "\xCB\x71\x26\x02\xDD\x6B\xB0\x3E"
+                         "\x50\x10\x16\xD0\x75\x68\x00\x01",
+               .rlen   = 40,
+               .assoc  = "\x00\x00\xA5\xF8\x00\x00\x00\x0A"
+                         "\xFA\xCE\xDB\xAD\xDE\xCA\xF8\x88",
+               .alen   = 16,
+               .input  = "\x05\x22\x15\xD1\x52\x56\x85\x04"
+                         "\xA8\x5C\x5D\x6D\x7E\x6E\xF5\xFA"
+                         "\xEA\x16\x37\x50\xF3\xDF\x84\x3B"
+                         "\x2F\x32\x18\x57\x34\x2A\x8C\x23"
+                         "\x67\xDF\x6D\x35\x7B\x54\x0D\xFB"
+                         "\x34\xA5\x9F\x6C\x48\x30\x1E\x22"
+                         "\xFE\xB1\x22\x17\x17\x8A\xB9\x5B",
+               .ilen   = 56,
        }, {
-               .key    = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
-                         "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
-                         "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
-                         "\xd1\xfc\x57",
-               .klen   = 27,
-               .iv     = "\x9c\xfe\xb8\x9c\xad\x71\xaa\x1f",
-               .assoc  = "\x86\x67\xa5\xa9\x14\x5f\x0d\xc6"
-                         "\xff\x14\xc7\x44\xbf\x6c\x3a\xc3"
-                         "\xff\xb6\x81\xbd\xe2\xd5\x06\xc7"
-                         "\x3c\xa1\x52\x13\x03\x8a\x23\x3a",
-               .alen   = 32,
-               .input  = "\x3f\x66\xb0\x9d\xe5\x4b\x38\x00"
-                         "\xc6\x0e\x6e\xe5\xd6\x98\xa6\x37"
-                         "\x8c\x26\x33\xc6\xb2\xa2\x17\xfa"
-                         "\x64\x19\xc0\x30\xd7\xfc\x14\x6b"
-                         "\xe3\x33\xc2\x04\xb0\x37\xbe\x3f"
-                         "\xa9\xb4\x2d\x68\x03\xa3\x44\xef",
-               .ilen   = 48,
-               .result = "\x02\x87\x4d\x28\x80\x6e\xb2\xed"
-                         "\x99\x2a\xa8\xca\x04\x25\x45\x90"
-                         "\x1d\xdd\x5a\xd9\xe4\xdb\x9c\x9c"
-                         "\x49\xe9\x01\xfe\xa7\x80\x6d\x6b",
-               .rlen   = 32,
-               .novrfy = 1,
+               .key    = "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\xDE\xCA\xF8",
+               .klen   = 19,
+               .iv     = "\xCA\xFE\xDE\xBA\xCE\xFA\xCE\x74",
+               .result = "\x45\x00\x00\x49\x33\xBA\x00\x00"
+                         "\x7F\x11\x91\x06\xC3\xFB\x1D\x10"
+                         "\xC2\xB1\xD3\x26\xC0\x28\x31\xCE"
+                         "\x00\x35\xDD\x7B\x80\x03\x02\xD5"
+                         "\x00\x00\x4E\x20\x00\x1E\x8C\x18"
+                         "\xD7\x5B\x81\xDC\x91\xBA\xA0\x47"
+                         "\x6B\x91\xB9\x24\xB2\x80\x38\x9D"
+                         "\x92\xC9\x63\xBA\xC0\x46\xEC\x95"
+                         "\x9B\x62\x66\xC0\x47\x22\xB1\x49"
+                         "\x23\x01\x01\x01",
+               .rlen   = 76,
+               .assoc  = "\x00\x00\x01\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x01\xCA\xFE\xDE\xBA"
+                         "\xCE\xFA\xCE\x74",
+               .alen   = 20,
+               .input  = "\x92\xD0\x53\x79\x33\x38\xD5\xF3"
+                         "\x7D\xE4\x7A\x8E\x86\x03\xC9\x90"
+                         "\x96\x35\xAB\x9C\xFB\xE8\xA3\x76"
+                         "\xE9\xE9\xE2\xD1\x2E\x11\x0E\x00"
+                         "\xFA\xCE\xB5\x9E\x02\xA7\x7B\xEA"
+                         "\x71\x9A\x58\xFB\xA5\x8A\xE1\xB7"
+                         "\x9C\x39\x9D\xE3\xB5\x6E\x69\xE6"
+                         "\x63\xC9\xDB\x05\x69\x51\x12\xAD"
+                         "\x3E\x00\x32\x73\x86\xF2\xEE\xF5"
+                         "\x0F\xE8\x81\x7E\x84\xD3\xC0\x0D"
+                         "\x76\xD6\x55\xC6\xB4\xC2\x34\xC7"
+                         "\x12\x25\x0B\xF9",
+               .ilen   = 92,
        }, {
-               .key    = "\xa4\x4b\x54\x29\x0a\xb8\x6d\x01"
-                         "\x5b\x80\x2a\xcf\x25\xc4\xb7\x5c"
-                         "\x20\x2c\xad\x30\xc2\x2b\x41\xfb"
-                         "\x0e\x85\xbc\x33\xad\x0f\x2b\xff"
-                         "\xee\x49\x83",
+               .key    = "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\x73\x61\x6C",
                .klen   = 35,
-               .iv     = "\xe9\xa9\xff\xe9\x57\xba\xfd\x9e",
-               .alen   = 0,
-               .input  = "\x1f\xb8\x8f\xa3\xdd\x54\x00\xf2",
-               .ilen   = 8,
-               .result = "\x00",
-               .rlen   = 0,
+               .iv     = "\x61\x6E\x64\x01\x69\x76\x65\x63",
+               .result = "\x45\x08\x00\x28\x73\x2C\x00\x00"
+                         "\x40\x06\xE9\xF9\x0A\x01\x06\x12"
+                         "\x0A\x01\x03\x8F\x06\xB8\x80\x23"
+                         "\xDD\x6B\xAF\xBE\xCB\x71\x26\x02"
+                         "\x50\x10\x1F\x64\x6D\x54\x00\x01",
+               .rlen   = 40,
+               .assoc  = "\x17\x40\x5E\x67\x15\x6F\x31\x26"
+                         "\xDD\x0D\xB9\x9B\x61\x6E\x64\x01"
+                         "\x69\x76\x65\x63",
+               .alen   = 20,
+               .input  = "\xCC\x74\xB7\xD3\xB0\x38\x50\x42"
+                         "\x2C\x64\x87\x46\x1E\x34\x10\x05"
+                         "\x29\x6B\xBB\x36\xE9\x69\xAD\x92"
+                         "\x82\xA1\x10\x6A\xEB\x0F\xDC\x7D"
+                         "\x08\xBA\xF3\x91\xCA\xAA\x61\xDA"
+                         "\x62\xF4\x14\x61\x5C\x9D\xB5\xA7"
+                         "\xEE\xD7\xB9\x7E\x87\x99\x9B\x7D",
+               .ilen   = 56,
        }, {
-               .key    = "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
-                         "\xa2\xbc\x6b\x98\xd7\x75\x7f\xe3"
-                         "\xa4\x48\x93\x39\x26\x71\x4a\xc6"
-                         "\xae\x8f\x11\x4c\xc2\x9c\x4a\xbb"
-                         "\x85\x34\x66",
+               .key    = "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+                         "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+                         "\x57\x69\x0E",
+               .klen   = 19,
+               .iv     = "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+               .result = "\x45\x00\x00\x49\x33\x3E\x00\x00"
+                         "\x7F\x11\x91\x82\xC3\xFB\x1D\x10"
+                         "\xC2\xB1\xD3\x26\xC0\x28\x31\xCE"
+                         "\x00\x35\xCB\x45\x80\x03\x02\x5B"
+                         "\x00\x00\x01\xE0\x00\x1E\x8C\x18"
+                         "\xD6\x57\x59\xD5\x22\x84\xA0\x35"
+                         "\x2C\x71\x47\x5C\x88\x80\x39\x1C"
+                         "\x76\x4D\x6E\x5E\xE0\x49\x6B\x32"
+                         "\x5A\xE2\x70\xC0\x38\x99\x49\x39"
+                         "\x15\x01\x01\x01",
+               .rlen   = 76,
+               .assoc  = "\x42\xF6\x7E\x3F\x10\x10\x10\x10"
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
+               .input  = "\x6A\x6B\x45\x5E\xD6\x9A\x52\xF6"
+                         "\xEF\x70\x1A\x9C\xE8\xD3\x19\x86"
+                         "\xC8\x02\xF0\xB0\x03\x09\xD9\x02"
+                         "\xA0\xD2\x59\x04\xD1\x85\x2A\x24"
+                         "\x1C\x67\x3E\xD8\x68\x72\x06\x94"
+                         "\x97\xBA\x4F\x76\x8D\xB0\x44\x5B"
+                         "\x69\xBF\xD5\xE2\x3D\xF1\x0B\x0C"
+                         "\xC0\xBF\xB1\x8F\x70\x09\x9E\xCE"
+                         "\xA5\xF2\x55\x58\x84\xFA\xF9\xB5"
+                         "\x23\xF4\x84\x40\x74\x14\x8A\x6B"
+                         "\xDB\xD7\x67\xED\xA4\x93\xF3\x47"
+                         "\xCC\xF7\x46\x6F",
+               .ilen   = 92,
+       }, {
+               .key    = "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\x73\x61\x6C",
                .klen   = 35,
-               .iv     = "\x42\xc8\x92\x0f\x36\x58\xe0\x6b",
-               .alen   = 0,
-               .input  = "\x48\x01\x5e\x02\x24\x04\x66\x47"
-                         "\xa1\xea\x6f\xaf\xe8\xfc\xfb\xdd"
-                         "\xa5\xa9\x87\x8d\x84\xee\x2e\x77"
-                         "\xbb\x86\xb9\xf5\x5c\x6c\xff\xf6"
-                         "\x72\xc3\x8e\xf7\x70\xb1\xb2\x07"
-                         "\xbc\xa8\xa3\xbd\x83\x7c\x1d\x2a",
-               .ilen   = 48,
-               .result = "\xdc\x56\xf2\x71\xb0\xb1\xa0\x6c"
-                         "\xf0\x97\x3a\xfb\x6d\xe7\x32\x99"
-                         "\x3e\xaf\x70\x5e\xb2\x4d\xea\x39"
-                         "\x89\xd4\x75\x7a\x63\xb1\xda\x93",
-               .rlen   = 32,
-               .novrfy = 1,
+               .iv     = "\x61\x6E\x64\x01\x69\x76\x65\x63",
+               .result = "\x63\x69\x73\x63\x6F\x01\x72\x75"
+                         "\x6C\x65\x73\x01\x74\x68\x65\x01"
+                         "\x6E\x65\x74\x77\x65\x01\x64\x65"
+                         "\x66\x69\x6E\x65\x01\x74\x68\x65"
+                         "\x74\x65\x63\x68\x6E\x6F\x6C\x6F"
+                         "\x67\x69\x65\x73\x01\x74\x68\x61"
+                         "\x74\x77\x69\x6C\x6C\x01\x64\x65"
+                         "\x66\x69\x6E\x65\x74\x6F\x6D\x6F"
+                         "\x72\x72\x6F\x77\x01\x02\x02\x01",
+               .rlen   = 72,
+               .assoc  = "\x17\x40\x5E\x67\x15\x6F\x31\x26"
+                         "\xDD\x0D\xB9\x9B\x61\x6E\x64\x01"
+                         "\x69\x76\x65\x63",
+               .alen   = 20,
+               .input  = "\xEA\x15\xC4\x98\xAC\x15\x22\x37"
+                         "\x00\x07\x1D\xBE\x60\x5D\x73\x16"
+                         "\x4D\x0F\xCC\xCE\x8A\xD0\x49\xD4"
+                         "\x39\xA3\xD1\xB1\x21\x0A\x92\x1A"
+                         "\x2C\xCF\x8F\x9D\xC9\x91\x0D\xB4"
+                         "\x15\xFC\xBC\xA5\xC5\xBF\x54\xE5"
+                         "\x1C\xC7\x32\x41\x07\x7B\x2C\xB6"
+                         "\x5C\x23\x7C\x93\xEA\xEF\x23\x1C"
+                         "\x73\xF4\xE7\x12\x84\x4C\x37\x0A"
+                         "\x4A\x8F\x06\x37\x48\xF9\xF9\x05"
+                         "\x55\x13\x40\xC3\xD5\x55\x3A\x3D",
+               .ilen   = 88,
        }, {
-               .key    = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
-                         "\x96\xe5\xc5\x68\xaa\x95\x35\xe0"
-                         "\x29\xa0\xba\x9e\x48\x78\xd1\xba"
-                         "\x0d\x1a\x53\x3b\xb5\xe3\xf8\x8b"
-                         "\xcf\x76\x3f",
+               .key    = "\x7D\x77\x3D\x00\xC1\x44\xC5\x25"
+                         "\xAC\x61\x9D\x18\xC8\x4A\x3F\x47"
+                         "\xD9\x66\x42",
+               .klen   = 19,
+               .iv     = "\x43\x45\x7E\x91\x82\x44\x3B\xC6",
+               .result = "\x01\x02\x02\x01",
+               .rlen   = 4,
+               .assoc  = "\x33\x54\x67\xAE\xFF\xFF\xFF\xFF"
+                         "\x43\x45\x7E\x91\x82\x44\x3B\xC6",
+               .alen   = 16,
+               .input  = "\x4C\x72\x63\x30\x2F\xE6\x56\xDD"
+                         "\xD0\xD8\x60\x9D\x8B\xEF\x85\x90"
+                         "\xF7\x61\x24\x62",
+               .ilen   = 20,
+       }, {
+               .key    = "\xAB\xBC\xCD\xDE\xF0\x01\x12\x23"
+                         "\x34\x45\x56\x67\x78\x89\x9A\xAB"
+                         "\xDE\xCA\xF8",
+               .klen   = 19,
+               .iv     = "\xCA\xFE\xDE\xBA\xCE\xFA\xCE\x74",
+               .result = "\x74\x6F\x01\x62\x65\x01\x6F\x72"
+                         "\x01\x6E\x6F\x74\x01\x74\x6F\x01"
+                         "\x62\x65\x00\x01",
+               .rlen   = 20,
+               .assoc  = "\x00\x00\x01\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x01\xCA\xFE\xDE\xBA"
+                         "\xCE\xFA\xCE\x74",
+               .alen   = 20,
+               .input  = "\xA3\xBF\x52\x52\x65\x83\xBA\x81"
+                         "\x03\x9B\x84\xFC\x44\x8C\xBB\x81"
+                         "\x36\xE1\x78\xBB\xA5\x49\x3A\xD0"
+                         "\xF0\x6B\x21\xAF\x98\xC0\x34\xDC"
+                         "\x17\x17\x65\xAD",
+               .ilen   = 36,
+       }, {
+               .key    = "\x6C\x65\x67\x61\x6C\x69\x7A\x65"
+                         "\x6D\x61\x72\x69\x6A\x75\x61\x6E"
+                         "\x61\x61\x6E\x64\x64\x6F\x69\x74"
+                         "\x62\x65\x66\x6F\x72\x65\x69\x61"
+                         "\x74\x75\x72",
                .klen   = 35,
-               .iv     = "\xd9\x95\x75\x8f\x44\x89\x40\x7b",
-               .assoc  = "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88"
-                         "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b"
-                         "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b"
-                         "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe",
-               .alen   = 32,
-               .input  = "\x48\x58\xd6\xf3\xad\x63\x58\xbf"
-                         "\xae\xc7\x5e\xae\x83\x8f\x7b\xe4"
-                         "\x78\x5c\x4c\x67\x71\x89\x94\xbf"
-                         "\x47\xf1\x63\x7e\x1c\x59\xbd\xc5"
-                         "\x7f\x44\x0a\x0c\x01\x18\x07\x92"
-                         "\xe1\xd3\x51\xce\x32\x6d\x0c\x5b",
-               .ilen   = 48,
-               .result = "\xc2\x54\xc8\xde\x78\x87\x77\x40"
-                         "\x49\x71\xe4\xb7\xe7\xcb\x76\x61"
-                         "\x0a\x41\xb9\xe9\xc0\x76\x54\xab"
-                         "\x04\x49\x3b\x19\x93\x57\x25\x5d",
+               .iv     = "\x33\x30\x21\x69\x67\x65\x74\x6D",
+               .result = "\x45\x00\x00\x30\xDA\x3A\x00\x00"
+                         "\x80\x01\xDF\x3B\xC0\xA8\x00\x05"
+                         "\xC0\xA8\x00\x01\x08\x00\xC6\xCD"
+                         "\x02\x00\x07\x00\x61\x62\x63\x64"
+                         "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+                         "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+                         "\x01\x02\x02\x01",
+               .rlen   = 52,
+               .assoc  = "\x79\x6B\x69\x63\xFF\xFF\xFF\xFF"
+                         "\xFF\xFF\xFF\xFF\x33\x30\x21\x69"
+                         "\x67\x65\x74\x6D",
+               .alen   = 20,
+               .input  = "\x96\xFD\x86\xF8\xD1\x98\xFF\x10"
+                         "\xAB\x8C\xDA\x8A\x5A\x08\x38\x1A"
+                         "\x48\x59\x80\x18\x1A\x18\x1A\x04"
+                         "\xC9\x0D\xE3\xE7\x0E\xA4\x0B\x75"
+                         "\x92\x9C\x52\x5C\x0B\xFB\xF8\xAF"
+                         "\x16\xC3\x35\xA8\xE7\xCE\x84\x04"
+                         "\xEB\x40\x6B\x7A\x8E\x75\xBB\x42"
+                         "\xE0\x63\x4B\x21\x44\xA2\x2B\x2B"
+                         "\x39\xDB\xC8\xDC",
+               .ilen   = 68,
+       }, {
+               .key    = "\x3D\xE0\x98\x74\xB3\x88\xE6\x49"
+                         "\x19\x88\xD0\xC3\x60\x7E\xAE\x1F"
+                         "\x57\x69\x0E",
+               .klen   = 19,
+               .iv     = "\x4E\x28\x00\x00\xA2\xFC\xA1\xA3",
+               .result = "\x45\x00\x00\x30\xDA\x3A\x00\x00"
+                         "\x80\x01\xDF\x3B\xC0\xA8\x00\x05"
+                         "\xC0\xA8\x00\x01\x08\x00\xC6\xCD"
+                         "\x02\x00\x07\x00\x61\x62\x63\x64"
+                         "\x65\x66\x67\x68\x69\x6A\x6B\x6C"
+                         "\x6D\x6E\x6F\x70\x71\x72\x73\x74"
+                         "\x01\x02\x02\x01",
+               .rlen   = 52,
+               .assoc  = "\x3F\x7E\xF6\x42\x10\x10\x10\x10"
+                         "\x10\x10\x10\x10\x4E\x28\x00\x00"
+                         "\xA2\xFC\xA1\xA3",
+               .alen   = 20,
+               .input  = "\x6A\x6B\x45\x27\x3F\x9E\x52\xF6"
+                         "\x10\x60\x54\x25\xEB\x80\x04\x93"
+                         "\xCA\x1B\x23\x97\xCB\x21\x2E\x01"
+                         "\xA2\xE7\x95\x41\x30\xE4\x4B\x1B"
+                         "\x79\x01\x58\x50\x01\x06\xE1\xE0"
+                         "\x2C\x83\x79\xD3\xDE\x46\x97\x1A"
+                         "\x44\xCC\x90\xBF\x00\x94\x94\x92"
+                         "\x20\x17\x0C\x1B\x55\xDE\x7E\x68"
+                         "\xF4\x95\x5D\x4F",
+               .ilen   = 68,
+       }, {
+               .key    = "\x4C\x80\xCD\xEF\xBB\x5D\x10\xDA"
+                         "\x90\x6A\xC7\x3C\x36\x13\xA6\x34"
+                         "\x22\x43\x3C",
+               .klen   = 19,
+               .iv     = "\x48\x55\xEC\x7D\x3A\x23\x4B\xFD",
+               .result = "\x08\x00\xC6\xCD\x02\x00\x07\x00"
+                         "\x61\x62\x63\x64\x65\x66\x67\x68"
+                         "\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70"
+                         "\x71\x72\x73\x74\x01\x02\x02\x01",
                .rlen   = 32,
-       },
+               .assoc  = "\x00\x00\x43\x21\x87\x65\x43\x21"
+                         "\x00\x00\x00\x07\x48\x55\xEC\x7D"
+                         "\x3A\x23\x4B\xFD",
+               .alen   = 20,
+               .input  = "\x67\xE9\x28\xB3\x1C\xA4\x6D\x02"
+                         "\xF0\xB5\x37\xB6\x6B\x2F\xF5\x4F"
+                         "\xF8\xA3\x4C\x53\xB8\x12\x09\xBF"
+                         "\x58\x7D\xCF\x29\xA3\x41\x68\x6B"
+                         "\xCE\xE8\x79\x85\x3C\xB0\x3A\x8F"
+                         "\x16\xB0\xA1\x26\xC9\xBC\xBC\xA6",
+               .ilen   = 48,
+       }
 };
 
 /*
@@ -22343,8 +23577,9 @@ static struct aead_testvec rfc7539esp_enc_tv_template[] = {
                .klen   = 36,
                .iv     = "\x01\x02\x03\x04\x05\x06\x07\x08",
                .assoc  = "\xf3\x33\x88\x86\x00\x00\x00\x00"
-                         "\x00\x00\x4e\x91",
-               .alen   = 12,
+                         "\x00\x00\x4e\x91\x01\x02\x03\x04"
+                         "\x05\x06\x07\x08",
+               .alen   = 20,
                .input  = "\x49\x6e\x74\x65\x72\x6e\x65\x74"
                          "\x2d\x44\x72\x61\x66\x74\x73\x20"
                          "\x61\x72\x65\x20\x64\x72\x61\x66"
@@ -22430,8 +23665,9 @@ static struct aead_testvec rfc7539esp_dec_tv_template[] = {
                .klen   = 36,
                .iv     = "\x01\x02\x03\x04\x05\x06\x07\x08",
                .assoc  = "\xf3\x33\x88\x86\x00\x00\x00\x00"
-                         "\x00\x00\x4e\x91",
-               .alen   = 12,
+                         "\x00\x00\x4e\x91\x01\x02\x03\x04"
+                         "\x05\x06\x07\x08",
+               .alen   = 20,
                .input  = "\x64\xa0\x86\x15\x75\x86\x1a\xf4"
                          "\x60\xf0\x62\xc7\x9b\xe6\x43\xbd"
                          "\x5e\x80\x5c\xfd\x34\x5c\xf3\x89"
@@ -30174,7 +31410,7 @@ static struct cipher_testvec salsa20_stream_enc_tv_template[] = {
        },
 };
 
-#define CHACHA20_ENC_TEST_VECTORS 3
+#define CHACHA20_ENC_TEST_VECTORS 4
 static struct cipher_testvec chacha20_enc_tv_template[] = {
        { /* RFC7539 A.2. Test Vector #1 */
                .key    = "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -30348,6 +31584,338 @@ static struct cipher_testvec chacha20_enc_tv_template[] = {
                          "\x87\xb5\x8d\xfd\x72\x8a\xfa\x36"
                          "\x75\x7a\x79\x7a\xc1\x88\xd1",
                .rlen   = 127,
+       }, { /* Self-made test vector for long data */
+               .key    = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+                         "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+                         "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+                         "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+               .klen   = 32,
+               .iv     = "\x1c\x00\x00\x00\x00\x00\x00\x00"
+                         "\x00\x00\x00\x00\x00\x00\x00\x01",
+               .input  = "\x49\xee\xe0\xdc\x24\x90\x40\xcd"
+                         "\xc5\x40\x8f\x47\x05\xbc\xdd\x81"
+                         "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb"
+                         "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8"
+                         "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4"
+                         "\x19\x4b\x01\x0f\x4e\xa4\x43\xce"
+                         "\x01\xc6\x67\xda\x03\x91\x18\x90"
+                         "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac"
+                         "\x74\x92\xd3\x53\x47\xc8\xdd\x25"
+                         "\x53\x6c\x02\x03\x87\x0d\x11\x0c"
+                         "\x58\xe3\x12\x18\xfd\x2a\x5b\x40"
+                         "\x0c\x30\xf0\xb8\x3f\x43\xce\xae"
+                         "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc"
+                         "\x33\x97\xc3\x77\xba\xc5\x70\xde"
+                         "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f"
+                         "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c"
+                         "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c"
+                         "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe"
+                         "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6"
+                         "\x79\x49\x41\xf4\x58\x18\xcb\x86"
+                         "\x7f\x30\x0e\xf8\x7d\x44\x36\xea"
+                         "\x75\xeb\x88\x84\x40\x3c\xad\x4f"
+                         "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5"
+                         "\x21\x66\xe9\xa7\xe3\xb2\x15\x88"
+                         "\x78\xf6\x79\xa1\x59\x47\x12\x4e"
+                         "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08"
+                         "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b"
+                         "\xdd\x60\x71\xf7\x47\x8c\x61\xc3"
+                         "\xda\x8a\x78\x1e\x16\xfa\x1e\x86"
+                         "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7"
+                         "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4"
+                         "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6"
+                         "\xf4\xe6\x33\x43\x84\x93\xa5\x67"
+                         "\x9b\x16\x58\x58\x80\x0f\x2b\x5c"
+                         "\x24\x74\x75\x7f\x95\x81\xb7\x30"
+                         "\x7a\x33\xa7\xf7\x94\x87\x32\x27"
+                         "\x10\x5d\x14\x4c\x43\x29\xdd\x26"
+                         "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10"
+                         "\xea\x6b\x64\xfd\x73\xc6\xed\xec"
+                         "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07"
+                         "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5"
+                         "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1"
+                         "\xec\xca\x60\x09\x4c\x6a\xd5\x09"
+                         "\x49\x46\x00\x88\x22\x8d\xce\xea"
+                         "\xb1\x17\x11\xde\x42\xd2\x23\xc1"
+                         "\x72\x11\xf5\x50\x73\x04\x40\x47"
+                         "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0"
+                         "\x3f\x58\xc1\x52\xab\x12\x67\x9d"
+                         "\x3f\x43\x4b\x68\xd4\x9c\x68\x38"
+                         "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b"
+                         "\xf9\xe5\x31\x69\x22\xf9\xa6\x69"
+                         "\xc6\x9c\x96\x9a\x12\x35\x95\x1d"
+                         "\x95\xd5\xdd\xbe\xbf\x93\x53\x24"
+                         "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00"
+                         "\x6f\x88\xc4\x37\x18\x69\x7c\xd7"
+                         "\x41\x92\x55\x4c\x03\xa1\x9a\x4b"
+                         "\x15\xe5\xdf\x7f\x37\x33\x72\xc1"
+                         "\x8b\x10\x67\xa3\x01\x57\x94\x25"
+                         "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73"
+                         "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda"
+                         "\x58\xb1\x47\x90\xfe\x42\x21\x72"
+                         "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd"
+                         "\xc6\x84\x6e\xca\xae\xe3\x68\xb4"
+                         "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b"
+                         "\x03\xa1\x31\xd9\xde\x8d\xf5\x22"
+                         "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76"
+                         "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe"
+                         "\x54\xf7\x27\x1b\xf4\xde\x02\xf5"
+                         "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e"
+                         "\x4b\x6e\xed\x46\x23\xdc\x65\xb2"
+                         "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7"
+                         "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8"
+                         "\x65\x69\x8a\x45\x29\xef\x74\x85"
+                         "\xde\x79\xc7\x08\xae\x30\xb0\xf4"
+                         "\xa3\x1d\x51\x41\xab\xce\xcb\xf6"
+                         "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3"
+                         "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7"
+                         "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9"
+                         "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a"
+                         "\x9f\xd7\xb9\x6c\x65\x14\x22\x45"
+                         "\x6e\x45\x32\x3e\x7e\x60\x1a\x12"
+                         "\x97\x82\x14\xfb\xaa\x04\x22\xfa"
+                         "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d"
+                         "\x78\x33\x5a\x7c\xad\xdb\x29\xce"
+                         "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac"
+                         "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21"
+                         "\x83\x35\x7e\xad\x73\xc2\xb5\x6c"
+                         "\x10\x26\x38\x07\xe5\xc7\x36\x80"
+                         "\xe2\x23\x12\x61\xf5\x48\x4b\x2b"
+                         "\xc5\xdf\x15\xd9\x87\x01\xaa\xac"
+                         "\x1e\x7c\xad\x73\x78\x18\x63\xe0"
+                         "\x8b\x9f\x81\xd8\x12\x6a\x28\x10"
+                         "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c"
+                         "\x83\x66\x80\x47\x80\xe8\xfd\x35"
+                         "\x1c\x97\x6f\xae\x49\x10\x66\xcc"
+                         "\xc6\xd8\xcc\x3a\x84\x91\x20\x77"
+                         "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9"
+                         "\x25\x94\x10\x5f\x40\x00\x64\x99"
+                         "\xdc\xae\xd7\x21\x09\x78\x50\x15"
+                         "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39"
+                         "\x87\x6e\x6d\xab\xde\x08\x51\x16"
+                         "\xc7\x13\xe9\xea\xed\x06\x8e\x2c"
+                         "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43"
+                         "\xb6\x98\x37\xb2\x43\xed\xde\xdf"
+                         "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b"
+                         "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9"
+                         "\x80\x55\xc9\x34\x91\xd1\x59\xe8"
+                         "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06"
+                         "\x20\xa8\x5d\xfa\xd1\xde\x70\x56"
+                         "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8"
+                         "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5"
+                         "\x44\x4b\x9f\xc2\x93\x03\xea\x2b"
+                         "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23"
+                         "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee"
+                         "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab"
+                         "\x82\x6b\x37\x04\xeb\x74\xbe\x79"
+                         "\xb9\x83\x90\xef\x20\x59\x46\xff"
+                         "\xe9\x97\x3e\x2f\xee\xb6\x64\x18"
+                         "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a"
+                         "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4"
+                         "\xce\xd3\x91\x49\x88\xc7\xb8\x4d"
+                         "\xb1\xb9\x07\x6d\x16\x72\xae\x46"
+                         "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8"
+                         "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62"
+                         "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9"
+                         "\x94\x97\xea\xdd\x58\x9e\xae\x76"
+                         "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde"
+                         "\xf7\x32\x87\xcd\x93\xbf\x11\x56"
+                         "\x11\xbe\x08\x74\xe1\x69\xad\xe2"
+                         "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe"
+                         "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76"
+                         "\x35\xea\x5d\x85\x81\xaf\x85\xeb"
+                         "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc"
+                         "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a"
+                         "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9"
+                         "\x37\x1c\xeb\x46\x54\x3f\xa5\x91"
+                         "\xc2\xb5\x8c\xfe\x53\x08\x97\x32"
+                         "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc"
+                         "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1"
+                         "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c"
+                         "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd"
+                         "\x20\x4e\x7c\x51\xb0\x60\x73\xb8"
+                         "\x9c\xac\x91\x90\x7e\x01\xb0\xe1"
+                         "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a"
+                         "\x06\x52\x95\x52\xb2\xe9\x25\x2e"
+                         "\x4c\xe2\x5a\x00\xb2\x13\x81\x03"
+                         "\x77\x66\x0d\xa5\x99\xda\x4e\x8c"
+                         "\xac\xf3\x13\x53\x27\x45\xaf\x64"
+                         "\x46\xdc\xea\x23\xda\x97\xd1\xab"
+                         "\x7d\x6c\x30\x96\x1f\xbc\x06\x34"
+                         "\x18\x0b\x5e\x21\x35\x11\x8d\x4c"
+                         "\xe0\x2d\xe9\x50\x16\x74\x81\xa8"
+                         "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc"
+                         "\xca\x34\x83\x27\x10\x5b\x68\x45"
+                         "\x8f\x52\x22\x0c\x55\x3d\x29\x7c"
+                         "\xe3\xc0\x66\x05\x42\x91\x5f\x58"
+                         "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19"
+                         "\x04\xa9\x08\x4b\x57\xfc\x67\x53"
+                         "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f"
+                         "\x92\xd6\x41\x7c\x5b\x2a\x00\x79"
+                         "\x72",
+               .ilen   = 1281,
+               .result = "\x45\xe8\xe0\xb6\x9c\xca\xfd\x87"
+                         "\xe8\x1d\x37\x96\x8a\xe3\x40\x35"
+                         "\xcf\x5e\x3a\x46\x3d\xfb\xd0\x69"
+                         "\xde\xaf\x7a\xd5\x0d\xe9\x52\xec"
+                         "\xc2\x82\xe5\x3e\x7d\xb2\x4a\xd9"
+                         "\xbb\xc3\x9f\xc0\x5d\xac\x93\x8d"
+                         "\x0e\x6f\xd3\xd7\xfb\x6a\x0d\xce"
+                         "\x92\x2c\xf7\xbb\x93\x57\xcc\xee"
+                         "\x42\x72\x6f\xc8\x4b\xd2\x76\xbf"
+                         "\xa0\xe3\x7a\x39\xf9\x5c\x8e\xfd"
+                         "\xa1\x1d\x41\xe5\x08\xc1\x1c\x11"
+                         "\x92\xfd\x39\x5c\x51\xd0\x2f\x66"
+                         "\x33\x4a\x71\x15\xfe\xee\x12\x54"
+                         "\x8c\x8f\x34\xd8\x50\x3c\x18\xa6"
+                         "\xc5\xe1\x46\x8a\xfb\x5f\x7e\x25"
+                         "\x9b\xe2\xc3\x66\x41\x2b\xb3\xa5"
+                         "\x57\x0e\x94\x17\x26\x39\xbb\x54"
+                         "\xae\x2e\x6f\x42\xfb\x4d\x89\x6f"
+                         "\x9d\xf1\x16\x2e\xe3\xe7\xfc\xe3"
+                         "\xb2\x4b\x2b\xa6\x7c\x04\x69\x3a"
+                         "\x70\x5a\xa7\xf1\x31\x64\x19\xca"
+                         "\x45\x79\xd8\x58\x23\x61\xaf\xc2"
+                         "\x52\x05\xc3\x0b\xc1\x64\x7c\x81"
+                         "\xd9\x11\xcf\xff\x02\x3d\x51\x84"
+                         "\x01\xac\xc6\x2e\x34\x2b\x09\x3a"
+                         "\xa8\x5d\x98\x0e\x89\xd9\xef\x8f"
+                         "\xd9\xd7\x7d\xdd\x63\x47\x46\x7d"
+                         "\xa1\xda\x0b\x53\x7d\x79\xcd\xc9"
+                         "\x86\xdd\x6b\x13\xa1\x9a\x70\xdd"
+                         "\x5c\xa1\x69\x3c\xe4\x5d\xe3\x8c"
+                         "\xe5\xf4\x87\x9c\x10\xcf\x0f\x0b"
+                         "\xc8\x43\xdc\xf8\x1d\x62\x5e\x5b"
+                         "\xe2\x03\x06\xc5\x71\xb6\x48\xa5"
+                         "\xf0\x0f\x2d\xd5\xa2\x73\x55\x8f"
+                         "\x01\xa7\x59\x80\x5f\x11\x6c\x40"
+                         "\xff\xb1\xf2\xc6\x7e\x01\xbb\x1c"
+                         "\x69\x9c\xc9\x3f\x71\x5f\x07\x7e"
+                         "\xdf\x6f\x99\xca\x9c\xfd\xf9\xb9"
+                         "\x49\xe7\xcc\x91\xd5\x9b\x8f\x03"
+                         "\xae\xe7\x61\x32\xef\x41\x6c\x75"
+                         "\x84\x9b\x8c\xce\x1d\x6b\x93\x21"
+                         "\x41\xec\xc6\xad\x8e\x0c\x48\xa8"
+                         "\xe2\xf5\x57\xde\xf7\x38\xfd\x4a"
+                         "\x6f\xa7\x4a\xf9\xac\x7d\xb1\x85"
+                         "\x7d\x6c\x95\x0a\x5a\xcf\x68\xd2"
+                         "\xe0\x7a\x26\xd9\xc1\x6d\x3e\xc6"
+                         "\x37\xbd\xbe\x24\x36\x77\x9f\x1b"
+                         "\xc1\x22\xf3\x79\xae\x95\x78\x66"
+                         "\x97\x11\xc0\x1a\xf1\xe8\x0d\x38"
+                         "\x09\xc2\xee\xb7\xd3\x46\x7b\x59"
+                         "\x77\x23\xe8\xb4\x92\x3d\x78\xbe"
+                         "\xe2\x25\x63\xa5\x2a\x06\x70\x92"
+                         "\x32\x63\xf9\x19\x21\x68\xe1\x0b"
+                         "\x9a\xd0\xee\x21\xdb\x1f\xe0\xde"
+                         "\x3e\x64\x02\x4d\x0e\xe0\x0a\xa9"
+                         "\xed\x19\x8c\xa8\xbf\xe3\x2e\x75"
+                         "\x24\x2b\xb0\xe5\x82\x6a\x1e\x6f"
+                         "\x71\x2a\x3a\x60\xed\x06\x0d\x17"
+                         "\xa2\xdb\x29\x1d\xae\xb2\xc4\xfb"
+                         "\x94\x04\xd8\x58\xfc\xc4\x04\x4e"
+                         "\xee\xc7\xc1\x0f\xe9\x9b\x63\x2d"
+                         "\x02\x3e\x02\x67\xe5\xd8\xbb\x79"
+                         "\xdf\xd2\xeb\x50\xe9\x0a\x02\x46"
+                         "\xdf\x68\xcf\xe7\x2b\x0a\x56\xd6"
+                         "\xf7\xbc\x44\xad\xb8\xb5\x5f\xeb"
+                         "\xbc\x74\x6b\xe8\x7e\xb0\x60\xc6"
+                         "\x0d\x96\x09\xbb\x19\xba\xe0\x3c"
+                         "\xc4\x6c\xbf\x0f\x58\xc0\x55\x62"
+                         "\x23\xa0\xff\xb5\x1c\xfd\x18\xe1"
+                         "\xcf\x6d\xd3\x52\xb4\xce\xa6\xfa"
+                         "\xaa\xfb\x1b\x0b\x42\x6d\x79\x42"
+                         "\x48\x70\x5b\x0e\xdd\x3a\xc9\x69"
+                         "\x8b\x73\x67\xf6\x95\xdb\x8c\xfb"
+                         "\xfd\xb5\x08\x47\x42\x84\x9a\xfa"
+                         "\xcc\x67\xb2\x3c\xb6\xfd\xd8\x32"
+                         "\xd6\x04\xb6\x4a\xea\x53\x4b\xf5"
+                         "\x94\x16\xad\xf0\x10\x2e\x2d\xb4"
+                         "\x8b\xab\xe5\x89\xc7\x39\x12\xf3"
+                         "\x8d\xb5\x96\x0b\x87\x5d\xa7\x7c"
+                         "\xb0\xc2\xf6\x2e\x57\x97\x2c\xdc"
+                         "\x54\x1c\x34\x72\xde\x0c\x68\x39"
+                         "\x9d\x32\xa5\x75\x92\x13\x32\xea"
+                         "\x90\x27\xbd\x5b\x1d\xb9\x21\x02"
+                         "\x1c\xcc\xba\x97\x5e\x49\x58\xe8"
+                         "\xac\x8b\xf3\xce\x3c\xf0\x00\xe9"
+                         "\x6c\xae\xe9\x77\xdf\xf4\x02\xcd"
+                         "\x55\x25\x89\x9e\x90\xf3\x6b\x8f"
+                         "\xb7\xd6\x47\x98\x26\x2f\x31\x2f"
+                         "\x8d\xbf\x54\xcd\x99\xeb\x80\xd7"
+                         "\xac\xc3\x08\xc2\xa6\x32\xf1\x24"
+                         "\x76\x7c\x4f\x78\x53\x55\xfb\x00"
+                         "\x8a\xd6\x52\x53\x25\x45\xfb\x0a"
+                         "\x6b\xb9\xbe\x3c\x5e\x11\xcc\x6a"
+                         "\xdd\xfc\xa7\xc4\x79\x4d\xbd\xfb"
+                         "\xce\x3a\xf1\x7a\xda\xeb\xfe\x64"
+                         "\x28\x3d\x0f\xee\x80\xba\x0c\xf8"
+                         "\xe9\x5b\x3a\xd4\xae\xc9\xf3\x0e"
+                         "\xe8\x5d\xc5\x5c\x0b\x20\x20\xee"
+                         "\x40\x0d\xde\x07\xa7\x14\xb4\x90"
+                         "\xb6\xbd\x3b\xae\x7d\x2b\xa7\xc7"
+                         "\xdc\x0b\x4c\x5d\x65\xb0\xd2\xc5"
+                         "\x79\x61\x23\xe0\xa2\x99\x73\x55"
+                         "\xad\xc6\xfb\xc7\x54\xb5\x98\x1f"
+                         "\x8c\x86\xc2\x3f\xbe\x5e\xea\x64"
+                         "\xa3\x60\x18\x9f\x80\xaf\x52\x74"
+                         "\x1a\xfe\x22\xc2\x92\x67\x40\x02"
+                         "\x08\xee\x67\x5b\x67\xe0\x3d\xde"
+                         "\x7a\xaf\x8e\x28\xf3\x5e\x0e\xf4"
+                         "\x48\x56\xaa\x85\x22\xd8\x36\xed"
+                         "\x3b\x3d\x68\x69\x30\xbc\x71\x23"
+                         "\xb1\x6e\x61\x03\x89\x44\x03\xf4"
+                         "\x32\xaa\x4c\x40\x9f\x69\xfb\x70"
+                         "\x91\xcc\x1f\x11\xbd\x76\x67\xe6"
+                         "\x10\x8b\x29\x39\x68\xea\x4e\x6d"
+                         "\xae\xfb\x40\xcf\xe2\xd0\x0d\x8d"
+                         "\x6f\xed\x9b\x8d\x64\x7a\x94\x8e"
+                         "\x32\x38\x78\xeb\x7d\x5f\xf9\x4d"
+                         "\x13\xbe\x21\xea\x16\xe7\x5c\xee"
+                         "\xcd\xf6\x5f\xc6\x45\xb2\x8f\x2b"
+                         "\xb5\x93\x3e\x45\xdb\xfd\xa2\x6a"
+                         "\xec\x83\x92\x99\x87\x47\xe0\x7c"
+                         "\xa2\x7b\xc4\x2a\xcd\xc0\x81\x03"
+                         "\x98\xb0\x87\xb6\x86\x13\x64\x33"
+                         "\x4c\xd7\x99\xbf\xdb\x7b\x6e\xaa"
+                         "\x76\xcc\xa0\x74\x1b\xa3\x6e\x83"
+                         "\xd4\xba\x7a\x84\x9d\x91\x71\xcd"
+                         "\x60\x2d\x56\xfd\x26\x35\xcb\xeb"
+                         "\xac\xe9\xee\xa4\xfc\x18\x5b\x91"
+                         "\xd5\xfe\x84\x45\xe0\xc7\xfd\x11"
+                         "\xe9\x00\xb6\x54\xdf\xe1\x94\xde"
+                         "\x2b\x70\x9f\x94\x7f\x15\x0e\x83"
+                         "\x63\x10\xb3\xf5\xea\xd3\xe8\xd1"
+                         "\xa5\xfc\x17\x19\x68\x9a\xbc\x17"
+                         "\x30\x43\x0a\x1a\x33\x92\xd4\x2a"
+                         "\x2e\x68\x99\xbc\x49\xf0\x68\xe3"
+                         "\xf0\x1f\xcb\xcc\xfa\xbb\x05\x56"
+                         "\x46\x84\x8b\x69\x83\x64\xc5\xe0"
+                         "\xc5\x52\x99\x07\x3c\xa6\x5c\xaf"
+                         "\xa3\xde\xd7\xdb\x43\xe6\xb7\x76"
+                         "\x4e\x4d\xd6\x71\x60\x63\x4a\x0c"
+                         "\x5f\xae\x25\x84\x22\x90\x5f\x26"
+                         "\x61\x4d\x8f\xaf\xc9\x22\xf2\x05"
+                         "\xcf\xc1\xdc\x68\xe5\x57\x8e\x24"
+                         "\x1b\x30\x59\xca\xd7\x0d\xc3\xd3"
+                         "\x52\x9e\x09\x3e\x0e\xaf\xdb\x5f"
+                         "\xc7\x2b\xde\x3a\xfd\xad\x93\x04"
+                         "\x74\x06\x89\x0e\x90\xeb\x85\xff"
+                         "\xe6\x3c\x12\x42\xf4\xfa\x80\x75"
+                         "\x5e\x4e\xd7\x2f\x93\x0b\x34\x41"
+                         "\x02\x85\x68\xd0\x03\x12\xde\x92"
+                         "\x54\x7a\x7e\xfb\x55\xe7\x88\xfb"
+                         "\xa4\xa9\xf2\xd1\xc6\x70\x06\x37"
+                         "\x25\xee\xa7\x6e\xd9\x89\x86\x50"
+                         "\x2e\x07\xdb\xfb\x2a\x86\x45\x0e"
+                         "\x91\xf4\x7c\xbb\x12\x60\xe8\x3f"
+                         "\x71\xbe\x8f\x9d\x26\xef\xd9\x89"
+                         "\xc4\x8f\xd8\xc5\x73\xd8\x84\xaa"
+                         "\x2f\xad\x22\x1e\x7e\xcf\xa2\x08"
+                         "\x23\x45\x89\x42\xa0\x30\xeb\xbf"
+                         "\xa1\xed\xad\xd5\x76\xfa\x24\x8f"
+                         "\x98",
+               .rlen   = 1281,
        },
 };
 
index 4044125fb5d5fa7589e2cbcb12623546d0effa75..07bc7aa6b224aeeb7ada29b08aec61966d3b3b2e 100644 (file)
@@ -480,4 +480,21 @@ config CRYPTO_DEV_IMGTEC_HASH
          hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256
          hashing algorithms.
 
+config CRYPTO_DEV_SUN4I_SS
+       tristate "Support for Allwinner Security System cryptographic accelerator"
+       depends on ARCH_SUNXI
+       select CRYPTO_MD5
+       select CRYPTO_SHA1
+       select CRYPTO_AES
+       select CRYPTO_DES
+       select CRYPTO_BLKCIPHER
+       help
+         Some Allwinner SoC have a crypto accelerator named
+         Security System. Select this if you want to use it.
+         The Security System handle AES/DES/3DES ciphers in CBC mode
+         and SHA1 and MD5 hash algorithms.
+
+         To compile this driver as a module, choose M here: the module
+         will be called sun4i-ss.
+
 endif # CRYPTO_HW
index e35c07a8da8568c59d56e35f0f9c30b2362ecbca..c3ced6fbd1b8f9877a7d6a03acad9deacbf5d378 100644 (file)
@@ -28,3 +28,4 @@ obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
 obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
 obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
+obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
index daca933a82ec9ea1c918a868516e859ffcdbca98..e493734095821549f3f0dffc6d51d330187ec83f 100644 (file)
@@ -87,8 +87,8 @@
 #define DESC_GCM_DEC_LEN               (DESC_GCM_BASE + 12 * CAAM_CMD_SZ)
 
 #define DESC_RFC4106_BASE              (3 * CAAM_CMD_SZ)
-#define DESC_RFC4106_ENC_LEN           (DESC_RFC4106_BASE + 10 * CAAM_CMD_SZ)
-#define DESC_RFC4106_DEC_LEN           (DESC_RFC4106_BASE + 10 * CAAM_CMD_SZ)
+#define DESC_RFC4106_ENC_LEN           (DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
+#define DESC_RFC4106_DEC_LEN           (DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
 
 #define DESC_RFC4543_BASE              (3 * CAAM_CMD_SZ)
 #define DESC_RFC4543_ENC_LEN           (DESC_RFC4543_BASE + 11 * CAAM_CMD_SZ)
@@ -976,22 +976,28 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
        append_operation(desc, ctx->class1_alg_type |
                         OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
 
-       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+       append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
        append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
-       /* Skip assoc data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
        /* Read assoc data */
        append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
                             FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
-       /* cryptlen = seqoutlen - assoclen */
-       append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+       /* Skip IV */
+       append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
 
        /* Will read cryptlen bytes */
        append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
 
+       /* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
+
+       /* Skip assoc data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+       /* cryptlen = seqoutlen - assoclen */
+       append_math_sub(desc, VARSEQOUTLEN, VARSEQINLEN, REG0, CAAM_CMD_SZ);
+
        /* Write encrypted data */
        append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
 
@@ -1044,21 +1050,27 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
        append_operation(desc, ctx->class1_alg_type |
                         OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON);
 
-       append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
+       append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, 8);
        append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
-       /* Skip assoc data */
-       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
        /* Read assoc data */
        append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
                             FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
-       /* Will write cryptlen bytes */
-       append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+       /* Skip IV */
+       append_seq_fifo_load(desc, 8, FIFOLD_CLASS_SKIP);
 
        /* Will read cryptlen bytes */
-       append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
+       append_math_sub(desc, VARSEQINLEN, SEQOUTLEN, REG3, CAAM_CMD_SZ);
+
+       /* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
+       append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
+
+       /* Skip assoc data */
+       append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+       /* Will write cryptlen bytes */
+       append_math_sub(desc, VARSEQOUTLEN, SEQOUTLEN, REG0, CAAM_CMD_SZ);
 
        /* Store payload data */
        append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
@@ -2685,6 +2697,14 @@ static int gcm_encrypt(struct aead_request *req)
        return ret;
 }
 
+static int ipsec_gcm_encrypt(struct aead_request *req)
+{
+       if (req->assoclen < 8)
+               return -EINVAL;
+
+       return gcm_encrypt(req);
+}
+
 static int old_aead_encrypt(struct aead_request *req)
 {
        struct aead_edesc *edesc;
@@ -2757,6 +2777,14 @@ static int gcm_decrypt(struct aead_request *req)
        return ret;
 }
 
+static int ipsec_gcm_decrypt(struct aead_request *req)
+{
+       if (req->assoclen < 8)
+               return -EINVAL;
+
+       return gcm_decrypt(req);
+}
+
 static int old_aead_decrypt(struct aead_request *req)
 {
        struct aead_edesc *edesc;
@@ -4058,8 +4086,8 @@ static struct caam_aead_alg driver_aeads[] = {
                        },
                        .setkey = rfc4106_setkey,
                        .setauthsize = rfc4106_setauthsize,
-                       .encrypt = gcm_encrypt,
-                       .decrypt = gcm_decrypt,
+                       .encrypt = ipsec_gcm_encrypt,
+                       .decrypt = ipsec_gcm_decrypt,
                        .ivsize = 8,
                        .maxauthsize = AES_BLOCK_SIZE,
                },
@@ -4076,8 +4104,8 @@ static struct caam_aead_alg driver_aeads[] = {
                        },
                        .setkey = rfc4543_setkey,
                        .setauthsize = rfc4543_setauthsize,
-                       .encrypt = gcm_encrypt,
-                       .decrypt = gcm_decrypt,
+                       .encrypt = ipsec_gcm_encrypt,
+                       .decrypt = ipsec_gcm_decrypt,
                        .ivsize = 8,
                        .maxauthsize = AES_BLOCK_SIZE,
                },
@@ -4260,7 +4288,8 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
        alg->base.cra_module = THIS_MODULE;
        alg->base.cra_priority = CAAM_CRA_PRIORITY;
        alg->base.cra_ctxsize = sizeof(struct caam_ctx);
-       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY;
+       alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_KERN_DRIVER_ONLY |
+                             CRYPTO_ALG_AEAD_NEW;
 
        alg->init = caam_aead_init;
        alg->exit = caam_aead_exit;
index efacab7539ef6a8afdacbfb8d9965a0ec4f9ef75..189180976167abbf7f24235b0c77b9cc36d9b274 100644 (file)
@@ -175,7 +175,7 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
 {
        struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctrldev);
        struct caam_ctrl __iomem *ctrl;
-       u32 *desc, status, rdsta_val;
+       u32 *desc, status = 0, rdsta_val;
        int ret = 0, sh_idx;
 
        ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
@@ -207,7 +207,8 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
                 * CAAM eras), then try again.
                 */
                rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
-               if (status || !(rdsta_val & (1 << sh_idx)))
+               if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
+                   !(rdsta_val & (1 << sh_idx)))
                        ret = -EAGAIN;
                if (ret)
                        break;
@@ -370,14 +371,14 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
 int caam_get_era(void)
 {
        struct device_node *caam_node;
-       for_each_compatible_node(caam_node, NULL, "fsl,sec-v4.0") {
-               const uint32_t *prop = (uint32_t *)of_get_property(caam_node,
-                               "fsl,sec-era",
-                               NULL);
-               return prop ? *prop : -ENOTSUPP;
-       }
+       int ret;
+       u32 prop;
+
+       caam_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0");
+       ret = of_property_read_u32(caam_node, "fsl,sec-era", &prop);
+       of_node_put(caam_node);
 
-       return -ENOTSUPP;
+       return IS_ERR_VALUE(ret) ? -ENOTSUPP : prop;
 }
 EXPORT_SYMBOL(caam_get_era);
 
@@ -444,8 +445,9 @@ static int caam_probe(struct platform_device *pdev)
         * Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
         * long pointers in master configuration register
         */
-       setbits32(&ctrl->mcr, MCFGR_WDENABLE |
-                 (sizeof(dma_addr_t) == sizeof(u64) ? MCFGR_LONG_PTR : 0));
+       clrsetbits_be32(&ctrl->mcr, MCFGR_AWCACHE_MASK, MCFGR_AWCACHE_CACH |
+                       MCFGR_WDENABLE | (sizeof(dma_addr_t) == sizeof(u64) ?
+                                        MCFGR_LONG_PTR : 0));
 
        /*
         *  Read the Compile Time paramters and SCFGR to determine
index 9f79fd7bd4d7d1b7589a2facdc15e7667dbb8d48..98d07de24fc48c975faf1e13e65d248cc48edd90 100644 (file)
@@ -367,7 +367,7 @@ do { \
        if (upper) \
                append_u64(desc, data); \
        else \
-               append_u32(desc, data); \
+               append_u32(desc, lower_32_bits(data)); \
 } while (0)
 
 #define append_math_add_imm_u64(desc, dest, src0, src1, data) \
index 672c97489505340abd440dc694cbc16d97045760..5e643523de1556c23dcdd0b56eb54c3b99643d41 100644 (file)
@@ -395,10 +395,16 @@ struct caam_ctrl {
 /* AXI read cache control */
 #define MCFGR_ARCACHE_SHIFT    12
 #define MCFGR_ARCACHE_MASK     (0xf << MCFGR_ARCACHE_SHIFT)
+#define MCFGR_ARCACHE_BUFF     (0x1 << MCFGR_ARCACHE_SHIFT)
+#define MCFGR_ARCACHE_CACH     (0x2 << MCFGR_ARCACHE_SHIFT)
+#define MCFGR_ARCACHE_RALL     (0x4 << MCFGR_ARCACHE_SHIFT)
 
 /* AXI write cache control */
 #define MCFGR_AWCACHE_SHIFT    8
 #define MCFGR_AWCACHE_MASK     (0xf << MCFGR_AWCACHE_SHIFT)
+#define MCFGR_AWCACHE_BUFF     (0x1 << MCFGR_AWCACHE_SHIFT)
+#define MCFGR_AWCACHE_CACH     (0x2 << MCFGR_AWCACHE_SHIFT)
+#define MCFGR_AWCACHE_WALL     (0x8 << MCFGR_AWCACHE_SHIFT)
 
 /* AXI pipeline depth */
 #define MCFGR_AXIPIPE_SHIFT    4
index f2e6de361fd1805094b651d3a36c6141d0ad8fbb..bb241c3ab6b9cad5fec5ed22f4a9f96b5d7da551 100644 (file)
@@ -216,6 +216,7 @@ static const struct acpi_device_id ccp_acpi_match[] = {
        { "AMDI0C00", 0 },
        { },
 };
+MODULE_DEVICE_TABLE(acpi, ccp_acpi_match);
 #endif
 
 #ifdef CONFIG_OF
@@ -223,6 +224,7 @@ static const struct of_device_id ccp_of_match[] = {
        { .compatible = "amd,ccp-seattle-v1a" },
        { },
 };
+MODULE_DEVICE_TABLE(of, ccp_of_match);
 #endif
 
 static struct platform_driver ccp_platform_driver = {
index ad47d0d6109845c810055fdb2ddaba8fa9d62039..68e8aa90fe01cbc074d70df5d5262b70834567d5 100644 (file)
@@ -334,7 +334,7 @@ static int img_hash_dma_init(struct img_hash_dev *hdev)
 
        hdev->dma_lch = dma_request_slave_channel(hdev->dev, "tx");
        if (!hdev->dma_lch) {
-               dev_err(hdev->dev, "Couldn't aquire a slave DMA channel.\n");
+               dev_err(hdev->dev, "Couldn't acquire a slave DMA channel.\n");
                return -EBUSY;
        }
        dma_conf.direction = DMA_MEM_TO_DEV;
index 1c6f98dd88f4958003d6da53ce43933e9e016d45..0643e3366e3309de88a03e687a2d5353f5715a22 100644 (file)
@@ -533,7 +533,6 @@ static struct platform_driver marvell_cesa = {
        .probe          = mv_cesa_probe,
        .remove         = mv_cesa_remove,
        .driver         = {
-               .owner  = THIS_MODULE,
                .name   = "marvell-cesa",
                .of_match_table = mv_cesa_of_match_table,
        },
index e421c96c763a6781ac1b40f4f30eb5966dad0e91..ad7552a6998c081a61cbc0fbf5f9d2e28c629bcd 100644 (file)
@@ -14,11 +14,14 @@ config CRYPTO_DEV_NX_ENCRYPT
 config CRYPTO_DEV_NX_COMPRESS
        tristate "Compression acceleration support"
        default y
+       select CRYPTO_ALGAPI
+       select 842_DECOMPRESS
        help
          Support for PowerPC Nest (NX) compression acceleration. This
          module supports acceleration for compressing memory with the 842
-         algorithm.  One of the platform drivers must be selected also.
-         If you choose 'M' here, this module will be called nx_compress.
+         algorithm using the cryptographic API.  One of the platform
+         drivers must be selected also.  If you choose 'M' here, this
+         module will be called nx_compress.
 
 if CRYPTO_DEV_NX_COMPRESS
 
@@ -42,14 +45,4 @@ config CRYPTO_DEV_NX_COMPRESS_POWERNV
          algorithm.  This supports NX hardware on the PowerNV platform.
          If you choose 'M' here, this module will be called nx_compress_powernv.
 
-config CRYPTO_DEV_NX_COMPRESS_CRYPTO
-       tristate "Compression acceleration cryptographic interface"
-       select CRYPTO_ALGAPI
-       select 842_DECOMPRESS
-       default y
-       help
-         Support for PowerPC Nest (NX) accelerators using the cryptographic
-         API.  If you choose 'M' here, this module will be called
-         nx_compress_crypto.
-
 endif
index e1684f5adb11e0f19a398496e47038dd924eada5..b727821c8ed4eb9277113859e5530692ce86ed19 100644 (file)
@@ -10,12 +10,8 @@ nx-crypto-objs := nx.o \
                  nx-sha256.o \
                  nx-sha512.o
 
-obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS) += nx-compress.o nx-compress-platform.o
-obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o
-obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o
-obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_CRYPTO) += nx-compress-crypto.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_PSERIES) += nx-compress-pseries.o nx-compress.o
+obj-$(CONFIG_CRYPTO_DEV_NX_COMPRESS_POWERNV) += nx-compress-powernv.o nx-compress.o
 nx-compress-objs := nx-842.o
-nx-compress-platform-objs := nx-842-platform.o
 nx-compress-pseries-objs := nx-842-pseries.o
 nx-compress-powernv-objs := nx-842-powernv.o
-nx-compress-crypto-objs := nx-842-crypto.o
diff --git a/drivers/crypto/nx/nx-842-crypto.c b/drivers/crypto/nx/nx-842-crypto.c
deleted file mode 100644 (file)
index d53a1dc..0000000
+++ /dev/null
@@ -1,580 +0,0 @@
-/*
- * Cryptographic API for the NX-842 hardware compression.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Copyright (C) IBM Corporation, 2011-2015
- *
- * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
- *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
- *
- * Rewrite: Dan Streetman <ddstreet@ieee.org>
- *
- * This is an interface to the NX-842 compression hardware in PowerPC
- * processors.  Most of the complexity of this drvier is due to the fact that
- * the NX-842 compression hardware requires the input and output data buffers
- * to be specifically aligned, to be a specific multiple in length, and within
- * specific minimum and maximum lengths.  Those restrictions, provided by the
- * nx-842 driver via nx842_constraints, mean this driver must use bounce
- * buffers and headers to correct misaligned in or out buffers, and to split
- * input buffers that are too large.
- *
- * This driver will fall back to software decompression if the hardware
- * decompression fails, so this driver's decompression should never fail as
- * long as the provided compressed buffer is valid.  Any compressed buffer
- * created by this driver will have a header (except ones where the input
- * perfectly matches the constraints); so users of this driver cannot simply
- * pass a compressed buffer created by this driver over to the 842 software
- * decompression library.  Instead, users must use this driver to decompress;
- * if the hardware fails or is unavailable, the compressed buffer will be
- * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
- * software decompression library.
- *
- * This does not fall back to software compression, however, since the caller
- * of this function is specifically requesting hardware compression; if the
- * hardware compression fails, the caller can fall back to software
- * compression, and the raw 842 compressed buffer that the software compressor
- * creates can be passed to this driver for hardware decompression; any
- * buffer without our specific header magic is assumed to be a raw 842 buffer
- * and passed directly to the hardware.  Note that the software compression
- * library will produce a compressed buffer that is incompatible with the
- * hardware decompressor if the original input buffer length is not a multiple
- * of 8; if such a compressed buffer is passed to this driver for
- * decompression, the hardware will reject it and this driver will then pass
- * it over to the software library for decompression.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/crypto.h>
-#include <linux/vmalloc.h>
-#include <linux/sw842.h>
-#include <linux/ratelimit.h>
-
-#include "nx-842.h"
-
-/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
- * template (see lib/842/842.h), so this magic number will never appear at
- * the start of a raw 842 compressed buffer.  That is important, as any buffer
- * passed to us without this magic is assumed to be a raw 842 compressed
- * buffer, and passed directly to the hardware to decompress.
- */
-#define NX842_CRYPTO_MAGIC     (0xf842)
-#define NX842_CRYPTO_GROUP_MAX (0x20)
-#define NX842_CRYPTO_HEADER_SIZE(g)                            \
-       (sizeof(struct nx842_crypto_header) +                   \
-        sizeof(struct nx842_crypto_header_group) * (g))
-#define NX842_CRYPTO_HEADER_MAX_SIZE                           \
-       NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
-
-/* bounce buffer size */
-#define BOUNCE_BUFFER_ORDER    (2)
-#define BOUNCE_BUFFER_SIZE                                     \
-       ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
-
-/* try longer on comp because we can fallback to sw decomp if hw is busy */
-#define COMP_BUSY_TIMEOUT      (250) /* ms */
-#define DECOMP_BUSY_TIMEOUT    (50) /* ms */
-
-struct nx842_crypto_header_group {
-       __be16 padding;                 /* unused bytes at start of group */
-       __be32 compressed_length;       /* compressed bytes in group */
-       __be32 uncompressed_length;     /* bytes after decompression */
-} __packed;
-
-struct nx842_crypto_header {
-       __be16 magic;           /* NX842_CRYPTO_MAGIC */
-       __be16 ignore;          /* decompressed end bytes to ignore */
-       u8 groups;              /* total groups in this header */
-       struct nx842_crypto_header_group group[];
-} __packed;
-
-struct nx842_crypto_param {
-       u8 *in;
-       unsigned int iremain;
-       u8 *out;
-       unsigned int oremain;
-       unsigned int ototal;
-};
-
-static int update_param(struct nx842_crypto_param *p,
-                       unsigned int slen, unsigned int dlen)
-{
-       if (p->iremain < slen)
-               return -EOVERFLOW;
-       if (p->oremain < dlen)
-               return -ENOSPC;
-
-       p->in += slen;
-       p->iremain -= slen;
-       p->out += dlen;
-       p->oremain -= dlen;
-       p->ototal += dlen;
-
-       return 0;
-}
-
-struct nx842_crypto_ctx {
-       u8 *wmem;
-       u8 *sbounce, *dbounce;
-
-       struct nx842_crypto_header header;
-       struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX];
-};
-
-static int nx842_crypto_init(struct crypto_tfm *tfm)
-{
-       struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
-
-       ctx->wmem = kmalloc(nx842_workmem_size(), GFP_KERNEL);
-       ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
-       ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
-       if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
-               kfree(ctx->wmem);
-               free_page((unsigned long)ctx->sbounce);
-               free_page((unsigned long)ctx->dbounce);
-               return -ENOMEM;
-       }
-
-       return 0;
-}
-
-static void nx842_crypto_exit(struct crypto_tfm *tfm)
-{
-       struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
-
-       kfree(ctx->wmem);
-       free_page((unsigned long)ctx->sbounce);
-       free_page((unsigned long)ctx->dbounce);
-}
-
-static int read_constraints(struct nx842_constraints *c)
-{
-       int ret;
-
-       ret = nx842_constraints(c);
-       if (ret) {
-               pr_err_ratelimited("could not get nx842 constraints : %d\n",
-                                  ret);
-               return ret;
-       }
-
-       /* limit maximum, to always have enough bounce buffer to decompress */
-       if (c->maximum > BOUNCE_BUFFER_SIZE) {
-               c->maximum = BOUNCE_BUFFER_SIZE;
-               pr_info_once("limiting nx842 maximum to %x\n", c->maximum);
-       }
-
-       return 0;
-}
-
-static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
-{
-       int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
-
-       /* compress should have added space for header */
-       if (s > be16_to_cpu(hdr->group[0].padding)) {
-               pr_err("Internal error: no space for header\n");
-               return -EINVAL;
-       }
-
-       memcpy(buf, hdr, s);
-
-       print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
-
-       return 0;
-}
-
-static int compress(struct nx842_crypto_ctx *ctx,
-                   struct nx842_crypto_param *p,
-                   struct nx842_crypto_header_group *g,
-                   struct nx842_constraints *c,
-                   u16 *ignore,
-                   unsigned int hdrsize)
-{
-       unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
-       unsigned int adj_slen = slen;
-       u8 *src = p->in, *dst = p->out;
-       int ret, dskip = 0;
-       ktime_t timeout;
-
-       if (p->iremain == 0)
-               return -EOVERFLOW;
-
-       if (p->oremain == 0 || hdrsize + c->minimum > dlen)
-               return -ENOSPC;
-
-       if (slen % c->multiple)
-               adj_slen = round_up(slen, c->multiple);
-       if (slen < c->minimum)
-               adj_slen = c->minimum;
-       if (slen > c->maximum)
-               adj_slen = slen = c->maximum;
-       if (adj_slen > slen || (u64)src % c->alignment) {
-               adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
-               slen = min(slen, BOUNCE_BUFFER_SIZE);
-               if (adj_slen > slen)
-                       memset(ctx->sbounce + slen, 0, adj_slen - slen);
-               memcpy(ctx->sbounce, src, slen);
-               src = ctx->sbounce;
-               slen = adj_slen;
-               pr_debug("using comp sbounce buffer, len %x\n", slen);
-       }
-
-       dst += hdrsize;
-       dlen -= hdrsize;
-
-       if ((u64)dst % c->alignment) {
-               dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
-               dst += dskip;
-               dlen -= dskip;
-       }
-       if (dlen % c->multiple)
-               dlen = round_down(dlen, c->multiple);
-       if (dlen < c->minimum) {
-nospc:
-               dst = ctx->dbounce;
-               dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
-               dlen = round_down(dlen, c->multiple);
-               dskip = 0;
-               pr_debug("using comp dbounce buffer, len %x\n", dlen);
-       }
-       if (dlen > c->maximum)
-               dlen = c->maximum;
-
-       tmplen = dlen;
-       timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
-       do {
-               dlen = tmplen; /* reset dlen, if we're retrying */
-               ret = nx842_compress(src, slen, dst, &dlen, ctx->wmem);
-               /* possibly we should reduce the slen here, instead of
-                * retrying with the dbounce buffer?
-                */
-               if (ret == -ENOSPC && dst != ctx->dbounce)
-                       goto nospc;
-       } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
-       if (ret)
-               return ret;
-
-       dskip += hdrsize;
-
-       if (dst == ctx->dbounce)
-               memcpy(p->out + dskip, dst, dlen);
-
-       g->padding = cpu_to_be16(dskip);
-       g->compressed_length = cpu_to_be32(dlen);
-       g->uncompressed_length = cpu_to_be32(slen);
-
-       if (p->iremain < slen) {
-               *ignore = slen - p->iremain;
-               slen = p->iremain;
-       }
-
-       pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
-                slen, *ignore, dlen, dskip);
-
-       return update_param(p, slen, dskip + dlen);
-}
-
-static int nx842_crypto_compress(struct crypto_tfm *tfm,
-                                const u8 *src, unsigned int slen,
-                                u8 *dst, unsigned int *dlen)
-{
-       struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
-       struct nx842_crypto_header *hdr = &ctx->header;
-       struct nx842_crypto_param p;
-       struct nx842_constraints c;
-       unsigned int groups, hdrsize, h;
-       int ret, n;
-       bool add_header;
-       u16 ignore = 0;
-
-       p.in = (u8 *)src;
-       p.iremain = slen;
-       p.out = dst;
-       p.oremain = *dlen;
-       p.ototal = 0;
-
-       *dlen = 0;
-
-       ret = read_constraints(&c);
-       if (ret)
-               return ret;
-
-       groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
-                      DIV_ROUND_UP(p.iremain, c.maximum));
-       hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
-
-       /* skip adding header if the buffers meet all constraints */
-       add_header = (p.iremain % c.multiple    ||
-                     p.iremain < c.minimum     ||
-                     p.iremain > c.maximum     ||
-                     (u64)p.in % c.alignment   ||
-                     p.oremain % c.multiple    ||
-                     p.oremain < c.minimum     ||
-                     p.oremain > c.maximum     ||
-                     (u64)p.out % c.alignment);
-
-       hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
-       hdr->groups = 0;
-       hdr->ignore = 0;
-
-       while (p.iremain > 0) {
-               n = hdr->groups++;
-               if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
-                       return -ENOSPC;
-
-               /* header goes before first group */
-               h = !n && add_header ? hdrsize : 0;
-
-               if (ignore)
-                       pr_warn("interal error, ignore is set %x\n", ignore);
-
-               ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
-               if (ret)
-                       return ret;
-       }
-
-       if (!add_header && hdr->groups > 1) {
-               pr_err("Internal error: No header but multiple groups\n");
-               return -EINVAL;
-       }
-
-       /* ignore indicates the input stream needed to be padded */
-       hdr->ignore = cpu_to_be16(ignore);
-       if (ignore)
-               pr_debug("marked %d bytes as ignore\n", ignore);
-
-       if (add_header)
-               ret = nx842_crypto_add_header(hdr, dst);
-       if (ret)
-               return ret;
-
-       *dlen = p.ototal;
-
-       pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
-
-       return 0;
-}
-
-static int decompress(struct nx842_crypto_ctx *ctx,
-                     struct nx842_crypto_param *p,
-                     struct nx842_crypto_header_group *g,
-                     struct nx842_constraints *c,
-                     u16 ignore,
-                     bool usehw)
-{
-       unsigned int slen = be32_to_cpu(g->compressed_length);
-       unsigned int required_len = be32_to_cpu(g->uncompressed_length);
-       unsigned int dlen = p->oremain, tmplen;
-       unsigned int adj_slen = slen;
-       u8 *src = p->in, *dst = p->out;
-       u16 padding = be16_to_cpu(g->padding);
-       int ret, spadding = 0, dpadding = 0;
-       ktime_t timeout;
-
-       if (!slen || !required_len)
-               return -EINVAL;
-
-       if (p->iremain <= 0 || padding + slen > p->iremain)
-               return -EOVERFLOW;
-
-       if (p->oremain <= 0 || required_len - ignore > p->oremain)
-               return -ENOSPC;
-
-       src += padding;
-
-       if (!usehw)
-               goto usesw;
-
-       if (slen % c->multiple)
-               adj_slen = round_up(slen, c->multiple);
-       if (slen < c->minimum)
-               adj_slen = c->minimum;
-       if (slen > c->maximum)
-               goto usesw;
-       if (slen < adj_slen || (u64)src % c->alignment) {
-               /* we can append padding bytes because the 842 format defines
-                * an "end" template (see lib/842/842_decompress.c) and will
-                * ignore any bytes following it.
-                */
-               if (slen < adj_slen)
-                       memset(ctx->sbounce + slen, 0, adj_slen - slen);
-               memcpy(ctx->sbounce, src, slen);
-               src = ctx->sbounce;
-               spadding = adj_slen - slen;
-               slen = adj_slen;
-               pr_debug("using decomp sbounce buffer, len %x\n", slen);
-       }
-
-       if (dlen % c->multiple)
-               dlen = round_down(dlen, c->multiple);
-       if (dlen < required_len || (u64)dst % c->alignment) {
-               dst = ctx->dbounce;
-               dlen = min(required_len, BOUNCE_BUFFER_SIZE);
-               pr_debug("using decomp dbounce buffer, len %x\n", dlen);
-       }
-       if (dlen < c->minimum)
-               goto usesw;
-       if (dlen > c->maximum)
-               dlen = c->maximum;
-
-       tmplen = dlen;
-       timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
-       do {
-               dlen = tmplen; /* reset dlen, if we're retrying */
-               ret = nx842_decompress(src, slen, dst, &dlen, ctx->wmem);
-       } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
-       if (ret) {
-usesw:
-               /* reset everything, sw doesn't have constraints */
-               src = p->in + padding;
-               slen = be32_to_cpu(g->compressed_length);
-               spadding = 0;
-               dst = p->out;
-               dlen = p->oremain;
-               dpadding = 0;
-               if (dlen < required_len) { /* have ignore bytes */
-                       dst = ctx->dbounce;
-                       dlen = BOUNCE_BUFFER_SIZE;
-               }
-               pr_info_ratelimited("using software 842 decompression\n");
-               ret = sw842_decompress(src, slen, dst, &dlen);
-       }
-       if (ret)
-               return ret;
-
-       slen -= spadding;
-
-       dlen -= ignore;
-       if (ignore)
-               pr_debug("ignoring last %x bytes\n", ignore);
-
-       if (dst == ctx->dbounce)
-               memcpy(p->out, dst, dlen);
-
-       pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
-                slen, padding, dlen, ignore);
-
-       return update_param(p, slen + padding, dlen);
-}
-
-static int nx842_crypto_decompress(struct crypto_tfm *tfm,
-                                  const u8 *src, unsigned int slen,
-                                  u8 *dst, unsigned int *dlen)
-{
-       struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
-       struct nx842_crypto_header *hdr;
-       struct nx842_crypto_param p;
-       struct nx842_constraints c;
-       int n, ret, hdr_len;
-       u16 ignore = 0;
-       bool usehw = true;
-
-       p.in = (u8 *)src;
-       p.iremain = slen;
-       p.out = dst;
-       p.oremain = *dlen;
-       p.ototal = 0;
-
-       *dlen = 0;
-
-       if (read_constraints(&c))
-               usehw = false;
-
-       hdr = (struct nx842_crypto_header *)src;
-
-       /* If it doesn't start with our header magic number, assume it's a raw
-        * 842 compressed buffer and pass it directly to the hardware driver
-        */
-       if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
-               struct nx842_crypto_header_group g = {
-                       .padding =              0,
-                       .compressed_length =    cpu_to_be32(p.iremain),
-                       .uncompressed_length =  cpu_to_be32(p.oremain),
-               };
-
-               ret = decompress(ctx, &p, &g, &c, 0, usehw);
-               if (ret)
-                       return ret;
-
-               *dlen = p.ototal;
-
-               return 0;
-       }
-
-       if (!hdr->groups) {
-               pr_err("header has no groups\n");
-               return -EINVAL;
-       }
-       if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
-               pr_err("header has too many groups %x, max %x\n",
-                      hdr->groups, NX842_CRYPTO_GROUP_MAX);
-               return -EINVAL;
-       }
-
-       hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
-       if (hdr_len > slen)
-               return -EOVERFLOW;
-
-       memcpy(&ctx->header, src, hdr_len);
-       hdr = &ctx->header;
-
-       for (n = 0; n < hdr->groups; n++) {
-               /* ignore applies to last group */
-               if (n + 1 == hdr->groups)
-                       ignore = be16_to_cpu(hdr->ignore);
-
-               ret = decompress(ctx, &p, &hdr->group[n], &c, ignore, usehw);
-               if (ret)
-                       return ret;
-       }
-
-       *dlen = p.ototal;
-
-       pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
-
-       return 0;
-}
-
-static struct crypto_alg alg = {
-       .cra_name               = "842",
-       .cra_driver_name        = "842-nx",
-       .cra_priority           = 300,
-       .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
-       .cra_ctxsize            = sizeof(struct nx842_crypto_ctx),
-       .cra_module             = THIS_MODULE,
-       .cra_init               = nx842_crypto_init,
-       .cra_exit               = nx842_crypto_exit,
-       .cra_u                  = { .compress = {
-       .coa_compress           = nx842_crypto_compress,
-       .coa_decompress         = nx842_crypto_decompress } }
-};
-
-static int __init nx842_crypto_mod_init(void)
-{
-       return crypto_register_alg(&alg);
-}
-module_init(nx842_crypto_mod_init);
-
-static void __exit nx842_crypto_mod_exit(void)
-{
-       crypto_unregister_alg(&alg);
-}
-module_exit(nx842_crypto_mod_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Interface");
-MODULE_ALIAS_CRYPTO("842");
-MODULE_ALIAS_CRYPTO("842-nx");
-MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
diff --git a/drivers/crypto/nx/nx-842-platform.c b/drivers/crypto/nx/nx-842-platform.c
deleted file mode 100644 (file)
index 664f13d..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-
-#include "nx-842.h"
-
-/* this is needed, separate from the main nx-842.c driver, because that main
- * driver loads the platform drivers during its init(), and it expects one
- * (or none) of the platform drivers to set this pointer to its driver.
- * That means this pointer can't be in the main nx-842 driver, because it
- * wouldn't be accessible until after the main driver loaded, which wouldn't
- * be possible as it's waiting for the platform driver to load.  So place it
- * here.
- */
-static struct nx842_driver *driver;
-static DEFINE_SPINLOCK(driver_lock);
-
-struct nx842_driver *nx842_platform_driver(void)
-{
-       return driver;
-}
-EXPORT_SYMBOL_GPL(nx842_platform_driver);
-
-bool nx842_platform_driver_set(struct nx842_driver *_driver)
-{
-       bool ret = false;
-
-       spin_lock(&driver_lock);
-
-       if (!driver) {
-               driver = _driver;
-               ret = true;
-       } else
-               WARN(1, "can't set platform driver, already set to %s\n",
-                    driver->name);
-
-       spin_unlock(&driver_lock);
-       return ret;
-}
-EXPORT_SYMBOL_GPL(nx842_platform_driver_set);
-
-/* only call this from the platform driver exit function */
-void nx842_platform_driver_unset(struct nx842_driver *_driver)
-{
-       spin_lock(&driver_lock);
-
-       if (driver == _driver)
-               driver = NULL;
-       else if (driver)
-               WARN(1, "can't unset platform driver %s, currently set to %s\n",
-                    _driver->name, driver->name);
-       else
-               WARN(1, "can't unset platform driver, already unset\n");
-
-       spin_unlock(&driver_lock);
-}
-EXPORT_SYMBOL_GPL(nx842_platform_driver_unset);
-
-bool nx842_platform_driver_get(void)
-{
-       bool ret = false;
-
-       spin_lock(&driver_lock);
-
-       if (driver)
-               ret = try_module_get(driver->owner);
-
-       spin_unlock(&driver_lock);
-
-       return ret;
-}
-EXPORT_SYMBOL_GPL(nx842_platform_driver_get);
-
-void nx842_platform_driver_put(void)
-{
-       spin_lock(&driver_lock);
-
-       if (driver)
-               module_put(driver->owner);
-
-       spin_unlock(&driver_lock);
-}
-EXPORT_SYMBOL_GPL(nx842_platform_driver_put);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
-MODULE_DESCRIPTION("842 H/W Compression platform driver");
index 33b3b0abf4ae7f555f12b5b02bd4e0759a6d5e2b..3750e13d872181c425feefafbc259c03024b75e0 100644 (file)
@@ -26,6 +26,8 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
 MODULE_DESCRIPTION("842 H/W Compression driver for IBM PowerNV processors");
+MODULE_ALIAS_CRYPTO("842");
+MODULE_ALIAS_CRYPTO("842-nx");
 
 #define WORKMEM_ALIGN  (CRB_ALIGN)
 #define CSB_WAIT_MAX   (5000) /* ms */
@@ -344,7 +346,8 @@ static int wait_for_csb(struct nx842_workmem *wmem,
        }
 
        /* successful completion */
-       pr_debug_ratelimited("Processed %u bytes in %lu us\n", csb->count,
+       pr_debug_ratelimited("Processed %u bytes in %lu us\n",
+                            be32_to_cpu(csb->count),
                             (unsigned long)ktime_us_delta(now, start));
 
        return 0;
@@ -581,9 +584,29 @@ static struct nx842_driver nx842_powernv_driver = {
        .decompress =   nx842_powernv_decompress,
 };
 
+static int nx842_powernv_crypto_init(struct crypto_tfm *tfm)
+{
+       return nx842_crypto_init(tfm, &nx842_powernv_driver);
+}
+
+static struct crypto_alg nx842_powernv_alg = {
+       .cra_name               = "842",
+       .cra_driver_name        = "842-nx",
+       .cra_priority           = 300,
+       .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
+       .cra_ctxsize            = sizeof(struct nx842_crypto_ctx),
+       .cra_module             = THIS_MODULE,
+       .cra_init               = nx842_powernv_crypto_init,
+       .cra_exit               = nx842_crypto_exit,
+       .cra_u                  = { .compress = {
+       .coa_compress           = nx842_crypto_compress,
+       .coa_decompress         = nx842_crypto_decompress } }
+};
+
 static __init int nx842_powernv_init(void)
 {
        struct device_node *dn;
+       int ret;
 
        /* verify workmem size/align restrictions */
        BUILD_BUG_ON(WORKMEM_ALIGN % CRB_ALIGN);
@@ -594,17 +617,14 @@ static __init int nx842_powernv_init(void)
        BUILD_BUG_ON(DDE_BUFFER_ALIGN % DDE_BUFFER_SIZE_MULT);
        BUILD_BUG_ON(DDE_BUFFER_SIZE_MULT % DDE_BUFFER_LAST_MULT);
 
-       pr_info("loading\n");
-
        for_each_compatible_node(dn, NULL, "ibm,power-nx")
                nx842_powernv_probe(dn);
 
-       if (!nx842_ct) {
-               pr_err("no coprocessors found\n");
+       if (!nx842_ct)
                return -ENODEV;
-       }
 
-       if (!nx842_platform_driver_set(&nx842_powernv_driver)) {
+       ret = crypto_register_alg(&nx842_powernv_alg);
+       if (ret) {
                struct nx842_coproc *coproc, *n;
 
                list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
@@ -612,11 +632,9 @@ static __init int nx842_powernv_init(void)
                        kfree(coproc);
                }
 
-               return -EEXIST;
+               return ret;
        }
 
-       pr_info("loaded\n");
-
        return 0;
 }
 module_init(nx842_powernv_init);
@@ -625,13 +643,11 @@ static void __exit nx842_powernv_exit(void)
 {
        struct nx842_coproc *coproc, *n;
 
-       nx842_platform_driver_unset(&nx842_powernv_driver);
+       crypto_unregister_alg(&nx842_powernv_alg);
 
        list_for_each_entry_safe(coproc, n, &nx842_coprocs, list) {
                list_del(&coproc->list);
                kfree(coproc);
        }
-
-       pr_info("unloaded\n");
 }
 module_exit(nx842_powernv_exit);
index 3040a6091bf2797a8a959f375651eb84f54464e4..f4cbde03c6adda03c497f0ce08a358081f78d6d6 100644 (file)
@@ -29,6 +29,8 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Robert Jennings <rcj@linux.vnet.ibm.com>");
 MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
+MODULE_ALIAS_CRYPTO("842");
+MODULE_ALIAS_CRYPTO("842-nx");
 
 static struct nx842_constraints nx842_pseries_constraints = {
        .alignment =    DDE_BUFFER_ALIGN,
@@ -99,11 +101,6 @@ struct nx842_workmem {
 #define NX842_HW_PAGE_SIZE     (4096)
 #define NX842_HW_PAGE_MASK     (~(NX842_HW_PAGE_SIZE-1))
 
-enum nx842_status {
-       UNAVAILABLE,
-       AVAILABLE
-};
-
 struct ibm_nx842_counters {
        atomic64_t comp_complete;
        atomic64_t comp_failed;
@@ -121,7 +118,6 @@ static struct nx842_devdata {
        unsigned int max_sg_len;
        unsigned int max_sync_size;
        unsigned int max_sync_sg;
-       enum nx842_status status;
 } __rcu *devdata;
 static DEFINE_SPINLOCK(devdata_mutex);
 
@@ -230,9 +226,12 @@ static int nx842_validate_result(struct device *dev,
        switch (csb->completion_code) {
        case 0: /* Completed without error */
                break;
-       case 64: /* Target bytes > Source bytes during compression */
+       case 64: /* Compression ok, but output larger than input */
+               dev_dbg(dev, "%s: output size larger than input size\n",
+                                       __func__);
+               break;
        case 13: /* Output buffer too small */
-               dev_dbg(dev, "%s: Compression output larger than input\n",
+               dev_dbg(dev, "%s: Out of space in output buffer\n",
                                        __func__);
                return -ENOSPC;
        case 66: /* Input data contains an illegal template field */
@@ -537,41 +536,36 @@ static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
                devdata->max_sync_size = 0;
                devdata->max_sync_sg = 0;
                devdata->max_sg_len = 0;
-               devdata->status = UNAVAILABLE;
                return 0;
        } else
                return -ENOENT;
 }
 
 /**
- * nx842_OF_upd_status -- Update the device info from OF status prop
+ * nx842_OF_upd_status -- Check the device info from OF status prop
  *
  * The status property indicates if the accelerator is enabled.  If the
  * device is in the OF tree it indicates that the hardware is present.
  * The status field indicates if the device is enabled when the status
  * is 'okay'.  Otherwise the device driver will be disabled.
  *
- * @devdata - struct nx842_devdata to update
  * @prop - struct property point containing the maxsyncop for the update
  *
  * Returns:
  *  0 - Device is available
- *  -EINVAL - Device is not available
+ *  -ENODEV - Device is not available
  */
-static int nx842_OF_upd_status(struct nx842_devdata *devdata,
-                                       struct property *prop) {
-       int ret = 0;
+static int nx842_OF_upd_status(struct property *prop)
+{
        const char *status = (const char *)prop->value;
 
-       if (!strncmp(status, "okay", (size_t)prop->length)) {
-               devdata->status = AVAILABLE;
-       } else {
-               dev_info(devdata->dev, "%s: status '%s' is not 'okay'\n",
-                               __func__, status);
-               devdata->status = UNAVAILABLE;
-       }
+       if (!strncmp(status, "okay", (size_t)prop->length))
+               return 0;
+       if (!strncmp(status, "disabled", (size_t)prop->length))
+               return -ENODEV;
+       dev_info(devdata->dev, "%s: unknown status '%s'\n", __func__, status);
 
-       return ret;
+       return -EINVAL;
 }
 
 /**
@@ -735,6 +729,10 @@ static int nx842_OF_upd(struct property *new_prop)
        int ret = 0;
        unsigned long flags;
 
+       new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
+       if (!new_devdata)
+               return -ENOMEM;
+
        spin_lock_irqsave(&devdata_mutex, flags);
        old_devdata = rcu_dereference_check(devdata,
                        lockdep_is_held(&devdata_mutex));
@@ -744,16 +742,10 @@ static int nx842_OF_upd(struct property *new_prop)
        if (!old_devdata || !of_node) {
                pr_err("%s: device is not available\n", __func__);
                spin_unlock_irqrestore(&devdata_mutex, flags);
+               kfree(new_devdata);
                return -ENODEV;
        }
 
-       new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
-       if (!new_devdata) {
-               dev_err(old_devdata->dev, "%s: Could not allocate memory for device data\n", __func__);
-               ret = -ENOMEM;
-               goto error_out;
-       }
-
        memcpy(new_devdata, old_devdata, sizeof(*old_devdata));
        new_devdata->counters = old_devdata->counters;
 
@@ -777,7 +769,7 @@ static int nx842_OF_upd(struct property *new_prop)
                goto out;
 
        /* Perform property updates */
-       ret = nx842_OF_upd_status(new_devdata, status);
+       ret = nx842_OF_upd_status(status);
        if (ret)
                goto error_out;
 
@@ -970,13 +962,43 @@ static struct nx842_driver nx842_pseries_driver = {
        .decompress =   nx842_pseries_decompress,
 };
 
-static int __init nx842_probe(struct vio_dev *viodev,
-                                 const struct vio_device_id *id)
+static int nx842_pseries_crypto_init(struct crypto_tfm *tfm)
+{
+       return nx842_crypto_init(tfm, &nx842_pseries_driver);
+}
+
+static struct crypto_alg nx842_pseries_alg = {
+       .cra_name               = "842",
+       .cra_driver_name        = "842-nx",
+       .cra_priority           = 300,
+       .cra_flags              = CRYPTO_ALG_TYPE_COMPRESS,
+       .cra_ctxsize            = sizeof(struct nx842_crypto_ctx),
+       .cra_module             = THIS_MODULE,
+       .cra_init               = nx842_pseries_crypto_init,
+       .cra_exit               = nx842_crypto_exit,
+       .cra_u                  = { .compress = {
+       .coa_compress           = nx842_crypto_compress,
+       .coa_decompress         = nx842_crypto_decompress } }
+};
+
+static int nx842_probe(struct vio_dev *viodev,
+                      const struct vio_device_id *id)
 {
        struct nx842_devdata *old_devdata, *new_devdata = NULL;
        unsigned long flags;
        int ret = 0;
 
+       new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
+       if (!new_devdata)
+               return -ENOMEM;
+
+       new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
+                       GFP_NOFS);
+       if (!new_devdata->counters) {
+               kfree(new_devdata);
+               return -ENOMEM;
+       }
+
        spin_lock_irqsave(&devdata_mutex, flags);
        old_devdata = rcu_dereference_check(devdata,
                        lockdep_is_held(&devdata_mutex));
@@ -989,21 +1011,6 @@ static int __init nx842_probe(struct vio_dev *viodev,
 
        dev_set_drvdata(&viodev->dev, NULL);
 
-       new_devdata = kzalloc(sizeof(*new_devdata), GFP_NOFS);
-       if (!new_devdata) {
-               dev_err(&viodev->dev, "%s: Could not allocate memory for device data\n", __func__);
-               ret = -ENOMEM;
-               goto error_unlock;
-       }
-
-       new_devdata->counters = kzalloc(sizeof(*new_devdata->counters),
-                       GFP_NOFS);
-       if (!new_devdata->counters) {
-               dev_err(&viodev->dev, "%s: Could not allocate memory for performance counters\n", __func__);
-               ret = -ENOMEM;
-               goto error_unlock;
-       }
-
        new_devdata->vdev = viodev;
        new_devdata->dev = &viodev->dev;
        nx842_OF_set_defaults(new_devdata);
@@ -1016,9 +1023,12 @@ static int __init nx842_probe(struct vio_dev *viodev,
        of_reconfig_notifier_register(&nx842_of_nb);
 
        ret = nx842_OF_upd(NULL);
-       if (ret && ret != -ENODEV) {
-               dev_err(&viodev->dev, "could not parse device tree. %d\n", ret);
-               ret = -1;
+       if (ret)
+               goto error;
+
+       ret = crypto_register_alg(&nx842_pseries_alg);
+       if (ret) {
+               dev_err(&viodev->dev, "could not register comp alg: %d\n", ret);
                goto error;
        }
 
@@ -1043,7 +1053,7 @@ error:
        return ret;
 }
 
-static int __exit nx842_remove(struct vio_dev *viodev)
+static int nx842_remove(struct vio_dev *viodev)
 {
        struct nx842_devdata *old_devdata;
        unsigned long flags;
@@ -1051,6 +1061,8 @@ static int __exit nx842_remove(struct vio_dev *viodev)
        pr_info("Removing IBM Power 842 compression device\n");
        sysfs_remove_group(&viodev->dev.kobj, &nx842_attribute_group);
 
+       crypto_unregister_alg(&nx842_pseries_alg);
+
        spin_lock_irqsave(&devdata_mutex, flags);
        old_devdata = rcu_dereference_check(devdata,
                        lockdep_is_held(&devdata_mutex));
@@ -1074,18 +1086,16 @@ static struct vio_device_id nx842_vio_driver_ids[] = {
 static struct vio_driver nx842_vio_driver = {
        .name = KBUILD_MODNAME,
        .probe = nx842_probe,
-       .remove = __exit_p(nx842_remove),
+       .remove = nx842_remove,
        .get_desired_dma = nx842_get_desired_dma,
        .id_table = nx842_vio_driver_ids,
 };
 
-static int __init nx842_init(void)
+static int __init nx842_pseries_init(void)
 {
        struct nx842_devdata *new_devdata;
        int ret;
 
-       pr_info("Registering IBM Power 842 compression driver\n");
-
        if (!of_find_compatible_node(NULL, NULL, "ibm,compression"))
                return -ENODEV;
 
@@ -1095,7 +1105,6 @@ static int __init nx842_init(void)
                pr_err("Could not allocate memory for device data\n");
                return -ENOMEM;
        }
-       new_devdata->status = UNAVAILABLE;
        RCU_INIT_POINTER(devdata, new_devdata);
 
        ret = vio_register_driver(&nx842_vio_driver);
@@ -1106,24 +1115,18 @@ static int __init nx842_init(void)
                return ret;
        }
 
-       if (!nx842_platform_driver_set(&nx842_pseries_driver)) {
-               vio_unregister_driver(&nx842_vio_driver);
-               kfree(new_devdata);
-               return -EEXIST;
-       }
-
        return 0;
 }
 
-module_init(nx842_init);
+module_init(nx842_pseries_init);
 
-static void __exit nx842_exit(void)
+static void __exit nx842_pseries_exit(void)
 {
        struct nx842_devdata *old_devdata;
        unsigned long flags;
 
-       pr_info("Exiting IBM Power 842 compression driver\n");
-       nx842_platform_driver_unset(&nx842_pseries_driver);
+       crypto_unregister_alg(&nx842_pseries_alg);
+
        spin_lock_irqsave(&devdata_mutex, flags);
        old_devdata = rcu_dereference_check(devdata,
                        lockdep_is_held(&devdata_mutex));
@@ -1136,5 +1139,5 @@ static void __exit nx842_exit(void)
        vio_unregister_driver(&nx842_vio_driver);
 }
 
-module_exit(nx842_exit);
+module_exit(nx842_pseries_exit);
 
index 6e5e0d60d0c8c9b887a19fe9e9f7a193ee298882..046c1c45411bbc7fe21b5207479644130183ac68 100644 (file)
@@ -1,10 +1,5 @@
 /*
- * Driver frontend for IBM Power 842 compression accelerator
- *
- * Copyright (C) 2015 Dan Streetman, IBM Corp
- *
- * Designer of the Power data compression engine:
- *   Bulent Abali <abali@us.ibm.com>
+ * Cryptographic API for the NX-842 hardware compression.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
+ *
+ * Copyright (C) IBM Corporation, 2011-2015
+ *
+ * Designer of the Power data compression engine:
+ *   Bulent Abali <abali@us.ibm.com>
+ *
+ * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
+ *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
+ *
+ * Rewrite: Dan Streetman <ddstreet@ieee.org>
+ *
+ * This is an interface to the NX-842 compression hardware in PowerPC
+ * processors.  Most of the complexity of this drvier is due to the fact that
+ * the NX-842 compression hardware requires the input and output data buffers
+ * to be specifically aligned, to be a specific multiple in length, and within
+ * specific minimum and maximum lengths.  Those restrictions, provided by the
+ * nx-842 driver via nx842_constraints, mean this driver must use bounce
+ * buffers and headers to correct misaligned in or out buffers, and to split
+ * input buffers that are too large.
+ *
+ * This driver will fall back to software decompression if the hardware
+ * decompression fails, so this driver's decompression should never fail as
+ * long as the provided compressed buffer is valid.  Any compressed buffer
+ * created by this driver will have a header (except ones where the input
+ * perfectly matches the constraints); so users of this driver cannot simply
+ * pass a compressed buffer created by this driver over to the 842 software
+ * decompression library.  Instead, users must use this driver to decompress;
+ * if the hardware fails or is unavailable, the compressed buffer will be
+ * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
+ * software decompression library.
+ *
+ * This does not fall back to software compression, however, since the caller
+ * of this function is specifically requesting hardware compression; if the
+ * hardware compression fails, the caller can fall back to software
+ * compression, and the raw 842 compressed buffer that the software compressor
+ * creates can be passed to this driver for hardware decompression; any
+ * buffer without our specific header magic is assumed to be a raw 842 buffer
+ * and passed directly to the hardware.  Note that the software compression
+ * library will produce a compressed buffer that is incompatible with the
+ * hardware decompressor if the original input buffer length is not a multiple
+ * of 8; if such a compressed buffer is passed to this driver for
+ * decompression, the hardware will reject it and this driver will then pass
+ * it over to the software library for decompression.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include "nx-842.h"
+#include <linux/vmalloc.h>
+#include <linux/sw842.h>
+#include <linux/spinlock.h>
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
-MODULE_DESCRIPTION("842 H/W Compression driver for IBM Power processors");
+#include "nx-842.h"
 
-/**
- * nx842_constraints
- *
- * This provides the driver's constraints.  Different nx842 implementations
- * may have varying requirements.  The constraints are:
- *   @alignment:       All buffers should be aligned to this
- *   @multiple:                All buffer lengths should be a multiple of this
- *   @minimum:         Buffer lengths must not be less than this amount
- *   @maximum:         Buffer lengths must not be more than this amount
- *
- * The constraints apply to all buffers and lengths, both input and output,
- * for both compression and decompression, except for the minimum which
- * only applies to compression input and decompression output; the
- * compressed data can be less than the minimum constraint.  It can be
- * assumed that compressed data will always adhere to the multiple
- * constraint.
- *
- * The driver may succeed even if these constraints are violated;
- * however the driver can return failure or suffer reduced performance
- * if any constraint is not met.
+/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
+ * template (see lib/842/842.h), so this magic number will never appear at
+ * the start of a raw 842 compressed buffer.  That is important, as any buffer
+ * passed to us without this magic is assumed to be a raw 842 compressed
+ * buffer, and passed directly to the hardware to decompress.
  */
-int nx842_constraints(struct nx842_constraints *c)
+#define NX842_CRYPTO_MAGIC     (0xf842)
+#define NX842_CRYPTO_HEADER_SIZE(g)                            \
+       (sizeof(struct nx842_crypto_header) +                   \
+        sizeof(struct nx842_crypto_header_group) * (g))
+#define NX842_CRYPTO_HEADER_MAX_SIZE                           \
+       NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
+
+/* bounce buffer size */
+#define BOUNCE_BUFFER_ORDER    (2)
+#define BOUNCE_BUFFER_SIZE                                     \
+       ((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
+
+/* try longer on comp because we can fallback to sw decomp if hw is busy */
+#define COMP_BUSY_TIMEOUT      (250) /* ms */
+#define DECOMP_BUSY_TIMEOUT    (50) /* ms */
+
+struct nx842_crypto_param {
+       u8 *in;
+       unsigned int iremain;
+       u8 *out;
+       unsigned int oremain;
+       unsigned int ototal;
+};
+
+static int update_param(struct nx842_crypto_param *p,
+                       unsigned int slen, unsigned int dlen)
 {
-       memcpy(c, nx842_platform_driver()->constraints, sizeof(*c));
+       if (p->iremain < slen)
+               return -EOVERFLOW;
+       if (p->oremain < dlen)
+               return -ENOSPC;
+
+       p->in += slen;
+       p->iremain -= slen;
+       p->out += dlen;
+       p->oremain -= dlen;
+       p->ototal += dlen;
+
        return 0;
 }
-EXPORT_SYMBOL_GPL(nx842_constraints);
 
-/**
- * nx842_workmem_size
- *
- * Get the amount of working memory the driver requires.
- */
-size_t nx842_workmem_size(void)
+int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
 {
-       return nx842_platform_driver()->workmem_size;
+       struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       spin_lock_init(&ctx->lock);
+       ctx->driver = driver;
+       ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
+       ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
+       ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
+       if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
+               kfree(ctx->wmem);
+               free_page((unsigned long)ctx->sbounce);
+               free_page((unsigned long)ctx->dbounce);
+               return -ENOMEM;
+       }
+
+       return 0;
 }
-EXPORT_SYMBOL_GPL(nx842_workmem_size);
+EXPORT_SYMBOL_GPL(nx842_crypto_init);
 
-int nx842_compress(const unsigned char *in, unsigned int ilen,
-                  unsigned char *out, unsigned int *olen, void *wmem)
+void nx842_crypto_exit(struct crypto_tfm *tfm)
 {
-       return nx842_platform_driver()->compress(in, ilen, out, olen, wmem);
+       struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+
+       kfree(ctx->wmem);
+       free_page((unsigned long)ctx->sbounce);
+       free_page((unsigned long)ctx->dbounce);
 }
-EXPORT_SYMBOL_GPL(nx842_compress);
+EXPORT_SYMBOL_GPL(nx842_crypto_exit);
 
-int nx842_decompress(const unsigned char *in, unsigned int ilen,
-                    unsigned char *out, unsigned int *olen, void *wmem)
+static void check_constraints(struct nx842_constraints *c)
 {
-       return nx842_platform_driver()->decompress(in, ilen, out, olen, wmem);
+       /* limit maximum, to always have enough bounce buffer to decompress */
+       if (c->maximum > BOUNCE_BUFFER_SIZE)
+               c->maximum = BOUNCE_BUFFER_SIZE;
 }
-EXPORT_SYMBOL_GPL(nx842_decompress);
 
-static __init int nx842_init(void)
+static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
 {
-       request_module("nx-compress-powernv");
-       request_module("nx-compress-pseries");
+       int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
 
-       /* we prevent loading if there's no platform driver, and we get the
-        * module that set it so it won't unload, so we don't need to check
-        * if it's set in any of the above functions
-        */
-       if (!nx842_platform_driver_get()) {
-               pr_err("no nx842 driver found.\n");
-               return -ENODEV;
+       /* compress should have added space for header */
+       if (s > be16_to_cpu(hdr->group[0].padding)) {
+               pr_err("Internal error: no space for header\n");
+               return -EINVAL;
        }
 
+       memcpy(buf, hdr, s);
+
+       print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
+
        return 0;
 }
-module_init(nx842_init);
 
-static void __exit nx842_exit(void)
+static int compress(struct nx842_crypto_ctx *ctx,
+                   struct nx842_crypto_param *p,
+                   struct nx842_crypto_header_group *g,
+                   struct nx842_constraints *c,
+                   u16 *ignore,
+                   unsigned int hdrsize)
+{
+       unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
+       unsigned int adj_slen = slen;
+       u8 *src = p->in, *dst = p->out;
+       int ret, dskip = 0;
+       ktime_t timeout;
+
+       if (p->iremain == 0)
+               return -EOVERFLOW;
+
+       if (p->oremain == 0 || hdrsize + c->minimum > dlen)
+               return -ENOSPC;
+
+       if (slen % c->multiple)
+               adj_slen = round_up(slen, c->multiple);
+       if (slen < c->minimum)
+               adj_slen = c->minimum;
+       if (slen > c->maximum)
+               adj_slen = slen = c->maximum;
+       if (adj_slen > slen || (u64)src % c->alignment) {
+               adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
+               slen = min(slen, BOUNCE_BUFFER_SIZE);
+               if (adj_slen > slen)
+                       memset(ctx->sbounce + slen, 0, adj_slen - slen);
+               memcpy(ctx->sbounce, src, slen);
+               src = ctx->sbounce;
+               slen = adj_slen;
+               pr_debug("using comp sbounce buffer, len %x\n", slen);
+       }
+
+       dst += hdrsize;
+       dlen -= hdrsize;
+
+       if ((u64)dst % c->alignment) {
+               dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
+               dst += dskip;
+               dlen -= dskip;
+       }
+       if (dlen % c->multiple)
+               dlen = round_down(dlen, c->multiple);
+       if (dlen < c->minimum) {
+nospc:
+               dst = ctx->dbounce;
+               dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
+               dlen = round_down(dlen, c->multiple);
+               dskip = 0;
+               pr_debug("using comp dbounce buffer, len %x\n", dlen);
+       }
+       if (dlen > c->maximum)
+               dlen = c->maximum;
+
+       tmplen = dlen;
+       timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
+       do {
+               dlen = tmplen; /* reset dlen, if we're retrying */
+               ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
+               /* possibly we should reduce the slen here, instead of
+                * retrying with the dbounce buffer?
+                */
+               if (ret == -ENOSPC && dst != ctx->dbounce)
+                       goto nospc;
+       } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
+       if (ret)
+               return ret;
+
+       dskip += hdrsize;
+
+       if (dst == ctx->dbounce)
+               memcpy(p->out + dskip, dst, dlen);
+
+       g->padding = cpu_to_be16(dskip);
+       g->compressed_length = cpu_to_be32(dlen);
+       g->uncompressed_length = cpu_to_be32(slen);
+
+       if (p->iremain < slen) {
+               *ignore = slen - p->iremain;
+               slen = p->iremain;
+       }
+
+       pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
+                slen, *ignore, dlen, dskip);
+
+       return update_param(p, slen, dskip + dlen);
+}
+
+int nx842_crypto_compress(struct crypto_tfm *tfm,
+                         const u8 *src, unsigned int slen,
+                         u8 *dst, unsigned int *dlen)
+{
+       struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct nx842_crypto_header *hdr = &ctx->header;
+       struct nx842_crypto_param p;
+       struct nx842_constraints c = *ctx->driver->constraints;
+       unsigned int groups, hdrsize, h;
+       int ret, n;
+       bool add_header;
+       u16 ignore = 0;
+
+       check_constraints(&c);
+
+       p.in = (u8 *)src;
+       p.iremain = slen;
+       p.out = dst;
+       p.oremain = *dlen;
+       p.ototal = 0;
+
+       *dlen = 0;
+
+       groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
+                      DIV_ROUND_UP(p.iremain, c.maximum));
+       hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
+
+       spin_lock_bh(&ctx->lock);
+
+       /* skip adding header if the buffers meet all constraints */
+       add_header = (p.iremain % c.multiple    ||
+                     p.iremain < c.minimum     ||
+                     p.iremain > c.maximum     ||
+                     (u64)p.in % c.alignment   ||
+                     p.oremain % c.multiple    ||
+                     p.oremain < c.minimum     ||
+                     p.oremain > c.maximum     ||
+                     (u64)p.out % c.alignment);
+
+       hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
+       hdr->groups = 0;
+       hdr->ignore = 0;
+
+       while (p.iremain > 0) {
+               n = hdr->groups++;
+               ret = -ENOSPC;
+               if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
+                       goto unlock;
+
+               /* header goes before first group */
+               h = !n && add_header ? hdrsize : 0;
+
+               if (ignore)
+                       pr_warn("interal error, ignore is set %x\n", ignore);
+
+               ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
+               if (ret)
+                       goto unlock;
+       }
+
+       if (!add_header && hdr->groups > 1) {
+               pr_err("Internal error: No header but multiple groups\n");
+               ret = -EINVAL;
+               goto unlock;
+       }
+
+       /* ignore indicates the input stream needed to be padded */
+       hdr->ignore = cpu_to_be16(ignore);
+       if (ignore)
+               pr_debug("marked %d bytes as ignore\n", ignore);
+
+       if (add_header)
+               ret = nx842_crypto_add_header(hdr, dst);
+       if (ret)
+               goto unlock;
+
+       *dlen = p.ototal;
+
+       pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
+
+unlock:
+       spin_unlock_bh(&ctx->lock);
+       return ret;
+}
+EXPORT_SYMBOL_GPL(nx842_crypto_compress);
+
+static int decompress(struct nx842_crypto_ctx *ctx,
+                     struct nx842_crypto_param *p,
+                     struct nx842_crypto_header_group *g,
+                     struct nx842_constraints *c,
+                     u16 ignore)
 {
-       nx842_platform_driver_put();
+       unsigned int slen = be32_to_cpu(g->compressed_length);
+       unsigned int required_len = be32_to_cpu(g->uncompressed_length);
+       unsigned int dlen = p->oremain, tmplen;
+       unsigned int adj_slen = slen;
+       u8 *src = p->in, *dst = p->out;
+       u16 padding = be16_to_cpu(g->padding);
+       int ret, spadding = 0, dpadding = 0;
+       ktime_t timeout;
+
+       if (!slen || !required_len)
+               return -EINVAL;
+
+       if (p->iremain <= 0 || padding + slen > p->iremain)
+               return -EOVERFLOW;
+
+       if (p->oremain <= 0 || required_len - ignore > p->oremain)
+               return -ENOSPC;
+
+       src += padding;
+
+       if (slen % c->multiple)
+               adj_slen = round_up(slen, c->multiple);
+       if (slen < c->minimum)
+               adj_slen = c->minimum;
+       if (slen > c->maximum)
+               goto usesw;
+       if (slen < adj_slen || (u64)src % c->alignment) {
+               /* we can append padding bytes because the 842 format defines
+                * an "end" template (see lib/842/842_decompress.c) and will
+                * ignore any bytes following it.
+                */
+               if (slen < adj_slen)
+                       memset(ctx->sbounce + slen, 0, adj_slen - slen);
+               memcpy(ctx->sbounce, src, slen);
+               src = ctx->sbounce;
+               spadding = adj_slen - slen;
+               slen = adj_slen;
+               pr_debug("using decomp sbounce buffer, len %x\n", slen);
+       }
+
+       if (dlen % c->multiple)
+               dlen = round_down(dlen, c->multiple);
+       if (dlen < required_len || (u64)dst % c->alignment) {
+               dst = ctx->dbounce;
+               dlen = min(required_len, BOUNCE_BUFFER_SIZE);
+               pr_debug("using decomp dbounce buffer, len %x\n", dlen);
+       }
+       if (dlen < c->minimum)
+               goto usesw;
+       if (dlen > c->maximum)
+               dlen = c->maximum;
+
+       tmplen = dlen;
+       timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
+       do {
+               dlen = tmplen; /* reset dlen, if we're retrying */
+               ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
+       } while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
+       if (ret) {
+usesw:
+               /* reset everything, sw doesn't have constraints */
+               src = p->in + padding;
+               slen = be32_to_cpu(g->compressed_length);
+               spadding = 0;
+               dst = p->out;
+               dlen = p->oremain;
+               dpadding = 0;
+               if (dlen < required_len) { /* have ignore bytes */
+                       dst = ctx->dbounce;
+                       dlen = BOUNCE_BUFFER_SIZE;
+               }
+               pr_info_ratelimited("using software 842 decompression\n");
+               ret = sw842_decompress(src, slen, dst, &dlen);
+       }
+       if (ret)
+               return ret;
+
+       slen -= spadding;
+
+       dlen -= ignore;
+       if (ignore)
+               pr_debug("ignoring last %x bytes\n", ignore);
+
+       if (dst == ctx->dbounce)
+               memcpy(p->out, dst, dlen);
+
+       pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
+                slen, padding, dlen, ignore);
+
+       return update_param(p, slen + padding, dlen);
 }
-module_exit(nx842_exit);
+
+int nx842_crypto_decompress(struct crypto_tfm *tfm,
+                           const u8 *src, unsigned int slen,
+                           u8 *dst, unsigned int *dlen)
+{
+       struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
+       struct nx842_crypto_header *hdr;
+       struct nx842_crypto_param p;
+       struct nx842_constraints c = *ctx->driver->constraints;
+       int n, ret, hdr_len;
+       u16 ignore = 0;
+
+       check_constraints(&c);
+
+       p.in = (u8 *)src;
+       p.iremain = slen;
+       p.out = dst;
+       p.oremain = *dlen;
+       p.ototal = 0;
+
+       *dlen = 0;
+
+       hdr = (struct nx842_crypto_header *)src;
+
+       spin_lock_bh(&ctx->lock);
+
+       /* If it doesn't start with our header magic number, assume it's a raw
+        * 842 compressed buffer and pass it directly to the hardware driver
+        */
+       if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
+               struct nx842_crypto_header_group g = {
+                       .padding =              0,
+                       .compressed_length =    cpu_to_be32(p.iremain),
+                       .uncompressed_length =  cpu_to_be32(p.oremain),
+               };
+
+               ret = decompress(ctx, &p, &g, &c, 0);
+               if (ret)
+                       goto unlock;
+
+               goto success;
+       }
+
+       if (!hdr->groups) {
+               pr_err("header has no groups\n");
+               ret = -EINVAL;
+               goto unlock;
+       }
+       if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
+               pr_err("header has too many groups %x, max %x\n",
+                      hdr->groups, NX842_CRYPTO_GROUP_MAX);
+               ret = -EINVAL;
+               goto unlock;
+       }
+
+       hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
+       if (hdr_len > slen) {
+               ret = -EOVERFLOW;
+               goto unlock;
+       }
+
+       memcpy(&ctx->header, src, hdr_len);
+       hdr = &ctx->header;
+
+       for (n = 0; n < hdr->groups; n++) {
+               /* ignore applies to last group */
+               if (n + 1 == hdr->groups)
+                       ignore = be16_to_cpu(hdr->ignore);
+
+               ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
+               if (ret)
+                       goto unlock;
+       }
+
+success:
+       *dlen = p.ototal;
+
+       pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
+
+       ret = 0;
+
+unlock:
+       spin_unlock_bh(&ctx->lock);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
+MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
index ac0ea79d0f8b98c1d8123f1017256eb145cfc8f4..a4eee3bba93736b601ca587fb702b4e88de1b9a9 100644 (file)
@@ -3,8 +3,9 @@
 #define __NX_842_H__
 
 #include <linux/kernel.h>
+#include <linux/init.h>
 #include <linux/module.h>
-#include <linux/sw842.h>
+#include <linux/crypto.h>
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/io.h>
@@ -104,6 +105,25 @@ static inline unsigned long nx842_get_pa(void *addr)
 #define GET_FIELD(v, m)                (((v) & (m)) >> MASK_LSH(m))
 #define SET_FIELD(v, m, val)   (((v) & ~(m)) | (((val) << MASK_LSH(m)) & (m)))
 
+/**
+ * This provides the driver's constraints.  Different nx842 implementations
+ * may have varying requirements.  The constraints are:
+ *   @alignment:       All buffers should be aligned to this
+ *   @multiple:                All buffer lengths should be a multiple of this
+ *   @minimum:         Buffer lengths must not be less than this amount
+ *   @maximum:         Buffer lengths must not be more than this amount
+ *
+ * The constraints apply to all buffers and lengths, both input and output,
+ * for both compression and decompression, except for the minimum which
+ * only applies to compression input and decompression output; the
+ * compressed data can be less than the minimum constraint.  It can be
+ * assumed that compressed data will always adhere to the multiple
+ * constraint.
+ *
+ * The driver may succeed even if these constraints are violated;
+ * however the driver can return failure or suffer reduced performance
+ * if any constraint is not met.
+ */
 struct nx842_constraints {
        int alignment;
        int multiple;
@@ -126,19 +146,40 @@ struct nx842_driver {
                          void *wrkmem);
 };
 
-struct nx842_driver *nx842_platform_driver(void);
-bool nx842_platform_driver_set(struct nx842_driver *driver);
-void nx842_platform_driver_unset(struct nx842_driver *driver);
-bool nx842_platform_driver_get(void);
-void nx842_platform_driver_put(void);
+struct nx842_crypto_header_group {
+       __be16 padding;                 /* unused bytes at start of group */
+       __be32 compressed_length;       /* compressed bytes in group */
+       __be32 uncompressed_length;     /* bytes after decompression */
+} __packed;
+
+struct nx842_crypto_header {
+       __be16 magic;           /* NX842_CRYPTO_MAGIC */
+       __be16 ignore;          /* decompressed end bytes to ignore */
+       u8 groups;              /* total groups in this header */
+       struct nx842_crypto_header_group group[];
+} __packed;
 
-size_t nx842_workmem_size(void);
+#define NX842_CRYPTO_GROUP_MAX (0x20)
 
-int nx842_constraints(struct nx842_constraints *constraints);
+struct nx842_crypto_ctx {
+       spinlock_t lock;
+
+       u8 *wmem;
+       u8 *sbounce, *dbounce;
+
+       struct nx842_crypto_header header;
+       struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX];
+
+       struct nx842_driver *driver;
+};
 
-int nx842_compress(const unsigned char *in, unsigned int in_len,
-                  unsigned char *out, unsigned int *out_len, void *wrkmem);
-int nx842_decompress(const unsigned char *in, unsigned int in_len,
-                    unsigned char *out, unsigned int *out_len, void *wrkmem);
+int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver);
+void nx842_crypto_exit(struct crypto_tfm *tfm);
+int nx842_crypto_compress(struct crypto_tfm *tfm,
+                         const u8 *src, unsigned int slen,
+                         u8 *dst, unsigned int *dlen);
+int nx842_crypto_decompress(struct crypto_tfm *tfm,
+                           const u8 *src, unsigned int slen,
+                           u8 *dst, unsigned int *dlen);
 
 #endif /* __NX_842_H__ */
index e4311ce0cd78cfc93eea1cec30a37a9ec3b54d25..195c9207a98d885e21b9f0f8fc7bab517aa9ed22 100644 (file)
@@ -94,8 +94,6 @@ static int ccm_aes_nx_setauthsize(struct crypto_aead *tfm,
                return -EINVAL;
        }
 
-       crypto_aead_crt(tfm)->authsize = authsize;
-
        return 0;
 }
 
@@ -111,8 +109,6 @@ static int ccm4309_aes_nx_setauthsize(struct crypto_aead *tfm,
                return -EINVAL;
        }
 
-       crypto_aead_crt(tfm)->authsize = authsize;
-
        return 0;
 }
 
@@ -174,6 +170,7 @@ static int generate_pat(u8                   *iv,
                        struct nx_crypto_ctx *nx_ctx,
                        unsigned int          authsize,
                        unsigned int          nbytes,
+                       unsigned int          assoclen,
                        u8                   *out)
 {
        struct nx_sg *nx_insg = nx_ctx->in_sg;
@@ -200,16 +197,16 @@ static int generate_pat(u8                   *iv,
         * greater than 2^32.
         */
 
-       if (!req->assoclen) {
+       if (!assoclen) {
                b0 = nx_ctx->csbcpb->cpb.aes_ccm.in_pat_or_b0;
-       } else if (req->assoclen <= 14) {
+       } else if (assoclen <= 14) {
                /* if associated data is 14 bytes or less, we do 1 GCM
                 * operation on 2 AES blocks, B0 (stored in the csbcpb) and B1,
                 * which is fed in through the source buffers here */
                b0 = nx_ctx->csbcpb->cpb.aes_ccm.in_pat_or_b0;
                b1 = nx_ctx->priv.ccm.iauth_tag;
-               iauth_len = req->assoclen;
-       } else if (req->assoclen <= 65280) {
+               iauth_len = assoclen;
+       } else if (assoclen <= 65280) {
                /* if associated data is less than (2^16 - 2^8), we construct
                 * B1 differently and feed in the associated data to a CCA
                 * operation */
@@ -223,7 +220,7 @@ static int generate_pat(u8                   *iv,
        }
 
        /* generate B0 */
-       rc = generate_b0(iv, req->assoclen, authsize, nbytes, b0);
+       rc = generate_b0(iv, assoclen, authsize, nbytes, b0);
        if (rc)
                return rc;
 
@@ -233,22 +230,22 @@ static int generate_pat(u8                   *iv,
         */
        if (b1) {
                memset(b1, 0, 16);
-               if (req->assoclen <= 65280) {
-                       *(u16 *)b1 = (u16)req->assoclen;
-                       scatterwalk_map_and_copy(b1 + 2, req->assoc, 0,
+               if (assoclen <= 65280) {
+                       *(u16 *)b1 = assoclen;
+                       scatterwalk_map_and_copy(b1 + 2, req->src, 0,
                                         iauth_len, SCATTERWALK_FROM_SG);
                } else {
                        *(u16 *)b1 = (u16)(0xfffe);
-                       *(u32 *)&b1[2] = (u32)req->assoclen;
-                       scatterwalk_map_and_copy(b1 + 6, req->assoc, 0,
+                       *(u32 *)&b1[2] = assoclen;
+                       scatterwalk_map_and_copy(b1 + 6, req->src, 0,
                                         iauth_len, SCATTERWALK_FROM_SG);
                }
        }
 
        /* now copy any remaining AAD to scatterlist and call nx... */
-       if (!req->assoclen) {
+       if (!assoclen) {
                return rc;
-       } else if (req->assoclen <= 14) {
+       } else if (assoclen <= 14) {
                unsigned int len = 16;
 
                nx_insg = nx_build_sg_list(nx_insg, b1, &len, nx_ctx->ap->sglen);
@@ -280,7 +277,7 @@ static int generate_pat(u8                   *iv,
                        return rc;
 
                atomic_inc(&(nx_ctx->stats->aes_ops));
-               atomic64_add(req->assoclen, &(nx_ctx->stats->aes_bytes));
+               atomic64_add(assoclen, &nx_ctx->stats->aes_bytes);
 
        } else {
                unsigned int processed = 0, to_process;
@@ -294,15 +291,15 @@ static int generate_pat(u8                   *iv,
                                nx_ctx->ap->databytelen/NX_PAGE_SIZE);
 
                do {
-                       to_process = min_t(u32, req->assoclen - processed,
+                       to_process = min_t(u32, assoclen - processed,
                                           nx_ctx->ap->databytelen);
 
                        nx_insg = nx_walk_and_build(nx_ctx->in_sg,
                                                    nx_ctx->ap->sglen,
-                                                   req->assoc, processed,
+                                                   req->src, processed,
                                                    &to_process);
 
-                       if ((to_process + processed) < req->assoclen) {
+                       if ((to_process + processed) < assoclen) {
                                NX_CPB_FDM(nx_ctx->csbcpb_aead) |=
                                        NX_FDM_INTERMEDIATE;
                        } else {
@@ -328,11 +325,10 @@ static int generate_pat(u8                   *iv,
                        NX_CPB_FDM(nx_ctx->csbcpb_aead) |= NX_FDM_CONTINUATION;
 
                        atomic_inc(&(nx_ctx->stats->aes_ops));
-                       atomic64_add(req->assoclen,
-                                       &(nx_ctx->stats->aes_bytes));
+                       atomic64_add(assoclen, &nx_ctx->stats->aes_bytes);
 
                        processed += to_process;
-               } while (processed < req->assoclen);
+               } while (processed < assoclen);
 
                result = nx_ctx->csbcpb_aead->cpb.aes_cca.out_pat_or_b0;
        }
@@ -343,7 +339,8 @@ static int generate_pat(u8                   *iv,
 }
 
 static int ccm_nx_decrypt(struct aead_request   *req,
-                         struct blkcipher_desc *desc)
+                         struct blkcipher_desc *desc,
+                         unsigned int assoclen)
 {
        struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
        struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
@@ -360,10 +357,10 @@ static int ccm_nx_decrypt(struct aead_request   *req,
 
        /* copy out the auth tag to compare with later */
        scatterwalk_map_and_copy(priv->oauth_tag,
-                                req->src, nbytes, authsize,
+                                req->src, nbytes + req->assoclen, authsize,
                                 SCATTERWALK_FROM_SG);
 
-       rc = generate_pat(desc->info, req, nx_ctx, authsize, nbytes,
+       rc = generate_pat(desc->info, req, nx_ctx, authsize, nbytes, assoclen,
                          csbcpb->cpb.aes_ccm.in_pat_or_b0);
        if (rc)
                goto out;
@@ -383,8 +380,8 @@ static int ccm_nx_decrypt(struct aead_request   *req,
                NX_CPB_FDM(nx_ctx->csbcpb) &= ~NX_FDM_ENDE_ENCRYPT;
 
                rc = nx_build_sg_lists(nx_ctx, desc, req->dst, req->src,
-                                       &to_process, processed,
-                                       csbcpb->cpb.aes_ccm.iv_or_ctr);
+                                      &to_process, processed + req->assoclen,
+                                      csbcpb->cpb.aes_ccm.iv_or_ctr);
                if (rc)
                        goto out;
 
@@ -420,7 +417,8 @@ out:
 }
 
 static int ccm_nx_encrypt(struct aead_request   *req,
-                         struct blkcipher_desc *desc)
+                         struct blkcipher_desc *desc,
+                         unsigned int assoclen)
 {
        struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
        struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
@@ -432,7 +430,7 @@ static int ccm_nx_encrypt(struct aead_request   *req,
 
        spin_lock_irqsave(&nx_ctx->lock, irq_flags);
 
-       rc = generate_pat(desc->info, req, nx_ctx, authsize, nbytes,
+       rc = generate_pat(desc->info, req, nx_ctx, authsize, nbytes, assoclen,
                          csbcpb->cpb.aes_ccm.in_pat_or_b0);
        if (rc)
                goto out;
@@ -451,7 +449,7 @@ static int ccm_nx_encrypt(struct aead_request   *req,
                NX_CPB_FDM(csbcpb) |= NX_FDM_ENDE_ENCRYPT;
 
                rc = nx_build_sg_lists(nx_ctx, desc, req->dst, req->src,
-                                       &to_process, processed,
+                                      &to_process, processed + req->assoclen,
                                       csbcpb->cpb.aes_ccm.iv_or_ctr);
                if (rc)
                        goto out;
@@ -483,7 +481,7 @@ static int ccm_nx_encrypt(struct aead_request   *req,
 
        /* copy out the auth tag */
        scatterwalk_map_and_copy(csbcpb->cpb.aes_ccm.out_pat_or_mac,
-                                req->dst, nbytes, authsize,
+                                req->dst, nbytes + req->assoclen, authsize,
                                 SCATTERWALK_TO_SG);
 
 out:
@@ -503,9 +501,8 @@ static int ccm4309_aes_nx_encrypt(struct aead_request *req)
        memcpy(iv + 4, req->iv, 8);
 
        desc.info = iv;
-       desc.tfm = (struct crypto_blkcipher *)req->base.tfm;
 
-       return ccm_nx_encrypt(req, &desc);
+       return ccm_nx_encrypt(req, &desc, req->assoclen - 8);
 }
 
 static int ccm_aes_nx_encrypt(struct aead_request *req)
@@ -514,13 +511,12 @@ static int ccm_aes_nx_encrypt(struct aead_request *req)
        int rc;
 
        desc.info = req->iv;
-       desc.tfm = (struct crypto_blkcipher *)req->base.tfm;
 
        rc = crypto_ccm_check_iv(desc.info);
        if (rc)
                return rc;
 
-       return ccm_nx_encrypt(req, &desc);
+       return ccm_nx_encrypt(req, &desc, req->assoclen);
 }
 
 static int ccm4309_aes_nx_decrypt(struct aead_request *req)
@@ -535,9 +531,8 @@ static int ccm4309_aes_nx_decrypt(struct aead_request *req)
        memcpy(iv + 4, req->iv, 8);
 
        desc.info = iv;
-       desc.tfm = (struct crypto_blkcipher *)req->base.tfm;
 
-       return ccm_nx_decrypt(req, &desc);
+       return ccm_nx_decrypt(req, &desc, req->assoclen - 8);
 }
 
 static int ccm_aes_nx_decrypt(struct aead_request *req)
@@ -546,13 +541,12 @@ static int ccm_aes_nx_decrypt(struct aead_request *req)
        int rc;
 
        desc.info = req->iv;
-       desc.tfm = (struct crypto_blkcipher *)req->base.tfm;
 
        rc = crypto_ccm_check_iv(desc.info);
        if (rc)
                return rc;
 
-       return ccm_nx_decrypt(req, &desc);
+       return ccm_nx_decrypt(req, &desc, req->assoclen);
 }
 
 /* tell the block cipher walk routines that this is a stream cipher by
@@ -560,47 +554,44 @@ static int ccm_aes_nx_decrypt(struct aead_request *req)
  * during encrypt/decrypt doesn't solve this problem, because it calls
  * blkcipher_walk_done under the covers, which doesn't use walk->blocksize,
  * but instead uses this tfm->blocksize. */
-struct crypto_alg nx_ccm_aes_alg = {
-       .cra_name        = "ccm(aes)",
-       .cra_driver_name = "ccm-aes-nx",
-       .cra_priority    = 300,
-       .cra_flags       = CRYPTO_ALG_TYPE_AEAD |
-                          CRYPTO_ALG_NEED_FALLBACK,
-       .cra_blocksize   = 1,
-       .cra_ctxsize     = sizeof(struct nx_crypto_ctx),
-       .cra_type        = &crypto_aead_type,
-       .cra_module      = THIS_MODULE,
-       .cra_init        = nx_crypto_ctx_aes_ccm_init,
-       .cra_exit        = nx_crypto_ctx_exit,
-       .cra_aead = {
-               .ivsize      = AES_BLOCK_SIZE,
-               .maxauthsize = AES_BLOCK_SIZE,
-               .setkey      = ccm_aes_nx_set_key,
-               .setauthsize = ccm_aes_nx_setauthsize,
-               .encrypt     = ccm_aes_nx_encrypt,
-               .decrypt     = ccm_aes_nx_decrypt,
-       }
+struct aead_alg nx_ccm_aes_alg = {
+       .base = {
+               .cra_name        = "ccm(aes)",
+               .cra_driver_name = "ccm-aes-nx",
+               .cra_priority    = 300,
+               .cra_flags       = CRYPTO_ALG_NEED_FALLBACK |
+                                  CRYPTO_ALG_AEAD_NEW,
+               .cra_blocksize   = 1,
+               .cra_ctxsize     = sizeof(struct nx_crypto_ctx),
+               .cra_module      = THIS_MODULE,
+       },
+       .init        = nx_crypto_ctx_aes_ccm_init,
+       .exit        = nx_crypto_ctx_aead_exit,
+       .ivsize      = AES_BLOCK_SIZE,
+       .maxauthsize = AES_BLOCK_SIZE,
+       .setkey      = ccm_aes_nx_set_key,
+       .setauthsize = ccm_aes_nx_setauthsize,
+       .encrypt     = ccm_aes_nx_encrypt,
+       .decrypt     = ccm_aes_nx_decrypt,
 };
 
-struct crypto_alg nx_ccm4309_aes_alg = {
-       .cra_name        = "rfc4309(ccm(aes))",
-       .cra_driver_name = "rfc4309-ccm-aes-nx",
-       .cra_priority    = 300,
-       .cra_flags       = CRYPTO_ALG_TYPE_AEAD |
-                          CRYPTO_ALG_NEED_FALLBACK,
-       .cra_blocksize   = 1,
-       .cra_ctxsize     = sizeof(struct nx_crypto_ctx),
-       .cra_type        = &crypto_nivaead_type,
-       .cra_module      = THIS_MODULE,
-       .cra_init        = nx_crypto_ctx_aes_ccm_init,
-       .cra_exit        = nx_crypto_ctx_exit,
-       .cra_aead = {
-               .ivsize      = 8,
-               .maxauthsize = AES_BLOCK_SIZE,
-               .setkey      = ccm4309_aes_nx_set_key,
-               .setauthsize = ccm4309_aes_nx_setauthsize,
-               .encrypt     = ccm4309_aes_nx_encrypt,
-               .decrypt     = ccm4309_aes_nx_decrypt,
-               .geniv       = "seqiv",
-       }
+struct aead_alg nx_ccm4309_aes_alg = {
+       .base = {
+               .cra_name        = "rfc4309(ccm(aes))",
+               .cra_driver_name = "rfc4309-ccm-aes-nx",
+               .cra_priority    = 300,
+               .cra_flags       = CRYPTO_ALG_NEED_FALLBACK |
+                                  CRYPTO_ALG_AEAD_NEW,
+               .cra_blocksize   = 1,
+               .cra_ctxsize     = sizeof(struct nx_crypto_ctx),
+               .cra_module      = THIS_MODULE,
+       },
+       .init        = nx_crypto_ctx_aes_ccm_init,
+       .exit        = nx_crypto_ctx_aead_exit,
+       .ivsize      = 8,
+       .maxauthsize = AES_BLOCK_SIZE,
+       .setkey      = ccm4309_aes_nx_set_key,
+       .setauthsize = ccm4309_aes_nx_setauthsize,
+       .encrypt     = ccm4309_aes_nx_encrypt,
+       .decrypt     = ccm4309_aes_nx_decrypt,
 };
index 92c993f08213fbd767c897eae178a5129c5af899..5719638b8642dfabcf4499491c47b9ce1b06ff62 100644 (file)
 
 #include <crypto/internal/aead.h>
 #include <crypto/aes.h>
-#include <crypto/algapi.h>
 #include <crypto/scatterwalk.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/crypto.h>
 #include <asm/vio.h>
 
 #include "nx_csbcpb.h"
@@ -36,7 +34,7 @@ static int gcm_aes_nx_set_key(struct crypto_aead *tfm,
                              const u8           *in_key,
                              unsigned int        key_len)
 {
-       struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&tfm->base);
+       struct nx_crypto_ctx *nx_ctx = crypto_aead_ctx(tfm);
        struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
        struct nx_csbcpb *csbcpb_aead = nx_ctx->csbcpb_aead;
 
@@ -75,7 +73,7 @@ static int gcm4106_aes_nx_set_key(struct crypto_aead *tfm,
                                  const u8           *in_key,
                                  unsigned int        key_len)
 {
-       struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&tfm->base);
+       struct nx_crypto_ctx *nx_ctx = crypto_aead_ctx(tfm);
        char *nonce = nx_ctx->priv.gcm.nonce;
        int rc;
 
@@ -110,13 +108,14 @@ static int gcm4106_aes_nx_setauthsize(struct crypto_aead *tfm,
 
 static int nx_gca(struct nx_crypto_ctx  *nx_ctx,
                  struct aead_request   *req,
-                 u8                    *out)
+                 u8                    *out,
+                 unsigned int assoclen)
 {
        int rc;
        struct nx_csbcpb *csbcpb_aead = nx_ctx->csbcpb_aead;
        struct scatter_walk walk;
        struct nx_sg *nx_sg = nx_ctx->in_sg;
-       unsigned int nbytes = req->assoclen;
+       unsigned int nbytes = assoclen;
        unsigned int processed = 0, to_process;
        unsigned int max_sg_len;
 
@@ -167,7 +166,7 @@ static int nx_gca(struct nx_crypto_ctx  *nx_ctx,
                NX_CPB_FDM(csbcpb_aead) |= NX_FDM_CONTINUATION;
 
                atomic_inc(&(nx_ctx->stats->aes_ops));
-               atomic64_add(req->assoclen, &(nx_ctx->stats->aes_bytes));
+               atomic64_add(assoclen, &(nx_ctx->stats->aes_bytes));
 
                processed += to_process;
        } while (processed < nbytes);
@@ -177,13 +176,15 @@ static int nx_gca(struct nx_crypto_ctx  *nx_ctx,
        return rc;
 }
 
-static int gmac(struct aead_request *req, struct blkcipher_desc *desc)
+static int gmac(struct aead_request *req, struct blkcipher_desc *desc,
+               unsigned int assoclen)
 {
        int rc;
-       struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
+       struct nx_crypto_ctx *nx_ctx =
+               crypto_aead_ctx(crypto_aead_reqtfm(req));
        struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
        struct nx_sg *nx_sg;
-       unsigned int nbytes = req->assoclen;
+       unsigned int nbytes = assoclen;
        unsigned int processed = 0, to_process;
        unsigned int max_sg_len;
 
@@ -238,7 +239,7 @@ static int gmac(struct aead_request *req, struct blkcipher_desc *desc)
                NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
 
                atomic_inc(&(nx_ctx->stats->aes_ops));
-               atomic64_add(req->assoclen, &(nx_ctx->stats->aes_bytes));
+               atomic64_add(assoclen, &(nx_ctx->stats->aes_bytes));
 
                processed += to_process;
        } while (processed < nbytes);
@@ -253,7 +254,8 @@ static int gcm_empty(struct aead_request *req, struct blkcipher_desc *desc,
                     int enc)
 {
        int rc;
-       struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
+       struct nx_crypto_ctx *nx_ctx =
+               crypto_aead_ctx(crypto_aead_reqtfm(req));
        struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
        char out[AES_BLOCK_SIZE];
        struct nx_sg *in_sg, *out_sg;
@@ -314,9 +316,11 @@ out:
        return rc;
 }
 
-static int gcm_aes_nx_crypt(struct aead_request *req, int enc)
+static int gcm_aes_nx_crypt(struct aead_request *req, int enc,
+                           unsigned int assoclen)
 {
-       struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
+       struct nx_crypto_ctx *nx_ctx =
+               crypto_aead_ctx(crypto_aead_reqtfm(req));
        struct nx_gcm_rctx *rctx = aead_request_ctx(req);
        struct nx_csbcpb *csbcpb = nx_ctx->csbcpb;
        struct blkcipher_desc desc;
@@ -332,10 +336,10 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc)
        *(u32 *)(desc.info + NX_GCM_CTR_OFFSET) = 1;
 
        if (nbytes == 0) {
-               if (req->assoclen == 0)
+               if (assoclen == 0)
                        rc = gcm_empty(req, &desc, enc);
                else
-                       rc = gmac(req, &desc);
+                       rc = gmac(req, &desc, assoclen);
                if (rc)
                        goto out;
                else
@@ -343,9 +347,10 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc)
        }
 
        /* Process associated data */
-       csbcpb->cpb.aes_gcm.bit_length_aad = req->assoclen * 8;
-       if (req->assoclen) {
-               rc = nx_gca(nx_ctx, req, csbcpb->cpb.aes_gcm.in_pat_or_aad);
+       csbcpb->cpb.aes_gcm.bit_length_aad = assoclen * 8;
+       if (assoclen) {
+               rc = nx_gca(nx_ctx, req, csbcpb->cpb.aes_gcm.in_pat_or_aad,
+                           assoclen);
                if (rc)
                        goto out;
        }
@@ -363,7 +368,6 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc)
                to_process = nbytes - processed;
 
                csbcpb->cpb.aes_gcm.bit_length_data = nbytes * 8;
-               desc.tfm = (struct crypto_blkcipher *) req->base.tfm;
                rc = nx_build_sg_lists(nx_ctx, &desc, req->dst,
                                       req->src, &to_process,
                                       processed + req->assoclen,
@@ -430,7 +434,7 @@ static int gcm_aes_nx_encrypt(struct aead_request *req)
 
        memcpy(iv, req->iv, 12);
 
-       return gcm_aes_nx_crypt(req, 1);
+       return gcm_aes_nx_crypt(req, 1, req->assoclen);
 }
 
 static int gcm_aes_nx_decrypt(struct aead_request *req)
@@ -440,12 +444,13 @@ static int gcm_aes_nx_decrypt(struct aead_request *req)
 
        memcpy(iv, req->iv, 12);
 
-       return gcm_aes_nx_crypt(req, 0);
+       return gcm_aes_nx_crypt(req, 0, req->assoclen);
 }
 
 static int gcm4106_aes_nx_encrypt(struct aead_request *req)
 {
-       struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
+       struct nx_crypto_ctx *nx_ctx =
+               crypto_aead_ctx(crypto_aead_reqtfm(req));
        struct nx_gcm_rctx *rctx = aead_request_ctx(req);
        char *iv = rctx->iv;
        char *nonce = nx_ctx->priv.gcm.nonce;
@@ -453,12 +458,16 @@ static int gcm4106_aes_nx_encrypt(struct aead_request *req)
        memcpy(iv, nonce, NX_GCM4106_NONCE_LEN);
        memcpy(iv + NX_GCM4106_NONCE_LEN, req->iv, 8);
 
-       return gcm_aes_nx_crypt(req, 1);
+       if (req->assoclen < 8)
+               return -EINVAL;
+
+       return gcm_aes_nx_crypt(req, 1, req->assoclen - 8);
 }
 
 static int gcm4106_aes_nx_decrypt(struct aead_request *req)
 {
-       struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(req->base.tfm);
+       struct nx_crypto_ctx *nx_ctx =
+               crypto_aead_ctx(crypto_aead_reqtfm(req));
        struct nx_gcm_rctx *rctx = aead_request_ctx(req);
        char *iv = rctx->iv;
        char *nonce = nx_ctx->priv.gcm.nonce;
@@ -466,7 +475,10 @@ static int gcm4106_aes_nx_decrypt(struct aead_request *req)
        memcpy(iv, nonce, NX_GCM4106_NONCE_LEN);
        memcpy(iv + NX_GCM4106_NONCE_LEN, req->iv, 8);
 
-       return gcm_aes_nx_crypt(req, 0);
+       if (req->assoclen < 8)
+               return -EINVAL;
+
+       return gcm_aes_nx_crypt(req, 0, req->assoclen - 8);
 }
 
 /* tell the block cipher walk routines that this is a stream cipher by
@@ -478,6 +490,7 @@ struct aead_alg nx_gcm_aes_alg = {
        .base = {
                .cra_name        = "gcm(aes)",
                .cra_driver_name = "gcm-aes-nx",
+               .cra_flags       = CRYPTO_ALG_AEAD_NEW,
                .cra_priority    = 300,
                .cra_blocksize   = 1,
                .cra_ctxsize     = sizeof(struct nx_crypto_ctx),
@@ -496,6 +509,7 @@ struct aead_alg nx_gcm4106_aes_alg = {
        .base = {
                .cra_name        = "rfc4106(gcm(aes))",
                .cra_driver_name = "rfc4106-gcm-aes-nx",
+               .cra_flags       = CRYPTO_ALG_AEAD_NEW,
                .cra_priority    = 300,
                .cra_blocksize   = 1,
                .cra_ctxsize     = sizeof(struct nx_crypto_ctx),
index 436971343ff7732b3be14af822e29fcc922c1e0f..9a5e643af5bc3e40c9732c863abd2562622e13a6 100644 (file)
@@ -612,11 +612,11 @@ static int nx_register_algs(void)
        if (rc)
                goto out_unreg_gcm;
 
-       rc = nx_register_alg(&nx_ccm_aes_alg, NX_FC_AES, NX_MODE_AES_CCM);
+       rc = nx_register_aead(&nx_ccm_aes_alg, NX_FC_AES, NX_MODE_AES_CCM);
        if (rc)
                goto out_unreg_gcm4106;
 
-       rc = nx_register_alg(&nx_ccm4309_aes_alg, NX_FC_AES, NX_MODE_AES_CCM);
+       rc = nx_register_aead(&nx_ccm4309_aes_alg, NX_FC_AES, NX_MODE_AES_CCM);
        if (rc)
                goto out_unreg_ccm;
 
@@ -644,9 +644,9 @@ out_unreg_s256:
        nx_unregister_shash(&nx_shash_sha256_alg, NX_FC_SHA, NX_MODE_SHA,
                            NX_PROPS_SHA256);
 out_unreg_ccm4309:
-       nx_unregister_alg(&nx_ccm4309_aes_alg, NX_FC_AES, NX_MODE_AES_CCM);
+       nx_unregister_aead(&nx_ccm4309_aes_alg, NX_FC_AES, NX_MODE_AES_CCM);
 out_unreg_ccm:
-       nx_unregister_alg(&nx_ccm_aes_alg, NX_FC_AES, NX_MODE_AES_CCM);
+       nx_unregister_aead(&nx_ccm_aes_alg, NX_FC_AES, NX_MODE_AES_CCM);
 out_unreg_gcm4106:
        nx_unregister_aead(&nx_gcm4106_aes_alg, NX_FC_AES, NX_MODE_AES_GCM);
 out_unreg_gcm:
@@ -711,11 +711,10 @@ static int nx_crypto_ctx_init(struct nx_crypto_ctx *nx_ctx, u32 fc, u32 mode)
 }
 
 /* entry points from the crypto tfm initializers */
-int nx_crypto_ctx_aes_ccm_init(struct crypto_tfm *tfm)
+int nx_crypto_ctx_aes_ccm_init(struct crypto_aead *tfm)
 {
-       crypto_aead_set_reqsize(__crypto_aead_cast(tfm),
-                               sizeof(struct nx_ccm_rctx));
-       return nx_crypto_ctx_init(crypto_tfm_ctx(tfm), NX_FC_AES,
+       crypto_aead_set_reqsize(tfm, sizeof(struct nx_ccm_rctx));
+       return nx_crypto_ctx_init(crypto_aead_ctx(tfm), NX_FC_AES,
                                  NX_MODE_AES_CCM);
 }
 
@@ -813,9 +812,9 @@ static int nx_remove(struct vio_dev *viodev)
                                    NX_FC_SHA, NX_MODE_SHA, NX_PROPS_SHA256);
                nx_unregister_shash(&nx_shash_sha256_alg,
                                    NX_FC_SHA, NX_MODE_SHA, NX_PROPS_SHA512);
-               nx_unregister_alg(&nx_ccm4309_aes_alg,
-                                 NX_FC_AES, NX_MODE_AES_CCM);
-               nx_unregister_alg(&nx_ccm_aes_alg, NX_FC_AES, NX_MODE_AES_CCM);
+               nx_unregister_aead(&nx_ccm4309_aes_alg,
+                                  NX_FC_AES, NX_MODE_AES_CCM);
+               nx_unregister_aead(&nx_ccm_aes_alg, NX_FC_AES, NX_MODE_AES_CCM);
                nx_unregister_aead(&nx_gcm4106_aes_alg,
                                   NX_FC_AES, NX_MODE_AES_GCM);
                nx_unregister_aead(&nx_gcm_aes_alg,
index cdff03a42ae7e883648a981779bed47bb9909c92..8a4d1fd752d6380e389000800c77120191fd6049 100644 (file)
@@ -150,7 +150,7 @@ struct nx_crypto_ctx {
 };
 
 /* prototypes */
-int nx_crypto_ctx_aes_ccm_init(struct crypto_tfm *tfm);
+int nx_crypto_ctx_aes_ccm_init(struct crypto_aead *tfm);
 int nx_crypto_ctx_aes_gcm_init(struct crypto_aead *tfm);
 int nx_crypto_ctx_aes_xcbc_init(struct crypto_tfm *tfm);
 int nx_crypto_ctx_aes_ctr_init(struct crypto_tfm *tfm);
@@ -189,8 +189,8 @@ extern struct aead_alg nx_gcm_aes_alg;
 extern struct aead_alg nx_gcm4106_aes_alg;
 extern struct crypto_alg nx_ctr_aes_alg;
 extern struct crypto_alg nx_ctr3686_aes_alg;
-extern struct crypto_alg nx_ccm_aes_alg;
-extern struct crypto_alg nx_ccm4309_aes_alg;
+extern struct aead_alg nx_ccm_aes_alg;
+extern struct aead_alg nx_ccm4309_aes_alg;
 extern struct shash_alg nx_shash_aes_xcbc_alg;
 extern struct shash_alg nx_shash_sha512_alg;
 extern struct shash_alg nx_shash_sha256_alg;
index 9a28b7e07c71a88c2d4e6e89d3a0c01f323f3efa..eba23147c0ee1de8552aabe4b5d0aeeffa2e6aba 100644 (file)
 #define AES_REG_IV(dd, x)              ((dd)->pdata->iv_ofs + ((x) * 0x04))
 
 #define AES_REG_CTRL(dd)               ((dd)->pdata->ctrl_ofs)
-#define AES_REG_CTRL_CTR_WIDTH_MASK    (3 << 7)
-#define AES_REG_CTRL_CTR_WIDTH_32              (0 << 7)
-#define AES_REG_CTRL_CTR_WIDTH_64              (1 << 7)
-#define AES_REG_CTRL_CTR_WIDTH_96              (2 << 7)
-#define AES_REG_CTRL_CTR_WIDTH_128             (3 << 7)
-#define AES_REG_CTRL_CTR               (1 << 6)
-#define AES_REG_CTRL_CBC               (1 << 5)
-#define AES_REG_CTRL_KEY_SIZE          (3 << 3)
-#define AES_REG_CTRL_DIRECTION         (1 << 2)
-#define AES_REG_CTRL_INPUT_READY       (1 << 1)
-#define AES_REG_CTRL_OUTPUT_READY      (1 << 0)
+#define AES_REG_CTRL_CTR_WIDTH_MASK    GENMASK(8, 7)
+#define AES_REG_CTRL_CTR_WIDTH_32      0
+#define AES_REG_CTRL_CTR_WIDTH_64      BIT(7)
+#define AES_REG_CTRL_CTR_WIDTH_96      BIT(8)
+#define AES_REG_CTRL_CTR_WIDTH_128     GENMASK(8, 7)
+#define AES_REG_CTRL_CTR               BIT(6)
+#define AES_REG_CTRL_CBC               BIT(5)
+#define AES_REG_CTRL_KEY_SIZE          GENMASK(4, 3)
+#define AES_REG_CTRL_DIRECTION         BIT(2)
+#define AES_REG_CTRL_INPUT_READY       BIT(1)
+#define AES_REG_CTRL_OUTPUT_READY      BIT(0)
+#define AES_REG_CTRL_MASK              GENMASK(24, 2)
 
 #define AES_REG_DATA_N(dd, x)          ((dd)->pdata->data_ofs + ((x) * 0x04))
 
 #define AES_REG_REV(dd)                        ((dd)->pdata->rev_ofs)
 
 #define AES_REG_MASK(dd)               ((dd)->pdata->mask_ofs)
-#define AES_REG_MASK_SIDLE             (1 << 6)
-#define AES_REG_MASK_START             (1 << 5)
-#define AES_REG_MASK_DMA_OUT_EN                (1 << 3)
-#define AES_REG_MASK_DMA_IN_EN         (1 << 2)
-#define AES_REG_MASK_SOFTRESET         (1 << 1)
-#define AES_REG_AUTOIDLE               (1 << 0)
+#define AES_REG_MASK_SIDLE             BIT(6)
+#define AES_REG_MASK_START             BIT(5)
+#define AES_REG_MASK_DMA_OUT_EN                BIT(3)
+#define AES_REG_MASK_DMA_IN_EN         BIT(2)
+#define AES_REG_MASK_SOFTRESET         BIT(1)
+#define AES_REG_AUTOIDLE               BIT(0)
 
 #define AES_REG_LENGTH_N(x)            (0x54 + ((x) * 0x04))
 
@@ -254,7 +255,7 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
 {
        unsigned int key32;
        int i, err;
-       u32 val, mask = 0;
+       u32 val;
 
        err = omap_aes_hw_init(dd);
        if (err)
@@ -274,17 +275,13 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
        val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
        if (dd->flags & FLAGS_CBC)
                val |= AES_REG_CTRL_CBC;
-       if (dd->flags & FLAGS_CTR) {
+       if (dd->flags & FLAGS_CTR)
                val |= AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_128;
-               mask = AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_MASK;
-       }
+
        if (dd->flags & FLAGS_ENCRYPT)
                val |= AES_REG_CTRL_DIRECTION;
 
-       mask |= AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION |
-                       AES_REG_CTRL_KEY_SIZE;
-
-       omap_aes_write_mask(dd, AES_REG_CTRL(dd), val, mask);
+       omap_aes_write_mask(dd, AES_REG_CTRL(dd), val, AES_REG_CTRL_MASK);
 
        return 0;
 }
@@ -558,6 +555,9 @@ static int omap_aes_check_aligned(struct scatterlist *sg, int total)
 {
        int len = 0;
 
+       if (!IS_ALIGNED(total, AES_BLOCK_SIZE))
+               return -EINVAL;
+
        while (sg) {
                if (!IS_ALIGNED(sg->offset, 4))
                        return -1;
@@ -577,9 +577,10 @@ static int omap_aes_check_aligned(struct scatterlist *sg, int total)
 static int omap_aes_copy_sgs(struct omap_aes_dev *dd)
 {
        void *buf_in, *buf_out;
-       int pages;
+       int pages, total;
 
-       pages = get_order(dd->total);
+       total = ALIGN(dd->total, AES_BLOCK_SIZE);
+       pages = get_order(total);
 
        buf_in = (void *)__get_free_pages(GFP_ATOMIC, pages);
        buf_out = (void *)__get_free_pages(GFP_ATOMIC, pages);
@@ -594,11 +595,11 @@ static int omap_aes_copy_sgs(struct omap_aes_dev *dd)
        sg_copy_buf(buf_in, dd->in_sg, 0, dd->total, 0);
 
        sg_init_table(&dd->in_sgl, 1);
-       sg_set_buf(&dd->in_sgl, buf_in, dd->total);
+       sg_set_buf(&dd->in_sgl, buf_in, total);
        dd->in_sg = &dd->in_sgl;
 
        sg_init_table(&dd->out_sgl, 1);
-       sg_set_buf(&dd->out_sgl, buf_out, dd->total);
+       sg_set_buf(&dd->out_sgl, buf_out, total);
        dd->out_sg = &dd->out_sgl;
 
        return 0;
@@ -611,7 +612,7 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd,
        struct omap_aes_ctx *ctx;
        struct omap_aes_reqctx *rctx;
        unsigned long flags;
-       int err, ret = 0;
+       int err, ret = 0, len;
 
        spin_lock_irqsave(&dd->lock, flags);
        if (req)
@@ -650,8 +651,9 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd,
                dd->sgs_copied = 0;
        }
 
-       dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, dd->total);
-       dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, dd->total);
+       len = ALIGN(dd->total, AES_BLOCK_SIZE);
+       dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, len);
+       dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, len);
        BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0);
 
        rctx = ablkcipher_request_ctx(req);
@@ -678,7 +680,7 @@ static void omap_aes_done_task(unsigned long data)
 {
        struct omap_aes_dev *dd = (struct omap_aes_dev *)data;
        void *buf_in, *buf_out;
-       int pages;
+       int pages, len;
 
        pr_debug("enter done_task\n");
 
@@ -697,7 +699,8 @@ static void omap_aes_done_task(unsigned long data)
 
                sg_copy_buf(buf_out, dd->orig_out, 0, dd->total_save, 1);
 
-               pages = get_order(dd->total_save);
+               len = ALIGN(dd->total_save, AES_BLOCK_SIZE);
+               pages = get_order(len);
                free_pages((unsigned long)buf_in, pages);
                free_pages((unsigned long)buf_out, pages);
        }
@@ -726,11 +729,6 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode)
                  !!(mode & FLAGS_ENCRYPT),
                  !!(mode & FLAGS_CBC));
 
-       if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) {
-               pr_err("request size is not exact amount of AES blocks\n");
-               return -EINVAL;
-       }
-
        dd = omap_aes_find_dev(ctx);
        if (!dd)
                return -ENODEV;
@@ -833,7 +831,7 @@ static struct crypto_alg algs_ecb_cbc[] = {
 {
        .cra_name               = "ecb(aes)",
        .cra_driver_name        = "ecb-aes-omap",
-       .cra_priority           = 100,
+       .cra_priority           = 300,
        .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
                                  CRYPTO_ALG_ASYNC,
@@ -855,7 +853,7 @@ static struct crypto_alg algs_ecb_cbc[] = {
 {
        .cra_name               = "cbc(aes)",
        .cra_driver_name        = "cbc-aes-omap",
-       .cra_priority           = 100,
+       .cra_priority           = 300,
        .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
                                  CRYPTO_ALG_ASYNC,
@@ -881,7 +879,7 @@ static struct crypto_alg algs_ctr[] = {
 {
        .cra_name               = "ctr(aes)",
        .cra_driver_name        = "ctr-aes-omap",
-       .cra_priority           = 100,
+       .cra_priority           = 300,
        .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER |
                                  CRYPTO_ALG_KERN_DRIVER_ONLY |
                                  CRYPTO_ALG_ASYNC,
@@ -1046,9 +1044,7 @@ static irqreturn_t omap_aes_irq(int irq, void *dev_id)
                        }
                }
 
-               dd->total -= AES_BLOCK_SIZE;
-
-               BUG_ON(dd->total < 0);
+               dd->total -= min_t(size_t, AES_BLOCK_SIZE, dd->total);
 
                /* Clear IRQ status */
                status &= ~AES_REG_IRQ_DATA_OUT;
index 6fdb9e8b22a75247971fca0b500414b8028bc7dd..d8c3d595d98b814ade785dfe885b2b6c7a13436f 100644 (file)
@@ -3,11 +3,13 @@ config CRYPTO_DEV_QAT
        select CRYPTO_AEAD
        select CRYPTO_AUTHENC
        select CRYPTO_BLKCIPHER
+       select CRYPTO_AKCIPHER
        select CRYPTO_HMAC
        select CRYPTO_SHA1
        select CRYPTO_SHA256
        select CRYPTO_SHA512
        select FW_LOADER
+       select ASN1
 
 config CRYPTO_DEV_QAT_DH895xCC
        tristate "Support for Intel(R) DH895xCC"
diff --git a/drivers/crypto/qat/qat_common/.gitignore b/drivers/crypto/qat/qat_common/.gitignore
new file mode 100644 (file)
index 0000000..ee32837
--- /dev/null
@@ -0,0 +1 @@
+*-asn1.[ch]
index e0424dc382feb1543784b704b4c33984927739a1..184605f76bea1519fcfbe7bdfe79beef9ce4c31b 100644 (file)
@@ -1,3 +1,6 @@
+$(obj)/qat_rsakey-asn1.o: $(obj)/qat_rsakey-asn1.c $(obj)/qat_rsakey-asn1.h
+clean-files += qat_rsakey-asn1.c qat_rsakey-asn1.h
+
 obj-$(CONFIG_CRYPTO_DEV_QAT) += intel_qat.o
 intel_qat-objs := adf_cfg.o \
        adf_ctl_drv.o \
@@ -8,6 +11,8 @@ intel_qat-objs := adf_cfg.o \
        adf_transport.o \
        qat_crypto.o \
        qat_algs.o \
+       qat_rsakey-asn1.o \
+       qat_asym_algs.o \
        qat_uclo.o \
        qat_hal.o
 
index 5fe90296762083e12690e70ace018b273d9e5569..99cbce6b3cbc9815a662ad8d7e6708554d29baf0 100644 (file)
@@ -135,6 +135,7 @@ struct adf_hw_device_data {
        struct adf_hw_device_class *dev_class;
        uint32_t (*get_accel_mask)(uint32_t fuse);
        uint32_t (*get_ae_mask)(uint32_t fuse);
+       uint32_t (*get_sram_bar_id)(struct adf_hw_device_data *self);
        uint32_t (*get_misc_bar_id)(struct adf_hw_device_data *self);
        uint32_t (*get_etr_bar_id)(struct adf_hw_device_data *self);
        uint32_t (*get_num_aes)(struct adf_hw_device_data *self);
@@ -151,7 +152,7 @@ struct adf_hw_device_data {
        void (*exit_arb)(struct adf_accel_dev *accel_dev);
        void (*enable_ints)(struct adf_accel_dev *accel_dev);
        const char *fw_name;
-       uint32_t pci_dev_id;
+       const char *fw_mmp_name;
        uint32_t fuses;
        uint32_t accel_capabilities_mask;
        uint16_t accel_mask;
@@ -184,6 +185,7 @@ struct icp_qat_fw_loader_handle;
 struct adf_fw_loader_data {
        struct icp_qat_fw_loader_handle *fw_loader;
        const struct firmware *uof_fw;
+       const struct firmware *mmp_fw;
 };
 
 struct adf_accel_dev {
index fdda8e7ae302511bec5c0e1c18d2c5c4b2b3d351..20b08bdcb1466f4ba8d15ccd933abd7ea4663a29 100644 (file)
@@ -55,24 +55,36 @@ int adf_ae_fw_load(struct adf_accel_dev *accel_dev)
 {
        struct adf_fw_loader_data *loader_data = accel_dev->fw_loader;
        struct adf_hw_device_data *hw_device = accel_dev->hw_device;
-       void *uof_addr;
-       uint32_t uof_size;
+       void *uof_addr, *mmp_addr;
+       u32 uof_size, mmp_size;
 
+       if (!hw_device->fw_name)
+               return 0;
+
+       if (request_firmware(&loader_data->mmp_fw, hw_device->fw_mmp_name,
+                            &accel_dev->accel_pci_dev.pci_dev->dev)) {
+               dev_err(&GET_DEV(accel_dev), "Failed to load MMP firmware %s\n",
+                       hw_device->fw_mmp_name);
+               return -EFAULT;
+       }
        if (request_firmware(&loader_data->uof_fw, hw_device->fw_name,
                             &accel_dev->accel_pci_dev.pci_dev->dev)) {
-               dev_err(&GET_DEV(accel_dev), "Failed to load firmware %s\n",
+               dev_err(&GET_DEV(accel_dev), "Failed to load UOF firmware %s\n",
                        hw_device->fw_name);
-               return -EFAULT;
+               goto out_err;
        }
 
        uof_size = loader_data->uof_fw->size;
        uof_addr = (void *)loader_data->uof_fw->data;
+       mmp_size = loader_data->mmp_fw->size;
+       mmp_addr = (void *)loader_data->mmp_fw->data;
+       qat_uclo_wr_mimage(loader_data->fw_loader, mmp_addr, mmp_size);
        if (qat_uclo_map_uof_obj(loader_data->fw_loader, uof_addr, uof_size)) {
                dev_err(&GET_DEV(accel_dev), "Failed to map UOF\n");
                goto out_err;
        }
        if (qat_uclo_wr_all_uimage(loader_data->fw_loader)) {
-               dev_err(&GET_DEV(accel_dev), "Failed to map UOF\n");
+               dev_err(&GET_DEV(accel_dev), "Failed to load UOF\n");
                goto out_err;
        }
        return 0;
@@ -85,11 +97,17 @@ out_err:
 void adf_ae_fw_release(struct adf_accel_dev *accel_dev)
 {
        struct adf_fw_loader_data *loader_data = accel_dev->fw_loader;
+       struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+
+       if (!hw_device->fw_name)
+               return;
 
        qat_uclo_del_uof_obj(loader_data->fw_loader);
        qat_hal_deinit(loader_data->fw_loader);
        release_firmware(loader_data->uof_fw);
+       release_firmware(loader_data->mmp_fw);
        loader_data->uof_fw = NULL;
+       loader_data->mmp_fw = NULL;
        loader_data->fw_loader = NULL;
 }
 
@@ -99,6 +117,9 @@ int adf_ae_start(struct adf_accel_dev *accel_dev)
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
        uint32_t ae_ctr, ae, max_aes = GET_MAX_ACCELENGINES(accel_dev);
 
+       if (!hw_data->fw_name)
+               return 0;
+
        for (ae = 0, ae_ctr = 0; ae < max_aes; ae++) {
                if (hw_data->ae_mask & (1 << ae)) {
                        qat_hal_start(loader_data->fw_loader, ae, 0xFF);
@@ -117,6 +138,9 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev)
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
        uint32_t ae_ctr, ae, max_aes = GET_MAX_ACCELENGINES(accel_dev);
 
+       if (!hw_data->fw_name)
+               return 0;
+
        for (ae = 0, ae_ctr = 0; ae < max_aes; ae++) {
                if (hw_data->ae_mask & (1 << ae)) {
                        qat_hal_stop(loader_data->fw_loader, ae, 0xFF);
@@ -143,6 +167,10 @@ static int adf_ae_reset(struct adf_accel_dev *accel_dev, int ae)
 int adf_ae_init(struct adf_accel_dev *accel_dev)
 {
        struct adf_fw_loader_data *loader_data;
+       struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+
+       if (!hw_device->fw_name)
+               return 0;
 
        loader_data = kzalloc(sizeof(*loader_data), GFP_KERNEL);
        if (!loader_data)
@@ -166,6 +194,10 @@ int adf_ae_init(struct adf_accel_dev *accel_dev)
 int adf_ae_shutdown(struct adf_accel_dev *accel_dev)
 {
        struct adf_fw_loader_data *loader_data = accel_dev->fw_loader;
+       struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+
+       if (!hw_device->fw_name)
+               return 0;
 
        qat_hal_deinit(loader_data->fw_loader);
        kfree(accel_dev->fw_loader);
index 2dbc733b8ab2483e8790d26a2f509239c681573d..8f34a5fce72a56ca4e41c0002f53a17aa983bf8d 100644 (file)
@@ -206,7 +206,7 @@ static struct pci_error_handlers adf_err_handler = {
  * QAT acceleration device accel_dev.
  * To be used by QAT device specific drivers.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf)
 {
index ab65bc274561dc58000f644d68bef6bf3b2b7926..76eccb8d96a87080b30e2c0ddd2294c8b3cd325b 100644 (file)
@@ -123,7 +123,7 @@ static const struct file_operations qat_dev_cfg_fops = {
  * The table stores device specific config values.
  * To be used by QAT device specific drivers.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_cfg_dev_add(struct adf_accel_dev *accel_dev)
 {
@@ -276,7 +276,7 @@ static int adf_cfg_key_val_get(struct adf_accel_dev *accel_dev,
  * in the given acceleration device
  * To be used by QAT device specific drivers.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_cfg_add_key_value_param(struct adf_accel_dev *accel_dev,
                                const char *section_name,
@@ -327,7 +327,7 @@ EXPORT_SYMBOL_GPL(adf_cfg_add_key_value_param);
  * will be stored.
  * To be used by QAT device specific drivers.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_cfg_section_add(struct adf_accel_dev *accel_dev, const char *name)
 {
index 27e16c09230bffa24e505a03c8848fc7ab2bb40d..3c33feefee6717b6cfc3ede5994bd2ba84cd981c 100644 (file)
@@ -55,7 +55,7 @@
 
 #define ADF_MAJOR_VERSION      0
 #define ADF_MINOR_VERSION      1
-#define ADF_BUILD_VERSION      3
+#define ADF_BUILD_VERSION      4
 #define ADF_DRV_VERSION                __stringify(ADF_MAJOR_VERSION) "." \
                                __stringify(ADF_MINOR_VERSION) "." \
                                __stringify(ADF_BUILD_VERSION)
@@ -94,6 +94,11 @@ struct service_hndl {
        int admin;
 };
 
+static inline int get_current_node(void)
+{
+       return cpu_data(current_thread_info()->cpu).phys_proc_id;
+}
+
 int adf_service_register(struct service_hndl *service);
 int adf_service_unregister(struct service_hndl *service);
 
@@ -141,10 +146,13 @@ int qat_crypto_unregister(void);
 struct qat_crypto_instance *qat_crypto_get_instance_node(int node);
 void qat_crypto_put_instance(struct qat_crypto_instance *inst);
 void qat_alg_callback(void *resp);
+void qat_alg_asym_callback(void *resp);
 int qat_algs_init(void);
 void qat_algs_exit(void);
 int qat_algs_register(void);
 int qat_algs_unregister(void);
+int qat_asym_algs_register(void);
+void qat_asym_algs_unregister(void);
 
 int qat_hal_init(struct adf_accel_dev *accel_dev);
 void qat_hal_deinit(struct icp_qat_fw_loader_handle *handle);
@@ -196,4 +204,6 @@ int qat_uclo_wr_all_uimage(struct icp_qat_fw_loader_handle *handle);
 void qat_uclo_del_uof_obj(struct icp_qat_fw_loader_handle *handle);
 int qat_uclo_map_uof_obj(struct icp_qat_fw_loader_handle *handle,
                         void *addr_ptr, int mem_size);
+void qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle,
+                       void *addr_ptr, int mem_size);
 #endif
index 3f0ff9e7d84060918b55321b09be752533e166e2..b574a82368a40bc9e7714ca17503e5c8077c77ed 100644 (file)
@@ -60,7 +60,7 @@ static uint32_t num_devices;
  * Function adds acceleration device to the acceleration framework.
  * To be used by QAT device specific drivers.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev)
 {
@@ -182,12 +182,7 @@ int adf_devmgr_verify_id(uint32_t id)
 
 void adf_devmgr_get_num_dev(uint32_t *num)
 {
-       struct list_head *itr;
-
-       *num = 0;
-       list_for_each(itr, &accel_table) {
-               (*num)++;
-       }
+       *num = num_devices;
 }
 
 int adf_dev_in_use(struct adf_accel_dev *accel_dev)
index 245f43237a2d8b168b034f8a5568753819f027dc..9a90b287ad68f901de3ed87efc33702aebd1d481 100644 (file)
@@ -69,7 +69,7 @@ static void adf_service_add(struct service_hndl *service)
  * Function adds the acceleration service to the acceleration framework.
  * To be used by QAT device specific drivers.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_service_register(struct service_hndl *service)
 {
@@ -94,7 +94,7 @@ static void adf_service_remove(struct service_hndl *service)
  * Function remove the acceleration service from the acceleration framework.
  * To be used by QAT device specific drivers.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_service_unregister(struct service_hndl *service)
 {
@@ -114,7 +114,7 @@ EXPORT_SYMBOL_GPL(adf_service_unregister);
  * Initialize the ring data structures and the admin comms and arbitration
  * services.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_dev_init(struct adf_accel_dev *accel_dev)
 {
@@ -214,7 +214,7 @@ EXPORT_SYMBOL_GPL(adf_dev_init);
  * is ready to be used.
  * To be used by QAT device specific drivers.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_dev_start(struct adf_accel_dev *accel_dev)
 {
@@ -257,7 +257,7 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
        clear_bit(ADF_STATUS_STARTING, &accel_dev->status);
        set_bit(ADF_STATUS_STARTED, &accel_dev->status);
 
-       if (qat_algs_register()) {
+       if (qat_algs_register() || qat_asym_algs_register()) {
                dev_err(&GET_DEV(accel_dev),
                        "Failed to register crypto algs\n");
                set_bit(ADF_STATUS_STARTING, &accel_dev->status);
@@ -276,7 +276,7 @@ EXPORT_SYMBOL_GPL(adf_dev_start);
  * is shuting down.
  * To be used by QAT device specific drivers.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_dev_stop(struct adf_accel_dev *accel_dev)
 {
@@ -296,6 +296,8 @@ int adf_dev_stop(struct adf_accel_dev *accel_dev)
                dev_err(&GET_DEV(accel_dev),
                        "Failed to unregister crypto algs\n");
 
+       qat_asym_algs_unregister();
+
        list_for_each(list_itr, &service_table) {
                service = list_entry(list_itr, struct service_hndl, list);
                if (service->admin)
index db2926bff8a5bc843741b7cf56facb114ab853b7..df0331d26838f33721c3b42019210edfb8e58f77 100644 (file)
@@ -463,7 +463,7 @@ err:
  * acceleration device accel_dev.
  * To be used by QAT device specific drivers.
  *
- * Return: 0 on success, error code othewise.
+ * Return: 0 on success, error code otherwise.
  */
 int adf_init_etr_data(struct adf_accel_dev *accel_dev)
 {
index 160c9a36c9198689d957c010e14930e3b3e8ffae..6ad7e4e1edcadd2a839ed866791b0dfb0d60cb47 100644 (file)
@@ -97,8 +97,9 @@
 #define ADF_RING_SIZE_IN_BYTES_TO_SIZE(SIZE) ((1 << (SIZE - 1)) >> 7)
 
 /* Minimum ring bufer size for memory allocation */
-#define ADF_RING_SIZE_BYTES_MIN(SIZE) ((SIZE < ADF_RING_SIZE_4K) ? \
-                               ADF_RING_SIZE_4K : SIZE)
+#define ADF_RING_SIZE_BYTES_MIN(SIZE) \
+       ((SIZE < ADF_SIZE_TO_RING_SIZE_IN_BYTES(ADF_RING_SIZE_4K)) ? \
+               ADF_SIZE_TO_RING_SIZE_IN_BYTES(ADF_RING_SIZE_4K) : SIZE)
 #define ADF_RING_SIZE_MODULO(SIZE) (SIZE + 0x6)
 #define ADF_SIZE_TO_POW(SIZE) ((((SIZE & 0x4) >> 1) | ((SIZE & 0x4) >> 2) | \
                                SIZE) & ~0x4)
index f1e30e24a4191b13f284d863ea096b1b0f5e6c02..46747f01b1d1fe18f3935a444ee76fc75d4349f4 100644 (file)
@@ -249,6 +249,8 @@ struct icp_qat_fw_comn_resp {
 
 #define QAT_COMN_RESP_CRYPTO_STATUS_BITPOS 7
 #define QAT_COMN_RESP_CRYPTO_STATUS_MASK 0x1
+#define QAT_COMN_RESP_PKE_STATUS_BITPOS 6
+#define QAT_COMN_RESP_PKE_STATUS_MASK 0x1
 #define QAT_COMN_RESP_CMP_STATUS_BITPOS 5
 #define QAT_COMN_RESP_CMP_STATUS_MASK 0x1
 #define QAT_COMN_RESP_XLAT_STATUS_BITPOS 4
diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_pke.h b/drivers/crypto/qat/qat_common/icp_qat_fw_pke.h
new file mode 100644 (file)
index 0000000..0d7a9b5
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+  Copyright(c) 2014 Intel Corporation.
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  Contact Information:
+  qat-linux@intel.com
+
+  BSD LICENSE
+  Copyright(c) 2014 Intel Corporation.
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in
+         the documentation and/or other materials provided with the
+         distribution.
+       * Neither the name of Intel Corporation nor the names of its
+         contributors may be used to endorse or promote products derived
+         from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+#ifndef _ICP_QAT_FW_PKE_
+#define _ICP_QAT_FW_PKE_
+
+#include "icp_qat_fw.h"
+
+struct icp_qat_fw_req_hdr_pke_cd_pars {
+       u64 content_desc_addr;
+       u32 content_desc_resrvd;
+       u32 func_id;
+};
+
+struct icp_qat_fw_req_pke_mid {
+       u64 opaque;
+       u64 src_data_addr;
+       u64 dest_data_addr;
+};
+
+struct icp_qat_fw_req_pke_hdr {
+       u8 resrvd1;
+       u8 resrvd2;
+       u8 service_type;
+       u8 hdr_flags;
+       u16 comn_req_flags;
+       u16 resrvd4;
+       struct icp_qat_fw_req_hdr_pke_cd_pars cd_pars;
+};
+
+struct icp_qat_fw_pke_request {
+       struct icp_qat_fw_req_pke_hdr pke_hdr;
+       struct icp_qat_fw_req_pke_mid pke_mid;
+       u8 output_param_count;
+       u8 input_param_count;
+       u16 resrvd1;
+       u32 resrvd2;
+       u64 next_req_adr;
+};
+
+struct icp_qat_fw_resp_pke_hdr {
+       u8 resrvd1;
+       u8 resrvd2;
+       u8 response_type;
+       u8 hdr_flags;
+       u16 comn_resp_flags;
+       u16 resrvd4;
+};
+
+struct icp_qat_fw_pke_resp {
+       struct icp_qat_fw_resp_pke_hdr pke_resp_hdr;
+       u64 opaque;
+       u64 src_data_addr;
+       u64 dest_data_addr;
+};
+
+#define ICP_QAT_FW_PKE_HDR_VALID_FLAG_BITPOS              7
+#define ICP_QAT_FW_PKE_HDR_VALID_FLAG_MASK                0x1
+#define ICP_QAT_FW_PKE_RESP_PKE_STAT_GET(status_word) \
+       QAT_FIELD_GET(((status_word >> ICP_QAT_FW_COMN_ONE_BYTE_SHIFT) & \
+               ICP_QAT_FW_COMN_SINGLE_BYTE_MASK), \
+               QAT_COMN_RESP_PKE_STATUS_BITPOS, \
+               QAT_COMN_RESP_PKE_STATUS_MASK)
+
+#define ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(hdr_t, val) \
+       QAT_FIELD_SET((hdr_t.hdr_flags), (val), \
+               ICP_QAT_FW_PKE_HDR_VALID_FLAG_BITPOS, \
+               ICP_QAT_FW_PKE_HDR_VALID_FLAG_MASK)
+#endif
index df427c0e9e7b2c99c8ee6cbe0c91b98c1ff47c43..9b25ab1d30a46570b9d2efe2672eef6236188477 100644 (file)
@@ -130,11 +130,6 @@ struct qat_alg_ablkcipher_ctx {
        spinlock_t lock;        /* protects qat_alg_ablkcipher_ctx struct */
 };
 
-static int get_current_node(void)
-{
-       return cpu_data(current_thread_info()->cpu).phys_proc_id;
-}
-
 static int qat_get_inter_state_size(enum icp_qat_hw_auth_algo qat_hash_alg)
 {
        switch (qat_hash_alg) {
diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
new file mode 100644 (file)
index 0000000..fe352a6
--- /dev/null
@@ -0,0 +1,652 @@
+/*
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+  Copyright(c) 2014 Intel Corporation.
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  Contact Information:
+  qat-linux@intel.com
+
+  BSD LICENSE
+  Copyright(c) 2014 Intel Corporation.
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in
+         the documentation and/or other materials provided with the
+         distribution.
+       * Neither the name of Intel Corporation nor the names of its
+         contributors may be used to endorse or promote products derived
+         from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <linux/module.h>
+#include <crypto/internal/rsa.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/akcipher.h>
+#include <linux/dma-mapping.h>
+#include <linux/fips.h>
+#include "qat_rsakey-asn1.h"
+#include "icp_qat_fw_pke.h"
+#include "adf_accel_devices.h"
+#include "adf_transport.h"
+#include "adf_common_drv.h"
+#include "qat_crypto.h"
+
+static DEFINE_MUTEX(algs_lock);
+static unsigned int active_devs;
+
+struct qat_rsa_input_params {
+       union {
+               struct {
+                       dma_addr_t m;
+                       dma_addr_t e;
+                       dma_addr_t n;
+               } enc;
+               struct {
+                       dma_addr_t c;
+                       dma_addr_t d;
+                       dma_addr_t n;
+               } dec;
+               u64 in_tab[8];
+       };
+} __packed __aligned(64);
+
+struct qat_rsa_output_params {
+       union {
+               struct {
+                       dma_addr_t c;
+               } enc;
+               struct {
+                       dma_addr_t m;
+               } dec;
+               u64 out_tab[8];
+       };
+} __packed __aligned(64);
+
+struct qat_rsa_ctx {
+       char *n;
+       char *e;
+       char *d;
+       dma_addr_t dma_n;
+       dma_addr_t dma_e;
+       dma_addr_t dma_d;
+       unsigned int key_sz;
+       struct qat_crypto_instance *inst;
+} __packed __aligned(64);
+
+struct qat_rsa_request {
+       struct qat_rsa_input_params in;
+       struct qat_rsa_output_params out;
+       dma_addr_t phy_in;
+       dma_addr_t phy_out;
+       char *src_align;
+       struct icp_qat_fw_pke_request req;
+       struct qat_rsa_ctx *ctx;
+       int err;
+} __aligned(64);
+
+static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp)
+{
+       struct akcipher_request *areq = (void *)(__force long)resp->opaque;
+       struct qat_rsa_request *req = PTR_ALIGN(akcipher_request_ctx(areq), 64);
+       struct device *dev = &GET_DEV(req->ctx->inst->accel_dev);
+       int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET(
+                               resp->pke_resp_hdr.comn_resp_flags);
+       char *ptr = areq->dst;
+
+       err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL;
+
+       if (req->src_align)
+               dma_free_coherent(dev, req->ctx->key_sz, req->src_align,
+                                 req->in.enc.m);
+       else
+               dma_unmap_single(dev, req->in.enc.m, req->ctx->key_sz,
+                                DMA_TO_DEVICE);
+
+       dma_unmap_single(dev, req->out.enc.c, req->ctx->key_sz,
+                        DMA_FROM_DEVICE);
+       dma_unmap_single(dev, req->phy_in, sizeof(struct qat_rsa_input_params),
+                        DMA_TO_DEVICE);
+       dma_unmap_single(dev, req->phy_out,
+                        sizeof(struct qat_rsa_output_params),
+                        DMA_TO_DEVICE);
+
+       areq->dst_len = req->ctx->key_sz;
+       /* Need to set the corect length of the output */
+       while (!(*ptr) && areq->dst_len) {
+               areq->dst_len--;
+               ptr++;
+       }
+
+       if (areq->dst_len != req->ctx->key_sz)
+               memcpy(areq->dst, ptr, areq->dst_len);
+
+       akcipher_request_complete(areq, err);
+}
+
+void qat_alg_asym_callback(void *_resp)
+{
+       struct icp_qat_fw_pke_resp *resp = _resp;
+
+       qat_rsa_cb(resp);
+}
+
+#define PKE_RSA_EP_512 0x1c161b21
+#define PKE_RSA_EP_1024 0x35111bf7
+#define PKE_RSA_EP_1536 0x4d111cdc
+#define PKE_RSA_EP_2048 0x6e111dba
+#define PKE_RSA_EP_3072 0x7d111ea3
+#define PKE_RSA_EP_4096 0xa5101f7e
+
+static unsigned long qat_rsa_enc_fn_id(unsigned int len)
+{
+       unsigned int bitslen = len << 3;
+
+       switch (bitslen) {
+       case 512:
+               return PKE_RSA_EP_512;
+       case 1024:
+               return PKE_RSA_EP_1024;
+       case 1536:
+               return PKE_RSA_EP_1536;
+       case 2048:
+               return PKE_RSA_EP_2048;
+       case 3072:
+               return PKE_RSA_EP_3072;
+       case 4096:
+               return PKE_RSA_EP_4096;
+       default:
+               return 0;
+       };
+}
+
+#define PKE_RSA_DP1_512 0x1c161b3c
+#define PKE_RSA_DP1_1024 0x35111c12
+#define PKE_RSA_DP1_1536 0x4d111cf7
+#define PKE_RSA_DP1_2048 0x6e111dda
+#define PKE_RSA_DP1_3072 0x7d111ebe
+#define PKE_RSA_DP1_4096 0xa5101f98
+
+static unsigned long qat_rsa_dec_fn_id(unsigned int len)
+{
+       unsigned int bitslen = len << 3;
+
+       switch (bitslen) {
+       case 512:
+               return PKE_RSA_DP1_512;
+       case 1024:
+               return PKE_RSA_DP1_1024;
+       case 1536:
+               return PKE_RSA_DP1_1536;
+       case 2048:
+               return PKE_RSA_DP1_2048;
+       case 3072:
+               return PKE_RSA_DP1_3072;
+       case 4096:
+               return PKE_RSA_DP1_4096;
+       default:
+               return 0;
+       };
+}
+
+static int qat_rsa_enc(struct akcipher_request *req)
+{
+       struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+       struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+       struct qat_crypto_instance *inst = ctx->inst;
+       struct device *dev = &GET_DEV(inst->accel_dev);
+       struct qat_rsa_request *qat_req =
+                       PTR_ALIGN(akcipher_request_ctx(req), 64);
+       struct icp_qat_fw_pke_request *msg = &qat_req->req;
+       int ret, ctr = 0;
+
+       if (unlikely(!ctx->n || !ctx->e))
+               return -EINVAL;
+
+       if (req->dst_len < ctx->key_sz) {
+               req->dst_len = ctx->key_sz;
+               return -EOVERFLOW;
+       }
+       memset(msg, '\0', sizeof(*msg));
+       ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr,
+                                         ICP_QAT_FW_COMN_REQ_FLAG_SET);
+       msg->pke_hdr.cd_pars.func_id = qat_rsa_enc_fn_id(ctx->key_sz);
+       if (unlikely(!msg->pke_hdr.cd_pars.func_id))
+               return -EINVAL;
+
+       qat_req->ctx = ctx;
+       msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE;
+       msg->pke_hdr.comn_req_flags =
+               ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT,
+                                           QAT_COMN_CD_FLD_TYPE_64BIT_ADR);
+
+       qat_req->in.enc.e = ctx->dma_e;
+       qat_req->in.enc.n = ctx->dma_n;
+       ret = -ENOMEM;
+
+       /*
+        * src can be of any size in valid range, but HW expects it to be the
+        * same as modulo n so in case it is different we need to allocate a
+        * new buf and copy src data.
+        * In other case we just need to map the user provided buffer.
+        */
+       if (req->src_len < ctx->key_sz) {
+               int shift = ctx->key_sz - req->src_len;
+
+               qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz,
+                                                        &qat_req->in.enc.m,
+                                                        GFP_KERNEL);
+               if (unlikely(!qat_req->src_align))
+                       return ret;
+
+               memcpy(qat_req->src_align + shift, req->src, req->src_len);
+       } else {
+               qat_req->src_align = NULL;
+               qat_req->in.enc.m = dma_map_single(dev, req->src, req->src_len,
+                                          DMA_TO_DEVICE);
+       }
+       qat_req->in.in_tab[3] = 0;
+       qat_req->out.enc.c = dma_map_single(dev, req->dst, req->dst_len,
+                                           DMA_FROM_DEVICE);
+       qat_req->out.out_tab[1] = 0;
+       qat_req->phy_in = dma_map_single(dev, &qat_req->in.enc.m,
+                                        sizeof(struct qat_rsa_input_params),
+                                        DMA_TO_DEVICE);
+       qat_req->phy_out = dma_map_single(dev, &qat_req->out.enc.c,
+                                         sizeof(struct qat_rsa_output_params),
+                                           DMA_TO_DEVICE);
+
+       if (unlikely((!qat_req->src_align &&
+                     dma_mapping_error(dev, qat_req->in.enc.m)) ||
+                    dma_mapping_error(dev, qat_req->out.enc.c) ||
+                    dma_mapping_error(dev, qat_req->phy_in) ||
+                    dma_mapping_error(dev, qat_req->phy_out)))
+               goto unmap;
+
+       msg->pke_mid.src_data_addr = qat_req->phy_in;
+       msg->pke_mid.dest_data_addr = qat_req->phy_out;
+       msg->pke_mid.opaque = (uint64_t)(__force long)req;
+       msg->input_param_count = 3;
+       msg->output_param_count = 1;
+       do {
+               ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg);
+       } while (ret == -EBUSY && ctr++ < 100);
+
+       if (!ret)
+               return -EINPROGRESS;
+unmap:
+       if (qat_req->src_align)
+               dma_free_coherent(dev, ctx->key_sz, qat_req->src_align,
+                                 qat_req->in.enc.m);
+       else
+               if (!dma_mapping_error(dev, qat_req->in.enc.m))
+                       dma_unmap_single(dev, qat_req->in.enc.m, ctx->key_sz,
+                                        DMA_TO_DEVICE);
+       if (!dma_mapping_error(dev, qat_req->out.enc.c))
+               dma_unmap_single(dev, qat_req->out.enc.c, ctx->key_sz,
+                                DMA_FROM_DEVICE);
+       if (!dma_mapping_error(dev, qat_req->phy_in))
+               dma_unmap_single(dev, qat_req->phy_in,
+                                sizeof(struct qat_rsa_input_params),
+                                DMA_TO_DEVICE);
+       if (!dma_mapping_error(dev, qat_req->phy_out))
+               dma_unmap_single(dev, qat_req->phy_out,
+                                sizeof(struct qat_rsa_output_params),
+                                DMA_TO_DEVICE);
+       return ret;
+}
+
+static int qat_rsa_dec(struct akcipher_request *req)
+{
+       struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+       struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+       struct qat_crypto_instance *inst = ctx->inst;
+       struct device *dev = &GET_DEV(inst->accel_dev);
+       struct qat_rsa_request *qat_req =
+                       PTR_ALIGN(akcipher_request_ctx(req), 64);
+       struct icp_qat_fw_pke_request *msg = &qat_req->req;
+       int ret, ctr = 0;
+
+       if (unlikely(!ctx->n || !ctx->d))
+               return -EINVAL;
+
+       if (req->dst_len < ctx->key_sz) {
+               req->dst_len = ctx->key_sz;
+               return -EOVERFLOW;
+       }
+       memset(msg, '\0', sizeof(*msg));
+       ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr,
+                                         ICP_QAT_FW_COMN_REQ_FLAG_SET);
+       msg->pke_hdr.cd_pars.func_id = qat_rsa_dec_fn_id(ctx->key_sz);
+       if (unlikely(!msg->pke_hdr.cd_pars.func_id))
+               return -EINVAL;
+
+       qat_req->ctx = ctx;
+       msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE;
+       msg->pke_hdr.comn_req_flags =
+               ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT,
+                                           QAT_COMN_CD_FLD_TYPE_64BIT_ADR);
+
+       qat_req->in.dec.d = ctx->dma_d;
+       qat_req->in.dec.n = ctx->dma_n;
+       ret = -ENOMEM;
+
+       /*
+        * src can be of any size in valid range, but HW expects it to be the
+        * same as modulo n so in case it is different we need to allocate a
+        * new buf and copy src data.
+        * In other case we just need to map the user provided buffer.
+        */
+       if (req->src_len < ctx->key_sz) {
+               int shift = ctx->key_sz - req->src_len;
+
+               qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz,
+                                                        &qat_req->in.dec.c,
+                                                        GFP_KERNEL);
+               if (unlikely(!qat_req->src_align))
+                       return ret;
+
+               memcpy(qat_req->src_align + shift, req->src, req->src_len);
+       } else {
+               qat_req->src_align = NULL;
+               qat_req->in.dec.c = dma_map_single(dev, req->src, req->src_len,
+                                                  DMA_TO_DEVICE);
+       }
+       qat_req->in.in_tab[3] = 0;
+       qat_req->out.dec.m = dma_map_single(dev, req->dst, req->dst_len,
+                                           DMA_FROM_DEVICE);
+       qat_req->out.out_tab[1] = 0;
+       qat_req->phy_in = dma_map_single(dev, &qat_req->in.dec.c,
+                                        sizeof(struct qat_rsa_input_params),
+                                        DMA_TO_DEVICE);
+       qat_req->phy_out = dma_map_single(dev, &qat_req->out.dec.m,
+                                         sizeof(struct qat_rsa_output_params),
+                                           DMA_TO_DEVICE);
+
+       if (unlikely((!qat_req->src_align &&
+                     dma_mapping_error(dev, qat_req->in.dec.c)) ||
+                    dma_mapping_error(dev, qat_req->out.dec.m) ||
+                    dma_mapping_error(dev, qat_req->phy_in) ||
+                    dma_mapping_error(dev, qat_req->phy_out)))
+               goto unmap;
+
+       msg->pke_mid.src_data_addr = qat_req->phy_in;
+       msg->pke_mid.dest_data_addr = qat_req->phy_out;
+       msg->pke_mid.opaque = (uint64_t)(__force long)req;
+       msg->input_param_count = 3;
+       msg->output_param_count = 1;
+       do {
+               ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg);
+       } while (ret == -EBUSY && ctr++ < 100);
+
+       if (!ret)
+               return -EINPROGRESS;
+unmap:
+       if (qat_req->src_align)
+               dma_free_coherent(dev, ctx->key_sz, qat_req->src_align,
+                                 qat_req->in.dec.c);
+       else
+               if (!dma_mapping_error(dev, qat_req->in.dec.c))
+                       dma_unmap_single(dev, qat_req->in.dec.c, ctx->key_sz,
+                                        DMA_TO_DEVICE);
+       if (!dma_mapping_error(dev, qat_req->out.dec.m))
+               dma_unmap_single(dev, qat_req->out.dec.m, ctx->key_sz,
+                                DMA_FROM_DEVICE);
+       if (!dma_mapping_error(dev, qat_req->phy_in))
+               dma_unmap_single(dev, qat_req->phy_in,
+                                sizeof(struct qat_rsa_input_params),
+                                DMA_TO_DEVICE);
+       if (!dma_mapping_error(dev, qat_req->phy_out))
+               dma_unmap_single(dev, qat_req->phy_out,
+                                sizeof(struct qat_rsa_output_params),
+                                DMA_TO_DEVICE);
+       return ret;
+}
+
+int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag,
+                 const void *value, size_t vlen)
+{
+       struct qat_rsa_ctx *ctx = context;
+       struct qat_crypto_instance *inst = ctx->inst;
+       struct device *dev = &GET_DEV(inst->accel_dev);
+       const char *ptr = value;
+       int ret;
+
+       while (!*ptr && vlen) {
+               ptr++;
+               vlen--;
+       }
+
+       ctx->key_sz = vlen;
+       ret = -EINVAL;
+       /* In FIPS mode only allow key size 2K & 3K */
+       if (fips_enabled && (ctx->key_sz != 256 && ctx->key_sz != 384)) {
+               pr_err("QAT: RSA: key size not allowed in FIPS mode\n");
+               goto err;
+       }
+       /* invalid key size provided */
+       if (!qat_rsa_enc_fn_id(ctx->key_sz))
+               goto err;
+
+       ret = -ENOMEM;
+       ctx->n = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_n, GFP_KERNEL);
+       if (!ctx->n)
+               goto err;
+
+       memcpy(ctx->n, ptr, ctx->key_sz);
+       return 0;
+err:
+       ctx->key_sz = 0;
+       ctx->n = NULL;
+       return ret;
+}
+
+int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag,
+                 const void *value, size_t vlen)
+{
+       struct qat_rsa_ctx *ctx = context;
+       struct qat_crypto_instance *inst = ctx->inst;
+       struct device *dev = &GET_DEV(inst->accel_dev);
+       const char *ptr = value;
+
+       while (!*ptr && vlen) {
+               ptr++;
+               vlen--;
+       }
+
+       if (!ctx->key_sz || !vlen || vlen > ctx->key_sz) {
+               ctx->e = NULL;
+               return -EINVAL;
+       }
+
+       ctx->e = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_e, GFP_KERNEL);
+       if (!ctx->e) {
+               ctx->e = NULL;
+               return -ENOMEM;
+       }
+       memcpy(ctx->e + (ctx->key_sz - vlen), ptr, vlen);
+       return 0;
+}
+
+int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag,
+                 const void *value, size_t vlen)
+{
+       struct qat_rsa_ctx *ctx = context;
+       struct qat_crypto_instance *inst = ctx->inst;
+       struct device *dev = &GET_DEV(inst->accel_dev);
+       const char *ptr = value;
+       int ret;
+
+       while (!*ptr && vlen) {
+               ptr++;
+               vlen--;
+       }
+
+       ret = -EINVAL;
+       if (!ctx->key_sz || !vlen || vlen > ctx->key_sz)
+               goto err;
+
+       /* In FIPS mode only allow key size 2K & 3K */
+       if (fips_enabled && (vlen != 256 && vlen != 384)) {
+               pr_err("QAT: RSA: key size not allowed in FIPS mode\n");
+               goto err;
+       }
+
+       ret = -ENOMEM;
+       ctx->d = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_d, GFP_KERNEL);
+       if (!ctx->n)
+               goto err;
+
+       memcpy(ctx->d + (ctx->key_sz - vlen), ptr, vlen);
+       return 0;
+err:
+       ctx->d = NULL;
+       return ret;
+}
+
+static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
+                         unsigned int keylen)
+{
+       struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+       struct device *dev = &GET_DEV(ctx->inst->accel_dev);
+       int ret;
+
+       /* Free the old key if any */
+       if (ctx->n)
+               dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n);
+       if (ctx->e)
+               dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e);
+       if (ctx->d) {
+               memset(ctx->d, '\0', ctx->key_sz);
+               dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d);
+       }
+
+       ctx->n = NULL;
+       ctx->e = NULL;
+       ctx->d = NULL;
+       ret = asn1_ber_decoder(&qat_rsakey_decoder, ctx, key, keylen);
+       if (ret < 0)
+               goto free;
+
+       if (!ctx->n || !ctx->e) {
+               /* invalid key provided */
+               ret = -EINVAL;
+               goto free;
+       }
+
+       return 0;
+free:
+       if (ctx->d) {
+               memset(ctx->d, '\0', ctx->key_sz);
+               dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d);
+               ctx->d = NULL;
+       }
+       if (ctx->e) {
+               dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e);
+               ctx->e = NULL;
+       }
+       if (ctx->n) {
+               dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n);
+               ctx->n = NULL;
+               ctx->key_sz = 0;
+       }
+       return ret;
+}
+
+static int qat_rsa_init_tfm(struct crypto_akcipher *tfm)
+{
+       struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+       struct qat_crypto_instance *inst =
+                       qat_crypto_get_instance_node(get_current_node());
+
+       if (!inst)
+               return -EINVAL;
+
+       ctx->key_sz = 0;
+       ctx->inst = inst;
+       return 0;
+}
+
+static void qat_rsa_exit_tfm(struct crypto_akcipher *tfm)
+{
+       struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+       struct device *dev = &GET_DEV(ctx->inst->accel_dev);
+
+       if (ctx->n)
+               dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n);
+       if (ctx->e)
+               dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e);
+       if (ctx->d) {
+               memset(ctx->d, '\0', ctx->key_sz);
+               dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d);
+       }
+       qat_crypto_put_instance(ctx->inst);
+       ctx->n = NULL;
+       ctx->d = NULL;
+       ctx->d = NULL;
+}
+
+static struct akcipher_alg rsa = {
+       .encrypt = qat_rsa_enc,
+       .decrypt = qat_rsa_dec,
+       .sign = qat_rsa_dec,
+       .verify = qat_rsa_enc,
+       .setkey = qat_rsa_setkey,
+       .init = qat_rsa_init_tfm,
+       .exit = qat_rsa_exit_tfm,
+       .reqsize = sizeof(struct qat_rsa_request) + 64,
+       .base = {
+               .cra_name = "rsa",
+               .cra_driver_name = "qat-rsa",
+               .cra_priority = 1000,
+               .cra_module = THIS_MODULE,
+               .cra_ctxsize = sizeof(struct qat_rsa_ctx),
+       },
+};
+
+int qat_asym_algs_register(void)
+{
+       int ret = 0;
+
+       mutex_lock(&algs_lock);
+       if (++active_devs == 1) {
+               rsa.base.cra_flags = 0;
+               ret = crypto_register_akcipher(&rsa);
+       }
+       mutex_unlock(&algs_lock);
+       return ret;
+}
+
+void qat_asym_algs_unregister(void)
+{
+       mutex_lock(&algs_lock);
+       if (--active_devs == 0)
+               crypto_unregister_akcipher(&rsa);
+       mutex_unlock(&algs_lock);
+}
index 3bd705ca5973c1d02598731d387afb9c12da5caf..e23ce6fe074b93f7ff77b4d953fe578282a0a01d 100644 (file)
@@ -88,12 +88,6 @@ static int qat_crypto_free_instances(struct adf_accel_dev *accel_dev)
                if (inst->pke_rx)
                        adf_remove_ring(inst->pke_rx);
 
-               if (inst->rnd_tx)
-                       adf_remove_ring(inst->rnd_tx);
-
-               if (inst->rnd_rx)
-                       adf_remove_ring(inst->rnd_rx);
-
                list_del(list_ptr);
                kfree(inst);
        }
@@ -202,11 +196,6 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev)
                                    msg_size, key, NULL, 0, &inst->sym_tx))
                        goto err;
 
-               snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_RND_TX, i);
-               if (adf_create_ring(accel_dev, SEC, bank, num_msg_asym,
-                                   msg_size, key, NULL, 0, &inst->rnd_tx))
-                       goto err;
-
                msg_size = msg_size >> 1;
                snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i);
                if (adf_create_ring(accel_dev, SEC, bank, num_msg_asym,
@@ -220,15 +209,9 @@ static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev)
                                    &inst->sym_rx))
                        goto err;
 
-               snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_RND_RX, i);
-               if (adf_create_ring(accel_dev, SEC, bank, num_msg_asym,
-                                   msg_size, key, qat_alg_callback, 0,
-                                   &inst->rnd_rx))
-                       goto err;
-
                snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i);
                if (adf_create_ring(accel_dev, SEC, bank, num_msg_asym,
-                                   msg_size, key, qat_alg_callback, 0,
+                                   msg_size, key, qat_alg_asym_callback, 0,
                                    &inst->pke_rx))
                        goto err;
        }
index d503007b49e6ef3f7499a19836e5f09c1b7de9ae..dc0273fe36206e7faaa9cb0f901d580a4b5f037f 100644 (file)
@@ -57,8 +57,6 @@ struct qat_crypto_instance {
        struct adf_etr_ring_data *sym_rx;
        struct adf_etr_ring_data *pke_tx;
        struct adf_etr_ring_data *pke_rx;
-       struct adf_etr_ring_data *rnd_tx;
-       struct adf_etr_ring_data *rnd_rx;
        struct adf_accel_dev *accel_dev;
        struct list_head list;
        unsigned long state;
index 274ff7e9de6e9cfa165a8c99297cfada512bc794..8e711d1c308402ad35ce87822ed16f572d07fe5b 100644 (file)
@@ -671,7 +671,6 @@ static int qat_hal_clear_gpr(struct icp_qat_fw_loader_handle *handle)
 #define ICP_DH895XCC_CAP_OFFSET     (ICP_DH895XCC_AE_OFFSET + 0x10000)
 #define LOCAL_TO_XFER_REG_OFFSET    0x800
 #define ICP_DH895XCC_EP_OFFSET      0x3a000
-#define ICP_DH895XCC_PMISC_BAR 1
 int qat_hal_init(struct adf_accel_dev *accel_dev)
 {
        unsigned char ae;
@@ -679,21 +678,24 @@ int qat_hal_init(struct adf_accel_dev *accel_dev)
        struct icp_qat_fw_loader_handle *handle;
        struct adf_accel_pci *pci_info = &accel_dev->accel_pci_dev;
        struct adf_hw_device_data *hw_data = accel_dev->hw_device;
-       struct adf_bar *bar =
+       struct adf_bar *misc_bar =
                        &pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)];
+       struct adf_bar *sram_bar =
+                       &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
 
        handle = kzalloc(sizeof(*handle), GFP_KERNEL);
        if (!handle)
                return -ENOMEM;
 
-       handle->hal_cap_g_ctl_csr_addr_v = bar->virt_addr +
+       handle->hal_cap_g_ctl_csr_addr_v = misc_bar->virt_addr +
                                                ICP_DH895XCC_CAP_OFFSET;
-       handle->hal_cap_ae_xfer_csr_addr_v = bar->virt_addr +
+       handle->hal_cap_ae_xfer_csr_addr_v = misc_bar->virt_addr +
                                                ICP_DH895XCC_AE_OFFSET;
-       handle->hal_ep_csr_addr_v = bar->virt_addr + ICP_DH895XCC_EP_OFFSET;
+       handle->hal_ep_csr_addr_v = misc_bar->virt_addr +
+                                   ICP_DH895XCC_EP_OFFSET;
        handle->hal_cap_ae_local_csr_addr_v =
                handle->hal_cap_ae_xfer_csr_addr_v + LOCAL_TO_XFER_REG_OFFSET;
-
+       handle->hal_sram_addr_v = sram_bar->virt_addr;
        handle->hal_handle = kzalloc(sizeof(*handle->hal_handle), GFP_KERNEL);
        if (!handle->hal_handle)
                goto out_hal_handle;
diff --git a/drivers/crypto/qat/qat_common/qat_rsakey.asn1 b/drivers/crypto/qat/qat_common/qat_rsakey.asn1
new file mode 100644 (file)
index 0000000..97b0e02
--- /dev/null
@@ -0,0 +1,5 @@
+RsaKey ::= SEQUENCE {
+       n INTEGER ({ qat_rsa_get_n }),
+       e INTEGER ({ qat_rsa_get_e }),
+       d INTEGER ({ qat_rsa_get_d })
+}
index 1e27f9f7fddf64dbb5ed959305dc93aa7c894fda..c48f181e894157a1c8767d64d4ecf00975bd1ef6 100644 (file)
@@ -359,28 +359,7 @@ static int qat_uclo_init_umem_seg(struct icp_qat_fw_loader_handle *handle,
 static int qat_uclo_init_ae_memory(struct icp_qat_fw_loader_handle *handle,
                                   struct icp_qat_uof_initmem *init_mem)
 {
-       unsigned int i;
-       struct icp_qat_uof_memvar_attr *mem_val_attr;
-
-       mem_val_attr =
-               (struct icp_qat_uof_memvar_attr *)((unsigned long)init_mem +
-               sizeof(struct icp_qat_uof_initmem));
-
        switch (init_mem->region) {
-       case ICP_QAT_UOF_SRAM_REGION:
-               if ((init_mem->addr + init_mem->num_in_bytes) >
-                   ICP_DH895XCC_PESRAM_BAR_SIZE) {
-                       pr_err("QAT: initmem on SRAM is out of range");
-                       return -EINVAL;
-               }
-               for (i = 0; i < init_mem->val_attr_num; i++) {
-                       qat_uclo_wr_sram_by_words(handle,
-                                                 init_mem->addr +
-                                                 mem_val_attr->offset_in_byte,
-                                                 &mem_val_attr->value, 4);
-                       mem_val_attr++;
-               }
-               break;
        case ICP_QAT_UOF_LMEM_REGION:
                if (qat_uclo_init_lmem_seg(handle, init_mem))
                        return -EINVAL;
@@ -990,6 +969,12 @@ out_err:
        return -EFAULT;
 }
 
+void qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle,
+                       void *addr_ptr, int mem_size)
+{
+       qat_uclo_wr_sram_by_words(handle, 0, addr_ptr, ALIGN(mem_size, 4));
+}
+
 int qat_uclo_map_uof_obj(struct icp_qat_fw_loader_handle *handle,
                         void *addr_ptr, int mem_size)
 {
index b1386922d7a2a7c0aaf6bf06c52c1a860ffd12bf..155ace9d4a439b470f1aff180d16a0b72e2abe6d 100644 (file)
@@ -117,6 +117,11 @@ static uint32_t get_etr_bar_id(struct adf_hw_device_data *self)
        return ADF_DH895XCC_ETR_BAR;
 }
 
+static uint32_t get_sram_bar_id(struct adf_hw_device_data *self)
+{
+       return ADF_DH895XCC_SRAM_BAR;
+}
+
 static enum dev_sku_info get_sku(struct adf_hw_device_data *self)
 {
        int sku = (self->fuses & ADF_DH895XCC_FUSECTL_SKU_MASK)
@@ -203,7 +208,6 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
        hw_data->instance_id = dh895xcc_class.instances++;
        hw_data->num_banks = ADF_DH895XCC_ETR_MAX_BANKS;
        hw_data->num_accel = ADF_DH895XCC_MAX_ACCELERATORS;
-       hw_data->pci_dev_id = ADF_DH895XCC_PCI_DEVICE_ID;
        hw_data->num_logical_accel = 1;
        hw_data->num_engines = ADF_DH895XCC_MAX_ACCELENGINES;
        hw_data->tx_rx_gap = ADF_DH895XCC_RX_RINGS_OFFSET;
@@ -219,8 +223,10 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
        hw_data->get_num_aes = get_num_aes;
        hw_data->get_etr_bar_id = get_etr_bar_id;
        hw_data->get_misc_bar_id = get_misc_bar_id;
+       hw_data->get_sram_bar_id = get_sram_bar_id;
        hw_data->get_sku = get_sku;
        hw_data->fw_name = ADF_DH895XCC_FW;
+       hw_data->fw_mmp_name = ADF_DH895XCC_MMP;
        hw_data->init_admin_comms = adf_init_admin_comms;
        hw_data->exit_admin_comms = adf_exit_admin_comms;
        hw_data->init_arb = adf_init_arb;
index 25269a9f24a2bca0cb41e095668589b609a462ff..a4963a9b687bf27318278c692d077f40bf3dc7f7 100644 (file)
@@ -48,6 +48,7 @@
 #define ADF_DH895x_HW_DATA_H_
 
 /* PCIe configuration space */
+#define ADF_DH895XCC_SRAM_BAR 0
 #define ADF_DH895XCC_PMISC_BAR 1
 #define ADF_DH895XCC_ETR_BAR 2
 #define ADF_DH895XCC_RX_RINGS_OFFSET 8
@@ -84,5 +85,7 @@
 #define ADF_DH895XCC_ADMINMSGLR_OFFSET (0x3A000 + 0x578)
 #define ADF_DH895XCC_MAILBOX_BASE_OFFSET 0x20970
 #define ADF_DH895XCC_MAILBOX_STRIDE 0x1000
+/* FW names */
 #define ADF_DH895XCC_FW "qat_895xcc.bin"
+#define ADF_DH895XCC_MMP "qat_mmp.bin"
 #endif
index 1bde45b7a3c560f5e409c17f45c4bf6bb9eb730e..4abeca1b1c33dc58f73bce3580e2a2d16eed4424 100644 (file)
@@ -100,7 +100,7 @@ static void adf_cleanup_accel(struct adf_accel_dev *accel_dev)
        }
 
        if (accel_dev->hw_device) {
-               switch (accel_dev->hw_device->pci_dev_id) {
+               switch (accel_pci_dev->pci_dev->device) {
                case ADF_DH895XCC_PCI_DEVICE_ID:
                        adf_clean_hw_data_dh895xcc(accel_dev->hw_device);
                        break;
@@ -167,12 +167,6 @@ static int adf_dev_configure(struct adf_accel_dev *accel_dev)
                                                key, (void *)&val, ADF_DEC))
                        goto err;
 
-               val = 4;
-               snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_RND_TX, i);
-               if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-                                               key, (void *)&val, ADF_DEC))
-                       goto err;
-
                val = 8;
                snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i);
                if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
@@ -185,12 +179,6 @@ static int adf_dev_configure(struct adf_accel_dev *accel_dev)
                                                key, (void *)&val, ADF_DEC))
                        goto err;
 
-               val = 12;
-               snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_RND_RX, i);
-               if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-                                               key, (void *)&val, ADF_DEC))
-                       goto err;
-
                val = ADF_COALESCING_DEF_TIME;
                snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i);
                if (adf_cfg_add_key_value_param(accel_dev, "Accelerator0",
diff --git a/drivers/crypto/sunxi-ss/Makefile b/drivers/crypto/sunxi-ss/Makefile
new file mode 100644 (file)
index 0000000..8f4c7a2
--- /dev/null
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sun4i-ss.o
+sun4i-ss-y += sun4i-ss-core.o sun4i-ss-hash.o sun4i-ss-cipher.o
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
new file mode 100644 (file)
index 0000000..e070c31
--- /dev/null
@@ -0,0 +1,542 @@
+/*
+ * sun4i-ss-cipher.c - hardware cryptographic accelerator for Allwinner A20 SoC
+ *
+ * Copyright (C) 2013-2015 Corentin LABBE <clabbe.montjoie@gmail.com>
+ *
+ * This file add support for AES cipher with 128,192,256 bits
+ * keysize in CBC and ECB mode.
+ * Add support also for DES and 3DES in CBC and ECB mode.
+ *
+ * You could find the datasheet in Documentation/arm/sunxi/README
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include "sun4i-ss.h"
+
+static int sun4i_ss_opti_poll(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_ss_ctx *ss = op->ss;
+       unsigned int ivsize = crypto_ablkcipher_ivsize(tfm);
+       struct sun4i_cipher_req_ctx *ctx = ablkcipher_request_ctx(areq);
+       u32 mode = ctx->mode;
+       /* when activating SS, the default FIFO space is SS_RX_DEFAULT(32) */
+       u32 rx_cnt = SS_RX_DEFAULT;
+       u32 tx_cnt = 0;
+       u32 spaces;
+       u32 v;
+       int i, err = 0;
+       unsigned int ileft = areq->nbytes;
+       unsigned int oleft = areq->nbytes;
+       unsigned int todo;
+       struct sg_mapping_iter mi, mo;
+       unsigned int oi, oo; /* offset for in and out */
+
+       if (areq->nbytes == 0)
+               return 0;
+
+       if (!areq->info) {
+               dev_err_ratelimited(ss->dev, "ERROR: Empty IV\n");
+               return -EINVAL;
+       }
+
+       if (!areq->src || !areq->dst) {
+               dev_err_ratelimited(ss->dev, "ERROR: Some SGs are NULL\n");
+               return -EINVAL;
+       }
+
+       spin_lock_bh(&ss->slock);
+
+       for (i = 0; i < op->keylen; i += 4)
+               writel(*(op->key + i / 4), ss->base + SS_KEY0 + i);
+
+       if (areq->info) {
+               for (i = 0; i < 4 && i < ivsize / 4; i++) {
+                       v = *(u32 *)(areq->info + i * 4);
+                       writel(v, ss->base + SS_IV0 + i * 4);
+               }
+       }
+       writel(mode, ss->base + SS_CTL);
+
+       sg_miter_start(&mi, areq->src, sg_nents(areq->src),
+                      SG_MITER_FROM_SG | SG_MITER_ATOMIC);
+       sg_miter_start(&mo, areq->dst, sg_nents(areq->dst),
+                      SG_MITER_TO_SG | SG_MITER_ATOMIC);
+       sg_miter_next(&mi);
+       sg_miter_next(&mo);
+       if (!mi.addr || !mo.addr) {
+               dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n");
+               err = -EINVAL;
+               goto release_ss;
+       }
+
+       ileft = areq->nbytes / 4;
+       oleft = areq->nbytes / 4;
+       oi = 0;
+       oo = 0;
+       do {
+               todo = min3(rx_cnt, ileft, (mi.length - oi) / 4);
+               if (todo > 0) {
+                       ileft -= todo;
+                       writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo);
+                       oi += todo * 4;
+               }
+               if (oi == mi.length) {
+                       sg_miter_next(&mi);
+                       oi = 0;
+               }
+
+               spaces = readl(ss->base + SS_FCSR);
+               rx_cnt = SS_RXFIFO_SPACES(spaces);
+               tx_cnt = SS_TXFIFO_SPACES(spaces);
+
+               todo = min3(tx_cnt, oleft, (mo.length - oo) / 4);
+               if (todo > 0) {
+                       oleft -= todo;
+                       readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo);
+                       oo += todo * 4;
+               }
+               if (oo == mo.length) {
+                       sg_miter_next(&mo);
+                       oo = 0;
+               }
+       } while (mo.length > 0);
+
+       if (areq->info) {
+               for (i = 0; i < 4 && i < ivsize / 4; i++) {
+                       v = readl(ss->base + SS_IV0 + i * 4);
+                       *(u32 *)(areq->info + i * 4) = v;
+               }
+       }
+
+release_ss:
+       sg_miter_stop(&mi);
+       sg_miter_stop(&mo);
+       writel(0, ss->base + SS_CTL);
+       spin_unlock_bh(&ss->slock);
+       return err;
+}
+
+/* Generic function that support SG with size not multiple of 4 */
+static int sun4i_ss_cipher_poll(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_ss_ctx *ss = op->ss;
+       int no_chunk = 1;
+       struct scatterlist *in_sg = areq->src;
+       struct scatterlist *out_sg = areq->dst;
+       unsigned int ivsize = crypto_ablkcipher_ivsize(tfm);
+       struct sun4i_cipher_req_ctx *ctx = ablkcipher_request_ctx(areq);
+       u32 mode = ctx->mode;
+       /* when activating SS, the default FIFO space is SS_RX_DEFAULT(32) */
+       u32 rx_cnt = SS_RX_DEFAULT;
+       u32 tx_cnt = 0;
+       u32 v;
+       u32 spaces;
+       int i, err = 0;
+       unsigned int ileft = areq->nbytes;
+       unsigned int oleft = areq->nbytes;
+       unsigned int todo;
+       struct sg_mapping_iter mi, mo;
+       unsigned int oi, oo;    /* offset for in and out */
+       char buf[4 * SS_RX_MAX];/* buffer for linearize SG src */
+       char bufo[4 * SS_TX_MAX]; /* buffer for linearize SG dst */
+       unsigned int ob = 0;    /* offset in buf */
+       unsigned int obo = 0;   /* offset in bufo*/
+       unsigned int obl = 0;   /* length of data in bufo */
+
+       if (areq->nbytes == 0)
+               return 0;
+
+       if (!areq->info) {
+               dev_err_ratelimited(ss->dev, "ERROR: Empty IV\n");
+               return -EINVAL;
+       }
+
+       if (!areq->src || !areq->dst) {
+               dev_err_ratelimited(ss->dev, "ERROR: Some SGs are NULL\n");
+               return -EINVAL;
+       }
+
+       /*
+        * if we have only SGs with size multiple of 4,
+        * we can use the SS optimized function
+        */
+       while (in_sg && no_chunk == 1) {
+               if ((in_sg->length % 4) != 0)
+                       no_chunk = 0;
+               in_sg = sg_next(in_sg);
+       }
+       while (out_sg && no_chunk == 1) {
+               if ((out_sg->length % 4) != 0)
+                       no_chunk = 0;
+               out_sg = sg_next(out_sg);
+       }
+
+       if (no_chunk == 1)
+               return sun4i_ss_opti_poll(areq);
+
+       spin_lock_bh(&ss->slock);
+
+       for (i = 0; i < op->keylen; i += 4)
+               writel(*(op->key + i / 4), ss->base + SS_KEY0 + i);
+
+       if (areq->info) {
+               for (i = 0; i < 4 && i < ivsize / 4; i++) {
+                       v = *(u32 *)(areq->info + i * 4);
+                       writel(v, ss->base + SS_IV0 + i * 4);
+               }
+       }
+       writel(mode, ss->base + SS_CTL);
+
+       sg_miter_start(&mi, areq->src, sg_nents(areq->src),
+                      SG_MITER_FROM_SG | SG_MITER_ATOMIC);
+       sg_miter_start(&mo, areq->dst, sg_nents(areq->dst),
+                      SG_MITER_TO_SG | SG_MITER_ATOMIC);
+       sg_miter_next(&mi);
+       sg_miter_next(&mo);
+       if (!mi.addr || !mo.addr) {
+               dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n");
+               err = -EINVAL;
+               goto release_ss;
+       }
+       ileft = areq->nbytes;
+       oleft = areq->nbytes;
+       oi = 0;
+       oo = 0;
+
+       while (oleft > 0) {
+               if (ileft > 0) {
+                       /*
+                        * todo is the number of consecutive 4byte word that we
+                        * can read from current SG
+                        */
+                       todo = min3(rx_cnt, ileft / 4, (mi.length - oi) / 4);
+                       if (todo > 0 && ob == 0) {
+                               writesl(ss->base + SS_RXFIFO, mi.addr + oi,
+                                       todo);
+                               ileft -= todo * 4;
+                               oi += todo * 4;
+                       } else {
+                               /*
+                                * not enough consecutive bytes, so we need to
+                                * linearize in buf. todo is in bytes
+                                * After that copy, if we have a multiple of 4
+                                * we need to be able to write all buf in one
+                                * pass, so it is why we min() with rx_cnt
+                                */
+                               todo = min3(rx_cnt * 4 - ob, ileft,
+                                           mi.length - oi);
+                               memcpy(buf + ob, mi.addr + oi, todo);
+                               ileft -= todo;
+                               oi += todo;
+                               ob += todo;
+                               if (ob % 4 == 0) {
+                                       writesl(ss->base + SS_RXFIFO, buf,
+                                               ob / 4);
+                                       ob = 0;
+                               }
+                       }
+                       if (oi == mi.length) {
+                               sg_miter_next(&mi);
+                               oi = 0;
+                       }
+               }
+
+               spaces = readl(ss->base + SS_FCSR);
+               rx_cnt = SS_RXFIFO_SPACES(spaces);
+               tx_cnt = SS_TXFIFO_SPACES(spaces);
+               dev_dbg(ss->dev, "%x %u/%u %u/%u cnt=%u %u/%u %u/%u cnt=%u %u %u\n",
+                       mode,
+                       oi, mi.length, ileft, areq->nbytes, rx_cnt,
+                       oo, mo.length, oleft, areq->nbytes, tx_cnt,
+                       todo, ob);
+
+               if (tx_cnt == 0)
+                       continue;
+               /* todo in 4bytes word */
+               todo = min3(tx_cnt, oleft / 4, (mo.length - oo) / 4);
+               if (todo > 0) {
+                       readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo);
+                       oleft -= todo * 4;
+                       oo += todo * 4;
+                       if (oo == mo.length) {
+                               sg_miter_next(&mo);
+                               oo = 0;
+                       }
+               } else {
+                       /*
+                        * read obl bytes in bufo, we read at maximum for
+                        * emptying the device
+                        */
+                       readsl(ss->base + SS_TXFIFO, bufo, tx_cnt);
+                       obl = tx_cnt * 4;
+                       obo = 0;
+                       do {
+                               /*
+                                * how many bytes we can copy ?
+                                * no more than remaining SG size
+                                * no more than remaining buffer
+                                * no need to test against oleft
+                                */
+                               todo = min(mo.length - oo, obl - obo);
+                               memcpy(mo.addr + oo, bufo + obo, todo);
+                               oleft -= todo;
+                               obo += todo;
+                               oo += todo;
+                               if (oo == mo.length) {
+                                       sg_miter_next(&mo);
+                                       oo = 0;
+                               }
+                       } while (obo < obl);
+                       /* bufo must be fully used here */
+               }
+       }
+       if (areq->info) {
+               for (i = 0; i < 4 && i < ivsize / 4; i++) {
+                       v = readl(ss->base + SS_IV0 + i * 4);
+                       *(u32 *)(areq->info + i * 4) = v;
+               }
+       }
+
+release_ss:
+       sg_miter_stop(&mi);
+       sg_miter_stop(&mo);
+       writel(0, ss->base + SS_CTL);
+       spin_unlock_bh(&ss->slock);
+
+       return err;
+}
+
+/* CBC AES */
+int sun4i_ss_cbc_aes_encrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_AES | SS_CBC | SS_ENABLED | SS_ENCRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+int sun4i_ss_cbc_aes_decrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_AES | SS_CBC | SS_ENABLED | SS_DECRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+/* ECB AES */
+int sun4i_ss_ecb_aes_encrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_AES | SS_ECB | SS_ENABLED | SS_ENCRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+int sun4i_ss_ecb_aes_decrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_AES | SS_ECB | SS_ENABLED | SS_DECRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+/* CBC DES */
+int sun4i_ss_cbc_des_encrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_DES | SS_CBC | SS_ENABLED | SS_ENCRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+int sun4i_ss_cbc_des_decrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_DES | SS_CBC | SS_ENABLED | SS_DECRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+/* ECB DES */
+int sun4i_ss_ecb_des_encrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_DES | SS_ECB | SS_ENABLED | SS_ENCRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+int sun4i_ss_ecb_des_decrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_DES | SS_ECB | SS_ENABLED | SS_DECRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+/* CBC 3DES */
+int sun4i_ss_cbc_des3_encrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_3DES | SS_CBC | SS_ENABLED | SS_ENCRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+int sun4i_ss_cbc_des3_decrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_3DES | SS_CBC | SS_ENABLED | SS_DECRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+/* ECB 3DES */
+int sun4i_ss_ecb_des3_encrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_3DES | SS_ECB | SS_ENABLED | SS_ENCRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+int sun4i_ss_ecb_des3_decrypt(struct ablkcipher_request *areq)
+{
+       struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(areq);
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_cipher_req_ctx *rctx = ablkcipher_request_ctx(areq);
+
+       rctx->mode = SS_OP_3DES | SS_ECB | SS_ENABLED | SS_DECRYPTION |
+               op->keymode;
+       return sun4i_ss_cipher_poll(areq);
+}
+
+int sun4i_ss_cipher_init(struct crypto_tfm *tfm)
+{
+       struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
+       struct crypto_alg *alg = tfm->__crt_alg;
+       struct sun4i_ss_alg_template *algt;
+
+       memset(op, 0, sizeof(struct sun4i_tfm_ctx));
+
+       algt = container_of(alg, struct sun4i_ss_alg_template, alg.crypto);
+       op->ss = algt->ss;
+
+       tfm->crt_ablkcipher.reqsize = sizeof(struct sun4i_cipher_req_ctx);
+
+       return 0;
+}
+
+/* check and set the AES key, prepare the mode to be used */
+int sun4i_ss_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+                       unsigned int keylen)
+{
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_ss_ctx *ss = op->ss;
+
+       switch (keylen) {
+       case 128 / 8:
+               op->keymode = SS_AES_128BITS;
+               break;
+       case 192 / 8:
+               op->keymode = SS_AES_192BITS;
+               break;
+       case 256 / 8:
+               op->keymode = SS_AES_256BITS;
+               break;
+       default:
+               dev_err(ss->dev, "ERROR: Invalid keylen %u\n", keylen);
+               crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       op->keylen = keylen;
+       memcpy(op->key, key, keylen);
+       return 0;
+}
+
+/* check and set the DES key, prepare the mode to be used */
+int sun4i_ss_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+                       unsigned int keylen)
+{
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_ss_ctx *ss = op->ss;
+       u32 flags;
+       u32 tmp[DES_EXPKEY_WORDS];
+       int ret;
+
+       if (unlikely(keylen != DES_KEY_SIZE)) {
+               dev_err(ss->dev, "Invalid keylen %u\n", keylen);
+               crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+
+       flags = crypto_ablkcipher_get_flags(tfm);
+
+       ret = des_ekey(tmp, key);
+       if (unlikely(ret == 0) && (flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+               crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+               dev_dbg(ss->dev, "Weak key %u\n", keylen);
+               return -EINVAL;
+       }
+
+       op->keylen = keylen;
+       memcpy(op->key, key, keylen);
+       return 0;
+}
+
+/* check and set the 3DES key, prepare the mode to be used */
+int sun4i_ss_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+                        unsigned int keylen)
+{
+       struct sun4i_tfm_ctx *op = crypto_ablkcipher_ctx(tfm);
+       struct sun4i_ss_ctx *ss = op->ss;
+
+       if (unlikely(keylen != 3 * DES_KEY_SIZE)) {
+               dev_err(ss->dev, "Invalid keylen %u\n", keylen);
+               crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       op->keylen = keylen;
+       memcpy(op->key, key, keylen);
+       return 0;
+}
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-core.c b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
new file mode 100644 (file)
index 0000000..0b79b58
--- /dev/null
@@ -0,0 +1,403 @@
+/*
+ * sun4i-ss-core.c - hardware cryptographic accelerator for Allwinner A20 SoC
+ *
+ * Copyright (C) 2013-2015 Corentin LABBE <clabbe.montjoie@gmail.com>
+ *
+ * Core file which registers crypto algorithms supported by the SS.
+ *
+ * You could find a link for the datasheet in Documentation/arm/sunxi/README
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <crypto/scatterwalk.h>
+#include <linux/scatterlist.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+
+#include "sun4i-ss.h"
+
+static struct sun4i_ss_alg_template ss_algs[] = {
+{       .type = CRYPTO_ALG_TYPE_AHASH,
+       .mode = SS_OP_MD5,
+       .alg.hash = {
+               .init = sun4i_hash_init,
+               .update = sun4i_hash_update,
+               .final = sun4i_hash_final,
+               .finup = sun4i_hash_finup,
+               .digest = sun4i_hash_digest,
+               .export = sun4i_hash_export_md5,
+               .import = sun4i_hash_import_md5,
+               .halg = {
+                       .digestsize = MD5_DIGEST_SIZE,
+                       .base = {
+                               .cra_name = "md5",
+                               .cra_driver_name = "md5-sun4i-ss",
+                               .cra_priority = 300,
+                               .cra_alignmask = 3,
+                               .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+                               .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+                               .cra_ctxsize = sizeof(struct sun4i_req_ctx),
+                               .cra_module = THIS_MODULE,
+                               .cra_type = &crypto_ahash_type,
+                               .cra_init = sun4i_hash_crainit
+                       }
+               }
+       }
+},
+{       .type = CRYPTO_ALG_TYPE_AHASH,
+       .mode = SS_OP_SHA1,
+       .alg.hash = {
+               .init = sun4i_hash_init,
+               .update = sun4i_hash_update,
+               .final = sun4i_hash_final,
+               .finup = sun4i_hash_finup,
+               .digest = sun4i_hash_digest,
+               .export = sun4i_hash_export_sha1,
+               .import = sun4i_hash_import_sha1,
+               .halg = {
+                       .digestsize = SHA1_DIGEST_SIZE,
+                       .base = {
+                               .cra_name = "sha1",
+                               .cra_driver_name = "sha1-sun4i-ss",
+                               .cra_priority = 300,
+                               .cra_alignmask = 3,
+                               .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+                               .cra_blocksize = SHA1_BLOCK_SIZE,
+                               .cra_ctxsize = sizeof(struct sun4i_req_ctx),
+                               .cra_module = THIS_MODULE,
+                               .cra_type = &crypto_ahash_type,
+                               .cra_init = sun4i_hash_crainit
+                       }
+               }
+       }
+},
+{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+       .alg.crypto = {
+               .cra_name = "cbc(aes)",
+               .cra_driver_name = "cbc-aes-sun4i-ss",
+               .cra_priority = 300,
+               .cra_blocksize = AES_BLOCK_SIZE,
+               .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
+               .cra_ctxsize = sizeof(struct sun4i_tfm_ctx),
+               .cra_module = THIS_MODULE,
+               .cra_alignmask = 3,
+               .cra_type = &crypto_ablkcipher_type,
+               .cra_init = sun4i_ss_cipher_init,
+               .cra_ablkcipher = {
+                       .min_keysize    = AES_MIN_KEY_SIZE,
+                       .max_keysize    = AES_MAX_KEY_SIZE,
+                       .ivsize         = AES_BLOCK_SIZE,
+                       .setkey         = sun4i_ss_aes_setkey,
+                       .encrypt        = sun4i_ss_cbc_aes_encrypt,
+                       .decrypt        = sun4i_ss_cbc_aes_decrypt,
+               }
+       }
+},
+{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+       .alg.crypto = {
+               .cra_name = "ecb(aes)",
+               .cra_driver_name = "ecb-aes-sun4i-ss",
+               .cra_priority = 300,
+               .cra_blocksize = AES_BLOCK_SIZE,
+               .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
+               .cra_ctxsize = sizeof(struct sun4i_tfm_ctx),
+               .cra_module = THIS_MODULE,
+               .cra_alignmask = 3,
+               .cra_type = &crypto_ablkcipher_type,
+               .cra_init = sun4i_ss_cipher_init,
+               .cra_ablkcipher = {
+                       .min_keysize    = AES_MIN_KEY_SIZE,
+                       .max_keysize    = AES_MAX_KEY_SIZE,
+                       .ivsize         = AES_BLOCK_SIZE,
+                       .setkey         = sun4i_ss_aes_setkey,
+                       .encrypt        = sun4i_ss_ecb_aes_encrypt,
+                       .decrypt        = sun4i_ss_ecb_aes_decrypt,
+               }
+       }
+},
+{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+       .alg.crypto = {
+               .cra_name = "cbc(des)",
+               .cra_driver_name = "cbc-des-sun4i-ss",
+               .cra_priority = 300,
+               .cra_blocksize = DES_BLOCK_SIZE,
+               .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
+               .cra_ctxsize = sizeof(struct sun4i_req_ctx),
+               .cra_module = THIS_MODULE,
+               .cra_alignmask = 3,
+               .cra_type = &crypto_ablkcipher_type,
+               .cra_init = sun4i_ss_cipher_init,
+               .cra_u.ablkcipher = {
+                       .min_keysize    = DES_KEY_SIZE,
+                       .max_keysize    = DES_KEY_SIZE,
+                       .ivsize         = DES_BLOCK_SIZE,
+                       .setkey         = sun4i_ss_des_setkey,
+                       .encrypt        = sun4i_ss_cbc_des_encrypt,
+                       .decrypt        = sun4i_ss_cbc_des_decrypt,
+               }
+       }
+},
+{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+       .alg.crypto = {
+               .cra_name = "ecb(des)",
+               .cra_driver_name = "ecb-des-sun4i-ss",
+               .cra_priority = 300,
+               .cra_blocksize = DES_BLOCK_SIZE,
+               .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
+               .cra_ctxsize = sizeof(struct sun4i_req_ctx),
+               .cra_module = THIS_MODULE,
+               .cra_alignmask = 3,
+               .cra_type = &crypto_ablkcipher_type,
+               .cra_init = sun4i_ss_cipher_init,
+               .cra_u.ablkcipher = {
+                       .min_keysize    = DES_KEY_SIZE,
+                       .max_keysize    = DES_KEY_SIZE,
+                       .setkey         = sun4i_ss_des_setkey,
+                       .encrypt        = sun4i_ss_ecb_des_encrypt,
+                       .decrypt        = sun4i_ss_ecb_des_decrypt,
+               }
+       }
+},
+{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+       .alg.crypto = {
+                       .cra_name = "cbc(des3_ede)",
+                       .cra_driver_name = "cbc-des3-sun4i-ss",
+                       .cra_priority = 300,
+                       .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+                       .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
+                       .cra_ctxsize = sizeof(struct sun4i_req_ctx),
+                       .cra_module = THIS_MODULE,
+                       .cra_alignmask = 3,
+                       .cra_type = &crypto_ablkcipher_type,
+                       .cra_init = sun4i_ss_cipher_init,
+                       .cra_u.ablkcipher = {
+                               .min_keysize    = DES3_EDE_KEY_SIZE,
+                               .max_keysize    = DES3_EDE_KEY_SIZE,
+                               .ivsize         = DES3_EDE_BLOCK_SIZE,
+                               .setkey         = sun4i_ss_des3_setkey,
+                               .encrypt        = sun4i_ss_cbc_des3_encrypt,
+                               .decrypt        = sun4i_ss_cbc_des3_decrypt,
+               }
+       }
+},
+{       .type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+       .alg.crypto = {
+                       .cra_name = "ecb(des3_ede)",
+                       .cra_driver_name = "ecb-des3-sun4i-ss",
+                       .cra_priority = 300,
+                       .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+                       .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER,
+                       .cra_ctxsize = sizeof(struct sun4i_req_ctx),
+                       .cra_module = THIS_MODULE,
+                       .cra_alignmask = 3,
+                       .cra_type = &crypto_ablkcipher_type,
+                       .cra_init = sun4i_ss_cipher_init,
+                       .cra_u.ablkcipher = {
+                               .min_keysize    = DES3_EDE_KEY_SIZE,
+                               .max_keysize    = DES3_EDE_KEY_SIZE,
+                               .ivsize         = DES3_EDE_BLOCK_SIZE,
+                               .setkey         = sun4i_ss_des3_setkey,
+                               .encrypt        = sun4i_ss_ecb_des3_encrypt,
+                               .decrypt        = sun4i_ss_ecb_des3_decrypt,
+               }
+       }
+},
+};
+
+static int sun4i_ss_probe(struct platform_device *pdev)
+{
+       struct resource *res;
+       u32 v;
+       int err, i;
+       unsigned long cr;
+       const unsigned long cr_ahb = 24 * 1000 * 1000;
+       const unsigned long cr_mod = 150 * 1000 * 1000;
+       struct sun4i_ss_ctx *ss;
+
+       if (!pdev->dev.of_node)
+               return -ENODEV;
+
+       ss = devm_kzalloc(&pdev->dev, sizeof(*ss), GFP_KERNEL);
+       if (!ss)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       ss->base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(ss->base)) {
+               dev_err(&pdev->dev, "Cannot request MMIO\n");
+               return PTR_ERR(ss->base);
+       }
+
+       ss->ssclk = devm_clk_get(&pdev->dev, "mod");
+       if (IS_ERR(ss->ssclk)) {
+               err = PTR_ERR(ss->ssclk);
+               dev_err(&pdev->dev, "Cannot get SS clock err=%d\n", err);
+               return err;
+       }
+       dev_dbg(&pdev->dev, "clock ss acquired\n");
+
+       ss->busclk = devm_clk_get(&pdev->dev, "ahb");
+       if (IS_ERR(ss->busclk)) {
+               err = PTR_ERR(ss->busclk);
+               dev_err(&pdev->dev, "Cannot get AHB SS clock err=%d\n", err);
+               return err;
+       }
+       dev_dbg(&pdev->dev, "clock ahb_ss acquired\n");
+
+       /* Enable both clocks */
+       err = clk_prepare_enable(ss->busclk);
+       if (err != 0) {
+               dev_err(&pdev->dev, "Cannot prepare_enable busclk\n");
+               return err;
+       }
+       err = clk_prepare_enable(ss->ssclk);
+       if (err != 0) {
+               dev_err(&pdev->dev, "Cannot prepare_enable ssclk\n");
+               goto error_ssclk;
+       }
+
+       /*
+        * Check that clock have the correct rates given in the datasheet
+        * Try to set the clock to the maximum allowed
+        */
+       err = clk_set_rate(ss->ssclk, cr_mod);
+       if (err != 0) {
+               dev_err(&pdev->dev, "Cannot set clock rate to ssclk\n");
+               goto error_clk;
+       }
+
+       /*
+        * The only impact on clocks below requirement are bad performance,
+        * so do not print "errors"
+        * warn on Overclocked clocks
+        */
+       cr = clk_get_rate(ss->busclk);
+       if (cr >= cr_ahb)
+               dev_dbg(&pdev->dev, "Clock bus %lu (%lu MHz) (must be >= %lu)\n",
+                       cr, cr / 1000000, cr_ahb);
+       else
+               dev_warn(&pdev->dev, "Clock bus %lu (%lu MHz) (must be >= %lu)\n",
+                        cr, cr / 1000000, cr_ahb);
+
+       cr = clk_get_rate(ss->ssclk);
+       if (cr <= cr_mod)
+               if (cr < cr_mod)
+                       dev_warn(&pdev->dev, "Clock ss %lu (%lu MHz) (must be <= %lu)\n",
+                                cr, cr / 1000000, cr_mod);
+               else
+                       dev_dbg(&pdev->dev, "Clock ss %lu (%lu MHz) (must be <= %lu)\n",
+                               cr, cr / 1000000, cr_mod);
+       else
+               dev_warn(&pdev->dev, "Clock ss is at %lu (%lu MHz) (must be <= %lu)\n",
+                        cr, cr / 1000000, cr_mod);
+
+       /*
+        * Datasheet named it "Die Bonding ID"
+        * I expect to be a sort of Security System Revision number.
+        * Since the A80 seems to have an other version of SS
+        * this info could be useful
+        */
+       writel(SS_ENABLED, ss->base + SS_CTL);
+       v = readl(ss->base + SS_CTL);
+       v >>= 16;
+       v &= 0x07;
+       dev_info(&pdev->dev, "Die ID %d\n", v);
+       writel(0, ss->base + SS_CTL);
+
+       ss->dev = &pdev->dev;
+
+       spin_lock_init(&ss->slock);
+
+       for (i = 0; i < ARRAY_SIZE(ss_algs); i++) {
+               ss_algs[i].ss = ss;
+               switch (ss_algs[i].type) {
+               case CRYPTO_ALG_TYPE_ABLKCIPHER:
+                       err = crypto_register_alg(&ss_algs[i].alg.crypto);
+                       if (err != 0) {
+                               dev_err(ss->dev, "Fail to register %s\n",
+                                       ss_algs[i].alg.crypto.cra_name);
+                               goto error_alg;
+                       }
+                       break;
+               case CRYPTO_ALG_TYPE_AHASH:
+                       err = crypto_register_ahash(&ss_algs[i].alg.hash);
+                       if (err != 0) {
+                               dev_err(ss->dev, "Fail to register %s\n",
+                                       ss_algs[i].alg.hash.halg.base.cra_name);
+                               goto error_alg;
+                       }
+                       break;
+               }
+       }
+       platform_set_drvdata(pdev, ss);
+       return 0;
+error_alg:
+       i--;
+       for (; i >= 0; i--) {
+               switch (ss_algs[i].type) {
+               case CRYPTO_ALG_TYPE_ABLKCIPHER:
+                       crypto_unregister_alg(&ss_algs[i].alg.crypto);
+                       break;
+               case CRYPTO_ALG_TYPE_AHASH:
+                       crypto_unregister_ahash(&ss_algs[i].alg.hash);
+                       break;
+               }
+       }
+error_clk:
+       clk_disable_unprepare(ss->ssclk);
+error_ssclk:
+       clk_disable_unprepare(ss->busclk);
+       return err;
+}
+
+static int sun4i_ss_remove(struct platform_device *pdev)
+{
+       int i;
+       struct sun4i_ss_ctx *ss = platform_get_drvdata(pdev);
+
+       for (i = 0; i < ARRAY_SIZE(ss_algs); i++) {
+               switch (ss_algs[i].type) {
+               case CRYPTO_ALG_TYPE_ABLKCIPHER:
+                       crypto_unregister_alg(&ss_algs[i].alg.crypto);
+                       break;
+               case CRYPTO_ALG_TYPE_AHASH:
+                       crypto_unregister_ahash(&ss_algs[i].alg.hash);
+                       break;
+               }
+       }
+
+       writel(0, ss->base + SS_CTL);
+       clk_disable_unprepare(ss->busclk);
+       clk_disable_unprepare(ss->ssclk);
+       return 0;
+}
+
+static const struct of_device_id a20ss_crypto_of_match_table[] = {
+       { .compatible = "allwinner,sun4i-a10-crypto" },
+       {}
+};
+MODULE_DEVICE_TABLE(of, a20ss_crypto_of_match_table);
+
+static struct platform_driver sun4i_ss_driver = {
+       .probe          = sun4i_ss_probe,
+       .remove         = sun4i_ss_remove,
+       .driver         = {
+               .name           = "sun4i-ss",
+               .of_match_table = a20ss_crypto_of_match_table,
+       },
+};
+
+module_platform_driver(sun4i_ss_driver);
+
+MODULE_DESCRIPTION("Allwinner Security System cryptographic accelerator");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Corentin LABBE <clabbe.montjoie@gmail.com>");
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
new file mode 100644 (file)
index 0000000..ff80314
--- /dev/null
@@ -0,0 +1,492 @@
+/*
+ * sun4i-ss-hash.c - hardware cryptographic accelerator for Allwinner A20 SoC
+ *
+ * Copyright (C) 2013-2015 Corentin LABBE <clabbe.montjoie@gmail.com>
+ *
+ * This file add support for MD5 and SHA1.
+ *
+ * You could find the datasheet in Documentation/arm/sunxi/README
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include "sun4i-ss.h"
+#include <linux/scatterlist.h>
+
+/* This is a totally arbitrary value */
+#define SS_TIMEOUT 100
+
+int sun4i_hash_crainit(struct crypto_tfm *tfm)
+{
+       crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+                                sizeof(struct sun4i_req_ctx));
+       return 0;
+}
+
+/* sun4i_hash_init: initialize request context */
+int sun4i_hash_init(struct ahash_request *areq)
+{
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
+       struct sun4i_ss_alg_template *algt;
+       struct sun4i_ss_ctx *ss;
+
+       memset(op, 0, sizeof(struct sun4i_req_ctx));
+
+       algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
+       ss = algt->ss;
+       op->ss = algt->ss;
+       op->mode = algt->mode;
+
+       return 0;
+}
+
+int sun4i_hash_export_md5(struct ahash_request *areq, void *out)
+{
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+       struct md5_state *octx = out;
+       int i;
+
+       octx->byte_count = op->byte_count + op->len;
+
+       memcpy(octx->block, op->buf, op->len);
+
+       if (op->byte_count > 0) {
+               for (i = 0; i < 4; i++)
+                       octx->hash[i] = op->hash[i];
+       } else {
+               octx->hash[0] = SHA1_H0;
+               octx->hash[1] = SHA1_H1;
+               octx->hash[2] = SHA1_H2;
+               octx->hash[3] = SHA1_H3;
+       }
+
+       return 0;
+}
+
+int sun4i_hash_import_md5(struct ahash_request *areq, const void *in)
+{
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+       const struct md5_state *ictx = in;
+       int i;
+
+       sun4i_hash_init(areq);
+
+       op->byte_count = ictx->byte_count & ~0x3F;
+       op->len = ictx->byte_count & 0x3F;
+
+       memcpy(op->buf, ictx->block, op->len);
+
+       for (i = 0; i < 4; i++)
+               op->hash[i] = ictx->hash[i];
+
+       return 0;
+}
+
+int sun4i_hash_export_sha1(struct ahash_request *areq, void *out)
+{
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+       struct sha1_state *octx = out;
+       int i;
+
+       octx->count = op->byte_count + op->len;
+
+       memcpy(octx->buffer, op->buf, op->len);
+
+       if (op->byte_count > 0) {
+               for (i = 0; i < 5; i++)
+                       octx->state[i] = op->hash[i];
+       } else {
+               octx->state[0] = SHA1_H0;
+               octx->state[1] = SHA1_H1;
+               octx->state[2] = SHA1_H2;
+               octx->state[3] = SHA1_H3;
+               octx->state[4] = SHA1_H4;
+       }
+
+       return 0;
+}
+
+int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
+{
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+       const struct sha1_state *ictx = in;
+       int i;
+
+       sun4i_hash_init(areq);
+
+       op->byte_count = ictx->count & ~0x3F;
+       op->len = ictx->count & 0x3F;
+
+       memcpy(op->buf, ictx->buffer, op->len);
+
+       for (i = 0; i < 5; i++)
+               op->hash[i] = ictx->state[i];
+
+       return 0;
+}
+
+/*
+ * sun4i_hash_update: update hash engine
+ *
+ * Could be used for both SHA1 and MD5
+ * Write data by step of 32bits and put then in the SS.
+ *
+ * Since we cannot leave partial data and hash state in the engine,
+ * we need to get the hash state at the end of this function.
+ * We can get the hash state every 64 bytes
+ *
+ * So the first work is to get the number of bytes to write to SS modulo 64
+ * The extra bytes will go to a temporary buffer op->buf storing op->len bytes
+ *
+ * So at the begin of update()
+ * if op->len + areq->nbytes < 64
+ * => all data will be written to wait buffer (op->buf) and end=0
+ * if not, write all data from op->buf to the device and position end to
+ * complete to 64bytes
+ *
+ * example 1:
+ * update1 60o => op->len=60
+ * update2 60o => need one more word to have 64 bytes
+ * end=4
+ * so write all data from op->buf and one word of SGs
+ * write remaining data in op->buf
+ * final state op->len=56
+ */
+int sun4i_hash_update(struct ahash_request *areq)
+{
+       u32 v, ivmode = 0;
+       unsigned int i = 0;
+       /*
+        * i is the total bytes read from SGs, to be compared to areq->nbytes
+        * i is important because we cannot rely on SG length since the sum of
+        * SG->length could be greater than areq->nbytes
+        */
+
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+       struct sun4i_ss_ctx *ss = op->ss;
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       unsigned int in_i = 0; /* advancement in the current SG */
+       unsigned int end;
+       /*
+        * end is the position when we need to stop writing to the device,
+        * to be compared to i
+        */
+       int in_r, err = 0;
+       unsigned int todo;
+       u32 spaces, rx_cnt = SS_RX_DEFAULT;
+       size_t copied = 0;
+       struct sg_mapping_iter mi;
+
+       dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
+               __func__, crypto_tfm_alg_name(areq->base.tfm),
+               op->byte_count, areq->nbytes, op->mode,
+               op->len, op->hash[0]);
+
+       if (areq->nbytes == 0)
+               return 0;
+
+       /* protect against overflow */
+       if (areq->nbytes > UINT_MAX - op->len) {
+               dev_err(ss->dev, "Cannot process too large request\n");
+               return -EINVAL;
+       }
+
+       if (op->len + areq->nbytes < 64) {
+               /* linearize data to op->buf */
+               copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
+                                           op->buf + op->len, areq->nbytes, 0);
+               op->len += copied;
+               return 0;
+       }
+
+       end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
+
+       if (end > areq->nbytes || areq->nbytes - end > 63) {
+               dev_err(ss->dev, "ERROR: Bound error %u %u\n",
+                       end, areq->nbytes);
+               return -EINVAL;
+       }
+
+       spin_lock_bh(&ss->slock);
+
+       /*
+        * if some data have been processed before,
+        * we need to restore the partial hash state
+        */
+       if (op->byte_count > 0) {
+               ivmode = SS_IV_ARBITRARY;
+               for (i = 0; i < 5; i++)
+                       writel(op->hash[i], ss->base + SS_IV0 + i * 4);
+       }
+       /* Enable the device */
+       writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
+
+       i = 0;
+       sg_miter_start(&mi, areq->src, sg_nents(areq->src),
+                      SG_MITER_FROM_SG | SG_MITER_ATOMIC);
+       sg_miter_next(&mi);
+       in_i = 0;
+
+       do {
+               /*
+                * we need to linearize in two case:
+                * - the buffer is already used
+                * - the SG does not have enough byte remaining ( < 4)
+                */
+               if (op->len > 0 || (mi.length - in_i) < 4) {
+                       /*
+                        * if we have entered here we have two reason to stop
+                        * - the buffer is full
+                        * - reach the end
+                        */
+                       while (op->len < 64 && i < end) {
+                               /* how many bytes we can read from current SG */
+                               in_r = min3(mi.length - in_i, end - i,
+                                           64 - op->len);
+                               memcpy(op->buf + op->len, mi.addr + in_i, in_r);
+                               op->len += in_r;
+                               i += in_r;
+                               in_i += in_r;
+                               if (in_i == mi.length) {
+                                       sg_miter_next(&mi);
+                                       in_i = 0;
+                               }
+                       }
+                       if (op->len > 3 && (op->len % 4) == 0) {
+                               /* write buf to the device */
+                               writesl(ss->base + SS_RXFIFO, op->buf,
+                                       op->len / 4);
+                               op->byte_count += op->len;
+                               op->len = 0;
+                       }
+               }
+               if (mi.length - in_i > 3 && i < end) {
+                       /* how many bytes we can read from current SG */
+                       in_r = min3(mi.length - in_i, areq->nbytes - i,
+                                   ((mi.length - in_i) / 4) * 4);
+                       /* how many bytes we can write in the device*/
+                       todo = min3((u32)(end - i) / 4, rx_cnt, (u32)in_r / 4);
+                       writesl(ss->base + SS_RXFIFO, mi.addr + in_i, todo);
+                       op->byte_count += todo * 4;
+                       i += todo * 4;
+                       in_i += todo * 4;
+                       rx_cnt -= todo;
+                       if (rx_cnt == 0) {
+                               spaces = readl(ss->base + SS_FCSR);
+                               rx_cnt = SS_RXFIFO_SPACES(spaces);
+                       }
+                       if (in_i == mi.length) {
+                               sg_miter_next(&mi);
+                               in_i = 0;
+                       }
+               }
+       } while (i < end);
+       /* final linear */
+       if ((areq->nbytes - i) < 64) {
+               while (i < areq->nbytes && in_i < mi.length && op->len < 64) {
+                       /* how many bytes we can read from current SG */
+                       in_r = min3(mi.length - in_i, areq->nbytes - i,
+                                   64 - op->len);
+                       memcpy(op->buf + op->len, mi.addr + in_i, in_r);
+                       op->len += in_r;
+                       i += in_r;
+                       in_i += in_r;
+                       if (in_i == mi.length) {
+                               sg_miter_next(&mi);
+                               in_i = 0;
+                       }
+               }
+       }
+
+       sg_miter_stop(&mi);
+
+       writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
+       i = 0;
+       do {
+               v = readl(ss->base + SS_CTL);
+               i++;
+       } while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
+       if (i >= SS_TIMEOUT) {
+               dev_err_ratelimited(ss->dev,
+                                   "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
+                                   i, SS_TIMEOUT, v, areq->nbytes);
+               err = -EIO;
+               goto release_ss;
+       }
+
+       /* get the partial hash only if something was written */
+       for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
+               op->hash[i] = readl(ss->base + SS_MD0 + i * 4);
+
+release_ss:
+       writel(0, ss->base + SS_CTL);
+       spin_unlock_bh(&ss->slock);
+       return err;
+}
+
+/*
+ * sun4i_hash_final: finalize hashing operation
+ *
+ * If we have some remaining bytes, we write them.
+ * Then ask the SS for finalizing the hashing operation
+ *
+ * I do not check RX FIFO size in this function since the size is 32
+ * after each enabling and this function neither write more than 32 words.
+ */
+int sun4i_hash_final(struct ahash_request *areq)
+{
+       u32 v, ivmode = 0;
+       unsigned int i;
+       unsigned int j = 0;
+       int zeros, err = 0;
+       unsigned int index, padlen;
+       __be64 bits;
+       struct sun4i_req_ctx *op = ahash_request_ctx(areq);
+       struct sun4i_ss_ctx *ss = op->ss;
+       struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+       u32 bf[32];
+       u32 wb = 0;
+       unsigned int nwait, nbw = 0;
+
+       dev_dbg(ss->dev, "%s: byte=%llu len=%u mode=%x wl=%u h=%x",
+               __func__, op->byte_count, areq->nbytes, op->mode,
+               op->len, op->hash[0]);
+
+       spin_lock_bh(&ss->slock);
+
+       /*
+        * if we have already written something,
+        * restore the partial hash state
+        */
+       if (op->byte_count > 0) {
+               ivmode = SS_IV_ARBITRARY;
+               for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
+                       writel(op->hash[i], ss->base + SS_IV0 + i * 4);
+       }
+       writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
+
+       /* write the remaining words of the wait buffer */
+       if (op->len > 0) {
+               nwait = op->len / 4;
+               if (nwait > 0) {
+                       writesl(ss->base + SS_RXFIFO, op->buf, nwait);
+                       op->byte_count += 4 * nwait;
+               }
+               nbw = op->len - 4 * nwait;
+               wb = *(u32 *)(op->buf + nwait * 4);
+               wb &= (0xFFFFFFFF >> (4 - nbw) * 8);
+       }
+
+       /* write the remaining bytes of the nbw buffer */
+       if (nbw > 0) {
+               wb |= ((1 << 7) << (nbw * 8));
+               bf[j++] = wb;
+       } else {
+               bf[j++] = 1 << 7;
+       }
+
+       /*
+        * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1)
+        * I take the operations from other MD5/SHA1 implementations
+        */
+
+       /* we have already send 4 more byte of which nbw data */
+       if (op->mode == SS_OP_MD5) {
+               index = (op->byte_count + 4) & 0x3f;
+               op->byte_count += nbw;
+               if (index > 56)
+                       zeros = (120 - index) / 4;
+               else
+                       zeros = (56 - index) / 4;
+       } else {
+               op->byte_count += nbw;
+               index = op->byte_count & 0x3f;
+               padlen = (index < 56) ? (56 - index) : ((64 + 56) - index);
+               zeros = (padlen - 1) / 4;
+       }
+
+       memset(bf + j, 0, 4 * zeros);
+       j += zeros;
+
+       /* write the length of data */
+       if (op->mode == SS_OP_SHA1) {
+               bits = cpu_to_be64(op->byte_count << 3);
+               bf[j++] = bits & 0xffffffff;
+               bf[j++] = (bits >> 32) & 0xffffffff;
+       } else {
+               bf[j++] = (op->byte_count << 3) & 0xffffffff;
+               bf[j++] = (op->byte_count >> 29) & 0xffffffff;
+       }
+       writesl(ss->base + SS_RXFIFO, bf, j);
+
+       /* Tell the SS to stop the hashing */
+       writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
+
+       /*
+        * Wait for SS to finish the hash.
+        * The timeout could happen only in case of bad overcloking
+        * or driver bug.
+        */
+       i = 0;
+       do {
+               v = readl(ss->base + SS_CTL);
+               i++;
+       } while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
+       if (i >= SS_TIMEOUT) {
+               dev_err_ratelimited(ss->dev,
+                                   "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
+                                   i, SS_TIMEOUT, v, areq->nbytes);
+               err = -EIO;
+               goto release_ss;
+       }
+
+       /* Get the hash from the device */
+       if (op->mode == SS_OP_SHA1) {
+               for (i = 0; i < 5; i++) {
+                       v = cpu_to_be32(readl(ss->base + SS_MD0 + i * 4));
+                       memcpy(areq->result + i * 4, &v, 4);
+               }
+       } else {
+               for (i = 0; i < 4; i++) {
+                       v = readl(ss->base + SS_MD0 + i * 4);
+                       memcpy(areq->result + i * 4, &v, 4);
+               }
+       }
+
+release_ss:
+       writel(0, ss->base + SS_CTL);
+       spin_unlock_bh(&ss->slock);
+       return err;
+}
+
+/* sun4i_hash_finup: finalize hashing operation after an update */
+int sun4i_hash_finup(struct ahash_request *areq)
+{
+       int err;
+
+       err = sun4i_hash_update(areq);
+       if (err != 0)
+               return err;
+
+       return sun4i_hash_final(areq);
+}
+
+/* combo of init/update/final functions */
+int sun4i_hash_digest(struct ahash_request *areq)
+{
+       int err;
+
+       err = sun4i_hash_init(areq);
+       if (err != 0)
+               return err;
+
+       err = sun4i_hash_update(areq);
+       if (err != 0)
+               return err;
+
+       return sun4i_hash_final(areq);
+}
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss.h b/drivers/crypto/sunxi-ss/sun4i-ss.h
new file mode 100644 (file)
index 0000000..db18b25
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+ * sun4i-ss.h - hardware cryptographic accelerator for Allwinner A20 SoC
+ *
+ * Copyright (C) 2013-2015 Corentin LABBE <clabbe.montjoie@gmail.com>
+ *
+ * Support AES cipher with 128,192,256 bits keysize.
+ * Support MD5 and SHA1 hash algorithms.
+ * Support DES and 3DES
+ *
+ * You could find the datasheet in Documentation/arm/sunxi/README
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <crypto/scatterwalk.h>
+#include <linux/scatterlist.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <crypto/md5.h>
+#include <crypto/sha.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/aes.h>
+#include <crypto/des.h>
+#include <crypto/internal/rng.h>
+
+#define SS_CTL            0x00
+#define SS_KEY0           0x04
+#define SS_KEY1           0x08
+#define SS_KEY2           0x0C
+#define SS_KEY3           0x10
+#define SS_KEY4           0x14
+#define SS_KEY5           0x18
+#define SS_KEY6           0x1C
+#define SS_KEY7           0x20
+
+#define SS_IV0            0x24
+#define SS_IV1            0x28
+#define SS_IV2            0x2C
+#define SS_IV3            0x30
+
+#define SS_FCSR           0x44
+
+#define SS_MD0            0x4C
+#define SS_MD1            0x50
+#define SS_MD2            0x54
+#define SS_MD3            0x58
+#define SS_MD4            0x5C
+
+#define SS_RXFIFO         0x200
+#define SS_TXFIFO         0x204
+
+/* SS_CTL configuration values */
+
+/* PRNG generator mode - bit 15 */
+#define SS_PRNG_ONESHOT                (0 << 15)
+#define SS_PRNG_CONTINUE       (1 << 15)
+
+/* IV mode for hash */
+#define SS_IV_ARBITRARY                (1 << 14)
+
+/* SS operation mode - bits 12-13 */
+#define SS_ECB                 (0 << 12)
+#define SS_CBC                 (1 << 12)
+#define SS_CTS                 (3 << 12)
+
+/* Counter width for CNT mode - bits 10-11 */
+#define SS_CNT_16BITS          (0 << 10)
+#define SS_CNT_32BITS          (1 << 10)
+#define SS_CNT_64BITS          (2 << 10)
+
+/* Key size for AES - bits 8-9 */
+#define SS_AES_128BITS         (0 << 8)
+#define SS_AES_192BITS         (1 << 8)
+#define SS_AES_256BITS         (2 << 8)
+
+/* Operation direction - bit 7 */
+#define SS_ENCRYPTION          (0 << 7)
+#define SS_DECRYPTION          (1 << 7)
+
+/* SS Method - bits 4-6 */
+#define SS_OP_AES              (0 << 4)
+#define SS_OP_DES              (1 << 4)
+#define SS_OP_3DES             (2 << 4)
+#define SS_OP_SHA1             (3 << 4)
+#define SS_OP_MD5              (4 << 4)
+#define SS_OP_PRNG             (5 << 4)
+
+/* Data end bit - bit 2 */
+#define SS_DATA_END            (1 << 2)
+
+/* PRNG start bit - bit 1 */
+#define SS_PRNG_START          (1 << 1)
+
+/* SS Enable bit - bit 0 */
+#define SS_DISABLED            (0 << 0)
+#define SS_ENABLED             (1 << 0)
+
+/* SS_FCSR configuration values */
+/* RX FIFO status - bit 30 */
+#define SS_RXFIFO_FREE         (1 << 30)
+
+/* RX FIFO empty spaces - bits 24-29 */
+#define SS_RXFIFO_SPACES(val)  (((val) >> 24) & 0x3f)
+
+/* TX FIFO status - bit 22 */
+#define SS_TXFIFO_AVAILABLE    (1 << 22)
+
+/* TX FIFO available spaces - bits 16-21 */
+#define SS_TXFIFO_SPACES(val)  (((val) >> 16) & 0x3f)
+
+#define SS_RX_MAX      32
+#define SS_RX_DEFAULT  SS_RX_MAX
+#define SS_TX_MAX      33
+
+#define SS_RXFIFO_EMP_INT_PENDING      (1 << 10)
+#define SS_TXFIFO_AVA_INT_PENDING      (1 << 8)
+#define SS_RXFIFO_EMP_INT_ENABLE       (1 << 2)
+#define SS_TXFIFO_AVA_INT_ENABLE       (1 << 0)
+
+struct sun4i_ss_ctx {
+       void __iomem *base;
+       int irq;
+       struct clk *busclk;
+       struct clk *ssclk;
+       struct device *dev;
+       struct resource *res;
+       spinlock_t slock; /* control the use of the device */
+};
+
+struct sun4i_ss_alg_template {
+       u32 type;
+       u32 mode;
+       union {
+               struct crypto_alg crypto;
+               struct ahash_alg hash;
+       } alg;
+       struct sun4i_ss_ctx *ss;
+};
+
+struct sun4i_tfm_ctx {
+       u32 key[AES_MAX_KEY_SIZE / 4];/* divided by sizeof(u32) */
+       u32 keylen;
+       u32 keymode;
+       struct sun4i_ss_ctx *ss;
+};
+
+struct sun4i_cipher_req_ctx {
+       u32 mode;
+};
+
+struct sun4i_req_ctx {
+       u32 mode;
+       u64 byte_count; /* number of bytes "uploaded" to the device */
+       u32 hash[5]; /* for storing SS_IVx register */
+       char buf[64];
+       unsigned int len;
+       struct sun4i_ss_ctx *ss;
+};
+
+int sun4i_hash_crainit(struct crypto_tfm *tfm);
+int sun4i_hash_init(struct ahash_request *areq);
+int sun4i_hash_update(struct ahash_request *areq);
+int sun4i_hash_final(struct ahash_request *areq);
+int sun4i_hash_finup(struct ahash_request *areq);
+int sun4i_hash_digest(struct ahash_request *areq);
+int sun4i_hash_export_md5(struct ahash_request *areq, void *out);
+int sun4i_hash_import_md5(struct ahash_request *areq, const void *in);
+int sun4i_hash_export_sha1(struct ahash_request *areq, void *out);
+int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in);
+
+int sun4i_ss_cbc_aes_encrypt(struct ablkcipher_request *areq);
+int sun4i_ss_cbc_aes_decrypt(struct ablkcipher_request *areq);
+int sun4i_ss_ecb_aes_encrypt(struct ablkcipher_request *areq);
+int sun4i_ss_ecb_aes_decrypt(struct ablkcipher_request *areq);
+
+int sun4i_ss_cbc_des_encrypt(struct ablkcipher_request *areq);
+int sun4i_ss_cbc_des_decrypt(struct ablkcipher_request *areq);
+int sun4i_ss_ecb_des_encrypt(struct ablkcipher_request *areq);
+int sun4i_ss_ecb_des_decrypt(struct ablkcipher_request *areq);
+
+int sun4i_ss_cbc_des3_encrypt(struct ablkcipher_request *areq);
+int sun4i_ss_cbc_des3_decrypt(struct ablkcipher_request *areq);
+int sun4i_ss_ecb_des3_encrypt(struct ablkcipher_request *areq);
+int sun4i_ss_ecb_des3_decrypt(struct ablkcipher_request *areq);
+
+int sun4i_ss_cipher_init(struct crypto_tfm *tfm);
+int sun4i_ss_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+                       unsigned int keylen);
+int sun4i_ss_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+                       unsigned int keylen);
+int sun4i_ss_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+                        unsigned int keylen);
index 314daf55e7f77791d075065a427b531bb361cce1..163cfe733bf055d36e5bbf21591d8571e3a0da96 100644 (file)
@@ -52,12 +52,7 @@ struct talitos_ptr {
        __be32 ptr;     /* address */
 };
 
-static const struct talitos_ptr zero_entry = {
-       .len = 0,
-       .j_extent = 0,
-       .eptr = 0,
-       .ptr = 0
-};
+static const struct talitos_ptr zero_entry;
 
 /* descriptor */
 struct talitos_desc {
index e79e567e43aacae4584b32c2d7fc9ae1e6c1e300..263af709e53604ee5a049f707d2e9f5795031d1d 100644 (file)
@@ -84,6 +84,7 @@ static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key,
        preempt_disable();
        pagefault_disable();
        enable_kernel_altivec();
+       enable_kernel_vsx();
        ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
        ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
        pagefault_enable();
@@ -103,6 +104,7 @@ static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
                preempt_disable();
                pagefault_disable();
                enable_kernel_altivec();
+               enable_kernel_vsx();
                aes_p8_encrypt(src, dst, &ctx->enc_key);
                pagefault_enable();
                preempt_enable();
@@ -119,6 +121,7 @@ static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
                preempt_disable();
                pagefault_disable();
                enable_kernel_altivec();
+               enable_kernel_vsx();
                aes_p8_decrypt(src, dst, &ctx->dec_key);
                pagefault_enable();
                preempt_enable();
index 7299995c78ec3b34ea76e289cf84dc877f1175ef..0b8fe2ec5315fc8253431ca533d953b9c72d7243 100644 (file)
@@ -85,6 +85,7 @@ static int p8_aes_cbc_setkey(struct crypto_tfm *tfm, const u8 *key,
        preempt_disable();
        pagefault_disable();
        enable_kernel_altivec();
+       enable_kernel_vsx();
        ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
        ret += aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
        pagefault_enable();
@@ -115,6 +116,7 @@ static int p8_aes_cbc_encrypt(struct blkcipher_desc *desc,
                preempt_disable();
                pagefault_disable();
                enable_kernel_altivec();
+               enable_kernel_vsx();
 
                blkcipher_walk_init(&walk, dst, src, nbytes);
                ret = blkcipher_walk_virt(desc, &walk);
@@ -155,6 +157,7 @@ static int p8_aes_cbc_decrypt(struct blkcipher_desc *desc,
                preempt_disable();
                pagefault_disable();
                enable_kernel_altivec();
+               enable_kernel_vsx();
 
                blkcipher_walk_init(&walk, dst, src, nbytes);
                ret = blkcipher_walk_virt(desc, &walk);
index 7adae42a7b79ea81a5bc35ae2db9db9b6a2437e2..1e754ae4e8509ee490af0d392dafb9f70a97e313 100644 (file)
@@ -82,6 +82,7 @@ static int p8_aes_ctr_setkey(struct crypto_tfm *tfm, const u8 *key,
 
        pagefault_disable();
        enable_kernel_altivec();
+       enable_kernel_vsx();
        ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
        pagefault_enable();
 
@@ -100,6 +101,7 @@ static void p8_aes_ctr_final(struct p8_aes_ctr_ctx *ctx,
 
        pagefault_disable();
        enable_kernel_altivec();
+       enable_kernel_vsx();
        aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key);
        pagefault_enable();
 
@@ -131,6 +133,7 @@ static int p8_aes_ctr_crypt(struct blkcipher_desc *desc,
                while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
                        pagefault_disable();
                        enable_kernel_altivec();
+                       enable_kernel_vsx();
                        aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr,
                                                    walk.dst.virt.addr,
                                                    (nbytes &
index b5e29002b66678337c54ec7858634d43285c4213..2183a2e77641e0682ca113951430765bcbcca4fc 100644 (file)
@@ -119,6 +119,7 @@ static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
        preempt_disable();
        pagefault_disable();
        enable_kernel_altivec();
+       enable_kernel_vsx();
        enable_kernel_fp();
        gcm_init_p8(ctx->htable, (const u64 *) key);
        pagefault_enable();
@@ -149,6 +150,7 @@ static int p8_ghash_update(struct shash_desc *desc,
                        preempt_disable();
                        pagefault_disable();
                        enable_kernel_altivec();
+                       enable_kernel_vsx();
                        enable_kernel_fp();
                        gcm_ghash_p8(dctx->shash, ctx->htable,
                                     dctx->buffer, GHASH_DIGEST_SIZE);
@@ -163,6 +165,7 @@ static int p8_ghash_update(struct shash_desc *desc,
                        preempt_disable();
                        pagefault_disable();
                        enable_kernel_altivec();
+                       enable_kernel_vsx();
                        enable_kernel_fp();
                        gcm_ghash_p8(dctx->shash, ctx->htable, src, len);
                        pagefault_enable();
@@ -193,6 +196,7 @@ static int p8_ghash_final(struct shash_desc *desc, u8 *out)
                        preempt_disable();
                        pagefault_disable();
                        enable_kernel_altivec();
+                       enable_kernel_vsx();
                        enable_kernel_fp();
                        gcm_ghash_p8(dctx->shash, ctx->htable,
                                     dctx->buffer, GHASH_DIGEST_SIZE);
index 7169ad04acc06602aa10ffaeeedcf26c061a2da2..14e35364cdfa6e6695fcba2afdc78bed2e2fa8dd 100644 (file)
  * a breach in the integrity of the message. In essence, that -EBADMSG error
  * code is the key bonus an AEAD cipher has over "standard" block chaining
  * modes.
+ *
+ * Memory Structure:
+ *
+ * To support the needs of the most prominent user of AEAD ciphers, namely
+ * IPSEC, the AEAD ciphers have a special memory layout the caller must adhere
+ * to.
+ *
+ * The scatter list pointing to the input data must contain:
+ *
+ * * for RFC4106 ciphers, the concatenation of
+ * associated authentication data || IV || plaintext or ciphertext. Note, the
+ * same IV (buffer) is also set with the aead_request_set_crypt call. Note,
+ * the API call of aead_request_set_ad must provide the length of the AAD and
+ * the IV. The API call of aead_request_set_crypt only points to the size of
+ * the input plaintext or ciphertext.
+ *
+ * * for "normal" AEAD ciphers, the concatenation of
+ * associated authentication data || plaintext or ciphertext.
+ *
+ * It is important to note that if multiple scatter gather list entries form
+ * the input data mentioned above, the first entry must not point to a NULL
+ * buffer. If there is any potential where the AAD buffer can be NULL, the
+ * calling code must contain a precaution to ensure that this does not result
+ * in the first scatter gather list entry pointing to a NULL buffer.
  */
 
 /**
index d4ebf6e9af6a536c589d55c914e56f1c6000f910..c9fe145f7dd3bad3af8cd7902accefbc8c7b5366 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 
 struct crypto_aead;
+struct crypto_instance;
 struct module;
 struct rtattr;
 struct seq_file;
@@ -30,6 +31,7 @@ struct crypto_type {
        void (*show)(struct seq_file *m, struct crypto_alg *alg);
        int (*report)(struct sk_buff *skb, struct crypto_alg *alg);
        struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask);
+       void (*free)(struct crypto_instance *inst);
 
        unsigned int type;
        unsigned int maskclear;
@@ -180,7 +182,6 @@ struct crypto_instance *crypto_alloc_instance(const char *name,
 void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen);
 int crypto_enqueue_request(struct crypto_queue *queue,
                           struct crypto_async_request *request);
-void *__crypto_dequeue_request(struct crypto_queue *queue, unsigned int offset);
 struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue);
 int crypto_tfm_in_queue(struct crypto_queue *queue, struct crypto_tfm *tfm);
 
diff --git a/include/crypto/chacha20.h b/include/crypto/chacha20.h
new file mode 100644 (file)
index 0000000..274bbae
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Common values for the ChaCha20 algorithm
+ */
+
+#ifndef _CRYPTO_CHACHA20_H
+#define _CRYPTO_CHACHA20_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+#define CHACHA20_IV_SIZE       16
+#define CHACHA20_KEY_SIZE      32
+#define CHACHA20_BLOCK_SIZE    64
+
+struct chacha20_ctx {
+       u32 key[8];
+};
+
+void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv);
+int crypto_chacha20_setkey(struct crypto_tfm *tfm, const u8 *key,
+                          unsigned int keysize);
+int crypto_chacha20_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+                         struct scatterlist *src, unsigned int nbytes);
+
+#endif
index 4b2547186519f80e00729144dd8edafab238b5ef..a292e960fb33c4073576962d634ce6c7f36af475 100644 (file)
@@ -21,6 +21,7 @@
 struct rtattr;
 
 struct aead_instance {
+       void (*free)(struct aead_instance *inst);
        union {
                struct {
                        char head[offsetof(struct aead_alg, base)];
@@ -34,6 +35,10 @@ struct crypto_aead_spawn {
        struct crypto_spawn base;
 };
 
+struct aead_queue {
+       struct crypto_queue base;
+};
+
 extern const struct crypto_type crypto_aead_type;
 extern const struct crypto_type crypto_nivaead_type;
 
@@ -157,6 +162,37 @@ static inline unsigned int crypto_aead_maxauthsize(struct crypto_aead *aead)
        return crypto_aead_alg_maxauthsize(crypto_aead_alg(aead));
 }
 
+static inline void aead_init_queue(struct aead_queue *queue,
+                                  unsigned int max_qlen)
+{
+       crypto_init_queue(&queue->base, max_qlen);
+}
+
+static inline int aead_enqueue_request(struct aead_queue *queue,
+                                      struct aead_request *request)
+{
+       return crypto_enqueue_request(&queue->base, &request->base);
+}
+
+static inline struct aead_request *aead_dequeue_request(
+       struct aead_queue *queue)
+{
+       struct crypto_async_request *req;
+
+       req = crypto_dequeue_request(&queue->base);
+
+       return req ? container_of(req, struct aead_request, base) : NULL;
+}
+
+static inline struct aead_request *aead_get_backlog(struct aead_queue *queue)
+{
+       struct crypto_async_request *req;
+
+       req = crypto_get_backlog(&queue->base);
+
+       return req ? container_of(req, struct aead_request, base) : NULL;
+}
+
 int crypto_register_aead(struct aead_alg *alg);
 void crypto_unregister_aead(struct aead_alg *alg);
 int crypto_register_aeads(struct aead_alg *algs, int count);
diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h
new file mode 100644 (file)
index 0000000..894df59
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Common values for the Poly1305 algorithm
+ */
+
+#ifndef _CRYPTO_POLY1305_H
+#define _CRYPTO_POLY1305_H
+
+#include <linux/types.h>
+#include <linux/crypto.h>
+
+#define POLY1305_BLOCK_SIZE    16
+#define POLY1305_KEY_SIZE      32
+#define POLY1305_DIGEST_SIZE   16
+
+struct poly1305_desc_ctx {
+       /* key */
+       u32 r[5];
+       /* finalize key */
+       u32 s[4];
+       /* accumulator */
+       u32 h[5];
+       /* partial buffer */
+       u8 buf[POLY1305_BLOCK_SIZE];
+       /* bytes used in partial buffer */
+       unsigned int buflen;
+       /* r key has been set */
+       bool rset;
+       /* s key has been set */
+       bool sset;
+};
+
+int crypto_poly1305_init(struct shash_desc *desc);
+int crypto_poly1305_setkey(struct crypto_shash *tfm,
+                          const u8 *key, unsigned int keylen);
+unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
+                                       const u8 *src, unsigned int srclen);
+int crypto_poly1305_update(struct shash_desc *desc,
+                          const u8 *src, unsigned int srclen);
+int crypto_poly1305_final(struct shash_desc *desc, u8 *dst);
+
+#endif
This page took 0.227532 seconds and 5 git commands to generate.