2 * AMD Cryptographic Coprocessor (CCP) driver
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
26 #include <crypto/sha.h>
31 CCP_MEMTYPE_SYSTEM
= 0,
41 enum dma_data_direction dir
;
44 struct ccp_dm_workarea
{
46 struct dma_pool
*dma_pool
;
50 struct ccp_dma_info dma
;
53 struct ccp_sg_workarea
{
54 struct scatterlist
*sg
;
57 struct scatterlist
*dma_sg
;
58 struct device
*dma_dev
;
59 unsigned int dma_count
;
60 enum dma_data_direction dma_dir
;
68 struct ccp_sg_workarea sg_wa
;
69 struct ccp_dm_workarea dm_wa
;
73 enum ccp_memtype type
;
75 struct ccp_dma_info dma
;
81 enum ccp_aes_type type
;
82 enum ccp_aes_mode mode
;
83 enum ccp_aes_action action
;
86 struct ccp_xts_aes_op
{
87 enum ccp_aes_action action
;
88 enum ccp_xts_aes_unit_size unit_size
;
92 enum ccp_sha_type type
;
101 struct ccp_passthru_op
{
102 enum ccp_passthru_bitwise bit_mod
;
103 enum ccp_passthru_byteswap byte_swap
;
107 enum ccp_ecc_function function
;
111 struct ccp_cmd_queue
*cmd_q
;
125 struct ccp_aes_op aes
;
126 struct ccp_xts_aes_op xts
;
127 struct ccp_sha_op sha
;
128 struct ccp_rsa_op rsa
;
129 struct ccp_passthru_op passthru
;
130 struct ccp_ecc_op ecc
;
134 /* SHA initial context values */
135 static const __be32 ccp_sha1_init
[CCP_SHA_CTXSIZE
/ sizeof(__be32
)] = {
136 cpu_to_be32(SHA1_H0
), cpu_to_be32(SHA1_H1
),
137 cpu_to_be32(SHA1_H2
), cpu_to_be32(SHA1_H3
),
138 cpu_to_be32(SHA1_H4
), 0, 0, 0,
141 static const __be32 ccp_sha224_init
[CCP_SHA_CTXSIZE
/ sizeof(__be32
)] = {
142 cpu_to_be32(SHA224_H0
), cpu_to_be32(SHA224_H1
),
143 cpu_to_be32(SHA224_H2
), cpu_to_be32(SHA224_H3
),
144 cpu_to_be32(SHA224_H4
), cpu_to_be32(SHA224_H5
),
145 cpu_to_be32(SHA224_H6
), cpu_to_be32(SHA224_H7
),
148 static const __be32 ccp_sha256_init
[CCP_SHA_CTXSIZE
/ sizeof(__be32
)] = {
149 cpu_to_be32(SHA256_H0
), cpu_to_be32(SHA256_H1
),
150 cpu_to_be32(SHA256_H2
), cpu_to_be32(SHA256_H3
),
151 cpu_to_be32(SHA256_H4
), cpu_to_be32(SHA256_H5
),
152 cpu_to_be32(SHA256_H6
), cpu_to_be32(SHA256_H7
),
155 static u32
ccp_addr_lo(struct ccp_dma_info
*info
)
157 return lower_32_bits(info
->address
+ info
->offset
);
160 static u32
ccp_addr_hi(struct ccp_dma_info
*info
)
162 return upper_32_bits(info
->address
+ info
->offset
) & 0x0000ffff;
165 static int ccp_do_cmd(struct ccp_op
*op
, u32
*cr
, unsigned int cr_count
)
167 struct ccp_cmd_queue
*cmd_q
= op
->cmd_q
;
168 struct ccp_device
*ccp
= cmd_q
->ccp
;
169 void __iomem
*cr_addr
;
174 /* We could read a status register to see how many free slots
175 * are actually available, but reading that register resets it
176 * and you could lose some error information.
180 cr0
= (cmd_q
->id
<< REQ0_CMD_Q_SHIFT
)
181 | (op
->jobid
<< REQ0_JOBID_SHIFT
)
182 | REQ0_WAIT_FOR_WRITE
;
185 cr0
|= REQ0_STOP_ON_COMPLETE
186 | REQ0_INT_ON_COMPLETE
;
188 if (op
->ioc
|| !cmd_q
->free_slots
)
189 cr0
|= REQ0_INT_ON_COMPLETE
;
191 /* Start at CMD_REQ1 */
192 cr_addr
= ccp
->io_regs
+ CMD_REQ0
+ CMD_REQ_INCR
;
194 mutex_lock(&ccp
->req_mutex
);
196 /* Write CMD_REQ1 through CMD_REQx first */
197 for (i
= 0; i
< cr_count
; i
++, cr_addr
+= CMD_REQ_INCR
)
198 iowrite32(*(cr
+ i
), cr_addr
);
200 /* Tell the CCP to start */
202 iowrite32(cr0
, ccp
->io_regs
+ CMD_REQ0
);
204 mutex_unlock(&ccp
->req_mutex
);
206 if (cr0
& REQ0_INT_ON_COMPLETE
) {
207 /* Wait for the job to complete */
208 ret
= wait_event_interruptible(cmd_q
->int_queue
,
210 if (ret
|| cmd_q
->cmd_error
) {
211 /* On error delete all related jobs from the queue */
212 cmd
= (cmd_q
->id
<< DEL_Q_ID_SHIFT
)
215 iowrite32(cmd
, ccp
->io_regs
+ DEL_CMD_Q_JOB
);
219 } else if (op
->soc
) {
220 /* Delete just head job from the queue on SoC */
222 | (cmd_q
->id
<< DEL_Q_ID_SHIFT
)
225 iowrite32(cmd
, ccp
->io_regs
+ DEL_CMD_Q_JOB
);
228 cmd_q
->free_slots
= CMD_Q_DEPTH(cmd_q
->q_status
);
236 static int ccp_perform_aes(struct ccp_op
*op
)
240 /* Fill out the register contents for REQ1 through REQ6 */
241 cr
[0] = (CCP_ENGINE_AES
<< REQ1_ENGINE_SHIFT
)
242 | (op
->u
.aes
.type
<< REQ1_AES_TYPE_SHIFT
)
243 | (op
->u
.aes
.mode
<< REQ1_AES_MODE_SHIFT
)
244 | (op
->u
.aes
.action
<< REQ1_AES_ACTION_SHIFT
)
245 | (op
->ksb_key
<< REQ1_KEY_KSB_SHIFT
);
246 cr
[1] = op
->src
.u
.dma
.length
- 1;
247 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
248 cr
[3] = (op
->ksb_ctx
<< REQ4_KSB_SHIFT
)
249 | (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
250 | ccp_addr_hi(&op
->src
.u
.dma
);
251 cr
[4] = ccp_addr_lo(&op
->dst
.u
.dma
);
252 cr
[5] = (CCP_MEMTYPE_SYSTEM
<< REQ6_MEMTYPE_SHIFT
)
253 | ccp_addr_hi(&op
->dst
.u
.dma
);
255 if (op
->u
.aes
.mode
== CCP_AES_MODE_CFB
)
256 cr
[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT
);
264 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
267 static int ccp_perform_xts_aes(struct ccp_op
*op
)
271 /* Fill out the register contents for REQ1 through REQ6 */
272 cr
[0] = (CCP_ENGINE_XTS_AES_128
<< REQ1_ENGINE_SHIFT
)
273 | (op
->u
.xts
.action
<< REQ1_AES_ACTION_SHIFT
)
274 | (op
->u
.xts
.unit_size
<< REQ1_XTS_AES_SIZE_SHIFT
)
275 | (op
->ksb_key
<< REQ1_KEY_KSB_SHIFT
);
276 cr
[1] = op
->src
.u
.dma
.length
- 1;
277 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
278 cr
[3] = (op
->ksb_ctx
<< REQ4_KSB_SHIFT
)
279 | (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
280 | ccp_addr_hi(&op
->src
.u
.dma
);
281 cr
[4] = ccp_addr_lo(&op
->dst
.u
.dma
);
282 cr
[5] = (CCP_MEMTYPE_SYSTEM
<< REQ6_MEMTYPE_SHIFT
)
283 | ccp_addr_hi(&op
->dst
.u
.dma
);
291 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
294 static int ccp_perform_sha(struct ccp_op
*op
)
298 /* Fill out the register contents for REQ1 through REQ6 */
299 cr
[0] = (CCP_ENGINE_SHA
<< REQ1_ENGINE_SHIFT
)
300 | (op
->u
.sha
.type
<< REQ1_SHA_TYPE_SHIFT
)
302 cr
[1] = op
->src
.u
.dma
.length
- 1;
303 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
304 cr
[3] = (op
->ksb_ctx
<< REQ4_KSB_SHIFT
)
305 | (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
306 | ccp_addr_hi(&op
->src
.u
.dma
);
310 cr
[4] = lower_32_bits(op
->u
.sha
.msg_bits
);
311 cr
[5] = upper_32_bits(op
->u
.sha
.msg_bits
);
317 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
320 static int ccp_perform_rsa(struct ccp_op
*op
)
324 /* Fill out the register contents for REQ1 through REQ6 */
325 cr
[0] = (CCP_ENGINE_RSA
<< REQ1_ENGINE_SHIFT
)
326 | (op
->u
.rsa
.mod_size
<< REQ1_RSA_MOD_SIZE_SHIFT
)
327 | (op
->ksb_key
<< REQ1_KEY_KSB_SHIFT
)
329 cr
[1] = op
->u
.rsa
.input_len
- 1;
330 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
331 cr
[3] = (op
->ksb_ctx
<< REQ4_KSB_SHIFT
)
332 | (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
333 | ccp_addr_hi(&op
->src
.u
.dma
);
334 cr
[4] = ccp_addr_lo(&op
->dst
.u
.dma
);
335 cr
[5] = (CCP_MEMTYPE_SYSTEM
<< REQ6_MEMTYPE_SHIFT
)
336 | ccp_addr_hi(&op
->dst
.u
.dma
);
338 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
341 static int ccp_perform_passthru(struct ccp_op
*op
)
345 /* Fill out the register contents for REQ1 through REQ6 */
346 cr
[0] = (CCP_ENGINE_PASSTHRU
<< REQ1_ENGINE_SHIFT
)
347 | (op
->u
.passthru
.bit_mod
<< REQ1_PT_BW_SHIFT
)
348 | (op
->u
.passthru
.byte_swap
<< REQ1_PT_BS_SHIFT
);
350 if (op
->src
.type
== CCP_MEMTYPE_SYSTEM
)
351 cr
[1] = op
->src
.u
.dma
.length
- 1;
353 cr
[1] = op
->dst
.u
.dma
.length
- 1;
355 if (op
->src
.type
== CCP_MEMTYPE_SYSTEM
) {
356 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
357 cr
[3] = (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
358 | ccp_addr_hi(&op
->src
.u
.dma
);
360 if (op
->u
.passthru
.bit_mod
!= CCP_PASSTHRU_BITWISE_NOOP
)
361 cr
[3] |= (op
->ksb_key
<< REQ4_KSB_SHIFT
);
363 cr
[2] = op
->src
.u
.ksb
* CCP_KSB_BYTES
;
364 cr
[3] = (CCP_MEMTYPE_KSB
<< REQ4_MEMTYPE_SHIFT
);
367 if (op
->dst
.type
== CCP_MEMTYPE_SYSTEM
) {
368 cr
[4] = ccp_addr_lo(&op
->dst
.u
.dma
);
369 cr
[5] = (CCP_MEMTYPE_SYSTEM
<< REQ6_MEMTYPE_SHIFT
)
370 | ccp_addr_hi(&op
->dst
.u
.dma
);
372 cr
[4] = op
->dst
.u
.ksb
* CCP_KSB_BYTES
;
373 cr
[5] = (CCP_MEMTYPE_KSB
<< REQ6_MEMTYPE_SHIFT
);
379 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
382 static int ccp_perform_ecc(struct ccp_op
*op
)
386 /* Fill out the register contents for REQ1 through REQ6 */
387 cr
[0] = REQ1_ECC_AFFINE_CONVERT
388 | (CCP_ENGINE_ECC
<< REQ1_ENGINE_SHIFT
)
389 | (op
->u
.ecc
.function
<< REQ1_ECC_FUNCTION_SHIFT
)
391 cr
[1] = op
->src
.u
.dma
.length
- 1;
392 cr
[2] = ccp_addr_lo(&op
->src
.u
.dma
);
393 cr
[3] = (CCP_MEMTYPE_SYSTEM
<< REQ4_MEMTYPE_SHIFT
)
394 | ccp_addr_hi(&op
->src
.u
.dma
);
395 cr
[4] = ccp_addr_lo(&op
->dst
.u
.dma
);
396 cr
[5] = (CCP_MEMTYPE_SYSTEM
<< REQ6_MEMTYPE_SHIFT
)
397 | ccp_addr_hi(&op
->dst
.u
.dma
);
399 return ccp_do_cmd(op
, cr
, ARRAY_SIZE(cr
));
402 static u32
ccp_alloc_ksb(struct ccp_device
*ccp
, unsigned int count
)
407 mutex_lock(&ccp
->ksb_mutex
);
409 start
= (u32
)bitmap_find_next_zero_area(ccp
->ksb
,
413 if (start
<= ccp
->ksb_count
) {
414 bitmap_set(ccp
->ksb
, start
, count
);
416 mutex_unlock(&ccp
->ksb_mutex
);
422 mutex_unlock(&ccp
->ksb_mutex
);
424 /* Wait for KSB entries to become available */
425 if (wait_event_interruptible(ccp
->ksb_queue
, ccp
->ksb_avail
))
429 return KSB_START
+ start
;
432 static void ccp_free_ksb(struct ccp_device
*ccp
, unsigned int start
,
438 mutex_lock(&ccp
->ksb_mutex
);
440 bitmap_clear(ccp
->ksb
, start
- KSB_START
, count
);
444 mutex_unlock(&ccp
->ksb_mutex
);
446 wake_up_interruptible_all(&ccp
->ksb_queue
);
449 static u32
ccp_gen_jobid(struct ccp_device
*ccp
)
451 return atomic_inc_return(&ccp
->current_id
) & CCP_JOBID_MASK
;
454 static void ccp_sg_free(struct ccp_sg_workarea
*wa
)
457 dma_unmap_sg(wa
->dma_dev
, wa
->dma_sg
, wa
->nents
, wa
->dma_dir
);
462 static int ccp_init_sg_workarea(struct ccp_sg_workarea
*wa
, struct device
*dev
,
463 struct scatterlist
*sg
, u64 len
,
464 enum dma_data_direction dma_dir
)
466 memset(wa
, 0, sizeof(*wa
));
472 wa
->nents
= sg_nents_for_len(sg
, len
);
476 wa
->bytes_left
= len
;
482 if (dma_dir
== DMA_NONE
)
487 wa
->dma_dir
= dma_dir
;
488 wa
->dma_count
= dma_map_sg(dev
, sg
, wa
->nents
, dma_dir
);
495 static void ccp_update_sg_workarea(struct ccp_sg_workarea
*wa
, unsigned int len
)
497 unsigned int nbytes
= min_t(u64
, len
, wa
->bytes_left
);
502 wa
->sg_used
+= nbytes
;
503 wa
->bytes_left
-= nbytes
;
504 if (wa
->sg_used
== wa
->sg
->length
) {
505 wa
->sg
= sg_next(wa
->sg
);
510 static void ccp_dm_free(struct ccp_dm_workarea
*wa
)
512 if (wa
->length
<= CCP_DMAPOOL_MAX_SIZE
) {
514 dma_pool_free(wa
->dma_pool
, wa
->address
,
518 dma_unmap_single(wa
->dev
, wa
->dma
.address
, wa
->length
,
527 static int ccp_init_dm_workarea(struct ccp_dm_workarea
*wa
,
528 struct ccp_cmd_queue
*cmd_q
,
530 enum dma_data_direction dir
)
532 memset(wa
, 0, sizeof(*wa
));
537 wa
->dev
= cmd_q
->ccp
->dev
;
540 if (len
<= CCP_DMAPOOL_MAX_SIZE
) {
541 wa
->dma_pool
= cmd_q
->dma_pool
;
543 wa
->address
= dma_pool_alloc(wa
->dma_pool
, GFP_KERNEL
,
548 wa
->dma
.length
= CCP_DMAPOOL_MAX_SIZE
;
550 memset(wa
->address
, 0, CCP_DMAPOOL_MAX_SIZE
);
552 wa
->address
= kzalloc(len
, GFP_KERNEL
);
556 wa
->dma
.address
= dma_map_single(wa
->dev
, wa
->address
, len
,
558 if (!wa
->dma
.address
)
561 wa
->dma
.length
= len
;
568 static void ccp_set_dm_area(struct ccp_dm_workarea
*wa
, unsigned int wa_offset
,
569 struct scatterlist
*sg
, unsigned int sg_offset
,
572 WARN_ON(!wa
->address
);
574 scatterwalk_map_and_copy(wa
->address
+ wa_offset
, sg
, sg_offset
, len
,
578 static void ccp_get_dm_area(struct ccp_dm_workarea
*wa
, unsigned int wa_offset
,
579 struct scatterlist
*sg
, unsigned int sg_offset
,
582 WARN_ON(!wa
->address
);
584 scatterwalk_map_and_copy(wa
->address
+ wa_offset
, sg
, sg_offset
, len
,
588 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea
*wa
,
589 struct scatterlist
*sg
,
590 unsigned int len
, unsigned int se_len
,
593 unsigned int nbytes
, sg_offset
, dm_offset
, ksb_len
, i
;
594 u8 buffer
[CCP_REVERSE_BUF_SIZE
];
596 if (WARN_ON(se_len
> sizeof(buffer
)))
603 ksb_len
= min_t(unsigned int, nbytes
, se_len
);
604 sg_offset
-= ksb_len
;
606 scatterwalk_map_and_copy(buffer
, sg
, sg_offset
, ksb_len
, 0);
607 for (i
= 0; i
< ksb_len
; i
++)
608 wa
->address
[dm_offset
+ i
] = buffer
[ksb_len
- i
- 1];
610 dm_offset
+= ksb_len
;
613 if ((ksb_len
!= se_len
) && sign_extend
) {
614 /* Must sign-extend to nearest sign-extend length */
615 if (wa
->address
[dm_offset
- 1] & 0x80)
616 memset(wa
->address
+ dm_offset
, 0xff,
624 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea
*wa
,
625 struct scatterlist
*sg
,
628 unsigned int nbytes
, sg_offset
, dm_offset
, ksb_len
, i
;
629 u8 buffer
[CCP_REVERSE_BUF_SIZE
];
635 ksb_len
= min_t(unsigned int, nbytes
, sizeof(buffer
));
636 dm_offset
-= ksb_len
;
638 for (i
= 0; i
< ksb_len
; i
++)
639 buffer
[ksb_len
- i
- 1] = wa
->address
[dm_offset
+ i
];
640 scatterwalk_map_and_copy(buffer
, sg
, sg_offset
, ksb_len
, 1);
642 sg_offset
+= ksb_len
;
647 static void ccp_free_data(struct ccp_data
*data
, struct ccp_cmd_queue
*cmd_q
)
649 ccp_dm_free(&data
->dm_wa
);
650 ccp_sg_free(&data
->sg_wa
);
653 static int ccp_init_data(struct ccp_data
*data
, struct ccp_cmd_queue
*cmd_q
,
654 struct scatterlist
*sg
, u64 sg_len
,
656 enum dma_data_direction dir
)
660 memset(data
, 0, sizeof(*data
));
662 ret
= ccp_init_sg_workarea(&data
->sg_wa
, cmd_q
->ccp
->dev
, sg
, sg_len
,
667 ret
= ccp_init_dm_workarea(&data
->dm_wa
, cmd_q
, dm_len
, dir
);
674 ccp_free_data(data
, cmd_q
);
679 static unsigned int ccp_queue_buf(struct ccp_data
*data
, unsigned int from
)
681 struct ccp_sg_workarea
*sg_wa
= &data
->sg_wa
;
682 struct ccp_dm_workarea
*dm_wa
= &data
->dm_wa
;
683 unsigned int buf_count
, nbytes
;
685 /* Clear the buffer if setting it */
687 memset(dm_wa
->address
, 0, dm_wa
->length
);
692 /* Perform the copy operation
693 * nbytes will always be <= UINT_MAX because dm_wa->length is
696 nbytes
= min_t(u64
, sg_wa
->bytes_left
, dm_wa
->length
);
697 scatterwalk_map_and_copy(dm_wa
->address
, sg_wa
->sg
, sg_wa
->sg_used
,
700 /* Update the structures and generate the count */
702 while (sg_wa
->bytes_left
&& (buf_count
< dm_wa
->length
)) {
703 nbytes
= min(sg_wa
->sg
->length
- sg_wa
->sg_used
,
704 dm_wa
->length
- buf_count
);
705 nbytes
= min_t(u64
, sg_wa
->bytes_left
, nbytes
);
708 ccp_update_sg_workarea(sg_wa
, nbytes
);
714 static unsigned int ccp_fill_queue_buf(struct ccp_data
*data
)
716 return ccp_queue_buf(data
, 0);
719 static unsigned int ccp_empty_queue_buf(struct ccp_data
*data
)
721 return ccp_queue_buf(data
, 1);
724 static void ccp_prepare_data(struct ccp_data
*src
, struct ccp_data
*dst
,
725 struct ccp_op
*op
, unsigned int block_size
,
728 unsigned int sg_src_len
, sg_dst_len
, op_len
;
730 /* The CCP can only DMA from/to one address each per operation. This
731 * requires that we find the smallest DMA area between the source
732 * and destination. The resulting len values will always be <= UINT_MAX
733 * because the dma length is an unsigned int.
735 sg_src_len
= sg_dma_len(src
->sg_wa
.sg
) - src
->sg_wa
.sg_used
;
736 sg_src_len
= min_t(u64
, src
->sg_wa
.bytes_left
, sg_src_len
);
739 sg_dst_len
= sg_dma_len(dst
->sg_wa
.sg
) - dst
->sg_wa
.sg_used
;
740 sg_dst_len
= min_t(u64
, src
->sg_wa
.bytes_left
, sg_dst_len
);
741 op_len
= min(sg_src_len
, sg_dst_len
);
746 /* The data operation length will be at least block_size in length
747 * or the smaller of available sg room remaining for the source or
750 op_len
= max(op_len
, block_size
);
752 /* Unless we have to buffer data, there's no reason to wait */
755 if (sg_src_len
< block_size
) {
756 /* Not enough data in the sg element, so it
757 * needs to be buffered into a blocksize chunk
759 int cp_len
= ccp_fill_queue_buf(src
);
762 op
->src
.u
.dma
.address
= src
->dm_wa
.dma
.address
;
763 op
->src
.u
.dma
.offset
= 0;
764 op
->src
.u
.dma
.length
= (blocksize_op
) ? block_size
: cp_len
;
766 /* Enough data in the sg element, but we need to
767 * adjust for any previously copied data
769 op
->src
.u
.dma
.address
= sg_dma_address(src
->sg_wa
.sg
);
770 op
->src
.u
.dma
.offset
= src
->sg_wa
.sg_used
;
771 op
->src
.u
.dma
.length
= op_len
& ~(block_size
- 1);
773 ccp_update_sg_workarea(&src
->sg_wa
, op
->src
.u
.dma
.length
);
777 if (sg_dst_len
< block_size
) {
778 /* Not enough room in the sg element or we're on the
779 * last piece of data (when using padding), so the
780 * output needs to be buffered into a blocksize chunk
783 op
->dst
.u
.dma
.address
= dst
->dm_wa
.dma
.address
;
784 op
->dst
.u
.dma
.offset
= 0;
785 op
->dst
.u
.dma
.length
= op
->src
.u
.dma
.length
;
787 /* Enough room in the sg element, but we need to
788 * adjust for any previously used area
790 op
->dst
.u
.dma
.address
= sg_dma_address(dst
->sg_wa
.sg
);
791 op
->dst
.u
.dma
.offset
= dst
->sg_wa
.sg_used
;
792 op
->dst
.u
.dma
.length
= op
->src
.u
.dma
.length
;
797 static void ccp_process_data(struct ccp_data
*src
, struct ccp_data
*dst
,
803 if (op
->dst
.u
.dma
.address
== dst
->dm_wa
.dma
.address
)
804 ccp_empty_queue_buf(dst
);
806 ccp_update_sg_workarea(&dst
->sg_wa
,
807 op
->dst
.u
.dma
.length
);
811 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue
*cmd_q
,
812 struct ccp_dm_workarea
*wa
, u32 jobid
, u32 ksb
,
813 u32 byte_swap
, bool from
)
817 memset(&op
, 0, sizeof(op
));
825 op
.src
.type
= CCP_MEMTYPE_KSB
;
827 op
.dst
.type
= CCP_MEMTYPE_SYSTEM
;
828 op
.dst
.u
.dma
.address
= wa
->dma
.address
;
829 op
.dst
.u
.dma
.length
= wa
->length
;
831 op
.src
.type
= CCP_MEMTYPE_SYSTEM
;
832 op
.src
.u
.dma
.address
= wa
->dma
.address
;
833 op
.src
.u
.dma
.length
= wa
->length
;
834 op
.dst
.type
= CCP_MEMTYPE_KSB
;
838 op
.u
.passthru
.byte_swap
= byte_swap
;
840 return ccp_perform_passthru(&op
);
843 static int ccp_copy_to_ksb(struct ccp_cmd_queue
*cmd_q
,
844 struct ccp_dm_workarea
*wa
, u32 jobid
, u32 ksb
,
847 return ccp_copy_to_from_ksb(cmd_q
, wa
, jobid
, ksb
, byte_swap
, false);
850 static int ccp_copy_from_ksb(struct ccp_cmd_queue
*cmd_q
,
851 struct ccp_dm_workarea
*wa
, u32 jobid
, u32 ksb
,
854 return ccp_copy_to_from_ksb(cmd_q
, wa
, jobid
, ksb
, byte_swap
, true);
857 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue
*cmd_q
,
860 struct ccp_aes_engine
*aes
= &cmd
->u
.aes
;
861 struct ccp_dm_workarea key
, ctx
;
864 unsigned int dm_offset
;
867 if (!((aes
->key_len
== AES_KEYSIZE_128
) ||
868 (aes
->key_len
== AES_KEYSIZE_192
) ||
869 (aes
->key_len
== AES_KEYSIZE_256
)))
872 if (aes
->src_len
& (AES_BLOCK_SIZE
- 1))
875 if (aes
->iv_len
!= AES_BLOCK_SIZE
)
878 if (!aes
->key
|| !aes
->iv
|| !aes
->src
)
881 if (aes
->cmac_final
) {
882 if (aes
->cmac_key_len
!= AES_BLOCK_SIZE
)
889 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT
!= 1);
890 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT
!= 1);
893 memset(&op
, 0, sizeof(op
));
895 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
896 op
.ksb_key
= cmd_q
->ksb_key
;
897 op
.ksb_ctx
= cmd_q
->ksb_ctx
;
899 op
.u
.aes
.type
= aes
->type
;
900 op
.u
.aes
.mode
= aes
->mode
;
901 op
.u
.aes
.action
= aes
->action
;
903 /* All supported key sizes fit in a single (32-byte) KSB entry
904 * and must be in little endian format. Use the 256-bit byte
905 * swap passthru option to convert from big endian to little
908 ret
= ccp_init_dm_workarea(&key
, cmd_q
,
909 CCP_AES_KEY_KSB_COUNT
* CCP_KSB_BYTES
,
914 dm_offset
= CCP_KSB_BYTES
- aes
->key_len
;
915 ccp_set_dm_area(&key
, dm_offset
, aes
->key
, 0, aes
->key_len
);
916 ret
= ccp_copy_to_ksb(cmd_q
, &key
, op
.jobid
, op
.ksb_key
,
917 CCP_PASSTHRU_BYTESWAP_256BIT
);
919 cmd
->engine_error
= cmd_q
->cmd_error
;
923 /* The AES context fits in a single (32-byte) KSB entry and
924 * must be in little endian format. Use the 256-bit byte swap
925 * passthru option to convert from big endian to little endian.
927 ret
= ccp_init_dm_workarea(&ctx
, cmd_q
,
928 CCP_AES_CTX_KSB_COUNT
* CCP_KSB_BYTES
,
933 dm_offset
= CCP_KSB_BYTES
- AES_BLOCK_SIZE
;
934 ccp_set_dm_area(&ctx
, dm_offset
, aes
->iv
, 0, aes
->iv_len
);
935 ret
= ccp_copy_to_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
936 CCP_PASSTHRU_BYTESWAP_256BIT
);
938 cmd
->engine_error
= cmd_q
->cmd_error
;
942 /* Send data to the CCP AES engine */
943 ret
= ccp_init_data(&src
, cmd_q
, aes
->src
, aes
->src_len
,
944 AES_BLOCK_SIZE
, DMA_TO_DEVICE
);
948 while (src
.sg_wa
.bytes_left
) {
949 ccp_prepare_data(&src
, NULL
, &op
, AES_BLOCK_SIZE
, true);
950 if (aes
->cmac_final
&& !src
.sg_wa
.bytes_left
) {
953 /* Push the K1/K2 key to the CCP now */
954 ret
= ccp_copy_from_ksb(cmd_q
, &ctx
, op
.jobid
,
956 CCP_PASSTHRU_BYTESWAP_256BIT
);
958 cmd
->engine_error
= cmd_q
->cmd_error
;
962 ccp_set_dm_area(&ctx
, 0, aes
->cmac_key
, 0,
964 ret
= ccp_copy_to_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
965 CCP_PASSTHRU_BYTESWAP_256BIT
);
967 cmd
->engine_error
= cmd_q
->cmd_error
;
972 ret
= ccp_perform_aes(&op
);
974 cmd
->engine_error
= cmd_q
->cmd_error
;
978 ccp_process_data(&src
, NULL
, &op
);
981 /* Retrieve the AES context - convert from LE to BE using
982 * 32-byte (256-bit) byteswapping
984 ret
= ccp_copy_from_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
985 CCP_PASSTHRU_BYTESWAP_256BIT
);
987 cmd
->engine_error
= cmd_q
->cmd_error
;
991 /* ...but we only need AES_BLOCK_SIZE bytes */
992 dm_offset
= CCP_KSB_BYTES
- AES_BLOCK_SIZE
;
993 ccp_get_dm_area(&ctx
, dm_offset
, aes
->iv
, 0, aes
->iv_len
);
996 ccp_free_data(&src
, cmd_q
);
1007 static int ccp_run_aes_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
1009 struct ccp_aes_engine
*aes
= &cmd
->u
.aes
;
1010 struct ccp_dm_workarea key
, ctx
;
1011 struct ccp_data src
, dst
;
1013 unsigned int dm_offset
;
1014 bool in_place
= false;
1017 if (aes
->mode
== CCP_AES_MODE_CMAC
)
1018 return ccp_run_aes_cmac_cmd(cmd_q
, cmd
);
1020 if (!((aes
->key_len
== AES_KEYSIZE_128
) ||
1021 (aes
->key_len
== AES_KEYSIZE_192
) ||
1022 (aes
->key_len
== AES_KEYSIZE_256
)))
1025 if (((aes
->mode
== CCP_AES_MODE_ECB
) ||
1026 (aes
->mode
== CCP_AES_MODE_CBC
) ||
1027 (aes
->mode
== CCP_AES_MODE_CFB
)) &&
1028 (aes
->src_len
& (AES_BLOCK_SIZE
- 1)))
1031 if (!aes
->key
|| !aes
->src
|| !aes
->dst
)
1034 if (aes
->mode
!= CCP_AES_MODE_ECB
) {
1035 if (aes
->iv_len
!= AES_BLOCK_SIZE
)
1042 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT
!= 1);
1043 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT
!= 1);
1046 memset(&op
, 0, sizeof(op
));
1048 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1049 op
.ksb_key
= cmd_q
->ksb_key
;
1050 op
.ksb_ctx
= cmd_q
->ksb_ctx
;
1051 op
.init
= (aes
->mode
== CCP_AES_MODE_ECB
) ? 0 : 1;
1052 op
.u
.aes
.type
= aes
->type
;
1053 op
.u
.aes
.mode
= aes
->mode
;
1054 op
.u
.aes
.action
= aes
->action
;
1056 /* All supported key sizes fit in a single (32-byte) KSB entry
1057 * and must be in little endian format. Use the 256-bit byte
1058 * swap passthru option to convert from big endian to little
1061 ret
= ccp_init_dm_workarea(&key
, cmd_q
,
1062 CCP_AES_KEY_KSB_COUNT
* CCP_KSB_BYTES
,
1067 dm_offset
= CCP_KSB_BYTES
- aes
->key_len
;
1068 ccp_set_dm_area(&key
, dm_offset
, aes
->key
, 0, aes
->key_len
);
1069 ret
= ccp_copy_to_ksb(cmd_q
, &key
, op
.jobid
, op
.ksb_key
,
1070 CCP_PASSTHRU_BYTESWAP_256BIT
);
1072 cmd
->engine_error
= cmd_q
->cmd_error
;
1076 /* The AES context fits in a single (32-byte) KSB entry and
1077 * must be in little endian format. Use the 256-bit byte swap
1078 * passthru option to convert from big endian to little endian.
1080 ret
= ccp_init_dm_workarea(&ctx
, cmd_q
,
1081 CCP_AES_CTX_KSB_COUNT
* CCP_KSB_BYTES
,
1086 if (aes
->mode
!= CCP_AES_MODE_ECB
) {
1087 /* Load the AES context - conver to LE */
1088 dm_offset
= CCP_KSB_BYTES
- AES_BLOCK_SIZE
;
1089 ccp_set_dm_area(&ctx
, dm_offset
, aes
->iv
, 0, aes
->iv_len
);
1090 ret
= ccp_copy_to_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1091 CCP_PASSTHRU_BYTESWAP_256BIT
);
1093 cmd
->engine_error
= cmd_q
->cmd_error
;
1098 /* Prepare the input and output data workareas. For in-place
1099 * operations we need to set the dma direction to BIDIRECTIONAL
1100 * and copy the src workarea to the dst workarea.
1102 if (sg_virt(aes
->src
) == sg_virt(aes
->dst
))
1105 ret
= ccp_init_data(&src
, cmd_q
, aes
->src
, aes
->src_len
,
1107 in_place
? DMA_BIDIRECTIONAL
: DMA_TO_DEVICE
);
1114 ret
= ccp_init_data(&dst
, cmd_q
, aes
->dst
, aes
->src_len
,
1115 AES_BLOCK_SIZE
, DMA_FROM_DEVICE
);
1120 /* Send data to the CCP AES engine */
1121 while (src
.sg_wa
.bytes_left
) {
1122 ccp_prepare_data(&src
, &dst
, &op
, AES_BLOCK_SIZE
, true);
1123 if (!src
.sg_wa
.bytes_left
) {
1126 /* Since we don't retrieve the AES context in ECB
1127 * mode we have to wait for the operation to complete
1128 * on the last piece of data
1130 if (aes
->mode
== CCP_AES_MODE_ECB
)
1134 ret
= ccp_perform_aes(&op
);
1136 cmd
->engine_error
= cmd_q
->cmd_error
;
1140 ccp_process_data(&src
, &dst
, &op
);
1143 if (aes
->mode
!= CCP_AES_MODE_ECB
) {
1144 /* Retrieve the AES context - convert from LE to BE using
1145 * 32-byte (256-bit) byteswapping
1147 ret
= ccp_copy_from_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1148 CCP_PASSTHRU_BYTESWAP_256BIT
);
1150 cmd
->engine_error
= cmd_q
->cmd_error
;
1154 /* ...but we only need AES_BLOCK_SIZE bytes */
1155 dm_offset
= CCP_KSB_BYTES
- AES_BLOCK_SIZE
;
1156 ccp_get_dm_area(&ctx
, dm_offset
, aes
->iv
, 0, aes
->iv_len
);
1161 ccp_free_data(&dst
, cmd_q
);
1164 ccp_free_data(&src
, cmd_q
);
1175 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue
*cmd_q
,
1176 struct ccp_cmd
*cmd
)
1178 struct ccp_xts_aes_engine
*xts
= &cmd
->u
.xts
;
1179 struct ccp_dm_workarea key
, ctx
;
1180 struct ccp_data src
, dst
;
1182 unsigned int unit_size
, dm_offset
;
1183 bool in_place
= false;
1186 switch (xts
->unit_size
) {
1187 case CCP_XTS_AES_UNIT_SIZE_16
:
1190 case CCP_XTS_AES_UNIT_SIZE_512
:
1193 case CCP_XTS_AES_UNIT_SIZE_1024
:
1196 case CCP_XTS_AES_UNIT_SIZE_2048
:
1199 case CCP_XTS_AES_UNIT_SIZE_4096
:
1207 if (xts
->key_len
!= AES_KEYSIZE_128
)
1210 if (!xts
->final
&& (xts
->src_len
& (AES_BLOCK_SIZE
- 1)))
1213 if (xts
->iv_len
!= AES_BLOCK_SIZE
)
1216 if (!xts
->key
|| !xts
->iv
|| !xts
->src
|| !xts
->dst
)
1219 BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT
!= 1);
1220 BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT
!= 1);
1223 memset(&op
, 0, sizeof(op
));
1225 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1226 op
.ksb_key
= cmd_q
->ksb_key
;
1227 op
.ksb_ctx
= cmd_q
->ksb_ctx
;
1229 op
.u
.xts
.action
= xts
->action
;
1230 op
.u
.xts
.unit_size
= xts
->unit_size
;
1232 /* All supported key sizes fit in a single (32-byte) KSB entry
1233 * and must be in little endian format. Use the 256-bit byte
1234 * swap passthru option to convert from big endian to little
1237 ret
= ccp_init_dm_workarea(&key
, cmd_q
,
1238 CCP_XTS_AES_KEY_KSB_COUNT
* CCP_KSB_BYTES
,
1243 dm_offset
= CCP_KSB_BYTES
- AES_KEYSIZE_128
;
1244 ccp_set_dm_area(&key
, dm_offset
, xts
->key
, 0, xts
->key_len
);
1245 ccp_set_dm_area(&key
, 0, xts
->key
, dm_offset
, xts
->key_len
);
1246 ret
= ccp_copy_to_ksb(cmd_q
, &key
, op
.jobid
, op
.ksb_key
,
1247 CCP_PASSTHRU_BYTESWAP_256BIT
);
1249 cmd
->engine_error
= cmd_q
->cmd_error
;
1253 /* The AES context fits in a single (32-byte) KSB entry and
1254 * for XTS is already in little endian format so no byte swapping
1257 ret
= ccp_init_dm_workarea(&ctx
, cmd_q
,
1258 CCP_XTS_AES_CTX_KSB_COUNT
* CCP_KSB_BYTES
,
1263 ccp_set_dm_area(&ctx
, 0, xts
->iv
, 0, xts
->iv_len
);
1264 ret
= ccp_copy_to_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1265 CCP_PASSTHRU_BYTESWAP_NOOP
);
1267 cmd
->engine_error
= cmd_q
->cmd_error
;
1271 /* Prepare the input and output data workareas. For in-place
1272 * operations we need to set the dma direction to BIDIRECTIONAL
1273 * and copy the src workarea to the dst workarea.
1275 if (sg_virt(xts
->src
) == sg_virt(xts
->dst
))
1278 ret
= ccp_init_data(&src
, cmd_q
, xts
->src
, xts
->src_len
,
1280 in_place
? DMA_BIDIRECTIONAL
: DMA_TO_DEVICE
);
1287 ret
= ccp_init_data(&dst
, cmd_q
, xts
->dst
, xts
->src_len
,
1288 unit_size
, DMA_FROM_DEVICE
);
1293 /* Send data to the CCP AES engine */
1294 while (src
.sg_wa
.bytes_left
) {
1295 ccp_prepare_data(&src
, &dst
, &op
, unit_size
, true);
1296 if (!src
.sg_wa
.bytes_left
)
1299 ret
= ccp_perform_xts_aes(&op
);
1301 cmd
->engine_error
= cmd_q
->cmd_error
;
1305 ccp_process_data(&src
, &dst
, &op
);
1308 /* Retrieve the AES context - convert from LE to BE using
1309 * 32-byte (256-bit) byteswapping
1311 ret
= ccp_copy_from_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1312 CCP_PASSTHRU_BYTESWAP_256BIT
);
1314 cmd
->engine_error
= cmd_q
->cmd_error
;
1318 /* ...but we only need AES_BLOCK_SIZE bytes */
1319 dm_offset
= CCP_KSB_BYTES
- AES_BLOCK_SIZE
;
1320 ccp_get_dm_area(&ctx
, dm_offset
, xts
->iv
, 0, xts
->iv_len
);
1324 ccp_free_data(&dst
, cmd_q
);
1327 ccp_free_data(&src
, cmd_q
);
1338 static int ccp_run_sha_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
1340 struct ccp_sha_engine
*sha
= &cmd
->u
.sha
;
1341 struct ccp_dm_workarea ctx
;
1342 struct ccp_data src
;
1346 if (sha
->ctx_len
!= CCP_SHA_CTXSIZE
)
1352 if (!sha
->final
&& (sha
->src_len
& (CCP_SHA_BLOCKSIZE
- 1)))
1355 if (!sha
->src_len
) {
1358 /* Not final, just return */
1362 /* CCP can't do a zero length sha operation so the caller
1363 * must buffer the data.
1368 /* The CCP cannot perform zero-length sha operations so the
1369 * caller is required to buffer data for the final operation.
1370 * However, a sha operation for a message with a total length
1371 * of zero is valid so known values are required to supply
1374 switch (sha
->type
) {
1375 case CCP_SHA_TYPE_1
:
1376 sha_zero
= sha1_zero_message_hash
;
1378 case CCP_SHA_TYPE_224
:
1379 sha_zero
= sha224_zero_message_hash
;
1381 case CCP_SHA_TYPE_256
:
1382 sha_zero
= sha256_zero_message_hash
;
1388 scatterwalk_map_and_copy((void *)sha_zero
, sha
->ctx
, 0,
1397 BUILD_BUG_ON(CCP_SHA_KSB_COUNT
!= 1);
1399 memset(&op
, 0, sizeof(op
));
1401 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1402 op
.ksb_ctx
= cmd_q
->ksb_ctx
;
1403 op
.u
.sha
.type
= sha
->type
;
1404 op
.u
.sha
.msg_bits
= sha
->msg_bits
;
1406 /* The SHA context fits in a single (32-byte) KSB entry and
1407 * must be in little endian format. Use the 256-bit byte swap
1408 * passthru option to convert from big endian to little endian.
1410 ret
= ccp_init_dm_workarea(&ctx
, cmd_q
,
1411 CCP_SHA_KSB_COUNT
* CCP_KSB_BYTES
,
1419 switch (sha
->type
) {
1420 case CCP_SHA_TYPE_1
:
1421 init
= ccp_sha1_init
;
1423 case CCP_SHA_TYPE_224
:
1424 init
= ccp_sha224_init
;
1426 case CCP_SHA_TYPE_256
:
1427 init
= ccp_sha256_init
;
1433 memcpy(ctx
.address
, init
, CCP_SHA_CTXSIZE
);
1435 ccp_set_dm_area(&ctx
, 0, sha
->ctx
, 0, sha
->ctx_len
);
1438 ret
= ccp_copy_to_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1439 CCP_PASSTHRU_BYTESWAP_256BIT
);
1441 cmd
->engine_error
= cmd_q
->cmd_error
;
1445 /* Send data to the CCP SHA engine */
1446 ret
= ccp_init_data(&src
, cmd_q
, sha
->src
, sha
->src_len
,
1447 CCP_SHA_BLOCKSIZE
, DMA_TO_DEVICE
);
1451 while (src
.sg_wa
.bytes_left
) {
1452 ccp_prepare_data(&src
, NULL
, &op
, CCP_SHA_BLOCKSIZE
, false);
1453 if (sha
->final
&& !src
.sg_wa
.bytes_left
)
1456 ret
= ccp_perform_sha(&op
);
1458 cmd
->engine_error
= cmd_q
->cmd_error
;
1462 ccp_process_data(&src
, NULL
, &op
);
1465 /* Retrieve the SHA context - convert from LE to BE using
1466 * 32-byte (256-bit) byteswapping to BE
1468 ret
= ccp_copy_from_ksb(cmd_q
, &ctx
, op
.jobid
, op
.ksb_ctx
,
1469 CCP_PASSTHRU_BYTESWAP_256BIT
);
1471 cmd
->engine_error
= cmd_q
->cmd_error
;
1475 ccp_get_dm_area(&ctx
, 0, sha
->ctx
, 0, sha
->ctx_len
);
1477 if (sha
->final
&& sha
->opad
) {
1478 /* HMAC operation, recursively perform final SHA */
1479 struct ccp_cmd hmac_cmd
;
1480 struct scatterlist sg
;
1481 u64 block_size
, digest_size
;
1484 switch (sha
->type
) {
1485 case CCP_SHA_TYPE_1
:
1486 block_size
= SHA1_BLOCK_SIZE
;
1487 digest_size
= SHA1_DIGEST_SIZE
;
1489 case CCP_SHA_TYPE_224
:
1490 block_size
= SHA224_BLOCK_SIZE
;
1491 digest_size
= SHA224_DIGEST_SIZE
;
1493 case CCP_SHA_TYPE_256
:
1494 block_size
= SHA256_BLOCK_SIZE
;
1495 digest_size
= SHA256_DIGEST_SIZE
;
1502 if (sha
->opad_len
!= block_size
) {
1507 hmac_buf
= kmalloc(block_size
+ digest_size
, GFP_KERNEL
);
1512 sg_init_one(&sg
, hmac_buf
, block_size
+ digest_size
);
1514 scatterwalk_map_and_copy(hmac_buf
, sha
->opad
, 0, block_size
, 0);
1515 memcpy(hmac_buf
+ block_size
, ctx
.address
, digest_size
);
1517 memset(&hmac_cmd
, 0, sizeof(hmac_cmd
));
1518 hmac_cmd
.engine
= CCP_ENGINE_SHA
;
1519 hmac_cmd
.u
.sha
.type
= sha
->type
;
1520 hmac_cmd
.u
.sha
.ctx
= sha
->ctx
;
1521 hmac_cmd
.u
.sha
.ctx_len
= sha
->ctx_len
;
1522 hmac_cmd
.u
.sha
.src
= &sg
;
1523 hmac_cmd
.u
.sha
.src_len
= block_size
+ digest_size
;
1524 hmac_cmd
.u
.sha
.opad
= NULL
;
1525 hmac_cmd
.u
.sha
.opad_len
= 0;
1526 hmac_cmd
.u
.sha
.first
= 1;
1527 hmac_cmd
.u
.sha
.final
= 1;
1528 hmac_cmd
.u
.sha
.msg_bits
= (block_size
+ digest_size
) << 3;
1530 ret
= ccp_run_sha_cmd(cmd_q
, &hmac_cmd
);
1532 cmd
->engine_error
= hmac_cmd
.engine_error
;
1538 ccp_free_data(&src
, cmd_q
);
1546 static int ccp_run_rsa_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
1548 struct ccp_rsa_engine
*rsa
= &cmd
->u
.rsa
;
1549 struct ccp_dm_workarea exp
, src
;
1550 struct ccp_data dst
;
1552 unsigned int ksb_count
, i_len
, o_len
;
1555 if (rsa
->key_size
> CCP_RSA_MAX_WIDTH
)
1558 if (!rsa
->exp
|| !rsa
->mod
|| !rsa
->src
|| !rsa
->dst
)
1561 /* The RSA modulus must precede the message being acted upon, so
1562 * it must be copied to a DMA area where the message and the
1563 * modulus can be concatenated. Therefore the input buffer
1564 * length required is twice the output buffer length (which
1565 * must be a multiple of 256-bits).
1567 o_len
= ((rsa
->key_size
+ 255) / 256) * 32;
1570 ksb_count
= o_len
/ CCP_KSB_BYTES
;
1572 memset(&op
, 0, sizeof(op
));
1574 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1575 op
.ksb_key
= ccp_alloc_ksb(cmd_q
->ccp
, ksb_count
);
1579 /* The RSA exponent may span multiple (32-byte) KSB entries and must
1580 * be in little endian format. Reverse copy each 32-byte chunk
1581 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1582 * and each byte within that chunk and do not perform any byte swap
1583 * operations on the passthru operation.
1585 ret
= ccp_init_dm_workarea(&exp
, cmd_q
, o_len
, DMA_TO_DEVICE
);
1589 ret
= ccp_reverse_set_dm_area(&exp
, rsa
->exp
, rsa
->exp_len
,
1590 CCP_KSB_BYTES
, false);
1593 ret
= ccp_copy_to_ksb(cmd_q
, &exp
, op
.jobid
, op
.ksb_key
,
1594 CCP_PASSTHRU_BYTESWAP_NOOP
);
1596 cmd
->engine_error
= cmd_q
->cmd_error
;
1600 /* Concatenate the modulus and the message. Both the modulus and
1601 * the operands must be in little endian format. Since the input
1602 * is in big endian format it must be converted.
1604 ret
= ccp_init_dm_workarea(&src
, cmd_q
, i_len
, DMA_TO_DEVICE
);
1608 ret
= ccp_reverse_set_dm_area(&src
, rsa
->mod
, rsa
->mod_len
,
1609 CCP_KSB_BYTES
, false);
1612 src
.address
+= o_len
; /* Adjust the address for the copy operation */
1613 ret
= ccp_reverse_set_dm_area(&src
, rsa
->src
, rsa
->src_len
,
1614 CCP_KSB_BYTES
, false);
1617 src
.address
-= o_len
; /* Reset the address to original value */
1619 /* Prepare the output area for the operation */
1620 ret
= ccp_init_data(&dst
, cmd_q
, rsa
->dst
, rsa
->mod_len
,
1621 o_len
, DMA_FROM_DEVICE
);
1626 op
.src
.u
.dma
.address
= src
.dma
.address
;
1627 op
.src
.u
.dma
.offset
= 0;
1628 op
.src
.u
.dma
.length
= i_len
;
1629 op
.dst
.u
.dma
.address
= dst
.dm_wa
.dma
.address
;
1630 op
.dst
.u
.dma
.offset
= 0;
1631 op
.dst
.u
.dma
.length
= o_len
;
1633 op
.u
.rsa
.mod_size
= rsa
->key_size
;
1634 op
.u
.rsa
.input_len
= i_len
;
1636 ret
= ccp_perform_rsa(&op
);
1638 cmd
->engine_error
= cmd_q
->cmd_error
;
1642 ccp_reverse_get_dm_area(&dst
.dm_wa
, rsa
->dst
, rsa
->mod_len
);
1645 ccp_free_data(&dst
, cmd_q
);
1654 ccp_free_ksb(cmd_q
->ccp
, op
.ksb_key
, ksb_count
);
1659 static int ccp_run_passthru_cmd(struct ccp_cmd_queue
*cmd_q
,
1660 struct ccp_cmd
*cmd
)
1662 struct ccp_passthru_engine
*pt
= &cmd
->u
.passthru
;
1663 struct ccp_dm_workarea mask
;
1664 struct ccp_data src
, dst
;
1666 bool in_place
= false;
1670 if (!pt
->final
&& (pt
->src_len
& (CCP_PASSTHRU_BLOCKSIZE
- 1)))
1673 if (!pt
->src
|| !pt
->dst
)
1676 if (pt
->bit_mod
!= CCP_PASSTHRU_BITWISE_NOOP
) {
1677 if (pt
->mask_len
!= CCP_PASSTHRU_MASKSIZE
)
1683 BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT
!= 1);
1685 memset(&op
, 0, sizeof(op
));
1687 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1689 if (pt
->bit_mod
!= CCP_PASSTHRU_BITWISE_NOOP
) {
1691 op
.ksb_key
= cmd_q
->ksb_key
;
1693 ret
= ccp_init_dm_workarea(&mask
, cmd_q
,
1694 CCP_PASSTHRU_KSB_COUNT
*
1700 ccp_set_dm_area(&mask
, 0, pt
->mask
, 0, pt
->mask_len
);
1701 ret
= ccp_copy_to_ksb(cmd_q
, &mask
, op
.jobid
, op
.ksb_key
,
1702 CCP_PASSTHRU_BYTESWAP_NOOP
);
1704 cmd
->engine_error
= cmd_q
->cmd_error
;
1709 /* Prepare the input and output data workareas. For in-place
1710 * operations we need to set the dma direction to BIDIRECTIONAL
1711 * and copy the src workarea to the dst workarea.
1713 if (sg_virt(pt
->src
) == sg_virt(pt
->dst
))
1716 ret
= ccp_init_data(&src
, cmd_q
, pt
->src
, pt
->src_len
,
1717 CCP_PASSTHRU_MASKSIZE
,
1718 in_place
? DMA_BIDIRECTIONAL
: DMA_TO_DEVICE
);
1725 ret
= ccp_init_data(&dst
, cmd_q
, pt
->dst
, pt
->src_len
,
1726 CCP_PASSTHRU_MASKSIZE
, DMA_FROM_DEVICE
);
1731 /* Send data to the CCP Passthru engine
1732 * Because the CCP engine works on a single source and destination
1733 * dma address at a time, each entry in the source scatterlist
1734 * (after the dma_map_sg call) must be less than or equal to the
1735 * (remaining) length in the destination scatterlist entry and the
1736 * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1738 dst
.sg_wa
.sg_used
= 0;
1739 for (i
= 1; i
<= src
.sg_wa
.dma_count
; i
++) {
1740 if (!dst
.sg_wa
.sg
||
1741 (dst
.sg_wa
.sg
->length
< src
.sg_wa
.sg
->length
)) {
1746 if (i
== src
.sg_wa
.dma_count
) {
1751 op
.src
.type
= CCP_MEMTYPE_SYSTEM
;
1752 op
.src
.u
.dma
.address
= sg_dma_address(src
.sg_wa
.sg
);
1753 op
.src
.u
.dma
.offset
= 0;
1754 op
.src
.u
.dma
.length
= sg_dma_len(src
.sg_wa
.sg
);
1756 op
.dst
.type
= CCP_MEMTYPE_SYSTEM
;
1757 op
.dst
.u
.dma
.address
= sg_dma_address(dst
.sg_wa
.sg
);
1758 op
.dst
.u
.dma
.offset
= dst
.sg_wa
.sg_used
;
1759 op
.dst
.u
.dma
.length
= op
.src
.u
.dma
.length
;
1761 ret
= ccp_perform_passthru(&op
);
1763 cmd
->engine_error
= cmd_q
->cmd_error
;
1767 dst
.sg_wa
.sg_used
+= src
.sg_wa
.sg
->length
;
1768 if (dst
.sg_wa
.sg_used
== dst
.sg_wa
.sg
->length
) {
1769 dst
.sg_wa
.sg
= sg_next(dst
.sg_wa
.sg
);
1770 dst
.sg_wa
.sg_used
= 0;
1772 src
.sg_wa
.sg
= sg_next(src
.sg_wa
.sg
);
1777 ccp_free_data(&dst
, cmd_q
);
1780 ccp_free_data(&src
, cmd_q
);
1783 if (pt
->bit_mod
!= CCP_PASSTHRU_BITWISE_NOOP
)
1789 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
1791 struct ccp_ecc_engine
*ecc
= &cmd
->u
.ecc
;
1792 struct ccp_dm_workarea src
, dst
;
1797 if (!ecc
->u
.mm
.operand_1
||
1798 (ecc
->u
.mm
.operand_1_len
> CCP_ECC_MODULUS_BYTES
))
1801 if (ecc
->function
!= CCP_ECC_FUNCTION_MINV_384BIT
)
1802 if (!ecc
->u
.mm
.operand_2
||
1803 (ecc
->u
.mm
.operand_2_len
> CCP_ECC_MODULUS_BYTES
))
1806 if (!ecc
->u
.mm
.result
||
1807 (ecc
->u
.mm
.result_len
< CCP_ECC_MODULUS_BYTES
))
1810 memset(&op
, 0, sizeof(op
));
1812 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1814 /* Concatenate the modulus and the operands. Both the modulus and
1815 * the operands must be in little endian format. Since the input
1816 * is in big endian format it must be converted and placed in a
1817 * fixed length buffer.
1819 ret
= ccp_init_dm_workarea(&src
, cmd_q
, CCP_ECC_SRC_BUF_SIZE
,
1824 /* Save the workarea address since it is updated in order to perform
1829 /* Copy the ECC modulus */
1830 ret
= ccp_reverse_set_dm_area(&src
, ecc
->mod
, ecc
->mod_len
,
1831 CCP_ECC_OPERAND_SIZE
, false);
1834 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1836 /* Copy the first operand */
1837 ret
= ccp_reverse_set_dm_area(&src
, ecc
->u
.mm
.operand_1
,
1838 ecc
->u
.mm
.operand_1_len
,
1839 CCP_ECC_OPERAND_SIZE
, false);
1842 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1844 if (ecc
->function
!= CCP_ECC_FUNCTION_MINV_384BIT
) {
1845 /* Copy the second operand */
1846 ret
= ccp_reverse_set_dm_area(&src
, ecc
->u
.mm
.operand_2
,
1847 ecc
->u
.mm
.operand_2_len
,
1848 CCP_ECC_OPERAND_SIZE
, false);
1851 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1854 /* Restore the workarea address */
1857 /* Prepare the output area for the operation */
1858 ret
= ccp_init_dm_workarea(&dst
, cmd_q
, CCP_ECC_DST_BUF_SIZE
,
1864 op
.src
.u
.dma
.address
= src
.dma
.address
;
1865 op
.src
.u
.dma
.offset
= 0;
1866 op
.src
.u
.dma
.length
= src
.length
;
1867 op
.dst
.u
.dma
.address
= dst
.dma
.address
;
1868 op
.dst
.u
.dma
.offset
= 0;
1869 op
.dst
.u
.dma
.length
= dst
.length
;
1871 op
.u
.ecc
.function
= cmd
->u
.ecc
.function
;
1873 ret
= ccp_perform_ecc(&op
);
1875 cmd
->engine_error
= cmd_q
->cmd_error
;
1879 ecc
->ecc_result
= le16_to_cpup(
1880 (const __le16
*)(dst
.address
+ CCP_ECC_RESULT_OFFSET
));
1881 if (!(ecc
->ecc_result
& CCP_ECC_RESULT_SUCCESS
)) {
1886 /* Save the ECC result */
1887 ccp_reverse_get_dm_area(&dst
, ecc
->u
.mm
.result
, CCP_ECC_MODULUS_BYTES
);
1898 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
1900 struct ccp_ecc_engine
*ecc
= &cmd
->u
.ecc
;
1901 struct ccp_dm_workarea src
, dst
;
1906 if (!ecc
->u
.pm
.point_1
.x
||
1907 (ecc
->u
.pm
.point_1
.x_len
> CCP_ECC_MODULUS_BYTES
) ||
1908 !ecc
->u
.pm
.point_1
.y
||
1909 (ecc
->u
.pm
.point_1
.y_len
> CCP_ECC_MODULUS_BYTES
))
1912 if (ecc
->function
== CCP_ECC_FUNCTION_PADD_384BIT
) {
1913 if (!ecc
->u
.pm
.point_2
.x
||
1914 (ecc
->u
.pm
.point_2
.x_len
> CCP_ECC_MODULUS_BYTES
) ||
1915 !ecc
->u
.pm
.point_2
.y
||
1916 (ecc
->u
.pm
.point_2
.y_len
> CCP_ECC_MODULUS_BYTES
))
1919 if (!ecc
->u
.pm
.domain_a
||
1920 (ecc
->u
.pm
.domain_a_len
> CCP_ECC_MODULUS_BYTES
))
1923 if (ecc
->function
== CCP_ECC_FUNCTION_PMUL_384BIT
)
1924 if (!ecc
->u
.pm
.scalar
||
1925 (ecc
->u
.pm
.scalar_len
> CCP_ECC_MODULUS_BYTES
))
1929 if (!ecc
->u
.pm
.result
.x
||
1930 (ecc
->u
.pm
.result
.x_len
< CCP_ECC_MODULUS_BYTES
) ||
1931 !ecc
->u
.pm
.result
.y
||
1932 (ecc
->u
.pm
.result
.y_len
< CCP_ECC_MODULUS_BYTES
))
1935 memset(&op
, 0, sizeof(op
));
1937 op
.jobid
= ccp_gen_jobid(cmd_q
->ccp
);
1939 /* Concatenate the modulus and the operands. Both the modulus and
1940 * the operands must be in little endian format. Since the input
1941 * is in big endian format it must be converted and placed in a
1942 * fixed length buffer.
1944 ret
= ccp_init_dm_workarea(&src
, cmd_q
, CCP_ECC_SRC_BUF_SIZE
,
1949 /* Save the workarea address since it is updated in order to perform
1954 /* Copy the ECC modulus */
1955 ret
= ccp_reverse_set_dm_area(&src
, ecc
->mod
, ecc
->mod_len
,
1956 CCP_ECC_OPERAND_SIZE
, false);
1959 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1961 /* Copy the first point X and Y coordinate */
1962 ret
= ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.point_1
.x
,
1963 ecc
->u
.pm
.point_1
.x_len
,
1964 CCP_ECC_OPERAND_SIZE
, false);
1967 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1968 ret
= ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.point_1
.y
,
1969 ecc
->u
.pm
.point_1
.y_len
,
1970 CCP_ECC_OPERAND_SIZE
, false);
1973 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1975 /* Set the first point Z coordianate to 1 */
1976 *src
.address
= 0x01;
1977 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1979 if (ecc
->function
== CCP_ECC_FUNCTION_PADD_384BIT
) {
1980 /* Copy the second point X and Y coordinate */
1981 ret
= ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.point_2
.x
,
1982 ecc
->u
.pm
.point_2
.x_len
,
1983 CCP_ECC_OPERAND_SIZE
, false);
1986 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1987 ret
= ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.point_2
.y
,
1988 ecc
->u
.pm
.point_2
.y_len
,
1989 CCP_ECC_OPERAND_SIZE
, false);
1992 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1994 /* Set the second point Z coordianate to 1 */
1995 *src
.address
= 0x01;
1996 src
.address
+= CCP_ECC_OPERAND_SIZE
;
1998 /* Copy the Domain "a" parameter */
1999 ret
= ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.domain_a
,
2000 ecc
->u
.pm
.domain_a_len
,
2001 CCP_ECC_OPERAND_SIZE
, false);
2004 src
.address
+= CCP_ECC_OPERAND_SIZE
;
2006 if (ecc
->function
== CCP_ECC_FUNCTION_PMUL_384BIT
) {
2007 /* Copy the scalar value */
2008 ret
= ccp_reverse_set_dm_area(&src
, ecc
->u
.pm
.scalar
,
2009 ecc
->u
.pm
.scalar_len
,
2010 CCP_ECC_OPERAND_SIZE
,
2014 src
.address
+= CCP_ECC_OPERAND_SIZE
;
2018 /* Restore the workarea address */
2021 /* Prepare the output area for the operation */
2022 ret
= ccp_init_dm_workarea(&dst
, cmd_q
, CCP_ECC_DST_BUF_SIZE
,
2028 op
.src
.u
.dma
.address
= src
.dma
.address
;
2029 op
.src
.u
.dma
.offset
= 0;
2030 op
.src
.u
.dma
.length
= src
.length
;
2031 op
.dst
.u
.dma
.address
= dst
.dma
.address
;
2032 op
.dst
.u
.dma
.offset
= 0;
2033 op
.dst
.u
.dma
.length
= dst
.length
;
2035 op
.u
.ecc
.function
= cmd
->u
.ecc
.function
;
2037 ret
= ccp_perform_ecc(&op
);
2039 cmd
->engine_error
= cmd_q
->cmd_error
;
2043 ecc
->ecc_result
= le16_to_cpup(
2044 (const __le16
*)(dst
.address
+ CCP_ECC_RESULT_OFFSET
));
2045 if (!(ecc
->ecc_result
& CCP_ECC_RESULT_SUCCESS
)) {
2050 /* Save the workarea address since it is updated as we walk through
2051 * to copy the point math result
2055 /* Save the ECC result X and Y coordinates */
2056 ccp_reverse_get_dm_area(&dst
, ecc
->u
.pm
.result
.x
,
2057 CCP_ECC_MODULUS_BYTES
);
2058 dst
.address
+= CCP_ECC_OUTPUT_SIZE
;
2059 ccp_reverse_get_dm_area(&dst
, ecc
->u
.pm
.result
.y
,
2060 CCP_ECC_MODULUS_BYTES
);
2061 dst
.address
+= CCP_ECC_OUTPUT_SIZE
;
2063 /* Restore the workarea address */
2075 static int ccp_run_ecc_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
2077 struct ccp_ecc_engine
*ecc
= &cmd
->u
.ecc
;
2079 ecc
->ecc_result
= 0;
2082 (ecc
->mod_len
> CCP_ECC_MODULUS_BYTES
))
2085 switch (ecc
->function
) {
2086 case CCP_ECC_FUNCTION_MMUL_384BIT
:
2087 case CCP_ECC_FUNCTION_MADD_384BIT
:
2088 case CCP_ECC_FUNCTION_MINV_384BIT
:
2089 return ccp_run_ecc_mm_cmd(cmd_q
, cmd
);
2091 case CCP_ECC_FUNCTION_PADD_384BIT
:
2092 case CCP_ECC_FUNCTION_PMUL_384BIT
:
2093 case CCP_ECC_FUNCTION_PDBL_384BIT
:
2094 return ccp_run_ecc_pm_cmd(cmd_q
, cmd
);
2101 int ccp_run_cmd(struct ccp_cmd_queue
*cmd_q
, struct ccp_cmd
*cmd
)
2105 cmd
->engine_error
= 0;
2106 cmd_q
->cmd_error
= 0;
2107 cmd_q
->int_rcvd
= 0;
2108 cmd_q
->free_slots
= CMD_Q_DEPTH(ioread32(cmd_q
->reg_status
));
2110 switch (cmd
->engine
) {
2111 case CCP_ENGINE_AES
:
2112 ret
= ccp_run_aes_cmd(cmd_q
, cmd
);
2114 case CCP_ENGINE_XTS_AES_128
:
2115 ret
= ccp_run_xts_aes_cmd(cmd_q
, cmd
);
2117 case CCP_ENGINE_SHA
:
2118 ret
= ccp_run_sha_cmd(cmd_q
, cmd
);
2120 case CCP_ENGINE_RSA
:
2121 ret
= ccp_run_rsa_cmd(cmd_q
, cmd
);
2123 case CCP_ENGINE_PASSTHRU
:
2124 ret
= ccp_run_passthru_cmd(cmd_q
, cmd
);
2126 case CCP_ENGINE_ECC
:
2127 ret
= ccp_run_ecc_cmd(cmd_q
, cmd
);