2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/kref.h>
34 #include <rdma/ib_umem.h>
35 #include <rdma/ib_user_verbs.h>
39 static void mlx5_ib_cq_comp(struct mlx5_core_cq
*cq
)
41 struct ib_cq
*ibcq
= &to_mibcq(cq
)->ibcq
;
43 ibcq
->comp_handler(ibcq
, ibcq
->cq_context
);
46 static void mlx5_ib_cq_event(struct mlx5_core_cq
*mcq
, enum mlx5_event type
)
48 struct mlx5_ib_cq
*cq
= container_of(mcq
, struct mlx5_ib_cq
, mcq
);
49 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
50 struct ib_cq
*ibcq
= &cq
->ibcq
;
51 struct ib_event event
;
53 if (type
!= MLX5_EVENT_TYPE_CQ_ERROR
) {
54 mlx5_ib_warn(dev
, "Unexpected event type %d on CQ %06x\n",
59 if (ibcq
->event_handler
) {
60 event
.device
= &dev
->ib_dev
;
61 event
.event
= IB_EVENT_CQ_ERR
;
62 event
.element
.cq
= ibcq
;
63 ibcq
->event_handler(&event
, ibcq
->cq_context
);
67 static void *get_cqe_from_buf(struct mlx5_ib_cq_buf
*buf
, int n
, int size
)
69 return mlx5_buf_offset(&buf
->buf
, n
* size
);
72 static void *get_cqe(struct mlx5_ib_cq
*cq
, int n
)
74 return get_cqe_from_buf(&cq
->buf
, n
, cq
->mcq
.cqe_sz
);
77 static u8
sw_ownership_bit(int n
, int nent
)
79 return (n
& nent
) ? 1 : 0;
82 static void *get_sw_cqe(struct mlx5_ib_cq
*cq
, int n
)
84 void *cqe
= get_cqe(cq
, n
& cq
->ibcq
.cqe
);
85 struct mlx5_cqe64
*cqe64
;
87 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
89 if (likely((cqe64
->op_own
) >> 4 != MLX5_CQE_INVALID
) &&
90 !((cqe64
->op_own
& MLX5_CQE_OWNER_MASK
) ^ !!(n
& (cq
->ibcq
.cqe
+ 1)))) {
97 static void *next_cqe_sw(struct mlx5_ib_cq
*cq
)
99 return get_sw_cqe(cq
, cq
->mcq
.cons_index
);
102 static enum ib_wc_opcode
get_umr_comp(struct mlx5_ib_wq
*wq
, int idx
)
104 switch (wq
->wr_data
[idx
]) {
108 case IB_WR_LOCAL_INV
:
109 return IB_WC_LOCAL_INV
;
111 case IB_WR_FAST_REG_MR
:
112 return IB_WC_FAST_REG_MR
;
115 pr_warn("unknown completion status\n");
120 static void handle_good_req(struct ib_wc
*wc
, struct mlx5_cqe64
*cqe
,
121 struct mlx5_ib_wq
*wq
, int idx
)
124 switch (be32_to_cpu(cqe
->sop_drop_qpn
) >> 24) {
125 case MLX5_OPCODE_RDMA_WRITE_IMM
:
126 wc
->wc_flags
|= IB_WC_WITH_IMM
;
127 case MLX5_OPCODE_RDMA_WRITE
:
128 wc
->opcode
= IB_WC_RDMA_WRITE
;
130 case MLX5_OPCODE_SEND_IMM
:
131 wc
->wc_flags
|= IB_WC_WITH_IMM
;
132 case MLX5_OPCODE_SEND
:
133 case MLX5_OPCODE_SEND_INVAL
:
134 wc
->opcode
= IB_WC_SEND
;
136 case MLX5_OPCODE_RDMA_READ
:
137 wc
->opcode
= IB_WC_RDMA_READ
;
138 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
140 case MLX5_OPCODE_ATOMIC_CS
:
141 wc
->opcode
= IB_WC_COMP_SWAP
;
144 case MLX5_OPCODE_ATOMIC_FA
:
145 wc
->opcode
= IB_WC_FETCH_ADD
;
148 case MLX5_OPCODE_ATOMIC_MASKED_CS
:
149 wc
->opcode
= IB_WC_MASKED_COMP_SWAP
;
152 case MLX5_OPCODE_ATOMIC_MASKED_FA
:
153 wc
->opcode
= IB_WC_MASKED_FETCH_ADD
;
156 case MLX5_OPCODE_BIND_MW
:
157 wc
->opcode
= IB_WC_BIND_MW
;
159 case MLX5_OPCODE_UMR
:
160 wc
->opcode
= get_umr_comp(wq
, idx
);
166 MLX5_GRH_IN_BUFFER
= 1,
170 static void handle_responder(struct ib_wc
*wc
, struct mlx5_cqe64
*cqe
,
171 struct mlx5_ib_qp
*qp
)
173 struct mlx5_ib_dev
*dev
= to_mdev(qp
->ibqp
.device
);
174 struct mlx5_ib_srq
*srq
;
175 struct mlx5_ib_wq
*wq
;
179 if (qp
->ibqp
.srq
|| qp
->ibqp
.xrcd
) {
180 struct mlx5_core_srq
*msrq
= NULL
;
183 msrq
= mlx5_core_get_srq(dev
->mdev
,
184 be32_to_cpu(cqe
->srqn
));
185 srq
= to_mibsrq(msrq
);
187 srq
= to_msrq(qp
->ibqp
.srq
);
190 wqe_ctr
= be16_to_cpu(cqe
->wqe_counter
);
191 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
192 mlx5_ib_free_srq_wqe(srq
, wqe_ctr
);
193 if (msrq
&& atomic_dec_and_test(&msrq
->refcount
))
194 complete(&msrq
->free
);
198 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
201 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
203 switch (cqe
->op_own
>> 4) {
204 case MLX5_CQE_RESP_WR_IMM
:
205 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
206 wc
->wc_flags
= IB_WC_WITH_IMM
;
207 wc
->ex
.imm_data
= cqe
->imm_inval_pkey
;
209 case MLX5_CQE_RESP_SEND
:
210 wc
->opcode
= IB_WC_RECV
;
213 case MLX5_CQE_RESP_SEND_IMM
:
214 wc
->opcode
= IB_WC_RECV
;
215 wc
->wc_flags
= IB_WC_WITH_IMM
;
216 wc
->ex
.imm_data
= cqe
->imm_inval_pkey
;
218 case MLX5_CQE_RESP_SEND_INV
:
219 wc
->opcode
= IB_WC_RECV
;
220 wc
->wc_flags
= IB_WC_WITH_INVALIDATE
;
221 wc
->ex
.invalidate_rkey
= be32_to_cpu(cqe
->imm_inval_pkey
);
224 wc
->slid
= be16_to_cpu(cqe
->slid
);
225 wc
->sl
= (be32_to_cpu(cqe
->flags_rqpn
) >> 24) & 0xf;
226 wc
->src_qp
= be32_to_cpu(cqe
->flags_rqpn
) & 0xffffff;
227 wc
->dlid_path_bits
= cqe
->ml_path
;
228 g
= (be32_to_cpu(cqe
->flags_rqpn
) >> 28) & 3;
229 wc
->wc_flags
|= g
? IB_WC_GRH
: 0;
230 wc
->pkey_index
= be32_to_cpu(cqe
->imm_inval_pkey
) & 0xffff;
233 static void dump_cqe(struct mlx5_ib_dev
*dev
, struct mlx5_err_cqe
*cqe
)
235 __be32
*p
= (__be32
*)cqe
;
238 mlx5_ib_warn(dev
, "dump error cqe\n");
239 for (i
= 0; i
< sizeof(*cqe
) / 16; i
++, p
+= 4)
240 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p
[0]),
241 be32_to_cpu(p
[1]), be32_to_cpu(p
[2]),
245 static void mlx5_handle_error_cqe(struct mlx5_ib_dev
*dev
,
246 struct mlx5_err_cqe
*cqe
,
251 switch (cqe
->syndrome
) {
252 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR
:
253 wc
->status
= IB_WC_LOC_LEN_ERR
;
255 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR
:
256 wc
->status
= IB_WC_LOC_QP_OP_ERR
;
258 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR
:
259 wc
->status
= IB_WC_LOC_PROT_ERR
;
261 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR
:
263 wc
->status
= IB_WC_WR_FLUSH_ERR
;
265 case MLX5_CQE_SYNDROME_MW_BIND_ERR
:
266 wc
->status
= IB_WC_MW_BIND_ERR
;
268 case MLX5_CQE_SYNDROME_BAD_RESP_ERR
:
269 wc
->status
= IB_WC_BAD_RESP_ERR
;
271 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR
:
272 wc
->status
= IB_WC_LOC_ACCESS_ERR
;
274 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR
:
275 wc
->status
= IB_WC_REM_INV_REQ_ERR
;
277 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR
:
278 wc
->status
= IB_WC_REM_ACCESS_ERR
;
280 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR
:
281 wc
->status
= IB_WC_REM_OP_ERR
;
283 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR
:
284 wc
->status
= IB_WC_RETRY_EXC_ERR
;
287 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR
:
288 wc
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
291 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR
:
292 wc
->status
= IB_WC_REM_ABORT_ERR
;
295 wc
->status
= IB_WC_GENERAL_ERR
;
299 wc
->vendor_err
= cqe
->vendor_err_synd
;
304 static int is_atomic_response(struct mlx5_ib_qp
*qp
, uint16_t idx
)
306 /* TBD: waiting decision
311 static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp
*qp
, uint16_t idx
)
313 struct mlx5_wqe_data_seg
*dpseg
;
316 dpseg
= mlx5_get_send_wqe(qp
, idx
) + sizeof(struct mlx5_wqe_ctrl_seg
) +
317 sizeof(struct mlx5_wqe_raddr_seg
) +
318 sizeof(struct mlx5_wqe_atomic_seg
);
319 addr
= (void *)(unsigned long)be64_to_cpu(dpseg
->addr
);
323 static void handle_atomic(struct mlx5_ib_qp
*qp
, struct mlx5_cqe64
*cqe64
,
330 if (!is_atomic_response(qp
, idx
))
333 byte_count
= be32_to_cpu(cqe64
->byte_cnt
);
334 addr
= mlx5_get_atomic_laddr(qp
, idx
);
336 if (byte_count
== 4) {
337 *(uint32_t *)addr
= be32_to_cpu(*((__be32
*)addr
));
339 for (i
= 0; i
< byte_count
; i
+= 8) {
340 *(uint64_t *)addr
= be64_to_cpu(*((__be64
*)addr
));
348 static void handle_atomics(struct mlx5_ib_qp
*qp
, struct mlx5_cqe64
*cqe64
,
354 idx
= tail
& (qp
->sq
.wqe_cnt
- 1);
355 handle_atomic(qp
, cqe64
, idx
);
359 tail
= qp
->sq
.w_list
[idx
].next
;
361 tail
= qp
->sq
.w_list
[idx
].next
;
362 qp
->sq
.last_poll
= tail
;
365 static void free_cq_buf(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq_buf
*buf
)
367 mlx5_buf_free(dev
->mdev
, &buf
->buf
);
370 static void get_sig_err_item(struct mlx5_sig_err_cqe
*cqe
,
371 struct ib_sig_err
*item
)
373 u16 syndrome
= be16_to_cpu(cqe
->syndrome
);
375 #define GUARD_ERR (1 << 13)
376 #define APPTAG_ERR (1 << 12)
377 #define REFTAG_ERR (1 << 11)
379 if (syndrome
& GUARD_ERR
) {
380 item
->err_type
= IB_SIG_BAD_GUARD
;
381 item
->expected
= be32_to_cpu(cqe
->expected_trans_sig
) >> 16;
382 item
->actual
= be32_to_cpu(cqe
->actual_trans_sig
) >> 16;
384 if (syndrome
& REFTAG_ERR
) {
385 item
->err_type
= IB_SIG_BAD_REFTAG
;
386 item
->expected
= be32_to_cpu(cqe
->expected_reftag
);
387 item
->actual
= be32_to_cpu(cqe
->actual_reftag
);
389 if (syndrome
& APPTAG_ERR
) {
390 item
->err_type
= IB_SIG_BAD_APPTAG
;
391 item
->expected
= be32_to_cpu(cqe
->expected_trans_sig
) & 0xffff;
392 item
->actual
= be32_to_cpu(cqe
->actual_trans_sig
) & 0xffff;
394 pr_err("Got signature completion error with bad syndrome %04x\n",
398 item
->sig_err_offset
= be64_to_cpu(cqe
->err_offset
);
399 item
->key
= be32_to_cpu(cqe
->mkey
);
402 static int mlx5_poll_one(struct mlx5_ib_cq
*cq
,
403 struct mlx5_ib_qp
**cur_qp
,
406 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
407 struct mlx5_err_cqe
*err_cqe
;
408 struct mlx5_cqe64
*cqe64
;
409 struct mlx5_core_qp
*mqp
;
410 struct mlx5_ib_wq
*wq
;
411 struct mlx5_sig_err_cqe
*sig_err_cqe
;
412 struct mlx5_core_mr
*mmr
;
413 struct mlx5_ib_mr
*mr
;
421 cqe
= next_cqe_sw(cq
);
425 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
427 ++cq
->mcq
.cons_index
;
429 /* Make sure we read CQ entry contents after we've checked the
434 opcode
= cqe64
->op_own
>> 4;
435 if (unlikely(opcode
== MLX5_CQE_RESIZE_CQ
)) {
436 if (likely(cq
->resize_buf
)) {
437 free_cq_buf(dev
, &cq
->buf
);
438 cq
->buf
= *cq
->resize_buf
;
439 kfree(cq
->resize_buf
);
440 cq
->resize_buf
= NULL
;
443 mlx5_ib_warn(dev
, "unexpected resize cqe\n");
447 qpn
= ntohl(cqe64
->sop_drop_qpn
) & 0xffffff;
448 if (!*cur_qp
|| (qpn
!= (*cur_qp
)->ibqp
.qp_num
)) {
449 /* We do not have to take the QP table lock here,
450 * because CQs will be locked while QPs are removed
453 mqp
= __mlx5_qp_lookup(dev
->mdev
, qpn
);
454 if (unlikely(!mqp
)) {
455 mlx5_ib_warn(dev
, "CQE@CQ %06x for unknown QPN %6x\n",
460 *cur_qp
= to_mibqp(mqp
);
463 wc
->qp
= &(*cur_qp
)->ibqp
;
467 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
468 idx
= wqe_ctr
& (wq
->wqe_cnt
- 1);
469 handle_good_req(wc
, cqe64
, wq
, idx
);
470 handle_atomics(*cur_qp
, cqe64
, wq
->last_poll
, idx
);
471 wc
->wr_id
= wq
->wrid
[idx
];
472 wq
->tail
= wq
->wqe_head
[idx
] + 1;
473 wc
->status
= IB_WC_SUCCESS
;
475 case MLX5_CQE_RESP_WR_IMM
:
476 case MLX5_CQE_RESP_SEND
:
477 case MLX5_CQE_RESP_SEND_IMM
:
478 case MLX5_CQE_RESP_SEND_INV
:
479 handle_responder(wc
, cqe64
, *cur_qp
);
480 wc
->status
= IB_WC_SUCCESS
;
482 case MLX5_CQE_RESIZE_CQ
:
484 case MLX5_CQE_REQ_ERR
:
485 case MLX5_CQE_RESP_ERR
:
486 err_cqe
= (struct mlx5_err_cqe
*)cqe64
;
487 mlx5_handle_error_cqe(dev
, err_cqe
, wc
);
488 mlx5_ib_dbg(dev
, "%s error cqe on cqn 0x%x:\n",
489 opcode
== MLX5_CQE_REQ_ERR
?
490 "Requestor" : "Responder", cq
->mcq
.cqn
);
491 mlx5_ib_dbg(dev
, "syndrome 0x%x, vendor syndrome 0x%x\n",
492 err_cqe
->syndrome
, err_cqe
->vendor_err_synd
);
493 if (opcode
== MLX5_CQE_REQ_ERR
) {
495 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
496 idx
= wqe_ctr
& (wq
->wqe_cnt
- 1);
497 wc
->wr_id
= wq
->wrid
[idx
];
498 wq
->tail
= wq
->wqe_head
[idx
] + 1;
500 struct mlx5_ib_srq
*srq
;
502 if ((*cur_qp
)->ibqp
.srq
) {
503 srq
= to_msrq((*cur_qp
)->ibqp
.srq
);
504 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
505 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
506 mlx5_ib_free_srq_wqe(srq
, wqe_ctr
);
509 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
514 case MLX5_CQE_SIG_ERR
:
515 sig_err_cqe
= (struct mlx5_sig_err_cqe
*)cqe64
;
517 read_lock(&dev
->mdev
->priv
.mr_table
.lock
);
518 mmr
= __mlx5_mr_lookup(dev
->mdev
,
519 mlx5_base_mkey(be32_to_cpu(sig_err_cqe
->mkey
)));
520 if (unlikely(!mmr
)) {
521 read_unlock(&dev
->mdev
->priv
.mr_table
.lock
);
522 mlx5_ib_warn(dev
, "CQE@CQ %06x for unknown MR %6x\n",
523 cq
->mcq
.cqn
, be32_to_cpu(sig_err_cqe
->mkey
));
528 get_sig_err_item(sig_err_cqe
, &mr
->sig
->err_item
);
529 mr
->sig
->sig_err_exists
= true;
530 mr
->sig
->sigerr_count
++;
532 mlx5_ib_warn(dev
, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
533 cq
->mcq
.cqn
, mr
->sig
->err_item
.key
,
534 mr
->sig
->err_item
.err_type
,
535 mr
->sig
->err_item
.sig_err_offset
,
536 mr
->sig
->err_item
.expected
,
537 mr
->sig
->err_item
.actual
);
539 read_unlock(&dev
->mdev
->priv
.mr_table
.lock
);
546 int mlx5_ib_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
548 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
549 struct mlx5_ib_qp
*cur_qp
= NULL
;
554 spin_lock_irqsave(&cq
->lock
, flags
);
556 for (npolled
= 0; npolled
< num_entries
; npolled
++) {
557 err
= mlx5_poll_one(cq
, &cur_qp
, wc
+ npolled
);
563 mlx5_cq_set_ci(&cq
->mcq
);
565 spin_unlock_irqrestore(&cq
->lock
, flags
);
567 if (err
== 0 || err
== -EAGAIN
)
573 int mlx5_ib_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
575 mlx5_cq_arm(&to_mcq(ibcq
)->mcq
,
576 (flags
& IB_CQ_SOLICITED_MASK
) == IB_CQ_SOLICITED
?
577 MLX5_CQ_DB_REQ_NOT_SOL
: MLX5_CQ_DB_REQ_NOT
,
578 to_mdev(ibcq
->device
)->mdev
->priv
.uuari
.uars
[0].map
,
579 MLX5_GET_DOORBELL_LOCK(&to_mdev(ibcq
->device
)->mdev
->priv
.cq_uar_lock
));
584 static int alloc_cq_buf(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq_buf
*buf
,
585 int nent
, int cqe_size
)
589 err
= mlx5_buf_alloc(dev
->mdev
, nent
* cqe_size
,
590 PAGE_SIZE
* 2, &buf
->buf
);
594 buf
->cqe_size
= cqe_size
;
600 static int create_cq_user(struct mlx5_ib_dev
*dev
, struct ib_udata
*udata
,
601 struct ib_ucontext
*context
, struct mlx5_ib_cq
*cq
,
602 int entries
, struct mlx5_create_cq_mbox_in
**cqb
,
603 int *cqe_size
, int *index
, int *inlen
)
605 struct mlx5_ib_create_cq ucmd
;
613 (udata
->inlen
- sizeof(struct ib_uverbs_cmd_hdr
) <
614 sizeof(ucmd
)) ? (sizeof(ucmd
) -
615 sizeof(ucmd
.reserved
)) : sizeof(ucmd
);
617 if (ib_copy_from_udata(&ucmd
, udata
, ucmdlen
))
620 if (ucmdlen
== sizeof(ucmd
) &&
624 if (ucmd
.cqe_size
!= 64 && ucmd
.cqe_size
!= 128)
627 *cqe_size
= ucmd
.cqe_size
;
629 cq
->buf
.umem
= ib_umem_get(context
, ucmd
.buf_addr
,
630 entries
* ucmd
.cqe_size
,
631 IB_ACCESS_LOCAL_WRITE
, 1);
632 if (IS_ERR(cq
->buf
.umem
)) {
633 err
= PTR_ERR(cq
->buf
.umem
);
637 err
= mlx5_ib_db_map_user(to_mucontext(context
), ucmd
.db_addr
,
642 mlx5_ib_cont_pages(cq
->buf
.umem
, ucmd
.buf_addr
, &npages
, &page_shift
,
644 mlx5_ib_dbg(dev
, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
645 ucmd
.buf_addr
, entries
* ucmd
.cqe_size
, npages
, page_shift
, ncont
);
647 *inlen
= sizeof(**cqb
) + sizeof(*(*cqb
)->pas
) * ncont
;
648 *cqb
= mlx5_vzalloc(*inlen
);
653 mlx5_ib_populate_pas(dev
, cq
->buf
.umem
, page_shift
, (*cqb
)->pas
, 0);
654 (*cqb
)->ctx
.log_pg_sz
= page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
656 *index
= to_mucontext(context
)->uuari
.uars
[0].index
;
661 mlx5_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
664 ib_umem_release(cq
->buf
.umem
);
668 static void destroy_cq_user(struct mlx5_ib_cq
*cq
, struct ib_ucontext
*context
)
670 mlx5_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
671 ib_umem_release(cq
->buf
.umem
);
674 static void init_cq_buf(struct mlx5_ib_cq
*cq
, struct mlx5_ib_cq_buf
*buf
)
678 struct mlx5_cqe64
*cqe64
;
680 for (i
= 0; i
< buf
->nent
; i
++) {
681 cqe
= get_cqe_from_buf(buf
, i
, buf
->cqe_size
);
682 cqe64
= buf
->cqe_size
== 64 ? cqe
: cqe
+ 64;
683 cqe64
->op_own
= MLX5_CQE_INVALID
<< 4;
687 static int create_cq_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
688 int entries
, int cqe_size
,
689 struct mlx5_create_cq_mbox_in
**cqb
,
690 int *index
, int *inlen
)
694 err
= mlx5_db_alloc(dev
->mdev
, &cq
->db
);
698 cq
->mcq
.set_ci_db
= cq
->db
.db
;
699 cq
->mcq
.arm_db
= cq
->db
.db
+ 1;
700 cq
->mcq
.cqe_sz
= cqe_size
;
702 err
= alloc_cq_buf(dev
, &cq
->buf
, entries
, cqe_size
);
706 init_cq_buf(cq
, &cq
->buf
);
708 *inlen
= sizeof(**cqb
) + sizeof(*(*cqb
)->pas
) * cq
->buf
.buf
.npages
;
709 *cqb
= mlx5_vzalloc(*inlen
);
714 mlx5_fill_page_array(&cq
->buf
.buf
, (*cqb
)->pas
);
716 (*cqb
)->ctx
.log_pg_sz
= cq
->buf
.buf
.page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
717 *index
= dev
->mdev
->priv
.uuari
.uars
[0].index
;
722 free_cq_buf(dev
, &cq
->buf
);
725 mlx5_db_free(dev
->mdev
, &cq
->db
);
729 static void destroy_cq_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
)
731 free_cq_buf(dev
, &cq
->buf
);
732 mlx5_db_free(dev
->mdev
, &cq
->db
);
735 struct ib_cq
*mlx5_ib_create_cq(struct ib_device
*ibdev
, int entries
,
736 int vector
, struct ib_ucontext
*context
,
737 struct ib_udata
*udata
)
739 struct mlx5_create_cq_mbox_in
*cqb
= NULL
;
740 struct mlx5_ib_dev
*dev
= to_mdev(ibdev
);
741 struct mlx5_ib_cq
*cq
;
742 int uninitialized_var(index
);
743 int uninitialized_var(inlen
);
750 return ERR_PTR(-EINVAL
);
752 entries
= roundup_pow_of_two(entries
+ 1);
753 if (entries
> dev
->mdev
->caps
.gen
.max_cqes
)
754 return ERR_PTR(-EINVAL
);
756 cq
= kzalloc(sizeof(*cq
), GFP_KERNEL
);
758 return ERR_PTR(-ENOMEM
);
760 cq
->ibcq
.cqe
= entries
- 1;
761 mutex_init(&cq
->resize_mutex
);
762 spin_lock_init(&cq
->lock
);
763 cq
->resize_buf
= NULL
;
764 cq
->resize_umem
= NULL
;
767 err
= create_cq_user(dev
, udata
, context
, cq
, entries
,
768 &cqb
, &cqe_size
, &index
, &inlen
);
772 /* for now choose 64 bytes till we have a proper interface */
774 err
= create_cq_kernel(dev
, cq
, entries
, cqe_size
, &cqb
,
780 cq
->cqe_size
= cqe_size
;
781 cqb
->ctx
.cqe_sz_flags
= cqe_sz_to_mlx_sz(cqe_size
) << 5;
782 cqb
->ctx
.log_sz_usr_page
= cpu_to_be32((ilog2(entries
) << 24) | index
);
783 err
= mlx5_vector2eqn(dev
->mdev
, vector
, &eqn
, &irqn
);
787 cqb
->ctx
.c_eqn
= cpu_to_be16(eqn
);
788 cqb
->ctx
.db_record_addr
= cpu_to_be64(cq
->db
.dma
);
790 err
= mlx5_core_create_cq(dev
->mdev
, &cq
->mcq
, cqb
, inlen
);
794 mlx5_ib_dbg(dev
, "cqn 0x%x\n", cq
->mcq
.cqn
);
796 cq
->mcq
.comp
= mlx5_ib_cq_comp
;
797 cq
->mcq
.event
= mlx5_ib_cq_event
;
800 if (ib_copy_to_udata(udata
, &cq
->mcq
.cqn
, sizeof(__u32
))) {
810 mlx5_core_destroy_cq(dev
->mdev
, &cq
->mcq
);
815 destroy_cq_user(cq
, context
);
817 destroy_cq_kernel(dev
, cq
);
826 int mlx5_ib_destroy_cq(struct ib_cq
*cq
)
828 struct mlx5_ib_dev
*dev
= to_mdev(cq
->device
);
829 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
830 struct ib_ucontext
*context
= NULL
;
833 context
= cq
->uobject
->context
;
835 mlx5_core_destroy_cq(dev
->mdev
, &mcq
->mcq
);
837 destroy_cq_user(mcq
, context
);
839 destroy_cq_kernel(dev
, mcq
);
846 static int is_equal_rsn(struct mlx5_cqe64
*cqe64
, u32 rsn
)
848 return rsn
== (ntohl(cqe64
->sop_drop_qpn
) & 0xffffff);
851 void __mlx5_ib_cq_clean(struct mlx5_ib_cq
*cq
, u32 rsn
, struct mlx5_ib_srq
*srq
)
853 struct mlx5_cqe64
*cqe64
, *dest64
;
862 /* First we need to find the current producer index, so we
863 * know where to start cleaning from. It doesn't matter if HW
864 * adds new entries after this loop -- the QP we're worried
865 * about is already in RESET, so the new entries won't come
866 * from our QP and therefore don't need to be checked.
868 for (prod_index
= cq
->mcq
.cons_index
; get_sw_cqe(cq
, prod_index
); prod_index
++)
869 if (prod_index
== cq
->mcq
.cons_index
+ cq
->ibcq
.cqe
)
872 /* Now sweep backwards through the CQ, removing CQ entries
873 * that match our QP by copying older entries on top of them.
875 while ((int) --prod_index
- (int) cq
->mcq
.cons_index
>= 0) {
876 cqe
= get_cqe(cq
, prod_index
& cq
->ibcq
.cqe
);
877 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
878 if (is_equal_rsn(cqe64
, rsn
)) {
879 if (srq
&& (ntohl(cqe64
->srqn
) & 0xffffff))
880 mlx5_ib_free_srq_wqe(srq
, be16_to_cpu(cqe64
->wqe_counter
));
883 dest
= get_cqe(cq
, (prod_index
+ nfreed
) & cq
->ibcq
.cqe
);
884 dest64
= (cq
->mcq
.cqe_sz
== 64) ? dest
: dest
+ 64;
885 owner_bit
= dest64
->op_own
& MLX5_CQE_OWNER_MASK
;
886 memcpy(dest
, cqe
, cq
->mcq
.cqe_sz
);
887 dest64
->op_own
= owner_bit
|
888 (dest64
->op_own
& ~MLX5_CQE_OWNER_MASK
);
893 cq
->mcq
.cons_index
+= nfreed
;
894 /* Make sure update of buffer contents is done before
895 * updating consumer index.
898 mlx5_cq_set_ci(&cq
->mcq
);
902 void mlx5_ib_cq_clean(struct mlx5_ib_cq
*cq
, u32 qpn
, struct mlx5_ib_srq
*srq
)
907 spin_lock_irq(&cq
->lock
);
908 __mlx5_ib_cq_clean(cq
, qpn
, srq
);
909 spin_unlock_irq(&cq
->lock
);
912 int mlx5_ib_modify_cq(struct ib_cq
*cq
, u16 cq_count
, u16 cq_period
)
914 struct mlx5_modify_cq_mbox_in
*in
;
915 struct mlx5_ib_dev
*dev
= to_mdev(cq
->device
);
916 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
920 if (!(dev
->mdev
->caps
.gen
.flags
& MLX5_DEV_CAP_FLAG_CQ_MODER
))
923 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
927 in
->cqn
= cpu_to_be32(mcq
->mcq
.cqn
);
928 fsel
= (MLX5_CQ_MODIFY_PERIOD
| MLX5_CQ_MODIFY_COUNT
);
929 in
->ctx
.cq_period
= cpu_to_be16(cq_period
);
930 in
->ctx
.cq_max_count
= cpu_to_be16(cq_count
);
931 in
->field_select
= cpu_to_be32(fsel
);
932 err
= mlx5_core_modify_cq(dev
->mdev
, &mcq
->mcq
, in
, sizeof(*in
));
936 mlx5_ib_warn(dev
, "modify cq 0x%x failed\n", mcq
->mcq
.cqn
);
941 static int resize_user(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
942 int entries
, struct ib_udata
*udata
, int *npas
,
943 int *page_shift
, int *cqe_size
)
945 struct mlx5_ib_resize_cq ucmd
;
946 struct ib_umem
*umem
;
949 struct ib_ucontext
*context
= cq
->buf
.umem
->context
;
951 err
= ib_copy_from_udata(&ucmd
, udata
, sizeof(ucmd
));
955 if (ucmd
.reserved0
|| ucmd
.reserved1
)
958 umem
= ib_umem_get(context
, ucmd
.buf_addr
, entries
* ucmd
.cqe_size
,
959 IB_ACCESS_LOCAL_WRITE
, 1);
965 mlx5_ib_cont_pages(umem
, ucmd
.buf_addr
, &npages
, page_shift
,
968 cq
->resize_umem
= umem
;
969 *cqe_size
= ucmd
.cqe_size
;
974 static void un_resize_user(struct mlx5_ib_cq
*cq
)
976 ib_umem_release(cq
->resize_umem
);
979 static int resize_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
980 int entries
, int cqe_size
)
984 cq
->resize_buf
= kzalloc(sizeof(*cq
->resize_buf
), GFP_KERNEL
);
988 err
= alloc_cq_buf(dev
, cq
->resize_buf
, entries
, cqe_size
);
992 init_cq_buf(cq
, cq
->resize_buf
);
997 kfree(cq
->resize_buf
);
1001 static void un_resize_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
)
1003 free_cq_buf(dev
, cq
->resize_buf
);
1004 cq
->resize_buf
= NULL
;
1007 static int copy_resize_cqes(struct mlx5_ib_cq
*cq
)
1009 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
1010 struct mlx5_cqe64
*scqe64
;
1011 struct mlx5_cqe64
*dcqe64
;
1020 ssize
= cq
->buf
.cqe_size
;
1021 dsize
= cq
->resize_buf
->cqe_size
;
1022 if (ssize
!= dsize
) {
1023 mlx5_ib_warn(dev
, "resize from different cqe size is not supported\n");
1027 i
= cq
->mcq
.cons_index
;
1028 scqe
= get_sw_cqe(cq
, i
);
1029 scqe64
= ssize
== 64 ? scqe
: scqe
+ 64;
1032 mlx5_ib_warn(dev
, "expected cqe in sw ownership\n");
1036 while ((scqe64
->op_own
>> 4) != MLX5_CQE_RESIZE_CQ
) {
1037 dcqe
= get_cqe_from_buf(cq
->resize_buf
,
1038 (i
+ 1) & (cq
->resize_buf
->nent
),
1040 dcqe64
= dsize
== 64 ? dcqe
: dcqe
+ 64;
1041 sw_own
= sw_ownership_bit(i
+ 1, cq
->resize_buf
->nent
);
1042 memcpy(dcqe
, scqe
, dsize
);
1043 dcqe64
->op_own
= (dcqe64
->op_own
& ~MLX5_CQE_OWNER_MASK
) | sw_own
;
1046 scqe
= get_sw_cqe(cq
, i
);
1047 scqe64
= ssize
== 64 ? scqe
: scqe
+ 64;
1049 mlx5_ib_warn(dev
, "expected cqe in sw ownership\n");
1053 if (scqe
== start_cqe
) {
1054 pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
1059 ++cq
->mcq
.cons_index
;
1063 int mlx5_ib_resize_cq(struct ib_cq
*ibcq
, int entries
, struct ib_udata
*udata
)
1065 struct mlx5_ib_dev
*dev
= to_mdev(ibcq
->device
);
1066 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
1067 struct mlx5_modify_cq_mbox_in
*in
;
1072 int uninitialized_var(cqe_size
);
1073 unsigned long flags
;
1075 if (!(dev
->mdev
->caps
.gen
.flags
& MLX5_DEV_CAP_FLAG_RESIZE_CQ
)) {
1076 pr_info("Firmware does not support resize CQ\n");
1083 entries
= roundup_pow_of_two(entries
+ 1);
1084 if (entries
> dev
->mdev
->caps
.gen
.max_cqes
+ 1)
1087 if (entries
== ibcq
->cqe
+ 1)
1090 mutex_lock(&cq
->resize_mutex
);
1092 err
= resize_user(dev
, cq
, entries
, udata
, &npas
, &page_shift
,
1096 err
= resize_kernel(dev
, cq
, entries
, cqe_size
);
1098 npas
= cq
->resize_buf
->buf
.npages
;
1099 page_shift
= cq
->resize_buf
->buf
.page_shift
;
1106 inlen
= sizeof(*in
) + npas
* sizeof(in
->pas
[0]);
1107 in
= mlx5_vzalloc(inlen
);
1114 mlx5_ib_populate_pas(dev
, cq
->resize_umem
, page_shift
,
1117 mlx5_fill_page_array(&cq
->resize_buf
->buf
, in
->pas
);
1119 in
->field_select
= cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE
|
1120 MLX5_MODIFY_CQ_MASK_PG_OFFSET
|
1121 MLX5_MODIFY_CQ_MASK_PG_SIZE
);
1122 in
->ctx
.log_pg_sz
= page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
1123 in
->ctx
.cqe_sz_flags
= cqe_sz_to_mlx_sz(cqe_size
) << 5;
1124 in
->ctx
.page_offset
= 0;
1125 in
->ctx
.log_sz_usr_page
= cpu_to_be32(ilog2(entries
) << 24);
1126 in
->hdr
.opmod
= cpu_to_be16(MLX5_CQ_OPMOD_RESIZE
);
1127 in
->cqn
= cpu_to_be32(cq
->mcq
.cqn
);
1129 err
= mlx5_core_modify_cq(dev
->mdev
, &cq
->mcq
, in
, inlen
);
1134 cq
->ibcq
.cqe
= entries
- 1;
1135 ib_umem_release(cq
->buf
.umem
);
1136 cq
->buf
.umem
= cq
->resize_umem
;
1137 cq
->resize_umem
= NULL
;
1139 struct mlx5_ib_cq_buf tbuf
;
1142 spin_lock_irqsave(&cq
->lock
, flags
);
1143 if (cq
->resize_buf
) {
1144 err
= copy_resize_cqes(cq
);
1147 cq
->buf
= *cq
->resize_buf
;
1148 kfree(cq
->resize_buf
);
1149 cq
->resize_buf
= NULL
;
1153 cq
->ibcq
.cqe
= entries
- 1;
1154 spin_unlock_irqrestore(&cq
->lock
, flags
);
1156 free_cq_buf(dev
, &tbuf
);
1158 mutex_unlock(&cq
->resize_mutex
);
1170 un_resize_kernel(dev
, cq
);
1172 mutex_unlock(&cq
->resize_mutex
);
1176 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev
*dev
, struct ib_cq
*ibcq
)
1178 struct mlx5_ib_cq
*cq
;
1184 return cq
->cqe_size
;