3 * This file is provided under a dual BSD/GPLv2 license. When using or
4 * redistributing this file, you may do so under either license.
8 * Copyright(c) 2015 Intel Corporation.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of version 2 of the GNU General Public License as
12 * published by the Free Software Foundation.
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
21 * Copyright(c) 2015 Intel Corporation.
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
27 * - Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * - Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in
31 * the documentation and/or other materials provided with the
33 * - Neither the name of Intel Corporation nor the names of its
34 * contributors may be used to endorse or promote products derived
35 * from this software without specific prior written permission.
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 #include <linux/err.h>
52 #include <linux/slab.h>
53 #include <linux/vmalloc.h>
54 #include <linux/kthread.h>
60 * hfi1_cq_enter - add a new entry to the completion queue
61 * @cq: completion queue
62 * @entry: work completion entry to add
63 * @sig: true if @entry is a solicited entry
65 * This may be called with qp->s_lock held.
67 void hfi1_cq_enter(struct hfi1_cq
*cq
, struct ib_wc
*entry
, int solicited
)
69 struct hfi1_cq_wc
*wc
;
74 spin_lock_irqsave(&cq
->lock
, flags
);
77 * Note that the head pointer might be writable by user processes.
78 * Take care to verify it is a sane value.
82 if (head
>= (unsigned) cq
->ibcq
.cqe
) {
87 if (unlikely(next
== wc
->tail
)) {
88 spin_unlock_irqrestore(&cq
->lock
, flags
);
89 if (cq
->ibcq
.event_handler
) {
92 ev
.device
= cq
->ibcq
.device
;
93 ev
.element
.cq
= &cq
->ibcq
;
94 ev
.event
= IB_EVENT_CQ_ERR
;
95 cq
->ibcq
.event_handler(&ev
, cq
->ibcq
.cq_context
);
100 wc
->uqueue
[head
].wr_id
= entry
->wr_id
;
101 wc
->uqueue
[head
].status
= entry
->status
;
102 wc
->uqueue
[head
].opcode
= entry
->opcode
;
103 wc
->uqueue
[head
].vendor_err
= entry
->vendor_err
;
104 wc
->uqueue
[head
].byte_len
= entry
->byte_len
;
105 wc
->uqueue
[head
].ex
.imm_data
=
106 (__u32 __force
)entry
->ex
.imm_data
;
107 wc
->uqueue
[head
].qp_num
= entry
->qp
->qp_num
;
108 wc
->uqueue
[head
].src_qp
= entry
->src_qp
;
109 wc
->uqueue
[head
].wc_flags
= entry
->wc_flags
;
110 wc
->uqueue
[head
].pkey_index
= entry
->pkey_index
;
111 wc
->uqueue
[head
].slid
= entry
->slid
;
112 wc
->uqueue
[head
].sl
= entry
->sl
;
113 wc
->uqueue
[head
].dlid_path_bits
= entry
->dlid_path_bits
;
114 wc
->uqueue
[head
].port_num
= entry
->port_num
;
115 /* Make sure entry is written before the head index. */
118 wc
->kqueue
[head
] = *entry
;
121 if (cq
->notify
== IB_CQ_NEXT_COMP
||
122 (cq
->notify
== IB_CQ_SOLICITED
&&
123 (solicited
|| entry
->status
!= IB_WC_SUCCESS
))) {
124 struct kthread_worker
*worker
;
126 * This will cause send_complete() to be called in
129 smp_read_barrier_depends(); /* see hfi1_cq_exit */
130 worker
= cq
->dd
->worker
;
131 if (likely(worker
)) {
132 cq
->notify
= IB_CQ_NONE
;
134 queue_kthread_work(worker
, &cq
->comptask
);
138 spin_unlock_irqrestore(&cq
->lock
, flags
);
142 * hfi1_poll_cq - poll for work completion entries
143 * @ibcq: the completion queue to poll
144 * @num_entries: the maximum number of entries to return
145 * @entry: pointer to array where work completions are placed
147 * Returns the number of completion entries polled.
149 * This may be called from interrupt context. Also called by ib_poll_cq()
150 * in the generic verbs code.
152 int hfi1_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*entry
)
154 struct hfi1_cq
*cq
= to_icq(ibcq
);
155 struct hfi1_cq_wc
*wc
;
160 /* The kernel can only poll a kernel completion queue */
166 spin_lock_irqsave(&cq
->lock
, flags
);
170 if (tail
> (u32
) cq
->ibcq
.cqe
)
171 tail
= (u32
) cq
->ibcq
.cqe
;
172 for (npolled
= 0; npolled
< num_entries
; ++npolled
, ++entry
) {
173 if (tail
== wc
->head
)
175 /* The kernel doesn't need a RMB since it has the lock. */
176 *entry
= wc
->kqueue
[tail
];
177 if (tail
>= cq
->ibcq
.cqe
)
184 spin_unlock_irqrestore(&cq
->lock
, flags
);
190 static void send_complete(struct kthread_work
*work
)
192 struct hfi1_cq
*cq
= container_of(work
, struct hfi1_cq
, comptask
);
195 * The completion handler will most likely rearm the notification
196 * and poll for all pending entries. If a new completion entry
197 * is added while we are in this routine, queue_work()
198 * won't call us again until we return so we check triggered to
199 * see if we need to call the handler again.
202 u8 triggered
= cq
->triggered
;
205 * IPoIB connected mode assumes the callback is from a
206 * soft IRQ. We simulate this by blocking "bottom halves".
207 * See the implementation for ipoib_cm_handle_tx_wc(),
208 * netif_tx_lock_bh() and netif_tx_lock().
211 cq
->ibcq
.comp_handler(&cq
->ibcq
, cq
->ibcq
.cq_context
);
214 if (cq
->triggered
== triggered
)
220 * hfi1_create_cq - create a completion queue
221 * @ibdev: the device this completion queue is attached to
222 * @attr: creation attributes
223 * @context: unused by the driver
224 * @udata: user data for libibverbs.so
226 * Returns a pointer to the completion queue or negative errno values
229 * Called by ib_create_cq() in the generic verbs code.
231 struct ib_cq
*hfi1_create_cq(
232 struct ib_device
*ibdev
,
233 const struct ib_cq_init_attr
*attr
,
234 struct ib_ucontext
*context
,
235 struct ib_udata
*udata
)
237 struct hfi1_ibdev
*dev
= to_idev(ibdev
);
239 struct hfi1_cq_wc
*wc
;
242 unsigned int entries
= attr
->cqe
;
245 return ERR_PTR(-EINVAL
);
247 if (entries
< 1 || entries
> hfi1_max_cqes
)
248 return ERR_PTR(-EINVAL
);
250 /* Allocate the completion queue structure. */
251 cq
= kmalloc(sizeof(*cq
), GFP_KERNEL
);
253 return ERR_PTR(-ENOMEM
);
256 * Allocate the completion queue entries and head/tail pointers.
257 * This is allocated separately so that it can be resized and
258 * also mapped into user space.
259 * We need to use vmalloc() in order to support mmap and large
260 * numbers of entries.
263 if (udata
&& udata
->outlen
>= sizeof(__u64
))
264 sz
+= sizeof(struct ib_uverbs_wc
) * (entries
+ 1);
266 sz
+= sizeof(struct ib_wc
) * (entries
+ 1);
267 wc
= vmalloc_user(sz
);
269 ret
= ERR_PTR(-ENOMEM
);
274 * Return the address of the WC as the offset to mmap.
275 * See hfi1_mmap() for details.
277 if (udata
&& udata
->outlen
>= sizeof(__u64
)) {
280 cq
->ip
= hfi1_create_mmap_info(dev
, sz
, context
, wc
);
282 ret
= ERR_PTR(-ENOMEM
);
286 err
= ib_copy_to_udata(udata
, &cq
->ip
->offset
,
287 sizeof(cq
->ip
->offset
));
295 spin_lock(&dev
->n_cqs_lock
);
296 if (dev
->n_cqs_allocated
== hfi1_max_cqs
) {
297 spin_unlock(&dev
->n_cqs_lock
);
298 ret
= ERR_PTR(-ENOMEM
);
302 dev
->n_cqs_allocated
++;
303 spin_unlock(&dev
->n_cqs_lock
);
306 spin_lock_irq(&dev
->pending_lock
);
307 list_add(&cq
->ip
->pending_mmaps
, &dev
->pending_mmaps
);
308 spin_unlock_irq(&dev
->pending_lock
);
312 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
313 * The number of entries should be >= the number requested or return
316 cq
->dd
= dd_from_dev(dev
);
317 cq
->ibcq
.cqe
= entries
;
318 cq
->notify
= IB_CQ_NONE
;
320 spin_lock_init(&cq
->lock
);
321 init_kthread_work(&cq
->comptask
, send_complete
);
341 * hfi1_destroy_cq - destroy a completion queue
342 * @ibcq: the completion queue to destroy.
344 * Returns 0 for success.
346 * Called by ib_destroy_cq() in the generic verbs code.
348 int hfi1_destroy_cq(struct ib_cq
*ibcq
)
350 struct hfi1_ibdev
*dev
= to_idev(ibcq
->device
);
351 struct hfi1_cq
*cq
= to_icq(ibcq
);
353 flush_kthread_work(&cq
->comptask
);
354 spin_lock(&dev
->n_cqs_lock
);
355 dev
->n_cqs_allocated
--;
356 spin_unlock(&dev
->n_cqs_lock
);
358 kref_put(&cq
->ip
->ref
, hfi1_release_mmap_info
);
367 * hfi1_req_notify_cq - change the notification type for a completion queue
368 * @ibcq: the completion queue
369 * @notify_flags: the type of notification to request
371 * Returns 0 for success.
373 * This may be called from interrupt context. Also called by
374 * ib_req_notify_cq() in the generic verbs code.
376 int hfi1_req_notify_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags notify_flags
)
378 struct hfi1_cq
*cq
= to_icq(ibcq
);
382 spin_lock_irqsave(&cq
->lock
, flags
);
384 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
385 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
387 if (cq
->notify
!= IB_CQ_NEXT_COMP
)
388 cq
->notify
= notify_flags
& IB_CQ_SOLICITED_MASK
;
390 if ((notify_flags
& IB_CQ_REPORT_MISSED_EVENTS
) &&
391 cq
->queue
->head
!= cq
->queue
->tail
)
394 spin_unlock_irqrestore(&cq
->lock
, flags
);
400 * hfi1_resize_cq - change the size of the CQ
401 * @ibcq: the completion queue
403 * Returns 0 for success.
405 int hfi1_resize_cq(struct ib_cq
*ibcq
, int cqe
, struct ib_udata
*udata
)
407 struct hfi1_cq
*cq
= to_icq(ibcq
);
408 struct hfi1_cq_wc
*old_wc
;
409 struct hfi1_cq_wc
*wc
;
414 if (cqe
< 1 || cqe
> hfi1_max_cqes
) {
420 * Need to use vmalloc() if we want to support large #s of entries.
423 if (udata
&& udata
->outlen
>= sizeof(__u64
))
424 sz
+= sizeof(struct ib_uverbs_wc
) * (cqe
+ 1);
426 sz
+= sizeof(struct ib_wc
) * (cqe
+ 1);
427 wc
= vmalloc_user(sz
);
433 /* Check that we can write the offset to mmap. */
434 if (udata
&& udata
->outlen
>= sizeof(__u64
)) {
437 ret
= ib_copy_to_udata(udata
, &offset
, sizeof(offset
));
442 spin_lock_irq(&cq
->lock
);
444 * Make sure head and tail are sane since they
445 * might be user writable.
449 if (head
> (u32
) cq
->ibcq
.cqe
)
450 head
= (u32
) cq
->ibcq
.cqe
;
452 if (tail
> (u32
) cq
->ibcq
.cqe
)
453 tail
= (u32
) cq
->ibcq
.cqe
;
455 n
= cq
->ibcq
.cqe
+ 1 + head
- tail
;
458 if (unlikely((u32
)cqe
< n
)) {
462 for (n
= 0; tail
!= head
; n
++) {
464 wc
->uqueue
[n
] = old_wc
->uqueue
[tail
];
466 wc
->kqueue
[n
] = old_wc
->kqueue
[tail
];
467 if (tail
== (u32
) cq
->ibcq
.cqe
)
476 spin_unlock_irq(&cq
->lock
);
481 struct hfi1_ibdev
*dev
= to_idev(ibcq
->device
);
482 struct hfi1_mmap_info
*ip
= cq
->ip
;
484 hfi1_update_mmap_info(dev
, ip
, sz
, wc
);
487 * Return the offset to mmap.
488 * See hfi1_mmap() for details.
490 if (udata
&& udata
->outlen
>= sizeof(__u64
)) {
491 ret
= ib_copy_to_udata(udata
, &ip
->offset
,
497 spin_lock_irq(&dev
->pending_lock
);
498 if (list_empty(&ip
->pending_mmaps
))
499 list_add(&ip
->pending_mmaps
, &dev
->pending_mmaps
);
500 spin_unlock_irq(&dev
->pending_lock
);
507 spin_unlock_irq(&cq
->lock
);
514 int hfi1_cq_init(struct hfi1_devdata
*dd
)
518 struct task_struct
*task
;
522 dd
->worker
= kzalloc(sizeof(*dd
->worker
), GFP_KERNEL
);
525 init_kthread_worker(dd
->worker
);
526 task
= kthread_create_on_node(
529 dd
->assigned_node_id
,
530 "hfi1_cq%d", dd
->unit
);
533 cpu
= cpumask_first(cpumask_of_node(dd
->assigned_node_id
));
534 kthread_bind(task
, cpu
);
535 wake_up_process(task
);
545 void hfi1_cq_exit(struct hfi1_devdata
*dd
)
547 struct kthread_worker
*worker
;
552 /* blocks future queuing from send_complete() */
554 smp_wmb(); /* See hfi1_cq_enter */
555 flush_kthread_worker(worker
);
556 kthread_stop(worker
->task
);