IB/ipath: Performance improvements via mmap of queues
[deliverable/linux.git] / drivers / infiniband / hw / ipath / ipath_cq.c
CommitLineData
cef1cce5 1/*
759d5768 2 * Copyright (c) 2006 QLogic, Inc. All rights reserved.
cef1cce5
BS
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/err.h>
35#include <linux/vmalloc.h>
36
37#include "ipath_verbs.h"
38
39/**
40 * ipath_cq_enter - add a new entry to the completion queue
41 * @cq: completion queue
42 * @entry: work completion entry to add
43 * @sig: true if @entry is a solicitated entry
44 *
373d9915 45 * This may be called with qp->s_lock held.
cef1cce5
BS
46 */
47void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
48{
373d9915 49 struct ipath_cq_wc *wc = cq->queue;
cef1cce5 50 unsigned long flags;
373d9915 51 u32 head;
cef1cce5
BS
52 u32 next;
53
54 spin_lock_irqsave(&cq->lock, flags);
55
373d9915
RC
56 /*
57 * Note that the head pointer might be writable by user processes.
58 * Take care to verify it is a sane value.
59 */
60 head = wc->head;
61 if (head >= (unsigned) cq->ibcq.cqe) {
62 head = cq->ibcq.cqe;
cef1cce5 63 next = 0;
373d9915
RC
64 } else
65 next = head + 1;
66 if (unlikely(next == wc->tail)) {
cef1cce5
BS
67 spin_unlock_irqrestore(&cq->lock, flags);
68 if (cq->ibcq.event_handler) {
69 struct ib_event ev;
70
71 ev.device = cq->ibcq.device;
72 ev.element.cq = &cq->ibcq;
73 ev.event = IB_EVENT_CQ_ERR;
74 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
75 }
76 return;
77 }
373d9915
RC
78 wc->queue[head] = *entry;
79 wc->head = next;
cef1cce5
BS
80
81 if (cq->notify == IB_CQ_NEXT_COMP ||
82 (cq->notify == IB_CQ_SOLICITED && solicited)) {
83 cq->notify = IB_CQ_NONE;
84 cq->triggered++;
85 /*
86 * This will cause send_complete() to be called in
87 * another thread.
88 */
89 tasklet_hi_schedule(&cq->comptask);
90 }
91
92 spin_unlock_irqrestore(&cq->lock, flags);
93
94 if (entry->status != IB_WC_SUCCESS)
95 to_idev(cq->ibcq.device)->n_wqe_errs++;
96}
97
98/**
99 * ipath_poll_cq - poll for work completion entries
100 * @ibcq: the completion queue to poll
101 * @num_entries: the maximum number of entries to return
102 * @entry: pointer to array where work completions are placed
103 *
104 * Returns the number of completion entries polled.
105 *
106 * This may be called from interrupt context. Also called by ib_poll_cq()
107 * in the generic verbs code.
108 */
109int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
110{
111 struct ipath_cq *cq = to_icq(ibcq);
373d9915 112 struct ipath_cq_wc *wc = cq->queue;
cef1cce5
BS
113 unsigned long flags;
114 int npolled;
115
116 spin_lock_irqsave(&cq->lock, flags);
117
118 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
373d9915 119 if (wc->tail == wc->head)
cef1cce5 120 break;
373d9915
RC
121 *entry = wc->queue[wc->tail];
122 if (wc->tail >= cq->ibcq.cqe)
123 wc->tail = 0;
cef1cce5 124 else
373d9915 125 wc->tail++;
cef1cce5
BS
126 }
127
128 spin_unlock_irqrestore(&cq->lock, flags);
129
130 return npolled;
131}
132
133static void send_complete(unsigned long data)
134{
135 struct ipath_cq *cq = (struct ipath_cq *)data;
136
137 /*
138 * The completion handler will most likely rearm the notification
139 * and poll for all pending entries. If a new completion entry
140 * is added while we are in this routine, tasklet_hi_schedule()
141 * won't call us again until we return so we check triggered to
142 * see if we need to call the handler again.
143 */
144 for (;;) {
145 u8 triggered = cq->triggered;
146
147 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
148
149 if (cq->triggered == triggered)
150 return;
151 }
152}
153
154/**
155 * ipath_create_cq - create a completion queue
156 * @ibdev: the device this completion queue is attached to
157 * @entries: the minimum size of the completion queue
158 * @context: unused by the InfiniPath driver
159 * @udata: unused by the InfiniPath driver
160 *
161 * Returns a pointer to the completion queue or negative errno values
162 * for failure.
163 *
164 * Called by ib_create_cq() in the generic verbs code.
165 */
166struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
167 struct ib_ucontext *context,
168 struct ib_udata *udata)
169{
fe62546a 170 struct ipath_ibdev *dev = to_idev(ibdev);
cef1cce5 171 struct ipath_cq *cq;
373d9915 172 struct ipath_cq_wc *wc;
cef1cce5
BS
173 struct ib_cq *ret;
174
fe62546a
BS
175 if (entries > ib_ipath_max_cqes) {
176 ret = ERR_PTR(-EINVAL);
373d9915 177 goto done;
fe62546a
BS
178 }
179
180 if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
181 ret = ERR_PTR(-ENOMEM);
373d9915 182 goto done;
fe62546a
BS
183 }
184
373d9915 185 /* Allocate the completion queue structure. */
cef1cce5
BS
186 cq = kmalloc(sizeof(*cq), GFP_KERNEL);
187 if (!cq) {
188 ret = ERR_PTR(-ENOMEM);
373d9915 189 goto done;
cef1cce5
BS
190 }
191
192 /*
373d9915
RC
193 * Allocate the completion queue entries and head/tail pointers.
194 * This is allocated separately so that it can be resized and
195 * also mapped into user space.
196 * We need to use vmalloc() in order to support mmap and large
197 * numbers of entries.
cef1cce5 198 */
373d9915 199 wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
cef1cce5 200 if (!wc) {
cef1cce5 201 ret = ERR_PTR(-ENOMEM);
373d9915 202 goto bail_cq;
cef1cce5 203 }
373d9915
RC
204
205 /*
206 * Return the address of the WC as the offset to mmap.
207 * See ipath_mmap() for details.
208 */
209 if (udata && udata->outlen >= sizeof(__u64)) {
210 struct ipath_mmap_info *ip;
211 __u64 offset = (__u64) wc;
212 int err;
213
214 err = ib_copy_to_udata(udata, &offset, sizeof(offset));
215 if (err) {
216 ret = ERR_PTR(err);
217 goto bail_wc;
218 }
219
220 /* Allocate info for ipath_mmap(). */
221 ip = kmalloc(sizeof(*ip), GFP_KERNEL);
222 if (!ip) {
223 ret = ERR_PTR(-ENOMEM);
224 goto bail_wc;
225 }
226 cq->ip = ip;
227 ip->context = context;
228 ip->obj = wc;
229 kref_init(&ip->ref);
230 ip->mmap_cnt = 0;
231 ip->size = PAGE_ALIGN(sizeof(*wc) +
232 sizeof(struct ib_wc) * entries);
233 spin_lock_irq(&dev->pending_lock);
234 ip->next = dev->pending_mmaps;
235 dev->pending_mmaps = ip;
236 spin_unlock_irq(&dev->pending_lock);
237 } else
238 cq->ip = NULL;
239
cef1cce5
BS
240 /*
241 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
242 * The number of entries should be >= the number requested or return
243 * an error.
244 */
245 cq->ibcq.cqe = entries;
246 cq->notify = IB_CQ_NONE;
247 cq->triggered = 0;
248 spin_lock_init(&cq->lock);
249 tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
373d9915
RC
250 wc->head = 0;
251 wc->tail = 0;
cef1cce5
BS
252 cq->queue = wc;
253
254 ret = &cq->ibcq;
255
fe62546a 256 dev->n_cqs_allocated++;
373d9915 257 goto done;
fe62546a 258
373d9915
RC
259bail_wc:
260 vfree(wc);
261
262bail_cq:
263 kfree(cq);
264
265done:
cef1cce5
BS
266 return ret;
267}
268
269/**
270 * ipath_destroy_cq - destroy a completion queue
271 * @ibcq: the completion queue to destroy.
272 *
273 * Returns 0 for success.
274 *
275 * Called by ib_destroy_cq() in the generic verbs code.
276 */
277int ipath_destroy_cq(struct ib_cq *ibcq)
278{
fe62546a 279 struct ipath_ibdev *dev = to_idev(ibcq->device);
cef1cce5
BS
280 struct ipath_cq *cq = to_icq(ibcq);
281
282 tasklet_kill(&cq->comptask);
fe62546a 283 dev->n_cqs_allocated--;
373d9915
RC
284 if (cq->ip)
285 kref_put(&cq->ip->ref, ipath_release_mmap_info);
286 else
287 vfree(cq->queue);
cef1cce5
BS
288 kfree(cq);
289
290 return 0;
291}
292
293/**
294 * ipath_req_notify_cq - change the notification type for a completion queue
295 * @ibcq: the completion queue
296 * @notify: the type of notification to request
297 *
298 * Returns 0 for success.
299 *
300 * This may be called from interrupt context. Also called by
301 * ib_req_notify_cq() in the generic verbs code.
302 */
303int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
304{
305 struct ipath_cq *cq = to_icq(ibcq);
306 unsigned long flags;
307
308 spin_lock_irqsave(&cq->lock, flags);
309 /*
310 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
373d9915 311 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
cef1cce5
BS
312 */
313 if (cq->notify != IB_CQ_NEXT_COMP)
314 cq->notify = notify;
315 spin_unlock_irqrestore(&cq->lock, flags);
316 return 0;
317}
318
319int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
320{
321 struct ipath_cq *cq = to_icq(ibcq);
373d9915
RC
322 struct ipath_cq_wc *old_wc = cq->queue;
323 struct ipath_cq_wc *wc;
324 u32 head, tail, n;
cef1cce5
BS
325 int ret;
326
327 /*
328 * Need to use vmalloc() if we want to support large #s of entries.
329 */
373d9915 330 wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
cef1cce5
BS
331 if (!wc) {
332 ret = -ENOMEM;
333 goto bail;
334 }
335
373d9915
RC
336 /*
337 * Return the address of the WC as the offset to mmap.
338 * See ipath_mmap() for details.
339 */
340 if (udata && udata->outlen >= sizeof(__u64)) {
341 __u64 offset = (__u64) wc;
342
343 ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
344 if (ret)
345 goto bail;
346 }
347
cef1cce5 348 spin_lock_irq(&cq->lock);
373d9915
RC
349 /*
350 * Make sure head and tail are sane since they
351 * might be user writable.
352 */
353 head = old_wc->head;
354 if (head > (u32) cq->ibcq.cqe)
355 head = (u32) cq->ibcq.cqe;
356 tail = old_wc->tail;
357 if (tail > (u32) cq->ibcq.cqe)
358 tail = (u32) cq->ibcq.cqe;
359 if (head < tail)
360 n = cq->ibcq.cqe + 1 + head - tail;
cef1cce5 361 else
373d9915 362 n = head - tail;
cef1cce5
BS
363 if (unlikely((u32)cqe < n)) {
364 spin_unlock_irq(&cq->lock);
365 vfree(wc);
366 ret = -EOVERFLOW;
367 goto bail;
368 }
373d9915
RC
369 for (n = 0; tail != head; n++) {
370 wc->queue[n] = old_wc->queue[tail];
371 if (tail == (u32) cq->ibcq.cqe)
372 tail = 0;
cef1cce5 373 else
373d9915 374 tail++;
cef1cce5
BS
375 }
376 cq->ibcq.cqe = cqe;
373d9915
RC
377 wc->head = n;
378 wc->tail = 0;
cef1cce5
BS
379 cq->queue = wc;
380 spin_unlock_irq(&cq->lock);
381
382 vfree(old_wc);
383
373d9915
RC
384 if (cq->ip) {
385 struct ipath_ibdev *dev = to_idev(ibcq->device);
386 struct ipath_mmap_info *ip = cq->ip;
387
388 ip->obj = wc;
389 ip->size = PAGE_ALIGN(sizeof(*wc) +
390 sizeof(struct ib_wc) * cqe);
391 spin_lock_irq(&dev->pending_lock);
392 ip->next = dev->pending_mmaps;
393 dev->pending_mmaps = ip;
394 spin_unlock_irq(&dev->pending_lock);
395 }
396
cef1cce5
BS
397 ret = 0;
398
399bail:
400 return ret;
401}
This page took 0.078573 seconds and 5 git commands to generate.