xprtrdma: Fix panic in rpcrdma_register_frmr_external()
[deliverable/linux.git] / net / sunrpc / xprtrdma / verbs.c
CommitLineData
f58851e6 1/*
c56c65fb
TT
2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
8 * license below:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 *
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 *
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
21 *
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
25 * permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
f58851e6
TT
38 */
39
c56c65fb
TT
40/*
41 * verbs.c
42 *
43 * Encapsulates the major functions managing:
44 * o adapters
45 * o endpoints
46 * o connections
47 * o buffer memory
48 */
49
a6b7a407 50#include <linux/interrupt.h>
5a0e3ad6 51#include <linux/slab.h>
65866f82 52#include <asm/bitops.h>
c56c65fb 53
f58851e6
TT
54#include "xprt_rdma.h"
55
c56c65fb
TT
56/*
57 * Globals/Macros
58 */
59
60#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_TRANS
62#endif
63
64/*
65 * internal functions
66 */
67
68/*
69 * handle replies in tasklet context, using a single, global list
70 * rdma tasklet function -- just turn around and call the func
71 * for all replies on the list
72 */
73
74static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
75static LIST_HEAD(rpcrdma_tasklets_g);
76
77static void
78rpcrdma_run_tasklet(unsigned long data)
79{
80 struct rpcrdma_rep *rep;
81 void (*func)(struct rpcrdma_rep *);
82 unsigned long flags;
83
84 data = data;
85 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
86 while (!list_empty(&rpcrdma_tasklets_g)) {
87 rep = list_entry(rpcrdma_tasklets_g.next,
88 struct rpcrdma_rep, rr_list);
89 list_del(&rep->rr_list);
90 func = rep->rr_func;
91 rep->rr_func = NULL;
92 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
93
94 if (func)
95 func(rep);
96 else
97 rpcrdma_recv_buffer_put(rep);
98
99 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
100 }
101 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
102}
103
104static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
105
106static inline void
107rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep)
108{
109 unsigned long flags;
110
111 spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
112 list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g);
113 spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
114 tasklet_schedule(&rpcrdma_tasklet_g);
115}
116
117static void
118rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
119{
120 struct rpcrdma_ep *ep = context;
121
122 dprintk("RPC: %s: QP error %X on device %s ep %p\n",
123 __func__, event->event, event->device->name, context);
124 if (ep->rep_connected == 1) {
125 ep->rep_connected = -EIO;
126 ep->rep_func(ep);
127 wake_up_all(&ep->rep_connect_wait);
128 }
129}
130
131static void
132rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
133{
134 struct rpcrdma_ep *ep = context;
135
136 dprintk("RPC: %s: CQ error %X on device %s ep %p\n",
137 __func__, event->event, event->device->name, context);
138 if (ep->rep_connected == 1) {
139 ep->rep_connected = -EIO;
140 ep->rep_func(ep);
141 wake_up_all(&ep->rep_connect_wait);
142 }
143}
144
fc664485
CL
145static void
146rpcrdma_sendcq_process_wc(struct ib_wc *wc)
c56c65fb 147{
fc664485 148 struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
c56c65fb 149
fc664485
CL
150 dprintk("RPC: %s: frmr %p status %X opcode %d\n",
151 __func__, frmr, wc->status, wc->opcode);
c56c65fb 152
fc664485 153 if (wc->wr_id == 0ULL)
c56c65fb 154 return;
fc664485 155 if (wc->status != IB_WC_SUCCESS)
c56c65fb 156 return;
c56c65fb 157
fc664485 158 if (wc->opcode == IB_WC_FAST_REG_MR)
5c635e09 159 frmr->r.frmr.state = FRMR_IS_VALID;
fc664485 160 else if (wc->opcode == IB_WC_LOCAL_INV)
5c635e09 161 frmr->r.frmr.state = FRMR_IS_INVALID;
c56c65fb
TT
162}
163
fc664485 164static int
1c00dd07 165rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
c56c65fb 166{
1c00dd07 167 struct ib_wc *wcs;
8301a2c0 168 int budget, count, rc;
c56c65fb 169
8301a2c0 170 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
1c00dd07
CL
171 do {
172 wcs = ep->rep_send_wcs;
173
174 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
175 if (rc <= 0)
176 return rc;
177
178 count = rc;
179 while (count-- > 0)
180 rpcrdma_sendcq_process_wc(wcs++);
8301a2c0 181 } while (rc == RPCRDMA_POLLSIZE && --budget);
1c00dd07 182 return 0;
fc664485 183}
c56c65fb 184
fc664485
CL
185/*
186 * Handle send, fast_reg_mr, and local_inv completions.
187 *
188 * Send events are typically suppressed and thus do not result
189 * in an upcall. Occasionally one is signaled, however. This
190 * prevents the provider's completion queue from wrapping and
191 * losing a completion.
192 */
193static void
194rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
195{
1c00dd07 196 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
fc664485
CL
197 int rc;
198
1c00dd07 199 rc = rpcrdma_sendcq_poll(cq, ep);
fc664485
CL
200 if (rc) {
201 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
202 __func__, rc);
203 return;
c56c65fb
TT
204 }
205
7f23f6f6
CL
206 rc = ib_req_notify_cq(cq,
207 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
208 if (rc == 0)
209 return;
210 if (rc < 0) {
fc664485
CL
211 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
212 __func__, rc);
213 return;
214 }
215
1c00dd07 216 rpcrdma_sendcq_poll(cq, ep);
fc664485
CL
217}
218
219static void
220rpcrdma_recvcq_process_wc(struct ib_wc *wc)
221{
222 struct rpcrdma_rep *rep =
223 (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
224
225 dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
226 __func__, rep, wc->status, wc->opcode, wc->byte_len);
227
228 if (wc->status != IB_WC_SUCCESS) {
229 rep->rr_len = ~0U;
230 goto out_schedule;
231 }
232 if (wc->opcode != IB_WC_RECV)
233 return;
234
235 rep->rr_len = wc->byte_len;
236 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
237 rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
238
239 if (rep->rr_len >= 16) {
240 struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
241 unsigned int credits = ntohl(p->rm_credit);
242
243 if (credits == 0)
244 credits = 1; /* don't deadlock */
245 else if (credits > rep->rr_buffer->rb_max_requests)
246 credits = rep->rr_buffer->rb_max_requests;
247 atomic_set(&rep->rr_buffer->rb_credits, credits);
248 }
249
250out_schedule:
251 rpcrdma_schedule_tasklet(rep);
252}
253
254static int
1c00dd07 255rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
fc664485 256{
1c00dd07 257 struct ib_wc *wcs;
8301a2c0 258 int budget, count, rc;
fc664485 259
8301a2c0 260 budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
1c00dd07
CL
261 do {
262 wcs = ep->rep_recv_wcs;
263
264 rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
265 if (rc <= 0)
266 return rc;
267
268 count = rc;
269 while (count-- > 0)
270 rpcrdma_recvcq_process_wc(wcs++);
8301a2c0 271 } while (rc == RPCRDMA_POLLSIZE && --budget);
1c00dd07 272 return 0;
c56c65fb
TT
273}
274
275/*
fc664485 276 * Handle receive completions.
c56c65fb 277 *
c56c65fb
TT
278 * It is reentrant but processes single events in order to maintain
279 * ordering of receives to keep server credits.
280 *
281 * It is the responsibility of the scheduled tasklet to return
282 * recv buffers to the pool. NOTE: this affects synchronization of
283 * connection shutdown. That is, the structures required for
284 * the completion of the reply handler must remain intact until
285 * all memory has been reclaimed.
c56c65fb
TT
286 */
287static void
fc664485 288rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
c56c65fb 289{
1c00dd07 290 struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
c56c65fb
TT
291 int rc;
292
1c00dd07 293 rc = rpcrdma_recvcq_poll(cq, ep);
fc664485
CL
294 if (rc) {
295 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
296 __func__, rc);
c56c65fb 297 return;
fc664485 298 }
c56c65fb 299
7f23f6f6
CL
300 rc = ib_req_notify_cq(cq,
301 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
302 if (rc == 0)
303 return;
304 if (rc < 0) {
fc664485 305 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
c56c65fb
TT
306 __func__, rc);
307 return;
308 }
309
1c00dd07 310 rpcrdma_recvcq_poll(cq, ep);
c56c65fb
TT
311}
312
313#ifdef RPC_DEBUG
314static const char * const conn[] = {
315 "address resolved",
316 "address error",
317 "route resolved",
318 "route error",
319 "connect request",
320 "connect response",
321 "connect error",
322 "unreachable",
323 "rejected",
324 "established",
325 "disconnected",
326 "device removal"
327};
328#endif
329
330static int
331rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
332{
333 struct rpcrdma_xprt *xprt = id->context;
334 struct rpcrdma_ia *ia = &xprt->rx_ia;
335 struct rpcrdma_ep *ep = &xprt->rx_ep;
ff0db049 336#ifdef RPC_DEBUG
c56c65fb 337 struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
ff0db049 338#endif
c56c65fb
TT
339 struct ib_qp_attr attr;
340 struct ib_qp_init_attr iattr;
341 int connstate = 0;
342
343 switch (event->event) {
344 case RDMA_CM_EVENT_ADDR_RESOLVED:
345 case RDMA_CM_EVENT_ROUTE_RESOLVED:
5675add3 346 ia->ri_async_rc = 0;
c56c65fb
TT
347 complete(&ia->ri_done);
348 break;
349 case RDMA_CM_EVENT_ADDR_ERROR:
350 ia->ri_async_rc = -EHOSTUNREACH;
351 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
352 __func__, ep);
353 complete(&ia->ri_done);
354 break;
355 case RDMA_CM_EVENT_ROUTE_ERROR:
356 ia->ri_async_rc = -ENETUNREACH;
357 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
358 __func__, ep);
359 complete(&ia->ri_done);
360 break;
361 case RDMA_CM_EVENT_ESTABLISHED:
362 connstate = 1;
363 ib_query_qp(ia->ri_id->qp, &attr,
364 IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
365 &iattr);
366 dprintk("RPC: %s: %d responder resources"
367 " (%d initiator)\n",
368 __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic);
369 goto connected;
370 case RDMA_CM_EVENT_CONNECT_ERROR:
371 connstate = -ENOTCONN;
372 goto connected;
373 case RDMA_CM_EVENT_UNREACHABLE:
374 connstate = -ENETDOWN;
375 goto connected;
376 case RDMA_CM_EVENT_REJECTED:
377 connstate = -ECONNREFUSED;
378 goto connected;
379 case RDMA_CM_EVENT_DISCONNECTED:
380 connstate = -ECONNABORTED;
381 goto connected;
382 case RDMA_CM_EVENT_DEVICE_REMOVAL:
383 connstate = -ENODEV;
384connected:
21454aaa 385 dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
c56c65fb
TT
386 __func__,
387 (event->event <= 11) ? conn[event->event] :
388 "unknown connection error",
21454aaa 389 &addr->sin_addr.s_addr,
c56c65fb
TT
390 ntohs(addr->sin_port),
391 ep, event->event);
392 atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
393 dprintk("RPC: %s: %sconnected\n",
394 __func__, connstate > 0 ? "" : "dis");
395 ep->rep_connected = connstate;
396 ep->rep_func(ep);
397 wake_up_all(&ep->rep_connect_wait);
398 break;
399 default:
1a954051 400 dprintk("RPC: %s: unexpected CM event %d\n",
c56c65fb 401 __func__, event->event);
c56c65fb
TT
402 break;
403 }
404
b3cd8d45
TT
405#ifdef RPC_DEBUG
406 if (connstate == 1) {
407 int ird = attr.max_dest_rd_atomic;
408 int tird = ep->rep_remote_cma.responder_resources;
21454aaa 409 printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
b3cd8d45 410 "on %s, memreg %d slots %d ird %d%s\n",
21454aaa 411 &addr->sin_addr.s_addr,
b3cd8d45
TT
412 ntohs(addr->sin_port),
413 ia->ri_id->device->name,
414 ia->ri_memreg_strategy,
415 xprt->rx_buf.rb_max_requests,
416 ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
417 } else if (connstate < 0) {
21454aaa
HH
418 printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
419 &addr->sin_addr.s_addr,
b3cd8d45
TT
420 ntohs(addr->sin_port),
421 connstate);
422 }
423#endif
424
c56c65fb
TT
425 return 0;
426}
427
428static struct rdma_cm_id *
429rpcrdma_create_id(struct rpcrdma_xprt *xprt,
430 struct rpcrdma_ia *ia, struct sockaddr *addr)
431{
432 struct rdma_cm_id *id;
433 int rc;
434
1a954051
TT
435 init_completion(&ia->ri_done);
436
b26f9b99 437 id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
c56c65fb
TT
438 if (IS_ERR(id)) {
439 rc = PTR_ERR(id);
440 dprintk("RPC: %s: rdma_create_id() failed %i\n",
441 __func__, rc);
442 return id;
443 }
444
5675add3 445 ia->ri_async_rc = -ETIMEDOUT;
c56c65fb
TT
446 rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
447 if (rc) {
448 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
449 __func__, rc);
450 goto out;
451 }
5675add3
TT
452 wait_for_completion_interruptible_timeout(&ia->ri_done,
453 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
c56c65fb
TT
454 rc = ia->ri_async_rc;
455 if (rc)
456 goto out;
457
5675add3 458 ia->ri_async_rc = -ETIMEDOUT;
c56c65fb
TT
459 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
460 if (rc) {
461 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
462 __func__, rc);
463 goto out;
464 }
5675add3
TT
465 wait_for_completion_interruptible_timeout(&ia->ri_done,
466 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
c56c65fb
TT
467 rc = ia->ri_async_rc;
468 if (rc)
469 goto out;
470
471 return id;
472
473out:
474 rdma_destroy_id(id);
475 return ERR_PTR(rc);
476}
477
478/*
479 * Drain any cq, prior to teardown.
480 */
481static void
482rpcrdma_clean_cq(struct ib_cq *cq)
483{
484 struct ib_wc wc;
485 int count = 0;
486
487 while (1 == ib_poll_cq(cq, 1, &wc))
488 ++count;
489
490 if (count)
491 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
492 __func__, count, wc.opcode);
493}
494
495/*
496 * Exported functions.
497 */
498
499/*
500 * Open and initialize an Interface Adapter.
501 * o initializes fields of struct rpcrdma_ia, including
502 * interface and provider attributes and protection zone.
503 */
504int
505rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
506{
bd7ed1d1
TT
507 int rc, mem_priv;
508 struct ib_device_attr devattr;
c56c65fb
TT
509 struct rpcrdma_ia *ia = &xprt->rx_ia;
510
c56c65fb
TT
511 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
512 if (IS_ERR(ia->ri_id)) {
513 rc = PTR_ERR(ia->ri_id);
514 goto out1;
515 }
516
517 ia->ri_pd = ib_alloc_pd(ia->ri_id->device);
518 if (IS_ERR(ia->ri_pd)) {
519 rc = PTR_ERR(ia->ri_pd);
520 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
521 __func__, rc);
522 goto out2;
523 }
524
bd7ed1d1
TT
525 /*
526 * Query the device to determine if the requested memory
527 * registration strategy is supported. If it isn't, set the
528 * strategy to a globally supported model.
529 */
530 rc = ib_query_device(ia->ri_id->device, &devattr);
531 if (rc) {
532 dprintk("RPC: %s: ib_query_device failed %d\n",
533 __func__, rc);
534 goto out2;
535 }
536
537 if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
538 ia->ri_have_dma_lkey = 1;
539 ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
540 }
541
f10eafd3 542 if (memreg == RPCRDMA_FRMR) {
3197d309
TT
543 /* Requires both frmr reg and local dma lkey */
544 if ((devattr.device_cap_flags &
545 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
546 (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
3197d309 547 dprintk("RPC: %s: FRMR registration "
f10eafd3
CL
548 "not supported by HCA\n", __func__);
549 memreg = RPCRDMA_MTHCAFMR;
0fc6c4e7
SW
550 } else {
551 /* Mind the ia limit on FRMR page list depth */
552 ia->ri_max_frmr_depth = min_t(unsigned int,
553 RPCRDMA_MAX_DATA_SEGS,
554 devattr.max_fast_reg_page_list_len);
bd7ed1d1 555 }
f10eafd3
CL
556 }
557 if (memreg == RPCRDMA_MTHCAFMR) {
558 if (!ia->ri_id->device->alloc_fmr) {
559 dprintk("RPC: %s: MTHCAFMR registration "
560 "not supported by HCA\n", __func__);
561#if RPCRDMA_PERSISTENT_REGISTRATION
562 memreg = RPCRDMA_ALLPHYSICAL;
563#else
cdd9ade7 564 rc = -ENOMEM;
f10eafd3
CL
565 goto out2;
566#endif
567 }
bd7ed1d1
TT
568 }
569
c56c65fb
TT
570 /*
571 * Optionally obtain an underlying physical identity mapping in
572 * order to do a memory window-based bind. This base registration
573 * is protected from remote access - that is enabled only by binding
574 * for the specific bytes targeted during each RPC operation, and
575 * revoked after the corresponding completion similar to a storage
576 * adapter.
577 */
bd7ed1d1 578 switch (memreg) {
3197d309 579 case RPCRDMA_FRMR:
bd7ed1d1 580 break;
c56c65fb 581#if RPCRDMA_PERSISTENT_REGISTRATION
bd7ed1d1
TT
582 case RPCRDMA_ALLPHYSICAL:
583 mem_priv = IB_ACCESS_LOCAL_WRITE |
584 IB_ACCESS_REMOTE_WRITE |
585 IB_ACCESS_REMOTE_READ;
586 goto register_setup;
c56c65fb 587#endif
bd7ed1d1
TT
588 case RPCRDMA_MTHCAFMR:
589 if (ia->ri_have_dma_lkey)
c56c65fb 590 break;
bd7ed1d1 591 mem_priv = IB_ACCESS_LOCAL_WRITE;
b45ccfd2 592#if RPCRDMA_PERSISTENT_REGISTRATION
bd7ed1d1 593 register_setup:
b45ccfd2 594#endif
c56c65fb
TT
595 ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
596 if (IS_ERR(ia->ri_bind_mem)) {
597 printk(KERN_ALERT "%s: ib_get_dma_mr for "
0ac531c1 598 "phys register failed with %lX\n",
c56c65fb 599 __func__, PTR_ERR(ia->ri_bind_mem));
0ac531c1
CL
600 rc = -ENOMEM;
601 goto out2;
c56c65fb 602 }
bd7ed1d1
TT
603 break;
604 default:
cdd9ade7
CL
605 printk(KERN_ERR "RPC: Unsupported memory "
606 "registration mode: %d\n", memreg);
607 rc = -ENOMEM;
bd7ed1d1 608 goto out2;
c56c65fb 609 }
bd7ed1d1
TT
610 dprintk("RPC: %s: memory registration strategy is %d\n",
611 __func__, memreg);
c56c65fb
TT
612
613 /* Else will do memory reg/dereg for each chunk */
614 ia->ri_memreg_strategy = memreg;
615
616 return 0;
617out2:
618 rdma_destroy_id(ia->ri_id);
fee08caf 619 ia->ri_id = NULL;
c56c65fb
TT
620out1:
621 return rc;
622}
623
624/*
625 * Clean up/close an IA.
626 * o if event handles and PD have been initialized, free them.
627 * o close the IA
628 */
629void
630rpcrdma_ia_close(struct rpcrdma_ia *ia)
631{
632 int rc;
633
634 dprintk("RPC: %s: entering\n", __func__);
635 if (ia->ri_bind_mem != NULL) {
636 rc = ib_dereg_mr(ia->ri_bind_mem);
637 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
638 __func__, rc);
639 }
fee08caf
TT
640 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
641 if (ia->ri_id->qp)
642 rdma_destroy_qp(ia->ri_id);
643 rdma_destroy_id(ia->ri_id);
644 ia->ri_id = NULL;
645 }
c56c65fb
TT
646 if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
647 rc = ib_dealloc_pd(ia->ri_pd);
648 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
649 __func__, rc);
650 }
c56c65fb
TT
651}
652
653/*
654 * Create unconnected endpoint.
655 */
656int
657rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
658 struct rpcrdma_create_data_internal *cdata)
659{
660 struct ib_device_attr devattr;
fc664485 661 struct ib_cq *sendcq, *recvcq;
5d40a8a5 662 int rc, err;
c56c65fb
TT
663
664 rc = ib_query_device(ia->ri_id->device, &devattr);
665 if (rc) {
666 dprintk("RPC: %s: ib_query_device failed %d\n",
667 __func__, rc);
668 return rc;
669 }
670
671 /* check provider's send/recv wr limits */
672 if (cdata->max_requests > devattr.max_qp_wr)
673 cdata->max_requests = devattr.max_qp_wr;
674
675 ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
676 ep->rep_attr.qp_context = ep;
677 /* send_cq and recv_cq initialized below */
678 ep->rep_attr.srq = NULL;
679 ep->rep_attr.cap.max_send_wr = cdata->max_requests;
680 switch (ia->ri_memreg_strategy) {
0fc6c4e7
SW
681 case RPCRDMA_FRMR: {
682 int depth = 7;
683
15cdc644
TT
684 /* Add room for frmr register and invalidate WRs.
685 * 1. FRMR reg WR for head
686 * 2. FRMR invalidate WR for head
0fc6c4e7
SW
687 * 3. N FRMR reg WRs for pagelist
688 * 4. N FRMR invalidate WRs for pagelist
15cdc644
TT
689 * 5. FRMR reg WR for tail
690 * 6. FRMR invalidate WR for tail
691 * 7. The RDMA_SEND WR
692 */
0fc6c4e7
SW
693
694 /* Calculate N if the device max FRMR depth is smaller than
695 * RPCRDMA_MAX_DATA_SEGS.
696 */
697 if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
698 int delta = RPCRDMA_MAX_DATA_SEGS -
699 ia->ri_max_frmr_depth;
700
701 do {
702 depth += 2; /* FRMR reg + invalidate */
703 delta -= ia->ri_max_frmr_depth;
704 } while (delta > 0);
705
706 }
707 ep->rep_attr.cap.max_send_wr *= depth;
15cdc644 708 if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
0fc6c4e7 709 cdata->max_requests = devattr.max_qp_wr / depth;
15cdc644
TT
710 if (!cdata->max_requests)
711 return -EINVAL;
0fc6c4e7
SW
712 ep->rep_attr.cap.max_send_wr = cdata->max_requests *
713 depth;
15cdc644 714 }
3197d309 715 break;
0fc6c4e7 716 }
c56c65fb
TT
717 default:
718 break;
719 }
720 ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
721 ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
722 ep->rep_attr.cap.max_recv_sge = 1;
723 ep->rep_attr.cap.max_inline_data = 0;
724 ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
725 ep->rep_attr.qp_type = IB_QPT_RC;
726 ep->rep_attr.port_num = ~0;
727
728 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
729 "iovs: send %d recv %d\n",
730 __func__,
731 ep->rep_attr.cap.max_send_wr,
732 ep->rep_attr.cap.max_recv_wr,
733 ep->rep_attr.cap.max_send_sge,
734 ep->rep_attr.cap.max_recv_sge);
735
736 /* set trigger for requesting send completion */
fc664485 737 ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
c56c65fb
TT
738 if (ep->rep_cqinit <= 2)
739 ep->rep_cqinit = 0;
740 INIT_CQCOUNT(ep);
741 ep->rep_ia = ia;
742 init_waitqueue_head(&ep->rep_connect_wait);
254f91e2 743 INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
c56c65fb 744
fc664485 745 sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
1c00dd07 746 rpcrdma_cq_async_error_upcall, ep,
c56c65fb 747 ep->rep_attr.cap.max_send_wr + 1, 0);
fc664485
CL
748 if (IS_ERR(sendcq)) {
749 rc = PTR_ERR(sendcq);
750 dprintk("RPC: %s: failed to create send CQ: %i\n",
c56c65fb
TT
751 __func__, rc);
752 goto out1;
753 }
754
fc664485 755 rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
c56c65fb
TT
756 if (rc) {
757 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
758 __func__, rc);
759 goto out2;
760 }
761
fc664485 762 recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
1c00dd07 763 rpcrdma_cq_async_error_upcall, ep,
fc664485
CL
764 ep->rep_attr.cap.max_recv_wr + 1, 0);
765 if (IS_ERR(recvcq)) {
766 rc = PTR_ERR(recvcq);
767 dprintk("RPC: %s: failed to create recv CQ: %i\n",
768 __func__, rc);
769 goto out2;
770 }
771
772 rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
773 if (rc) {
774 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
775 __func__, rc);
776 ib_destroy_cq(recvcq);
777 goto out2;
778 }
779
780 ep->rep_attr.send_cq = sendcq;
781 ep->rep_attr.recv_cq = recvcq;
c56c65fb
TT
782
783 /* Initialize cma parameters */
784
785 /* RPC/RDMA does not use private data */
786 ep->rep_remote_cma.private_data = NULL;
787 ep->rep_remote_cma.private_data_len = 0;
788
789 /* Client offers RDMA Read but does not initiate */
b334eaab 790 ep->rep_remote_cma.initiator_depth = 0;
03ff8821 791 if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
b334eaab
TT
792 ep->rep_remote_cma.responder_resources = 32;
793 else
c56c65fb 794 ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
c56c65fb
TT
795
796 ep->rep_remote_cma.retry_count = 7;
797 ep->rep_remote_cma.flow_control = 0;
798 ep->rep_remote_cma.rnr_retry_count = 0;
799
800 return 0;
801
802out2:
fc664485 803 err = ib_destroy_cq(sendcq);
5d40a8a5
CL
804 if (err)
805 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
806 __func__, err);
c56c65fb
TT
807out1:
808 return rc;
809}
810
811/*
812 * rpcrdma_ep_destroy
813 *
814 * Disconnect and destroy endpoint. After this, the only
815 * valid operations on the ep are to free it (if dynamically
816 * allocated) or re-create it.
c56c65fb 817 */
7f1d5419 818void
c56c65fb
TT
819rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
820{
821 int rc;
822
823 dprintk("RPC: %s: entering, connected is %d\n",
824 __func__, ep->rep_connected);
825
254f91e2
CL
826 cancel_delayed_work_sync(&ep->rep_connect_worker);
827
c56c65fb
TT
828 if (ia->ri_id->qp) {
829 rc = rpcrdma_ep_disconnect(ep, ia);
830 if (rc)
831 dprintk("RPC: %s: rpcrdma_ep_disconnect"
832 " returned %i\n", __func__, rc);
fee08caf
TT
833 rdma_destroy_qp(ia->ri_id);
834 ia->ri_id->qp = NULL;
c56c65fb
TT
835 }
836
c56c65fb
TT
837 /* padding - could be done in rpcrdma_buffer_destroy... */
838 if (ep->rep_pad_mr) {
839 rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
840 ep->rep_pad_mr = NULL;
841 }
842
fc664485
CL
843 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
844 rc = ib_destroy_cq(ep->rep_attr.recv_cq);
845 if (rc)
846 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
847 __func__, rc);
848
849 rpcrdma_clean_cq(ep->rep_attr.send_cq);
850 rc = ib_destroy_cq(ep->rep_attr.send_cq);
c56c65fb
TT
851 if (rc)
852 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
853 __func__, rc);
c56c65fb
TT
854}
855
856/*
857 * Connect unconnected endpoint.
858 */
859int
860rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
861{
862 struct rdma_cm_id *id;
863 int rc = 0;
864 int retry_count = 0;
c56c65fb 865
c055551e 866 if (ep->rep_connected != 0) {
c56c65fb
TT
867 struct rpcrdma_xprt *xprt;
868retry:
ec62f40d 869 dprintk("RPC: %s: reconnecting...\n", __func__);
c56c65fb
TT
870 rc = rpcrdma_ep_disconnect(ep, ia);
871 if (rc && rc != -ENOTCONN)
872 dprintk("RPC: %s: rpcrdma_ep_disconnect"
873 " status %i\n", __func__, rc);
fc664485
CL
874
875 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
876 rpcrdma_clean_cq(ep->rep_attr.send_cq);
c56c65fb
TT
877
878 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
879 id = rpcrdma_create_id(xprt, ia,
880 (struct sockaddr *)&xprt->rx_data.addr);
881 if (IS_ERR(id)) {
ec62f40d 882 rc = -EHOSTUNREACH;
c56c65fb
TT
883 goto out;
884 }
885 /* TEMP TEMP TEMP - fail if new device:
886 * Deregister/remarshal *all* requests!
887 * Close and recreate adapter, pd, etc!
888 * Re-determine all attributes still sane!
889 * More stuff I haven't thought of!
890 * Rrrgh!
891 */
892 if (ia->ri_id->device != id->device) {
893 printk("RPC: %s: can't reconnect on "
894 "different device!\n", __func__);
895 rdma_destroy_id(id);
ec62f40d 896 rc = -ENETUNREACH;
c56c65fb
TT
897 goto out;
898 }
899 /* END TEMP */
ec62f40d
CL
900 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
901 if (rc) {
902 dprintk("RPC: %s: rdma_create_qp failed %i\n",
903 __func__, rc);
904 rdma_destroy_id(id);
905 rc = -ENETUNREACH;
906 goto out;
907 }
1a954051 908 rdma_destroy_qp(ia->ri_id);
c56c65fb
TT
909 rdma_destroy_id(ia->ri_id);
910 ia->ri_id = id;
ec62f40d
CL
911 } else {
912 dprintk("RPC: %s: connecting...\n", __func__);
913 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
914 if (rc) {
915 dprintk("RPC: %s: rdma_create_qp failed %i\n",
916 __func__, rc);
917 /* do not update ep->rep_connected */
918 return -ENETUNREACH;
919 }
c56c65fb
TT
920 }
921
c56c65fb
TT
922 ep->rep_connected = 0;
923
924 rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
925 if (rc) {
926 dprintk("RPC: %s: rdma_connect() failed with %i\n",
927 __func__, rc);
928 goto out;
929 }
930
c56c65fb
TT
931 wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
932
933 /*
934 * Check state. A non-peer reject indicates no listener
935 * (ECONNREFUSED), which may be a transient state. All
936 * others indicate a transport condition which has already
937 * undergone a best-effort.
938 */
f64f9e71
JP
939 if (ep->rep_connected == -ECONNREFUSED &&
940 ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
c56c65fb
TT
941 dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
942 goto retry;
943 }
944 if (ep->rep_connected <= 0) {
945 /* Sometimes, the only way to reliably connect to remote
946 * CMs is to use same nonzero values for ORD and IRD. */
b334eaab
TT
947 if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
948 (ep->rep_remote_cma.responder_resources == 0 ||
949 ep->rep_remote_cma.initiator_depth !=
950 ep->rep_remote_cma.responder_resources)) {
951 if (ep->rep_remote_cma.responder_resources == 0)
952 ep->rep_remote_cma.responder_resources = 1;
953 ep->rep_remote_cma.initiator_depth =
954 ep->rep_remote_cma.responder_resources;
c56c65fb 955 goto retry;
b334eaab 956 }
c56c65fb
TT
957 rc = ep->rep_connected;
958 } else {
959 dprintk("RPC: %s: connected\n", __func__);
960 }
961
962out:
963 if (rc)
964 ep->rep_connected = rc;
965 return rc;
966}
967
968/*
969 * rpcrdma_ep_disconnect
970 *
971 * This is separate from destroy to facilitate the ability
972 * to reconnect without recreating the endpoint.
973 *
974 * This call is not reentrant, and must not be made in parallel
975 * on the same endpoint.
976 */
977int
978rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
979{
980 int rc;
981
fc664485
CL
982 rpcrdma_clean_cq(ep->rep_attr.recv_cq);
983 rpcrdma_clean_cq(ep->rep_attr.send_cq);
c56c65fb
TT
984 rc = rdma_disconnect(ia->ri_id);
985 if (!rc) {
986 /* returns without wait if not connected */
987 wait_event_interruptible(ep->rep_connect_wait,
988 ep->rep_connected != 1);
989 dprintk("RPC: %s: after wait, %sconnected\n", __func__,
990 (ep->rep_connected == 1) ? "still " : "dis");
991 } else {
992 dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
993 ep->rep_connected = rc;
994 }
995 return rc;
996}
997
998/*
999 * Initialize buffer memory
1000 */
1001int
1002rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
1003 struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
1004{
1005 char *p;
65866f82 1006 size_t len, rlen, wlen;
c56c65fb 1007 int i, rc;
8d4ba034 1008 struct rpcrdma_mw *r;
c56c65fb
TT
1009
1010 buf->rb_max_requests = cdata->max_requests;
1011 spin_lock_init(&buf->rb_lock);
1012 atomic_set(&buf->rb_credits, 1);
1013
1014 /* Need to allocate:
1015 * 1. arrays for send and recv pointers
1016 * 2. arrays of struct rpcrdma_req to fill in pointers
1017 * 3. array of struct rpcrdma_rep for replies
1018 * 4. padding, if any
3197d309 1019 * 5. mw's, fmr's or frmr's, if any
c56c65fb
TT
1020 * Send/recv buffers in req/rep need to be registered
1021 */
1022
1023 len = buf->rb_max_requests *
1024 (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
1025 len += cdata->padding;
1026 switch (ia->ri_memreg_strategy) {
3197d309
TT
1027 case RPCRDMA_FRMR:
1028 len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
1029 sizeof(struct rpcrdma_mw);
1030 break;
c56c65fb
TT
1031 case RPCRDMA_MTHCAFMR:
1032 /* TBD we are perhaps overallocating here */
1033 len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
1034 sizeof(struct rpcrdma_mw);
1035 break;
c56c65fb
TT
1036 default:
1037 break;
1038 }
1039
1040 /* allocate 1, 4 and 5 in one shot */
1041 p = kzalloc(len, GFP_KERNEL);
1042 if (p == NULL) {
1043 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1044 __func__, len);
1045 rc = -ENOMEM;
1046 goto out;
1047 }
1048 buf->rb_pool = p; /* for freeing it later */
1049
1050 buf->rb_send_bufs = (struct rpcrdma_req **) p;
1051 p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
1052 buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
1053 p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
1054
1055 /*
1056 * Register the zeroed pad buffer, if any.
1057 */
1058 if (cdata->padding) {
1059 rc = rpcrdma_register_internal(ia, p, cdata->padding,
1060 &ep->rep_pad_mr, &ep->rep_pad);
1061 if (rc)
1062 goto out;
1063 }
1064 p += cdata->padding;
1065
c56c65fb 1066 INIT_LIST_HEAD(&buf->rb_mws);
8d4ba034 1067 r = (struct rpcrdma_mw *)p;
c56c65fb 1068 switch (ia->ri_memreg_strategy) {
3197d309
TT
1069 case RPCRDMA_FRMR:
1070 for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
1071 r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
0fc6c4e7 1072 ia->ri_max_frmr_depth);
3197d309
TT
1073 if (IS_ERR(r->r.frmr.fr_mr)) {
1074 rc = PTR_ERR(r->r.frmr.fr_mr);
1075 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1076 " failed %i\n", __func__, rc);
1077 goto out;
1078 }
0fc6c4e7
SW
1079 r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
1080 ia->ri_id->device,
1081 ia->ri_max_frmr_depth);
3197d309
TT
1082 if (IS_ERR(r->r.frmr.fr_pgl)) {
1083 rc = PTR_ERR(r->r.frmr.fr_pgl);
1084 dprintk("RPC: %s: "
1085 "ib_alloc_fast_reg_page_list "
1086 "failed %i\n", __func__, rc);
4034ba04
AA
1087
1088 ib_dereg_mr(r->r.frmr.fr_mr);
3197d309
TT
1089 goto out;
1090 }
1091 list_add(&r->mw_list, &buf->rb_mws);
1092 ++r;
1093 }
1094 break;
c56c65fb 1095 case RPCRDMA_MTHCAFMR:
c56c65fb
TT
1096 /* TBD we are perhaps overallocating here */
1097 for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
8d4ba034
TT
1098 static struct ib_fmr_attr fa =
1099 { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
c56c65fb
TT
1100 r->r.fmr = ib_alloc_fmr(ia->ri_pd,
1101 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
1102 &fa);
1103 if (IS_ERR(r->r.fmr)) {
1104 rc = PTR_ERR(r->r.fmr);
1105 dprintk("RPC: %s: ib_alloc_fmr"
1106 " failed %i\n", __func__, rc);
1107 goto out;
1108 }
1109 list_add(&r->mw_list, &buf->rb_mws);
1110 ++r;
1111 }
c56c65fb 1112 break;
c56c65fb
TT
1113 default:
1114 break;
1115 }
1116
1117 /*
1118 * Allocate/init the request/reply buffers. Doing this
1119 * using kmalloc for now -- one for each buf.
1120 */
65866f82
CL
1121 wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
1122 rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
1123 dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
1124 __func__, wlen, rlen);
1125
c56c65fb
TT
1126 for (i = 0; i < buf->rb_max_requests; i++) {
1127 struct rpcrdma_req *req;
1128 struct rpcrdma_rep *rep;
1129
65866f82 1130 req = kmalloc(wlen, GFP_KERNEL);
c56c65fb
TT
1131 if (req == NULL) {
1132 dprintk("RPC: %s: request buffer %d alloc"
1133 " failed\n", __func__, i);
1134 rc = -ENOMEM;
1135 goto out;
1136 }
1137 memset(req, 0, sizeof(struct rpcrdma_req));
1138 buf->rb_send_bufs[i] = req;
1139 buf->rb_send_bufs[i]->rl_buffer = buf;
1140
1141 rc = rpcrdma_register_internal(ia, req->rl_base,
65866f82 1142 wlen - offsetof(struct rpcrdma_req, rl_base),
c56c65fb
TT
1143 &buf->rb_send_bufs[i]->rl_handle,
1144 &buf->rb_send_bufs[i]->rl_iov);
1145 if (rc)
1146 goto out;
1147
65866f82
CL
1148 buf->rb_send_bufs[i]->rl_size = wlen -
1149 sizeof(struct rpcrdma_req);
c56c65fb 1150
65866f82 1151 rep = kmalloc(rlen, GFP_KERNEL);
c56c65fb
TT
1152 if (rep == NULL) {
1153 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1154 __func__, i);
1155 rc = -ENOMEM;
1156 goto out;
1157 }
1158 memset(rep, 0, sizeof(struct rpcrdma_rep));
1159 buf->rb_recv_bufs[i] = rep;
1160 buf->rb_recv_bufs[i]->rr_buffer = buf;
c56c65fb
TT
1161
1162 rc = rpcrdma_register_internal(ia, rep->rr_base,
65866f82 1163 rlen - offsetof(struct rpcrdma_rep, rr_base),
c56c65fb
TT
1164 &buf->rb_recv_bufs[i]->rr_handle,
1165 &buf->rb_recv_bufs[i]->rr_iov);
1166 if (rc)
1167 goto out;
1168
1169 }
1170 dprintk("RPC: %s: max_requests %d\n",
1171 __func__, buf->rb_max_requests);
1172 /* done */
1173 return 0;
1174out:
1175 rpcrdma_buffer_destroy(buf);
1176 return rc;
1177}
1178
1179/*
1180 * Unregister and destroy buffer memory. Need to deal with
1181 * partial initialization, so it's callable from failed create.
1182 * Must be called before destroying endpoint, as registrations
1183 * reference it.
1184 */
1185void
1186rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1187{
1188 int rc, i;
1189 struct rpcrdma_ia *ia = rdmab_to_ia(buf);
8d4ba034 1190 struct rpcrdma_mw *r;
c56c65fb
TT
1191
1192 /* clean up in reverse order from create
1193 * 1. recv mr memory (mr free, then kfree)
c56c65fb
TT
1194 * 2. send mr memory (mr free, then kfree)
1195 * 3. padding (if any) [moved to rpcrdma_ep_destroy]
1196 * 4. arrays
1197 */
1198 dprintk("RPC: %s: entering\n", __func__);
1199
1200 for (i = 0; i < buf->rb_max_requests; i++) {
1201 if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) {
1202 rpcrdma_deregister_internal(ia,
1203 buf->rb_recv_bufs[i]->rr_handle,
1204 &buf->rb_recv_bufs[i]->rr_iov);
1205 kfree(buf->rb_recv_bufs[i]);
1206 }
1207 if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
c56c65fb
TT
1208 rpcrdma_deregister_internal(ia,
1209 buf->rb_send_bufs[i]->rl_handle,
1210 &buf->rb_send_bufs[i]->rl_iov);
1211 kfree(buf->rb_send_bufs[i]);
1212 }
1213 }
1214
4034ba04
AA
1215 while (!list_empty(&buf->rb_mws)) {
1216 r = list_entry(buf->rb_mws.next,
1217 struct rpcrdma_mw, mw_list);
1218 list_del(&r->mw_list);
1219 switch (ia->ri_memreg_strategy) {
1220 case RPCRDMA_FRMR:
1221 rc = ib_dereg_mr(r->r.frmr.fr_mr);
1222 if (rc)
1223 dprintk("RPC: %s:"
1224 " ib_dereg_mr"
1225 " failed %i\n",
1226 __func__, rc);
1227 ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
1228 break;
1229 case RPCRDMA_MTHCAFMR:
1230 rc = ib_dealloc_fmr(r->r.fmr);
1231 if (rc)
1232 dprintk("RPC: %s:"
1233 " ib_dealloc_fmr"
1234 " failed %i\n",
1235 __func__, rc);
1236 break;
4034ba04
AA
1237 default:
1238 break;
1239 }
1240 }
1241
c56c65fb
TT
1242 kfree(buf->rb_pool);
1243}
1244
1245/*
1246 * Get a set of request/reply buffers.
1247 *
1248 * Reply buffer (if needed) is attached to send buffer upon return.
1249 * Rule:
1250 * rb_send_index and rb_recv_index MUST always be pointing to the
1251 * *next* available buffer (non-NULL). They are incremented after
1252 * removing buffers, and decremented *before* returning them.
1253 */
1254struct rpcrdma_req *
1255rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
1256{
1257 struct rpcrdma_req *req;
1258 unsigned long flags;
8d4ba034
TT
1259 int i;
1260 struct rpcrdma_mw *r;
c56c65fb
TT
1261
1262 spin_lock_irqsave(&buffers->rb_lock, flags);
1263 if (buffers->rb_send_index == buffers->rb_max_requests) {
1264 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1265 dprintk("RPC: %s: out of request buffers\n", __func__);
1266 return ((struct rpcrdma_req *)NULL);
1267 }
1268
1269 req = buffers->rb_send_bufs[buffers->rb_send_index];
1270 if (buffers->rb_send_index < buffers->rb_recv_index) {
1271 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1272 __func__,
1273 buffers->rb_recv_index - buffers->rb_send_index);
1274 req->rl_reply = NULL;
1275 } else {
1276 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1277 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1278 }
1279 buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
1280 if (!list_empty(&buffers->rb_mws)) {
8d4ba034 1281 i = RPCRDMA_MAX_SEGS - 1;
c56c65fb 1282 do {
c56c65fb
TT
1283 r = list_entry(buffers->rb_mws.next,
1284 struct rpcrdma_mw, mw_list);
1285 list_del(&r->mw_list);
1286 req->rl_segments[i].mr_chunk.rl_mw = r;
1287 } while (--i >= 0);
1288 }
1289 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1290 return req;
1291}
1292
1293/*
1294 * Put request/reply buffers back into pool.
1295 * Pre-decrement counter/array index.
1296 */
1297void
1298rpcrdma_buffer_put(struct rpcrdma_req *req)
1299{
1300 struct rpcrdma_buffer *buffers = req->rl_buffer;
1301 struct rpcrdma_ia *ia = rdmab_to_ia(buffers);
1302 int i;
1303 unsigned long flags;
1304
c56c65fb
TT
1305 spin_lock_irqsave(&buffers->rb_lock, flags);
1306 buffers->rb_send_bufs[--buffers->rb_send_index] = req;
1307 req->rl_niovs = 0;
1308 if (req->rl_reply) {
1309 buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
c56c65fb
TT
1310 req->rl_reply->rr_func = NULL;
1311 req->rl_reply = NULL;
1312 }
1313 switch (ia->ri_memreg_strategy) {
3197d309 1314 case RPCRDMA_FRMR:
c56c65fb 1315 case RPCRDMA_MTHCAFMR:
c56c65fb
TT
1316 /*
1317 * Cycle mw's back in reverse order, and "spin" them.
1318 * This delays and scrambles reuse as much as possible.
1319 */
1320 i = 1;
1321 do {
1322 struct rpcrdma_mw **mw;
1323 mw = &req->rl_segments[i].mr_chunk.rl_mw;
1324 list_add_tail(&(*mw)->mw_list, &buffers->rb_mws);
1325 *mw = NULL;
1326 } while (++i < RPCRDMA_MAX_SEGS);
1327 list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list,
1328 &buffers->rb_mws);
1329 req->rl_segments[0].mr_chunk.rl_mw = NULL;
1330 break;
1331 default:
1332 break;
1333 }
1334 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1335}
1336
1337/*
1338 * Recover reply buffers from pool.
1339 * This happens when recovering from error conditions.
1340 * Post-increment counter/array index.
1341 */
1342void
1343rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
1344{
1345 struct rpcrdma_buffer *buffers = req->rl_buffer;
1346 unsigned long flags;
1347
1348 if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */
1349 buffers = ((struct rpcrdma_req *) buffers)->rl_buffer;
1350 spin_lock_irqsave(&buffers->rb_lock, flags);
1351 if (buffers->rb_recv_index < buffers->rb_max_requests) {
1352 req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
1353 buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
1354 }
1355 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1356}
1357
1358/*
1359 * Put reply buffers back into pool when not attached to
b45ccfd2 1360 * request. This happens in error conditions.
c56c65fb
TT
1361 */
1362void
1363rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
1364{
1365 struct rpcrdma_buffer *buffers = rep->rr_buffer;
1366 unsigned long flags;
1367
1368 rep->rr_func = NULL;
1369 spin_lock_irqsave(&buffers->rb_lock, flags);
1370 buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
1371 spin_unlock_irqrestore(&buffers->rb_lock, flags);
1372}
1373
1374/*
1375 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1376 */
1377
1378int
1379rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
1380 struct ib_mr **mrp, struct ib_sge *iov)
1381{
1382 struct ib_phys_buf ipb;
1383 struct ib_mr *mr;
1384 int rc;
1385
1386 /*
1387 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1388 */
1389 iov->addr = ib_dma_map_single(ia->ri_id->device,
1390 va, len, DMA_BIDIRECTIONAL);
bf858ab0
YB
1391 if (ib_dma_mapping_error(ia->ri_id->device, iov->addr))
1392 return -ENOMEM;
1393
c56c65fb
TT
1394 iov->length = len;
1395
bd7ed1d1
TT
1396 if (ia->ri_have_dma_lkey) {
1397 *mrp = NULL;
1398 iov->lkey = ia->ri_dma_lkey;
1399 return 0;
1400 } else if (ia->ri_bind_mem != NULL) {
c56c65fb
TT
1401 *mrp = NULL;
1402 iov->lkey = ia->ri_bind_mem->lkey;
1403 return 0;
1404 }
1405
1406 ipb.addr = iov->addr;
1407 ipb.size = iov->length;
1408 mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
1409 IB_ACCESS_LOCAL_WRITE, &iov->addr);
1410
1411 dprintk("RPC: %s: phys convert: 0x%llx "
1412 "registered 0x%llx length %d\n",
a56daeb7
AM
1413 __func__, (unsigned long long)ipb.addr,
1414 (unsigned long long)iov->addr, len);
c56c65fb
TT
1415
1416 if (IS_ERR(mr)) {
1417 *mrp = NULL;
1418 rc = PTR_ERR(mr);
1419 dprintk("RPC: %s: failed with %i\n", __func__, rc);
1420 } else {
1421 *mrp = mr;
1422 iov->lkey = mr->lkey;
1423 rc = 0;
1424 }
1425
1426 return rc;
1427}
1428
1429int
1430rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
1431 struct ib_mr *mr, struct ib_sge *iov)
1432{
1433 int rc;
1434
1435 ib_dma_unmap_single(ia->ri_id->device,
1436 iov->addr, iov->length, DMA_BIDIRECTIONAL);
1437
1438 if (NULL == mr)
1439 return 0;
1440
1441 rc = ib_dereg_mr(mr);
1442 if (rc)
1443 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc);
1444 return rc;
1445}
1446
1447/*
1448 * Wrappers for chunk registration, shared by read/write chunk code.
1449 */
1450
1451static void
1452rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
1453{
1454 seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
1455 seg->mr_dmalen = seg->mr_len;
1456 if (seg->mr_page)
1457 seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
1458 seg->mr_page, offset_in_page(seg->mr_offset),
1459 seg->mr_dmalen, seg->mr_dir);
1460 else
1461 seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
1462 seg->mr_offset,
1463 seg->mr_dmalen, seg->mr_dir);
5c635e09
TT
1464 if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
1465 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1466 __func__,
986d4abb
RD
1467 (unsigned long long)seg->mr_dma,
1468 seg->mr_offset, seg->mr_dmalen);
5c635e09 1469 }
c56c65fb
TT
1470}
1471
1472static void
1473rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
1474{
1475 if (seg->mr_page)
1476 ib_dma_unmap_page(ia->ri_id->device,
1477 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1478 else
1479 ib_dma_unmap_single(ia->ri_id->device,
1480 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
1481}
1482
3197d309
TT
1483static int
1484rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
1485 int *nsegs, int writing, struct rpcrdma_ia *ia,
1486 struct rpcrdma_xprt *r_xprt)
1487{
1488 struct rpcrdma_mr_seg *seg1 = seg;
5c635e09
TT
1489 struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
1490
3197d309
TT
1491 u8 key;
1492 int len, pageoff;
1493 int i, rc;
9b78145c
TT
1494 int seg_len;
1495 u64 pa;
1496 int page_no;
3197d309
TT
1497
1498 pageoff = offset_in_page(seg1->mr_offset);
1499 seg1->mr_offset -= pageoff; /* start of page */
1500 seg1->mr_len += pageoff;
1501 len = -pageoff;
0fc6c4e7
SW
1502 if (*nsegs > ia->ri_max_frmr_depth)
1503 *nsegs = ia->ri_max_frmr_depth;
9b78145c 1504 for (page_no = i = 0; i < *nsegs;) {
3197d309 1505 rpcrdma_map_one(ia, seg, writing);
9b78145c
TT
1506 pa = seg->mr_dma;
1507 for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
1508 seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->
1509 page_list[page_no++] = pa;
1510 pa += PAGE_SIZE;
1511 }
3197d309
TT
1512 len += seg->mr_len;
1513 ++seg;
1514 ++i;
1515 /* Check for holes */
1516 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1517 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1518 break;
1519 }
1520 dprintk("RPC: %s: Using frmr %p to map %d segments\n",
1521 __func__, seg1->mr_chunk.rl_mw, i);
1522
5c635e09
TT
1523 if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
1524 dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
1525 __func__,
1526 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
1527 /* Invalidate before using. */
1528 memset(&invalidate_wr, 0, sizeof invalidate_wr);
1529 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
1530 invalidate_wr.next = &frmr_wr;
1531 invalidate_wr.opcode = IB_WR_LOCAL_INV;
1532 invalidate_wr.send_flags = IB_SEND_SIGNALED;
1533 invalidate_wr.ex.invalidate_rkey =
1534 seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1535 DECR_CQCOUNT(&r_xprt->rx_ep);
1536 post_wr = &invalidate_wr;
1537 } else
1538 post_wr = &frmr_wr;
1539
3197d309
TT
1540 /* Prepare FRMR WR */
1541 memset(&frmr_wr, 0, sizeof frmr_wr);
5c635e09 1542 frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
3197d309 1543 frmr_wr.opcode = IB_WR_FAST_REG_MR;
5c635e09 1544 frmr_wr.send_flags = IB_SEND_SIGNALED;
7a8b80eb 1545 frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
3197d309 1546 frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
9b78145c 1547 frmr_wr.wr.fast_reg.page_list_len = page_no;
3197d309 1548 frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
9b78145c 1549 frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
c977dea2 1550 if (frmr_wr.wr.fast_reg.length < len) {
5fc83f47
CL
1551 rc = -EIO;
1552 goto out_err;
c977dea2
CL
1553 }
1554
1555 /* Bump the key */
1556 key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
1557 ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
1558
3197d309 1559 frmr_wr.wr.fast_reg.access_flags = (writing ?
68743082
VP
1560 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
1561 IB_ACCESS_REMOTE_READ);
3197d309
TT
1562 frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1563 DECR_CQCOUNT(&r_xprt->rx_ep);
1564
5c635e09 1565 rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
3197d309
TT
1566
1567 if (rc) {
1568 dprintk("RPC: %s: failed ib_post_send for register,"
1569 " status %i\n", __func__, rc);
5fc83f47 1570 goto out_err;
3197d309
TT
1571 } else {
1572 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1573 seg1->mr_base = seg1->mr_dma + pageoff;
1574 seg1->mr_nsegs = i;
1575 seg1->mr_len = len;
1576 }
1577 *nsegs = i;
5fc83f47
CL
1578 return 0;
1579out_err:
1580 while (i--)
1581 rpcrdma_unmap_one(ia, --seg);
3197d309
TT
1582 return rc;
1583}
1584
1585static int
1586rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
1587 struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
1588{
1589 struct rpcrdma_mr_seg *seg1 = seg;
1590 struct ib_send_wr invalidate_wr, *bad_wr;
1591 int rc;
1592
1593 while (seg1->mr_nsegs--)
1594 rpcrdma_unmap_one(ia, seg++);
1595
1596 memset(&invalidate_wr, 0, sizeof invalidate_wr);
5c635e09 1597 invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
3197d309 1598 invalidate_wr.opcode = IB_WR_LOCAL_INV;
5c635e09 1599 invalidate_wr.send_flags = IB_SEND_SIGNALED;
3197d309
TT
1600 invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
1601 DECR_CQCOUNT(&r_xprt->rx_ep);
1602
1603 rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
1604 if (rc)
1605 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1606 " status %i\n", __func__, rc);
1607 return rc;
1608}
1609
8d4ba034
TT
1610static int
1611rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
1612 int *nsegs, int writing, struct rpcrdma_ia *ia)
1613{
1614 struct rpcrdma_mr_seg *seg1 = seg;
1615 u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
1616 int len, pageoff, i, rc;
1617
1618 pageoff = offset_in_page(seg1->mr_offset);
1619 seg1->mr_offset -= pageoff; /* start of page */
1620 seg1->mr_len += pageoff;
1621 len = -pageoff;
1622 if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
1623 *nsegs = RPCRDMA_MAX_DATA_SEGS;
1624 for (i = 0; i < *nsegs;) {
1625 rpcrdma_map_one(ia, seg, writing);
1626 physaddrs[i] = seg->mr_dma;
1627 len += seg->mr_len;
1628 ++seg;
1629 ++i;
1630 /* Check for holes */
1631 if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
1632 offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
1633 break;
1634 }
1635 rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
1636 physaddrs, i, seg1->mr_dma);
1637 if (rc) {
1638 dprintk("RPC: %s: failed ib_map_phys_fmr "
1639 "%u@0x%llx+%i (%d)... status %i\n", __func__,
1640 len, (unsigned long long)seg1->mr_dma,
1641 pageoff, i, rc);
1642 while (i--)
1643 rpcrdma_unmap_one(ia, --seg);
1644 } else {
1645 seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
1646 seg1->mr_base = seg1->mr_dma + pageoff;
1647 seg1->mr_nsegs = i;
1648 seg1->mr_len = len;
1649 }
1650 *nsegs = i;
1651 return rc;
1652}
1653
1654static int
1655rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
1656 struct rpcrdma_ia *ia)
1657{
1658 struct rpcrdma_mr_seg *seg1 = seg;
1659 LIST_HEAD(l);
1660 int rc;
1661
1662 list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
1663 rc = ib_unmap_fmr(&l);
1664 while (seg1->mr_nsegs--)
1665 rpcrdma_unmap_one(ia, seg++);
1666 if (rc)
1667 dprintk("RPC: %s: failed ib_unmap_fmr,"
1668 " status %i\n", __func__, rc);
1669 return rc;
1670}
1671
c56c65fb
TT
1672int
1673rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
1674 int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
1675{
1676 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
c56c65fb
TT
1677 int rc = 0;
1678
1679 switch (ia->ri_memreg_strategy) {
1680
1681#if RPCRDMA_PERSISTENT_REGISTRATION
1682 case RPCRDMA_ALLPHYSICAL:
1683 rpcrdma_map_one(ia, seg, writing);
1684 seg->mr_rkey = ia->ri_bind_mem->rkey;
1685 seg->mr_base = seg->mr_dma;
1686 seg->mr_nsegs = 1;
1687 nsegs = 1;
1688 break;
1689#endif
1690
3197d309
TT
1691 /* Registration using frmr registration */
1692 case RPCRDMA_FRMR:
1693 rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
1694 break;
1695
8d4ba034 1696 /* Registration using fmr memory registration */
c56c65fb 1697 case RPCRDMA_MTHCAFMR:
8d4ba034 1698 rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
c56c65fb
TT
1699 break;
1700
c56c65fb 1701 default:
0ac531c1 1702 return -1;
c56c65fb
TT
1703 }
1704 if (rc)
1705 return -1;
1706
1707 return nsegs;
1708}
1709
1710int
1711rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
13c9ff8f 1712 struct rpcrdma_xprt *r_xprt)
c56c65fb
TT
1713{
1714 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
c56c65fb
TT
1715 int nsegs = seg->mr_nsegs, rc;
1716
1717 switch (ia->ri_memreg_strategy) {
1718
1719#if RPCRDMA_PERSISTENT_REGISTRATION
1720 case RPCRDMA_ALLPHYSICAL:
c56c65fb 1721 rpcrdma_unmap_one(ia, seg);
c56c65fb
TT
1722 break;
1723#endif
1724
3197d309
TT
1725 case RPCRDMA_FRMR:
1726 rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
1727 break;
1728
c56c65fb 1729 case RPCRDMA_MTHCAFMR:
8d4ba034 1730 rc = rpcrdma_deregister_fmr_external(seg, ia);
c56c65fb
TT
1731 break;
1732
c56c65fb 1733 default:
c56c65fb
TT
1734 break;
1735 }
c56c65fb
TT
1736 return nsegs;
1737}
1738
1739/*
1740 * Prepost any receive buffer, then post send.
1741 *
1742 * Receive buffer is donated to hardware, reclaimed upon recv completion.
1743 */
1744int
1745rpcrdma_ep_post(struct rpcrdma_ia *ia,
1746 struct rpcrdma_ep *ep,
1747 struct rpcrdma_req *req)
1748{
1749 struct ib_send_wr send_wr, *send_wr_fail;
1750 struct rpcrdma_rep *rep = req->rl_reply;
1751 int rc;
1752
1753 if (rep) {
1754 rc = rpcrdma_ep_post_recv(ia, ep, rep);
1755 if (rc)
1756 goto out;
1757 req->rl_reply = NULL;
1758 }
1759
1760 send_wr.next = NULL;
1761 send_wr.wr_id = 0ULL; /* no send cookie */
1762 send_wr.sg_list = req->rl_send_iov;
1763 send_wr.num_sge = req->rl_niovs;
1764 send_wr.opcode = IB_WR_SEND;
c56c65fb
TT
1765 if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */
1766 ib_dma_sync_single_for_device(ia->ri_id->device,
1767 req->rl_send_iov[3].addr, req->rl_send_iov[3].length,
1768 DMA_TO_DEVICE);
1769 ib_dma_sync_single_for_device(ia->ri_id->device,
1770 req->rl_send_iov[1].addr, req->rl_send_iov[1].length,
1771 DMA_TO_DEVICE);
1772 ib_dma_sync_single_for_device(ia->ri_id->device,
1773 req->rl_send_iov[0].addr, req->rl_send_iov[0].length,
1774 DMA_TO_DEVICE);
1775
1776 if (DECR_CQCOUNT(ep) > 0)
1777 send_wr.send_flags = 0;
1778 else { /* Provider must take a send completion every now and then */
1779 INIT_CQCOUNT(ep);
1780 send_wr.send_flags = IB_SEND_SIGNALED;
1781 }
1782
1783 rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
1784 if (rc)
1785 dprintk("RPC: %s: ib_post_send returned %i\n", __func__,
1786 rc);
1787out:
1788 return rc;
1789}
1790
1791/*
1792 * (Re)post a receive buffer.
1793 */
1794int
1795rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
1796 struct rpcrdma_ep *ep,
1797 struct rpcrdma_rep *rep)
1798{
1799 struct ib_recv_wr recv_wr, *recv_wr_fail;
1800 int rc;
1801
1802 recv_wr.next = NULL;
1803 recv_wr.wr_id = (u64) (unsigned long) rep;
1804 recv_wr.sg_list = &rep->rr_iov;
1805 recv_wr.num_sge = 1;
1806
1807 ib_dma_sync_single_for_cpu(ia->ri_id->device,
1808 rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
1809
c56c65fb
TT
1810 rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
1811
1812 if (rc)
1813 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__,
1814 rc);
1815 return rc;
1816}
This page took 0.546415 seconds and 5 git commands to generate.