2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * Encapsulates the major functions managing:
50 #include <linux/interrupt.h>
51 #include <linux/slab.h>
52 #include <linux/prefetch.h>
53 #include <linux/sunrpc/addr.h>
54 #include <asm/bitops.h>
56 #include "xprt_rdma.h"
62 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
63 # define RPCDBG_FACILITY RPCDBG_TRANS
66 static void rpcrdma_reset_frmrs(struct rpcrdma_ia
*);
67 static void rpcrdma_reset_fmrs(struct rpcrdma_ia
*);
74 * handle replies in tasklet context, using a single, global list
75 * rdma tasklet function -- just turn around and call the func
76 * for all replies on the list
79 static DEFINE_SPINLOCK(rpcrdma_tk_lock_g
);
80 static LIST_HEAD(rpcrdma_tasklets_g
);
83 rpcrdma_run_tasklet(unsigned long data
)
85 struct rpcrdma_rep
*rep
;
86 void (*func
)(struct rpcrdma_rep
*);
90 spin_lock_irqsave(&rpcrdma_tk_lock_g
, flags
);
91 while (!list_empty(&rpcrdma_tasklets_g
)) {
92 rep
= list_entry(rpcrdma_tasklets_g
.next
,
93 struct rpcrdma_rep
, rr_list
);
94 list_del(&rep
->rr_list
);
97 spin_unlock_irqrestore(&rpcrdma_tk_lock_g
, flags
);
102 rpcrdma_recv_buffer_put(rep
);
104 spin_lock_irqsave(&rpcrdma_tk_lock_g
, flags
);
106 spin_unlock_irqrestore(&rpcrdma_tk_lock_g
, flags
);
109 static DECLARE_TASKLET(rpcrdma_tasklet_g
, rpcrdma_run_tasklet
, 0UL);
111 static const char * const async_event
[] = {
116 "communication established",
117 "send queue drained",
118 "path migration successful",
120 "device fatal error",
133 #define ASYNC_MSG(status) \
134 ((status) < ARRAY_SIZE(async_event) ? \
135 async_event[(status)] : "unknown async error")
138 rpcrdma_schedule_tasklet(struct list_head
*sched_list
)
142 spin_lock_irqsave(&rpcrdma_tk_lock_g
, flags
);
143 list_splice_tail(sched_list
, &rpcrdma_tasklets_g
);
144 spin_unlock_irqrestore(&rpcrdma_tk_lock_g
, flags
);
145 tasklet_schedule(&rpcrdma_tasklet_g
);
149 rpcrdma_qp_async_error_upcall(struct ib_event
*event
, void *context
)
151 struct rpcrdma_ep
*ep
= context
;
153 pr_err("RPC: %s: %s on device %s ep %p\n",
154 __func__
, ASYNC_MSG(event
->event
),
155 event
->device
->name
, context
);
156 if (ep
->rep_connected
== 1) {
157 ep
->rep_connected
= -EIO
;
158 rpcrdma_conn_func(ep
);
159 wake_up_all(&ep
->rep_connect_wait
);
164 rpcrdma_cq_async_error_upcall(struct ib_event
*event
, void *context
)
166 struct rpcrdma_ep
*ep
= context
;
168 pr_err("RPC: %s: %s on device %s ep %p\n",
169 __func__
, ASYNC_MSG(event
->event
),
170 event
->device
->name
, context
);
171 if (ep
->rep_connected
== 1) {
172 ep
->rep_connected
= -EIO
;
173 rpcrdma_conn_func(ep
);
174 wake_up_all(&ep
->rep_connect_wait
);
178 static const char * const wc_status
[] = {
180 "local length error",
181 "local QP operation error",
182 "local EE context operation error",
183 "local protection error",
185 "memory management operation error",
186 "bad response error",
187 "local access error",
188 "remote invalid request error",
189 "remote access error",
190 "remote operation error",
191 "transport retry counter exceeded",
192 "RNR retrycounter exceeded",
193 "local RDD violation error",
194 "remove invalid RD request",
196 "invalid EE context number",
197 "invalid EE context state",
199 "response timeout error",
203 #define COMPLETION_MSG(status) \
204 ((status) < ARRAY_SIZE(wc_status) ? \
205 wc_status[(status)] : "unexpected completion error")
208 rpcrdma_sendcq_process_wc(struct ib_wc
*wc
)
210 if (likely(wc
->status
== IB_WC_SUCCESS
))
213 /* WARNING: Only wr_id and status are reliable at this point */
214 if (wc
->wr_id
== 0ULL) {
215 if (wc
->status
!= IB_WC_WR_FLUSH_ERR
)
216 pr_err("RPC: %s: SEND: %s\n",
217 __func__
, COMPLETION_MSG(wc
->status
));
219 struct rpcrdma_mw
*r
;
221 r
= (struct rpcrdma_mw
*)(unsigned long)wc
->wr_id
;
222 r
->r
.frmr
.fr_state
= FRMR_IS_STALE
;
223 pr_err("RPC: %s: frmr %p (stale): %s\n",
224 __func__
, r
, COMPLETION_MSG(wc
->status
));
229 rpcrdma_sendcq_poll(struct ib_cq
*cq
, struct rpcrdma_ep
*ep
)
232 int budget
, count
, rc
;
234 budget
= RPCRDMA_WC_BUDGET
/ RPCRDMA_POLLSIZE
;
236 wcs
= ep
->rep_send_wcs
;
238 rc
= ib_poll_cq(cq
, RPCRDMA_POLLSIZE
, wcs
);
244 rpcrdma_sendcq_process_wc(wcs
++);
245 } while (rc
== RPCRDMA_POLLSIZE
&& --budget
);
250 * Handle send, fast_reg_mr, and local_inv completions.
252 * Send events are typically suppressed and thus do not result
253 * in an upcall. Occasionally one is signaled, however. This
254 * prevents the provider's completion queue from wrapping and
255 * losing a completion.
258 rpcrdma_sendcq_upcall(struct ib_cq
*cq
, void *cq_context
)
260 struct rpcrdma_ep
*ep
= (struct rpcrdma_ep
*)cq_context
;
263 rc
= rpcrdma_sendcq_poll(cq
, ep
);
265 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
270 rc
= ib_req_notify_cq(cq
,
271 IB_CQ_NEXT_COMP
| IB_CQ_REPORT_MISSED_EVENTS
);
275 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
280 rpcrdma_sendcq_poll(cq
, ep
);
284 rpcrdma_recvcq_process_wc(struct ib_wc
*wc
, struct list_head
*sched_list
)
286 struct rpcrdma_rep
*rep
=
287 (struct rpcrdma_rep
*)(unsigned long)wc
->wr_id
;
289 /* WARNING: Only wr_id and status are reliable at this point */
290 if (wc
->status
!= IB_WC_SUCCESS
)
293 /* status == SUCCESS means all fields in wc are trustworthy */
294 if (wc
->opcode
!= IB_WC_RECV
)
297 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
298 __func__
, rep
, wc
->byte_len
);
300 rep
->rr_len
= wc
->byte_len
;
301 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep
->rr_buffer
)->ri_id
->device
,
302 rdmab_addr(rep
->rr_rdmabuf
),
303 rep
->rr_len
, DMA_FROM_DEVICE
);
304 prefetch(rdmab_to_msg(rep
->rr_rdmabuf
));
307 list_add_tail(&rep
->rr_list
, sched_list
);
310 if (wc
->status
!= IB_WC_WR_FLUSH_ERR
)
311 pr_err("RPC: %s: rep %p: %s\n",
312 __func__
, rep
, COMPLETION_MSG(wc
->status
));
318 rpcrdma_recvcq_poll(struct ib_cq
*cq
, struct rpcrdma_ep
*ep
)
320 struct list_head sched_list
;
322 int budget
, count
, rc
;
324 INIT_LIST_HEAD(&sched_list
);
325 budget
= RPCRDMA_WC_BUDGET
/ RPCRDMA_POLLSIZE
;
327 wcs
= ep
->rep_recv_wcs
;
329 rc
= ib_poll_cq(cq
, RPCRDMA_POLLSIZE
, wcs
);
335 rpcrdma_recvcq_process_wc(wcs
++, &sched_list
);
336 } while (rc
== RPCRDMA_POLLSIZE
&& --budget
);
340 rpcrdma_schedule_tasklet(&sched_list
);
345 * Handle receive completions.
347 * It is reentrant but processes single events in order to maintain
348 * ordering of receives to keep server credits.
350 * It is the responsibility of the scheduled tasklet to return
351 * recv buffers to the pool. NOTE: this affects synchronization of
352 * connection shutdown. That is, the structures required for
353 * the completion of the reply handler must remain intact until
354 * all memory has been reclaimed.
357 rpcrdma_recvcq_upcall(struct ib_cq
*cq
, void *cq_context
)
359 struct rpcrdma_ep
*ep
= (struct rpcrdma_ep
*)cq_context
;
362 rc
= rpcrdma_recvcq_poll(cq
, ep
);
364 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
369 rc
= ib_req_notify_cq(cq
,
370 IB_CQ_NEXT_COMP
| IB_CQ_REPORT_MISSED_EVENTS
);
374 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
379 rpcrdma_recvcq_poll(cq
, ep
);
383 rpcrdma_flush_cqs(struct rpcrdma_ep
*ep
)
386 LIST_HEAD(sched_list
);
388 while (ib_poll_cq(ep
->rep_attr
.recv_cq
, 1, &wc
) > 0)
389 rpcrdma_recvcq_process_wc(&wc
, &sched_list
);
390 if (!list_empty(&sched_list
))
391 rpcrdma_schedule_tasklet(&sched_list
);
392 while (ib_poll_cq(ep
->rep_attr
.send_cq
, 1, &wc
) > 0)
393 rpcrdma_sendcq_process_wc(&wc
);
396 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
397 static const char * const conn
[] = {
416 #define CONNECTION_MSG(status) \
417 ((status) < ARRAY_SIZE(conn) ? \
418 conn[(status)] : "unrecognized connection error")
422 rpcrdma_conn_upcall(struct rdma_cm_id
*id
, struct rdma_cm_event
*event
)
424 struct rpcrdma_xprt
*xprt
= id
->context
;
425 struct rpcrdma_ia
*ia
= &xprt
->rx_ia
;
426 struct rpcrdma_ep
*ep
= &xprt
->rx_ep
;
427 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
428 struct sockaddr
*sap
= (struct sockaddr
*)&ep
->rep_remote_addr
;
430 struct ib_qp_attr
*attr
= &ia
->ri_qp_attr
;
431 struct ib_qp_init_attr
*iattr
= &ia
->ri_qp_init_attr
;
434 switch (event
->event
) {
435 case RDMA_CM_EVENT_ADDR_RESOLVED
:
436 case RDMA_CM_EVENT_ROUTE_RESOLVED
:
438 complete(&ia
->ri_done
);
440 case RDMA_CM_EVENT_ADDR_ERROR
:
441 ia
->ri_async_rc
= -EHOSTUNREACH
;
442 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
444 complete(&ia
->ri_done
);
446 case RDMA_CM_EVENT_ROUTE_ERROR
:
447 ia
->ri_async_rc
= -ENETUNREACH
;
448 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
450 complete(&ia
->ri_done
);
452 case RDMA_CM_EVENT_ESTABLISHED
:
454 ib_query_qp(ia
->ri_id
->qp
, attr
,
455 IB_QP_MAX_QP_RD_ATOMIC
| IB_QP_MAX_DEST_RD_ATOMIC
,
457 dprintk("RPC: %s: %d responder resources"
459 __func__
, attr
->max_dest_rd_atomic
,
460 attr
->max_rd_atomic
);
462 case RDMA_CM_EVENT_CONNECT_ERROR
:
463 connstate
= -ENOTCONN
;
465 case RDMA_CM_EVENT_UNREACHABLE
:
466 connstate
= -ENETDOWN
;
468 case RDMA_CM_EVENT_REJECTED
:
469 connstate
= -ECONNREFUSED
;
471 case RDMA_CM_EVENT_DISCONNECTED
:
472 connstate
= -ECONNABORTED
;
474 case RDMA_CM_EVENT_DEVICE_REMOVAL
:
477 dprintk("RPC: %s: %sconnected\n",
478 __func__
, connstate
> 0 ? "" : "dis");
479 ep
->rep_connected
= connstate
;
480 rpcrdma_conn_func(ep
);
481 wake_up_all(&ep
->rep_connect_wait
);
484 dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
485 __func__
, sap
, rpc_get_port(sap
), ep
,
486 CONNECTION_MSG(event
->event
));
490 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
491 if (connstate
== 1) {
492 int ird
= attr
->max_dest_rd_atomic
;
493 int tird
= ep
->rep_remote_cma
.responder_resources
;
495 pr_info("rpcrdma: connection to %pIS:%u on %s, memreg %d slots %d ird %d%s\n",
496 sap
, rpc_get_port(sap
),
497 ia
->ri_id
->device
->name
,
498 ia
->ri_memreg_strategy
,
499 xprt
->rx_buf
.rb_max_requests
,
500 ird
, ird
< 4 && ird
< tird
/ 2 ? " (low!)" : "");
501 } else if (connstate
< 0) {
502 pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
503 sap
, rpc_get_port(sap
), connstate
);
510 static struct rdma_cm_id
*
511 rpcrdma_create_id(struct rpcrdma_xprt
*xprt
,
512 struct rpcrdma_ia
*ia
, struct sockaddr
*addr
)
514 struct rdma_cm_id
*id
;
517 init_completion(&ia
->ri_done
);
519 id
= rdma_create_id(rpcrdma_conn_upcall
, xprt
, RDMA_PS_TCP
, IB_QPT_RC
);
522 dprintk("RPC: %s: rdma_create_id() failed %i\n",
527 ia
->ri_async_rc
= -ETIMEDOUT
;
528 rc
= rdma_resolve_addr(id
, NULL
, addr
, RDMA_RESOLVE_TIMEOUT
);
530 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
534 wait_for_completion_interruptible_timeout(&ia
->ri_done
,
535 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT
) + 1);
536 rc
= ia
->ri_async_rc
;
540 ia
->ri_async_rc
= -ETIMEDOUT
;
541 rc
= rdma_resolve_route(id
, RDMA_RESOLVE_TIMEOUT
);
543 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
547 wait_for_completion_interruptible_timeout(&ia
->ri_done
,
548 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT
) + 1);
549 rc
= ia
->ri_async_rc
;
561 * Drain any cq, prior to teardown.
564 rpcrdma_clean_cq(struct ib_cq
*cq
)
569 while (1 == ib_poll_cq(cq
, 1, &wc
))
573 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
574 __func__
, count
, wc
.opcode
);
578 * Exported functions.
582 * Open and initialize an Interface Adapter.
583 * o initializes fields of struct rpcrdma_ia, including
584 * interface and provider attributes and protection zone.
587 rpcrdma_ia_open(struct rpcrdma_xprt
*xprt
, struct sockaddr
*addr
, int memreg
)
590 struct rpcrdma_ia
*ia
= &xprt
->rx_ia
;
591 struct ib_device_attr
*devattr
= &ia
->ri_devattr
;
593 ia
->ri_id
= rpcrdma_create_id(xprt
, ia
, addr
);
594 if (IS_ERR(ia
->ri_id
)) {
595 rc
= PTR_ERR(ia
->ri_id
);
599 ia
->ri_pd
= ib_alloc_pd(ia
->ri_id
->device
);
600 if (IS_ERR(ia
->ri_pd
)) {
601 rc
= PTR_ERR(ia
->ri_pd
);
602 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
607 rc
= ib_query_device(ia
->ri_id
->device
, devattr
);
609 dprintk("RPC: %s: ib_query_device failed %d\n",
614 if (devattr
->device_cap_flags
& IB_DEVICE_LOCAL_DMA_LKEY
) {
615 ia
->ri_have_dma_lkey
= 1;
616 ia
->ri_dma_lkey
= ia
->ri_id
->device
->local_dma_lkey
;
619 if (memreg
== RPCRDMA_FRMR
) {
620 /* Requires both frmr reg and local dma lkey */
621 if ((devattr
->device_cap_flags
&
622 (IB_DEVICE_MEM_MGT_EXTENSIONS
|IB_DEVICE_LOCAL_DMA_LKEY
)) !=
623 (IB_DEVICE_MEM_MGT_EXTENSIONS
|IB_DEVICE_LOCAL_DMA_LKEY
)) {
624 dprintk("RPC: %s: FRMR registration "
625 "not supported by HCA\n", __func__
);
626 memreg
= RPCRDMA_MTHCAFMR
;
628 /* Mind the ia limit on FRMR page list depth */
629 ia
->ri_max_frmr_depth
= min_t(unsigned int,
630 RPCRDMA_MAX_DATA_SEGS
,
631 devattr
->max_fast_reg_page_list_len
);
634 if (memreg
== RPCRDMA_MTHCAFMR
) {
635 if (!ia
->ri_id
->device
->alloc_fmr
) {
636 dprintk("RPC: %s: MTHCAFMR registration "
637 "not supported by HCA\n", __func__
);
638 memreg
= RPCRDMA_ALLPHYSICAL
;
643 * Optionally obtain an underlying physical identity mapping in
644 * order to do a memory window-based bind. This base registration
645 * is protected from remote access - that is enabled only by binding
646 * for the specific bytes targeted during each RPC operation, and
647 * revoked after the corresponding completion similar to a storage
653 case RPCRDMA_ALLPHYSICAL
:
654 mem_priv
= IB_ACCESS_LOCAL_WRITE
|
655 IB_ACCESS_REMOTE_WRITE
|
656 IB_ACCESS_REMOTE_READ
;
658 case RPCRDMA_MTHCAFMR
:
659 if (ia
->ri_have_dma_lkey
)
661 mem_priv
= IB_ACCESS_LOCAL_WRITE
;
663 ia
->ri_bind_mem
= ib_get_dma_mr(ia
->ri_pd
, mem_priv
);
664 if (IS_ERR(ia
->ri_bind_mem
)) {
665 printk(KERN_ALERT
"%s: ib_get_dma_mr for "
666 "phys register failed with %lX\n",
667 __func__
, PTR_ERR(ia
->ri_bind_mem
));
673 printk(KERN_ERR
"RPC: Unsupported memory "
674 "registration mode: %d\n", memreg
);
678 dprintk("RPC: %s: memory registration strategy is %d\n",
681 /* Else will do memory reg/dereg for each chunk */
682 ia
->ri_memreg_strategy
= memreg
;
684 rwlock_init(&ia
->ri_qplock
);
688 ib_dealloc_pd(ia
->ri_pd
);
691 rdma_destroy_id(ia
->ri_id
);
698 * Clean up/close an IA.
699 * o if event handles and PD have been initialized, free them.
703 rpcrdma_ia_close(struct rpcrdma_ia
*ia
)
707 dprintk("RPC: %s: entering\n", __func__
);
708 if (ia
->ri_bind_mem
!= NULL
) {
709 rc
= ib_dereg_mr(ia
->ri_bind_mem
);
710 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
713 if (ia
->ri_id
!= NULL
&& !IS_ERR(ia
->ri_id
)) {
715 rdma_destroy_qp(ia
->ri_id
);
716 rdma_destroy_id(ia
->ri_id
);
719 if (ia
->ri_pd
!= NULL
&& !IS_ERR(ia
->ri_pd
)) {
720 rc
= ib_dealloc_pd(ia
->ri_pd
);
721 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
727 * Create unconnected endpoint.
730 rpcrdma_ep_create(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
,
731 struct rpcrdma_create_data_internal
*cdata
)
733 struct ib_device_attr
*devattr
= &ia
->ri_devattr
;
734 struct ib_cq
*sendcq
, *recvcq
;
737 /* check provider's send/recv wr limits */
738 if (cdata
->max_requests
> devattr
->max_qp_wr
)
739 cdata
->max_requests
= devattr
->max_qp_wr
;
741 ep
->rep_attr
.event_handler
= rpcrdma_qp_async_error_upcall
;
742 ep
->rep_attr
.qp_context
= ep
;
743 /* send_cq and recv_cq initialized below */
744 ep
->rep_attr
.srq
= NULL
;
745 ep
->rep_attr
.cap
.max_send_wr
= cdata
->max_requests
;
746 switch (ia
->ri_memreg_strategy
) {
750 /* Add room for frmr register and invalidate WRs.
751 * 1. FRMR reg WR for head
752 * 2. FRMR invalidate WR for head
753 * 3. N FRMR reg WRs for pagelist
754 * 4. N FRMR invalidate WRs for pagelist
755 * 5. FRMR reg WR for tail
756 * 6. FRMR invalidate WR for tail
757 * 7. The RDMA_SEND WR
760 /* Calculate N if the device max FRMR depth is smaller than
761 * RPCRDMA_MAX_DATA_SEGS.
763 if (ia
->ri_max_frmr_depth
< RPCRDMA_MAX_DATA_SEGS
) {
764 int delta
= RPCRDMA_MAX_DATA_SEGS
-
765 ia
->ri_max_frmr_depth
;
768 depth
+= 2; /* FRMR reg + invalidate */
769 delta
-= ia
->ri_max_frmr_depth
;
773 ep
->rep_attr
.cap
.max_send_wr
*= depth
;
774 if (ep
->rep_attr
.cap
.max_send_wr
> devattr
->max_qp_wr
) {
775 cdata
->max_requests
= devattr
->max_qp_wr
/ depth
;
776 if (!cdata
->max_requests
)
778 ep
->rep_attr
.cap
.max_send_wr
= cdata
->max_requests
*
786 ep
->rep_attr
.cap
.max_recv_wr
= cdata
->max_requests
;
787 ep
->rep_attr
.cap
.max_send_sge
= (cdata
->padding
? 4 : 2);
788 ep
->rep_attr
.cap
.max_recv_sge
= 1;
789 ep
->rep_attr
.cap
.max_inline_data
= 0;
790 ep
->rep_attr
.sq_sig_type
= IB_SIGNAL_REQ_WR
;
791 ep
->rep_attr
.qp_type
= IB_QPT_RC
;
792 ep
->rep_attr
.port_num
= ~0;
794 if (cdata
->padding
) {
795 ep
->rep_padbuf
= rpcrdma_alloc_regbuf(ia
, cdata
->padding
,
797 if (IS_ERR(ep
->rep_padbuf
))
798 return PTR_ERR(ep
->rep_padbuf
);
800 ep
->rep_padbuf
= NULL
;
802 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
803 "iovs: send %d recv %d\n",
805 ep
->rep_attr
.cap
.max_send_wr
,
806 ep
->rep_attr
.cap
.max_recv_wr
,
807 ep
->rep_attr
.cap
.max_send_sge
,
808 ep
->rep_attr
.cap
.max_recv_sge
);
810 /* set trigger for requesting send completion */
811 ep
->rep_cqinit
= ep
->rep_attr
.cap
.max_send_wr
/2 - 1;
812 if (ep
->rep_cqinit
> RPCRDMA_MAX_UNSIGNALED_SENDS
)
813 ep
->rep_cqinit
= RPCRDMA_MAX_UNSIGNALED_SENDS
;
814 else if (ep
->rep_cqinit
<= 2)
817 init_waitqueue_head(&ep
->rep_connect_wait
);
818 INIT_DELAYED_WORK(&ep
->rep_connect_worker
, rpcrdma_connect_worker
);
820 sendcq
= ib_create_cq(ia
->ri_id
->device
, rpcrdma_sendcq_upcall
,
821 rpcrdma_cq_async_error_upcall
, ep
,
822 ep
->rep_attr
.cap
.max_send_wr
+ 1, 0);
823 if (IS_ERR(sendcq
)) {
824 rc
= PTR_ERR(sendcq
);
825 dprintk("RPC: %s: failed to create send CQ: %i\n",
830 rc
= ib_req_notify_cq(sendcq
, IB_CQ_NEXT_COMP
);
832 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
837 recvcq
= ib_create_cq(ia
->ri_id
->device
, rpcrdma_recvcq_upcall
,
838 rpcrdma_cq_async_error_upcall
, ep
,
839 ep
->rep_attr
.cap
.max_recv_wr
+ 1, 0);
840 if (IS_ERR(recvcq
)) {
841 rc
= PTR_ERR(recvcq
);
842 dprintk("RPC: %s: failed to create recv CQ: %i\n",
847 rc
= ib_req_notify_cq(recvcq
, IB_CQ_NEXT_COMP
);
849 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
851 ib_destroy_cq(recvcq
);
855 ep
->rep_attr
.send_cq
= sendcq
;
856 ep
->rep_attr
.recv_cq
= recvcq
;
858 /* Initialize cma parameters */
860 /* RPC/RDMA does not use private data */
861 ep
->rep_remote_cma
.private_data
= NULL
;
862 ep
->rep_remote_cma
.private_data_len
= 0;
864 /* Client offers RDMA Read but does not initiate */
865 ep
->rep_remote_cma
.initiator_depth
= 0;
866 if (devattr
->max_qp_rd_atom
> 32) /* arbitrary but <= 255 */
867 ep
->rep_remote_cma
.responder_resources
= 32;
869 ep
->rep_remote_cma
.responder_resources
=
870 devattr
->max_qp_rd_atom
;
872 ep
->rep_remote_cma
.retry_count
= 7;
873 ep
->rep_remote_cma
.flow_control
= 0;
874 ep
->rep_remote_cma
.rnr_retry_count
= 0;
879 err
= ib_destroy_cq(sendcq
);
881 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
884 rpcrdma_free_regbuf(ia
, ep
->rep_padbuf
);
891 * Disconnect and destroy endpoint. After this, the only
892 * valid operations on the ep are to free it (if dynamically
893 * allocated) or re-create it.
896 rpcrdma_ep_destroy(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
900 dprintk("RPC: %s: entering, connected is %d\n",
901 __func__
, ep
->rep_connected
);
903 cancel_delayed_work_sync(&ep
->rep_connect_worker
);
906 rpcrdma_ep_disconnect(ep
, ia
);
907 rdma_destroy_qp(ia
->ri_id
);
908 ia
->ri_id
->qp
= NULL
;
911 rpcrdma_free_regbuf(ia
, ep
->rep_padbuf
);
913 rpcrdma_clean_cq(ep
->rep_attr
.recv_cq
);
914 rc
= ib_destroy_cq(ep
->rep_attr
.recv_cq
);
916 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
919 rpcrdma_clean_cq(ep
->rep_attr
.send_cq
);
920 rc
= ib_destroy_cq(ep
->rep_attr
.send_cq
);
922 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
927 * Connect unconnected endpoint.
930 rpcrdma_ep_connect(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
932 struct rdma_cm_id
*id
, *old
;
936 if (ep
->rep_connected
!= 0) {
937 struct rpcrdma_xprt
*xprt
;
939 dprintk("RPC: %s: reconnecting...\n", __func__
);
941 rpcrdma_ep_disconnect(ep
, ia
);
942 rpcrdma_flush_cqs(ep
);
944 switch (ia
->ri_memreg_strategy
) {
946 rpcrdma_reset_frmrs(ia
);
948 case RPCRDMA_MTHCAFMR
:
949 rpcrdma_reset_fmrs(ia
);
951 case RPCRDMA_ALLPHYSICAL
:
958 xprt
= container_of(ia
, struct rpcrdma_xprt
, rx_ia
);
959 id
= rpcrdma_create_id(xprt
, ia
,
960 (struct sockaddr
*)&xprt
->rx_data
.addr
);
965 /* TEMP TEMP TEMP - fail if new device:
966 * Deregister/remarshal *all* requests!
967 * Close and recreate adapter, pd, etc!
968 * Re-determine all attributes still sane!
969 * More stuff I haven't thought of!
972 if (ia
->ri_id
->device
!= id
->device
) {
973 printk("RPC: %s: can't reconnect on "
974 "different device!\n", __func__
);
980 rc
= rdma_create_qp(id
, ia
->ri_pd
, &ep
->rep_attr
);
982 dprintk("RPC: %s: rdma_create_qp failed %i\n",
989 write_lock(&ia
->ri_qplock
);
992 write_unlock(&ia
->ri_qplock
);
994 rdma_destroy_qp(old
);
995 rdma_destroy_id(old
);
997 dprintk("RPC: %s: connecting...\n", __func__
);
998 rc
= rdma_create_qp(ia
->ri_id
, ia
->ri_pd
, &ep
->rep_attr
);
1000 dprintk("RPC: %s: rdma_create_qp failed %i\n",
1002 /* do not update ep->rep_connected */
1003 return -ENETUNREACH
;
1007 ep
->rep_connected
= 0;
1009 rc
= rdma_connect(ia
->ri_id
, &ep
->rep_remote_cma
);
1011 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1016 wait_event_interruptible(ep
->rep_connect_wait
, ep
->rep_connected
!= 0);
1019 * Check state. A non-peer reject indicates no listener
1020 * (ECONNREFUSED), which may be a transient state. All
1021 * others indicate a transport condition which has already
1022 * undergone a best-effort.
1024 if (ep
->rep_connected
== -ECONNREFUSED
&&
1025 ++retry_count
<= RDMA_CONNECT_RETRY_MAX
) {
1026 dprintk("RPC: %s: non-peer_reject, retry\n", __func__
);
1029 if (ep
->rep_connected
<= 0) {
1030 /* Sometimes, the only way to reliably connect to remote
1031 * CMs is to use same nonzero values for ORD and IRD. */
1032 if (retry_count
++ <= RDMA_CONNECT_RETRY_MAX
+ 1 &&
1033 (ep
->rep_remote_cma
.responder_resources
== 0 ||
1034 ep
->rep_remote_cma
.initiator_depth
!=
1035 ep
->rep_remote_cma
.responder_resources
)) {
1036 if (ep
->rep_remote_cma
.responder_resources
== 0)
1037 ep
->rep_remote_cma
.responder_resources
= 1;
1038 ep
->rep_remote_cma
.initiator_depth
=
1039 ep
->rep_remote_cma
.responder_resources
;
1042 rc
= ep
->rep_connected
;
1044 dprintk("RPC: %s: connected\n", __func__
);
1049 ep
->rep_connected
= rc
;
1054 * rpcrdma_ep_disconnect
1056 * This is separate from destroy to facilitate the ability
1057 * to reconnect without recreating the endpoint.
1059 * This call is not reentrant, and must not be made in parallel
1060 * on the same endpoint.
1063 rpcrdma_ep_disconnect(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
1067 rpcrdma_flush_cqs(ep
);
1068 rc
= rdma_disconnect(ia
->ri_id
);
1070 /* returns without wait if not connected */
1071 wait_event_interruptible(ep
->rep_connect_wait
,
1072 ep
->rep_connected
!= 1);
1073 dprintk("RPC: %s: after wait, %sconnected\n", __func__
,
1074 (ep
->rep_connected
== 1) ? "still " : "dis");
1076 dprintk("RPC: %s: rdma_disconnect %i\n", __func__
, rc
);
1077 ep
->rep_connected
= rc
;
1081 static struct rpcrdma_req
*
1082 rpcrdma_create_req(struct rpcrdma_xprt
*r_xprt
)
1084 struct rpcrdma_req
*req
;
1086 req
= kzalloc(sizeof(*req
), GFP_KERNEL
);
1088 return ERR_PTR(-ENOMEM
);
1090 req
->rl_buffer
= &r_xprt
->rx_buf
;
1094 static struct rpcrdma_rep
*
1095 rpcrdma_create_rep(struct rpcrdma_xprt
*r_xprt
)
1097 struct rpcrdma_create_data_internal
*cdata
= &r_xprt
->rx_data
;
1098 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
1099 struct rpcrdma_rep
*rep
;
1103 rep
= kzalloc(sizeof(*rep
), GFP_KERNEL
);
1107 rep
->rr_rdmabuf
= rpcrdma_alloc_regbuf(ia
, cdata
->inline_rsize
,
1109 if (IS_ERR(rep
->rr_rdmabuf
)) {
1110 rc
= PTR_ERR(rep
->rr_rdmabuf
);
1114 rep
->rr_buffer
= &r_xprt
->rx_buf
;
1124 rpcrdma_init_fmrs(struct rpcrdma_ia
*ia
, struct rpcrdma_buffer
*buf
)
1126 int mr_access_flags
= IB_ACCESS_REMOTE_WRITE
| IB_ACCESS_REMOTE_READ
;
1127 struct ib_fmr_attr fmr_attr
= {
1128 .max_pages
= RPCRDMA_MAX_DATA_SEGS
,
1130 .page_shift
= PAGE_SHIFT
1132 struct rpcrdma_mw
*r
;
1135 i
= (buf
->rb_max_requests
+ 1) * RPCRDMA_MAX_SEGS
;
1136 dprintk("RPC: %s: initalizing %d FMRs\n", __func__
, i
);
1139 r
= kzalloc(sizeof(*r
), GFP_KERNEL
);
1143 r
->r
.fmr
= ib_alloc_fmr(ia
->ri_pd
, mr_access_flags
, &fmr_attr
);
1144 if (IS_ERR(r
->r
.fmr
)) {
1145 rc
= PTR_ERR(r
->r
.fmr
);
1146 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1151 list_add(&r
->mw_list
, &buf
->rb_mws
);
1152 list_add(&r
->mw_all
, &buf
->rb_all
);
1162 rpcrdma_init_frmrs(struct rpcrdma_ia
*ia
, struct rpcrdma_buffer
*buf
)
1164 struct rpcrdma_frmr
*f
;
1165 struct rpcrdma_mw
*r
;
1168 i
= (buf
->rb_max_requests
+ 1) * RPCRDMA_MAX_SEGS
;
1169 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__
, i
);
1172 r
= kzalloc(sizeof(*r
), GFP_KERNEL
);
1177 f
->fr_mr
= ib_alloc_fast_reg_mr(ia
->ri_pd
,
1178 ia
->ri_max_frmr_depth
);
1179 if (IS_ERR(f
->fr_mr
)) {
1180 rc
= PTR_ERR(f
->fr_mr
);
1181 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1182 "failed %i\n", __func__
, rc
);
1186 f
->fr_pgl
= ib_alloc_fast_reg_page_list(ia
->ri_id
->device
,
1187 ia
->ri_max_frmr_depth
);
1188 if (IS_ERR(f
->fr_pgl
)) {
1189 rc
= PTR_ERR(f
->fr_pgl
);
1190 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1191 "failed %i\n", __func__
, rc
);
1193 ib_dereg_mr(f
->fr_mr
);
1197 list_add(&r
->mw_list
, &buf
->rb_mws
);
1198 list_add(&r
->mw_all
, &buf
->rb_all
);
1209 rpcrdma_buffer_create(struct rpcrdma_xprt
*r_xprt
)
1211 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1212 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
1213 struct rpcrdma_create_data_internal
*cdata
= &r_xprt
->rx_data
;
1218 buf
->rb_max_requests
= cdata
->max_requests
;
1219 spin_lock_init(&buf
->rb_lock
);
1221 /* Need to allocate:
1222 * 1. arrays for send and recv pointers
1223 * 2. arrays of struct rpcrdma_req to fill in pointers
1224 * 3. array of struct rpcrdma_rep for replies
1225 * Send/recv buffers in req/rep need to be registered
1227 len
= buf
->rb_max_requests
*
1228 (sizeof(struct rpcrdma_req
*) + sizeof(struct rpcrdma_rep
*));
1230 p
= kzalloc(len
, GFP_KERNEL
);
1232 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1237 buf
->rb_pool
= p
; /* for freeing it later */
1239 buf
->rb_send_bufs
= (struct rpcrdma_req
**) p
;
1240 p
= (char *) &buf
->rb_send_bufs
[buf
->rb_max_requests
];
1241 buf
->rb_recv_bufs
= (struct rpcrdma_rep
**) p
;
1242 p
= (char *) &buf
->rb_recv_bufs
[buf
->rb_max_requests
];
1244 INIT_LIST_HEAD(&buf
->rb_mws
);
1245 INIT_LIST_HEAD(&buf
->rb_all
);
1246 switch (ia
->ri_memreg_strategy
) {
1248 rc
= rpcrdma_init_frmrs(ia
, buf
);
1252 case RPCRDMA_MTHCAFMR
:
1253 rc
= rpcrdma_init_fmrs(ia
, buf
);
1261 for (i
= 0; i
< buf
->rb_max_requests
; i
++) {
1262 struct rpcrdma_req
*req
;
1263 struct rpcrdma_rep
*rep
;
1265 req
= rpcrdma_create_req(r_xprt
);
1267 dprintk("RPC: %s: request buffer %d alloc"
1268 " failed\n", __func__
, i
);
1272 buf
->rb_send_bufs
[i
] = req
;
1274 rep
= rpcrdma_create_rep(r_xprt
);
1276 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1281 buf
->rb_recv_bufs
[i
] = rep
;
1286 rpcrdma_buffer_destroy(buf
);
1291 rpcrdma_destroy_rep(struct rpcrdma_ia
*ia
, struct rpcrdma_rep
*rep
)
1296 rpcrdma_free_regbuf(ia
, rep
->rr_rdmabuf
);
1301 rpcrdma_destroy_req(struct rpcrdma_ia
*ia
, struct rpcrdma_req
*req
)
1306 rpcrdma_free_regbuf(ia
, req
->rl_sendbuf
);
1307 rpcrdma_free_regbuf(ia
, req
->rl_rdmabuf
);
1312 rpcrdma_destroy_fmrs(struct rpcrdma_buffer
*buf
)
1314 struct rpcrdma_mw
*r
;
1317 while (!list_empty(&buf
->rb_all
)) {
1318 r
= list_entry(buf
->rb_all
.next
, struct rpcrdma_mw
, mw_all
);
1319 list_del(&r
->mw_all
);
1320 list_del(&r
->mw_list
);
1322 rc
= ib_dealloc_fmr(r
->r
.fmr
);
1324 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1332 rpcrdma_destroy_frmrs(struct rpcrdma_buffer
*buf
)
1334 struct rpcrdma_mw
*r
;
1337 while (!list_empty(&buf
->rb_all
)) {
1338 r
= list_entry(buf
->rb_all
.next
, struct rpcrdma_mw
, mw_all
);
1339 list_del(&r
->mw_all
);
1340 list_del(&r
->mw_list
);
1342 rc
= ib_dereg_mr(r
->r
.frmr
.fr_mr
);
1344 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1346 ib_free_fast_reg_page_list(r
->r
.frmr
.fr_pgl
);
1353 rpcrdma_buffer_destroy(struct rpcrdma_buffer
*buf
)
1355 struct rpcrdma_ia
*ia
= rdmab_to_ia(buf
);
1358 /* clean up in reverse order from create
1359 * 1. recv mr memory (mr free, then kfree)
1360 * 2. send mr memory (mr free, then kfree)
1363 dprintk("RPC: %s: entering\n", __func__
);
1365 for (i
= 0; i
< buf
->rb_max_requests
; i
++) {
1366 if (buf
->rb_recv_bufs
)
1367 rpcrdma_destroy_rep(ia
, buf
->rb_recv_bufs
[i
]);
1368 if (buf
->rb_send_bufs
)
1369 rpcrdma_destroy_req(ia
, buf
->rb_send_bufs
[i
]);
1372 switch (ia
->ri_memreg_strategy
) {
1374 rpcrdma_destroy_frmrs(buf
);
1376 case RPCRDMA_MTHCAFMR
:
1377 rpcrdma_destroy_fmrs(buf
);
1383 kfree(buf
->rb_pool
);
1386 /* After a disconnect, unmap all FMRs.
1388 * This is invoked only in the transport connect worker in order
1389 * to serialize with rpcrdma_register_fmr_external().
1392 rpcrdma_reset_fmrs(struct rpcrdma_ia
*ia
)
1394 struct rpcrdma_xprt
*r_xprt
=
1395 container_of(ia
, struct rpcrdma_xprt
, rx_ia
);
1396 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1397 struct list_head
*pos
;
1398 struct rpcrdma_mw
*r
;
1402 list_for_each(pos
, &buf
->rb_all
) {
1403 r
= list_entry(pos
, struct rpcrdma_mw
, mw_all
);
1406 list_add(&r
->r
.fmr
->list
, &l
);
1407 rc
= ib_unmap_fmr(&l
);
1409 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1414 /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1415 * an unusable state. Find FRMRs in this state and dereg / reg
1416 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1419 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1421 * This is invoked only in the transport connect worker in order
1422 * to serialize with rpcrdma_register_frmr_external().
1425 rpcrdma_reset_frmrs(struct rpcrdma_ia
*ia
)
1427 struct rpcrdma_xprt
*r_xprt
=
1428 container_of(ia
, struct rpcrdma_xprt
, rx_ia
);
1429 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1430 struct list_head
*pos
;
1431 struct rpcrdma_mw
*r
;
1434 list_for_each(pos
, &buf
->rb_all
) {
1435 r
= list_entry(pos
, struct rpcrdma_mw
, mw_all
);
1437 if (r
->r
.frmr
.fr_state
== FRMR_IS_INVALID
)
1440 rc
= ib_dereg_mr(r
->r
.frmr
.fr_mr
);
1442 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1444 ib_free_fast_reg_page_list(r
->r
.frmr
.fr_pgl
);
1446 r
->r
.frmr
.fr_mr
= ib_alloc_fast_reg_mr(ia
->ri_pd
,
1447 ia
->ri_max_frmr_depth
);
1448 if (IS_ERR(r
->r
.frmr
.fr_mr
)) {
1449 rc
= PTR_ERR(r
->r
.frmr
.fr_mr
);
1450 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1451 " failed %i\n", __func__
, rc
);
1454 r
->r
.frmr
.fr_pgl
= ib_alloc_fast_reg_page_list(
1456 ia
->ri_max_frmr_depth
);
1457 if (IS_ERR(r
->r
.frmr
.fr_pgl
)) {
1458 rc
= PTR_ERR(r
->r
.frmr
.fr_pgl
);
1460 "ib_alloc_fast_reg_page_list "
1461 "failed %i\n", __func__
, rc
);
1463 ib_dereg_mr(r
->r
.frmr
.fr_mr
);
1466 r
->r
.frmr
.fr_state
= FRMR_IS_INVALID
;
1470 /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1471 * some req segments uninitialized.
1474 rpcrdma_buffer_put_mr(struct rpcrdma_mw
**mw
, struct rpcrdma_buffer
*buf
)
1477 list_add_tail(&(*mw
)->mw_list
, &buf
->rb_mws
);
1482 /* Cycle mw's back in reverse order, and "spin" them.
1483 * This delays and scrambles reuse as much as possible.
1486 rpcrdma_buffer_put_mrs(struct rpcrdma_req
*req
, struct rpcrdma_buffer
*buf
)
1488 struct rpcrdma_mr_seg
*seg
= req
->rl_segments
;
1489 struct rpcrdma_mr_seg
*seg1
= seg
;
1492 for (i
= 1, seg
++; i
< RPCRDMA_MAX_SEGS
; seg
++, i
++)
1493 rpcrdma_buffer_put_mr(&seg
->rl_mw
, buf
);
1494 rpcrdma_buffer_put_mr(&seg1
->rl_mw
, buf
);
1498 rpcrdma_buffer_put_sendbuf(struct rpcrdma_req
*req
, struct rpcrdma_buffer
*buf
)
1500 buf
->rb_send_bufs
[--buf
->rb_send_index
] = req
;
1502 if (req
->rl_reply
) {
1503 buf
->rb_recv_bufs
[--buf
->rb_recv_index
] = req
->rl_reply
;
1504 req
->rl_reply
->rr_func
= NULL
;
1505 req
->rl_reply
= NULL
;
1509 /* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1510 * Redo only the ib_post_send().
1513 rpcrdma_retry_local_inv(struct rpcrdma_mw
*r
, struct rpcrdma_ia
*ia
)
1515 struct rpcrdma_xprt
*r_xprt
=
1516 container_of(ia
, struct rpcrdma_xprt
, rx_ia
);
1517 struct ib_send_wr invalidate_wr
, *bad_wr
;
1520 dprintk("RPC: %s: FRMR %p is stale\n", __func__
, r
);
1522 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
1523 r
->r
.frmr
.fr_state
= FRMR_IS_INVALID
;
1525 memset(&invalidate_wr
, 0, sizeof(invalidate_wr
));
1526 invalidate_wr
.wr_id
= (unsigned long)(void *)r
;
1527 invalidate_wr
.opcode
= IB_WR_LOCAL_INV
;
1528 invalidate_wr
.ex
.invalidate_rkey
= r
->r
.frmr
.fr_mr
->rkey
;
1529 DECR_CQCOUNT(&r_xprt
->rx_ep
);
1531 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1532 __func__
, r
, r
->r
.frmr
.fr_mr
->rkey
);
1534 read_lock(&ia
->ri_qplock
);
1535 rc
= ib_post_send(ia
->ri_id
->qp
, &invalidate_wr
, &bad_wr
);
1536 read_unlock(&ia
->ri_qplock
);
1538 /* Force rpcrdma_buffer_get() to retry */
1539 r
->r
.frmr
.fr_state
= FRMR_IS_STALE
;
1540 dprintk("RPC: %s: ib_post_send failed, %i\n",
1546 rpcrdma_retry_flushed_linv(struct list_head
*stale
,
1547 struct rpcrdma_buffer
*buf
)
1549 struct rpcrdma_ia
*ia
= rdmab_to_ia(buf
);
1550 struct list_head
*pos
;
1551 struct rpcrdma_mw
*r
;
1552 unsigned long flags
;
1554 list_for_each(pos
, stale
) {
1555 r
= list_entry(pos
, struct rpcrdma_mw
, mw_list
);
1556 rpcrdma_retry_local_inv(r
, ia
);
1559 spin_lock_irqsave(&buf
->rb_lock
, flags
);
1560 list_splice_tail(stale
, &buf
->rb_mws
);
1561 spin_unlock_irqrestore(&buf
->rb_lock
, flags
);
1564 static struct rpcrdma_req
*
1565 rpcrdma_buffer_get_frmrs(struct rpcrdma_req
*req
, struct rpcrdma_buffer
*buf
,
1566 struct list_head
*stale
)
1568 struct rpcrdma_mw
*r
;
1571 i
= RPCRDMA_MAX_SEGS
- 1;
1572 while (!list_empty(&buf
->rb_mws
)) {
1573 r
= list_entry(buf
->rb_mws
.next
,
1574 struct rpcrdma_mw
, mw_list
);
1575 list_del(&r
->mw_list
);
1576 if (r
->r
.frmr
.fr_state
== FRMR_IS_STALE
) {
1577 list_add(&r
->mw_list
, stale
);
1580 req
->rl_segments
[i
].rl_mw
= r
;
1581 if (unlikely(i
-- == 0))
1582 return req
; /* Success */
1585 /* Not enough entries on rb_mws for this req */
1586 rpcrdma_buffer_put_sendbuf(req
, buf
);
1587 rpcrdma_buffer_put_mrs(req
, buf
);
1591 static struct rpcrdma_req
*
1592 rpcrdma_buffer_get_fmrs(struct rpcrdma_req
*req
, struct rpcrdma_buffer
*buf
)
1594 struct rpcrdma_mw
*r
;
1597 i
= RPCRDMA_MAX_SEGS
- 1;
1598 while (!list_empty(&buf
->rb_mws
)) {
1599 r
= list_entry(buf
->rb_mws
.next
,
1600 struct rpcrdma_mw
, mw_list
);
1601 list_del(&r
->mw_list
);
1602 req
->rl_segments
[i
].rl_mw
= r
;
1603 if (unlikely(i
-- == 0))
1604 return req
; /* Success */
1607 /* Not enough entries on rb_mws for this req */
1608 rpcrdma_buffer_put_sendbuf(req
, buf
);
1609 rpcrdma_buffer_put_mrs(req
, buf
);
1614 * Get a set of request/reply buffers.
1616 * Reply buffer (if needed) is attached to send buffer upon return.
1618 * rb_send_index and rb_recv_index MUST always be pointing to the
1619 * *next* available buffer (non-NULL). They are incremented after
1620 * removing buffers, and decremented *before* returning them.
1622 struct rpcrdma_req
*
1623 rpcrdma_buffer_get(struct rpcrdma_buffer
*buffers
)
1625 struct rpcrdma_ia
*ia
= rdmab_to_ia(buffers
);
1626 struct list_head stale
;
1627 struct rpcrdma_req
*req
;
1628 unsigned long flags
;
1630 spin_lock_irqsave(&buffers
->rb_lock
, flags
);
1631 if (buffers
->rb_send_index
== buffers
->rb_max_requests
) {
1632 spin_unlock_irqrestore(&buffers
->rb_lock
, flags
);
1633 dprintk("RPC: %s: out of request buffers\n", __func__
);
1634 return ((struct rpcrdma_req
*)NULL
);
1637 req
= buffers
->rb_send_bufs
[buffers
->rb_send_index
];
1638 if (buffers
->rb_send_index
< buffers
->rb_recv_index
) {
1639 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1641 buffers
->rb_recv_index
- buffers
->rb_send_index
);
1642 req
->rl_reply
= NULL
;
1644 req
->rl_reply
= buffers
->rb_recv_bufs
[buffers
->rb_recv_index
];
1645 buffers
->rb_recv_bufs
[buffers
->rb_recv_index
++] = NULL
;
1647 buffers
->rb_send_bufs
[buffers
->rb_send_index
++] = NULL
;
1649 INIT_LIST_HEAD(&stale
);
1650 switch (ia
->ri_memreg_strategy
) {
1652 req
= rpcrdma_buffer_get_frmrs(req
, buffers
, &stale
);
1654 case RPCRDMA_MTHCAFMR
:
1655 req
= rpcrdma_buffer_get_fmrs(req
, buffers
);
1660 spin_unlock_irqrestore(&buffers
->rb_lock
, flags
);
1661 if (!list_empty(&stale
))
1662 rpcrdma_retry_flushed_linv(&stale
, buffers
);
1667 * Put request/reply buffers back into pool.
1668 * Pre-decrement counter/array index.
1671 rpcrdma_buffer_put(struct rpcrdma_req
*req
)
1673 struct rpcrdma_buffer
*buffers
= req
->rl_buffer
;
1674 struct rpcrdma_ia
*ia
= rdmab_to_ia(buffers
);
1675 unsigned long flags
;
1677 spin_lock_irqsave(&buffers
->rb_lock
, flags
);
1678 rpcrdma_buffer_put_sendbuf(req
, buffers
);
1679 switch (ia
->ri_memreg_strategy
) {
1681 case RPCRDMA_MTHCAFMR
:
1682 rpcrdma_buffer_put_mrs(req
, buffers
);
1687 spin_unlock_irqrestore(&buffers
->rb_lock
, flags
);
1691 * Recover reply buffers from pool.
1692 * This happens when recovering from error conditions.
1693 * Post-increment counter/array index.
1696 rpcrdma_recv_buffer_get(struct rpcrdma_req
*req
)
1698 struct rpcrdma_buffer
*buffers
= req
->rl_buffer
;
1699 unsigned long flags
;
1701 spin_lock_irqsave(&buffers
->rb_lock
, flags
);
1702 if (buffers
->rb_recv_index
< buffers
->rb_max_requests
) {
1703 req
->rl_reply
= buffers
->rb_recv_bufs
[buffers
->rb_recv_index
];
1704 buffers
->rb_recv_bufs
[buffers
->rb_recv_index
++] = NULL
;
1706 spin_unlock_irqrestore(&buffers
->rb_lock
, flags
);
1710 * Put reply buffers back into pool when not attached to
1711 * request. This happens in error conditions.
1714 rpcrdma_recv_buffer_put(struct rpcrdma_rep
*rep
)
1716 struct rpcrdma_buffer
*buffers
= rep
->rr_buffer
;
1717 unsigned long flags
;
1719 rep
->rr_func
= NULL
;
1720 spin_lock_irqsave(&buffers
->rb_lock
, flags
);
1721 buffers
->rb_recv_bufs
[--buffers
->rb_recv_index
] = rep
;
1722 spin_unlock_irqrestore(&buffers
->rb_lock
, flags
);
1726 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1730 rpcrdma_register_internal(struct rpcrdma_ia
*ia
, void *va
, int len
,
1731 struct ib_mr
**mrp
, struct ib_sge
*iov
)
1733 struct ib_phys_buf ipb
;
1738 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1740 iov
->addr
= ib_dma_map_single(ia
->ri_id
->device
,
1741 va
, len
, DMA_BIDIRECTIONAL
);
1742 if (ib_dma_mapping_error(ia
->ri_id
->device
, iov
->addr
))
1747 if (ia
->ri_have_dma_lkey
) {
1749 iov
->lkey
= ia
->ri_dma_lkey
;
1751 } else if (ia
->ri_bind_mem
!= NULL
) {
1753 iov
->lkey
= ia
->ri_bind_mem
->lkey
;
1757 ipb
.addr
= iov
->addr
;
1758 ipb
.size
= iov
->length
;
1759 mr
= ib_reg_phys_mr(ia
->ri_pd
, &ipb
, 1,
1760 IB_ACCESS_LOCAL_WRITE
, &iov
->addr
);
1762 dprintk("RPC: %s: phys convert: 0x%llx "
1763 "registered 0x%llx length %d\n",
1764 __func__
, (unsigned long long)ipb
.addr
,
1765 (unsigned long long)iov
->addr
, len
);
1770 dprintk("RPC: %s: failed with %i\n", __func__
, rc
);
1773 iov
->lkey
= mr
->lkey
;
1781 rpcrdma_deregister_internal(struct rpcrdma_ia
*ia
,
1782 struct ib_mr
*mr
, struct ib_sge
*iov
)
1786 ib_dma_unmap_single(ia
->ri_id
->device
,
1787 iov
->addr
, iov
->length
, DMA_BIDIRECTIONAL
);
1792 rc
= ib_dereg_mr(mr
);
1794 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__
, rc
);
1799 * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
1800 * @ia: controlling rpcrdma_ia
1801 * @size: size of buffer to be allocated, in bytes
1804 * Returns pointer to private header of an area of internally
1805 * registered memory, or an ERR_PTR. The registered buffer follows
1806 * the end of the private header.
1808 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1809 * receiving the payload of RDMA RECV operations. regbufs are not
1810 * used for RDMA READ/WRITE operations, thus are registered only for
1813 struct rpcrdma_regbuf
*
1814 rpcrdma_alloc_regbuf(struct rpcrdma_ia
*ia
, size_t size
, gfp_t flags
)
1816 struct rpcrdma_regbuf
*rb
;
1820 rb
= kmalloc(sizeof(*rb
) + size
, flags
);
1825 rb
->rg_owner
= NULL
;
1826 rc
= rpcrdma_register_internal(ia
, rb
->rg_base
, size
,
1827 &rb
->rg_mr
, &rb
->rg_iov
);
1840 * rpcrdma_free_regbuf - deregister and free registered buffer
1841 * @ia: controlling rpcrdma_ia
1842 * @rb: regbuf to be deregistered and freed
1845 rpcrdma_free_regbuf(struct rpcrdma_ia
*ia
, struct rpcrdma_regbuf
*rb
)
1848 rpcrdma_deregister_internal(ia
, rb
->rg_mr
, &rb
->rg_iov
);
1854 * Wrappers for chunk registration, shared by read/write chunk code.
1858 rpcrdma_map_one(struct rpcrdma_ia
*ia
, struct rpcrdma_mr_seg
*seg
, int writing
)
1860 seg
->mr_dir
= writing
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
;
1861 seg
->mr_dmalen
= seg
->mr_len
;
1863 seg
->mr_dma
= ib_dma_map_page(ia
->ri_id
->device
,
1864 seg
->mr_page
, offset_in_page(seg
->mr_offset
),
1865 seg
->mr_dmalen
, seg
->mr_dir
);
1867 seg
->mr_dma
= ib_dma_map_single(ia
->ri_id
->device
,
1869 seg
->mr_dmalen
, seg
->mr_dir
);
1870 if (ib_dma_mapping_error(ia
->ri_id
->device
, seg
->mr_dma
)) {
1871 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1873 (unsigned long long)seg
->mr_dma
,
1874 seg
->mr_offset
, seg
->mr_dmalen
);
1879 rpcrdma_unmap_one(struct rpcrdma_ia
*ia
, struct rpcrdma_mr_seg
*seg
)
1882 ib_dma_unmap_page(ia
->ri_id
->device
,
1883 seg
->mr_dma
, seg
->mr_dmalen
, seg
->mr_dir
);
1885 ib_dma_unmap_single(ia
->ri_id
->device
,
1886 seg
->mr_dma
, seg
->mr_dmalen
, seg
->mr_dir
);
1890 rpcrdma_register_frmr_external(struct rpcrdma_mr_seg
*seg
,
1891 int *nsegs
, int writing
, struct rpcrdma_ia
*ia
,
1892 struct rpcrdma_xprt
*r_xprt
)
1894 struct rpcrdma_mr_seg
*seg1
= seg
;
1895 struct rpcrdma_mw
*mw
= seg1
->rl_mw
;
1896 struct rpcrdma_frmr
*frmr
= &mw
->r
.frmr
;
1897 struct ib_mr
*mr
= frmr
->fr_mr
;
1898 struct ib_send_wr fastreg_wr
, *bad_wr
;
1906 pageoff
= offset_in_page(seg1
->mr_offset
);
1907 seg1
->mr_offset
-= pageoff
; /* start of page */
1908 seg1
->mr_len
+= pageoff
;
1910 if (*nsegs
> ia
->ri_max_frmr_depth
)
1911 *nsegs
= ia
->ri_max_frmr_depth
;
1912 for (page_no
= i
= 0; i
< *nsegs
;) {
1913 rpcrdma_map_one(ia
, seg
, writing
);
1915 for (seg_len
= seg
->mr_len
; seg_len
> 0; seg_len
-= PAGE_SIZE
) {
1916 frmr
->fr_pgl
->page_list
[page_no
++] = pa
;
1922 /* Check for holes */
1923 if ((i
< *nsegs
&& offset_in_page(seg
->mr_offset
)) ||
1924 offset_in_page((seg
-1)->mr_offset
+ (seg
-1)->mr_len
))
1927 dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n",
1928 __func__
, mw
, i
, len
);
1930 frmr
->fr_state
= FRMR_IS_VALID
;
1932 memset(&fastreg_wr
, 0, sizeof(fastreg_wr
));
1933 fastreg_wr
.wr_id
= (unsigned long)(void *)mw
;
1934 fastreg_wr
.opcode
= IB_WR_FAST_REG_MR
;
1935 fastreg_wr
.wr
.fast_reg
.iova_start
= seg1
->mr_dma
+ pageoff
;
1936 fastreg_wr
.wr
.fast_reg
.page_list
= frmr
->fr_pgl
;
1937 fastreg_wr
.wr
.fast_reg
.page_list_len
= page_no
;
1938 fastreg_wr
.wr
.fast_reg
.page_shift
= PAGE_SHIFT
;
1939 fastreg_wr
.wr
.fast_reg
.length
= len
;
1942 key
= (u8
)(mr
->rkey
& 0x000000FF);
1943 ib_update_fast_reg_key(mr
, ++key
);
1945 fastreg_wr
.wr
.fast_reg
.access_flags
= (writing
?
1946 IB_ACCESS_REMOTE_WRITE
| IB_ACCESS_LOCAL_WRITE
:
1947 IB_ACCESS_REMOTE_READ
);
1948 fastreg_wr
.wr
.fast_reg
.rkey
= mr
->rkey
;
1949 DECR_CQCOUNT(&r_xprt
->rx_ep
);
1951 rc
= ib_post_send(ia
->ri_id
->qp
, &fastreg_wr
, &bad_wr
);
1953 dprintk("RPC: %s: failed ib_post_send for register,"
1954 " status %i\n", __func__
, rc
);
1955 ib_update_fast_reg_key(mr
, --key
);
1958 seg1
->mr_rkey
= mr
->rkey
;
1959 seg1
->mr_base
= seg1
->mr_dma
+ pageoff
;
1966 frmr
->fr_state
= FRMR_IS_INVALID
;
1968 rpcrdma_unmap_one(ia
, --seg
);
1973 rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg
*seg
,
1974 struct rpcrdma_ia
*ia
, struct rpcrdma_xprt
*r_xprt
)
1976 struct rpcrdma_mr_seg
*seg1
= seg
;
1977 struct ib_send_wr invalidate_wr
, *bad_wr
;
1980 seg1
->rl_mw
->r
.frmr
.fr_state
= FRMR_IS_INVALID
;
1982 memset(&invalidate_wr
, 0, sizeof invalidate_wr
);
1983 invalidate_wr
.wr_id
= (unsigned long)(void *)seg1
->rl_mw
;
1984 invalidate_wr
.opcode
= IB_WR_LOCAL_INV
;
1985 invalidate_wr
.ex
.invalidate_rkey
= seg1
->rl_mw
->r
.frmr
.fr_mr
->rkey
;
1986 DECR_CQCOUNT(&r_xprt
->rx_ep
);
1988 read_lock(&ia
->ri_qplock
);
1989 while (seg1
->mr_nsegs
--)
1990 rpcrdma_unmap_one(ia
, seg
++);
1991 rc
= ib_post_send(ia
->ri_id
->qp
, &invalidate_wr
, &bad_wr
);
1992 read_unlock(&ia
->ri_qplock
);
1994 /* Force rpcrdma_buffer_get() to retry */
1995 seg1
->rl_mw
->r
.frmr
.fr_state
= FRMR_IS_STALE
;
1996 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1997 " status %i\n", __func__
, rc
);
2003 rpcrdma_register_fmr_external(struct rpcrdma_mr_seg
*seg
,
2004 int *nsegs
, int writing
, struct rpcrdma_ia
*ia
)
2006 struct rpcrdma_mr_seg
*seg1
= seg
;
2007 u64 physaddrs
[RPCRDMA_MAX_DATA_SEGS
];
2008 int len
, pageoff
, i
, rc
;
2010 pageoff
= offset_in_page(seg1
->mr_offset
);
2011 seg1
->mr_offset
-= pageoff
; /* start of page */
2012 seg1
->mr_len
+= pageoff
;
2014 if (*nsegs
> RPCRDMA_MAX_DATA_SEGS
)
2015 *nsegs
= RPCRDMA_MAX_DATA_SEGS
;
2016 for (i
= 0; i
< *nsegs
;) {
2017 rpcrdma_map_one(ia
, seg
, writing
);
2018 physaddrs
[i
] = seg
->mr_dma
;
2022 /* Check for holes */
2023 if ((i
< *nsegs
&& offset_in_page(seg
->mr_offset
)) ||
2024 offset_in_page((seg
-1)->mr_offset
+ (seg
-1)->mr_len
))
2027 rc
= ib_map_phys_fmr(seg1
->rl_mw
->r
.fmr
, physaddrs
, i
, seg1
->mr_dma
);
2029 dprintk("RPC: %s: failed ib_map_phys_fmr "
2030 "%u@0x%llx+%i (%d)... status %i\n", __func__
,
2031 len
, (unsigned long long)seg1
->mr_dma
,
2034 rpcrdma_unmap_one(ia
, --seg
);
2036 seg1
->mr_rkey
= seg1
->rl_mw
->r
.fmr
->rkey
;
2037 seg1
->mr_base
= seg1
->mr_dma
+ pageoff
;
2046 rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg
*seg
,
2047 struct rpcrdma_ia
*ia
)
2049 struct rpcrdma_mr_seg
*seg1
= seg
;
2053 list_add(&seg1
->rl_mw
->r
.fmr
->list
, &l
);
2054 rc
= ib_unmap_fmr(&l
);
2055 read_lock(&ia
->ri_qplock
);
2056 while (seg1
->mr_nsegs
--)
2057 rpcrdma_unmap_one(ia
, seg
++);
2058 read_unlock(&ia
->ri_qplock
);
2060 dprintk("RPC: %s: failed ib_unmap_fmr,"
2061 " status %i\n", __func__
, rc
);
2066 rpcrdma_register_external(struct rpcrdma_mr_seg
*seg
,
2067 int nsegs
, int writing
, struct rpcrdma_xprt
*r_xprt
)
2069 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
2072 switch (ia
->ri_memreg_strategy
) {
2074 case RPCRDMA_ALLPHYSICAL
:
2075 rpcrdma_map_one(ia
, seg
, writing
);
2076 seg
->mr_rkey
= ia
->ri_bind_mem
->rkey
;
2077 seg
->mr_base
= seg
->mr_dma
;
2082 /* Registration using frmr registration */
2084 rc
= rpcrdma_register_frmr_external(seg
, &nsegs
, writing
, ia
, r_xprt
);
2087 /* Registration using fmr memory registration */
2088 case RPCRDMA_MTHCAFMR
:
2089 rc
= rpcrdma_register_fmr_external(seg
, &nsegs
, writing
, ia
);
2102 rpcrdma_deregister_external(struct rpcrdma_mr_seg
*seg
,
2103 struct rpcrdma_xprt
*r_xprt
)
2105 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
2106 int nsegs
= seg
->mr_nsegs
, rc
;
2108 switch (ia
->ri_memreg_strategy
) {
2110 case RPCRDMA_ALLPHYSICAL
:
2111 read_lock(&ia
->ri_qplock
);
2112 rpcrdma_unmap_one(ia
, seg
);
2113 read_unlock(&ia
->ri_qplock
);
2117 rc
= rpcrdma_deregister_frmr_external(seg
, ia
, r_xprt
);
2120 case RPCRDMA_MTHCAFMR
:
2121 rc
= rpcrdma_deregister_fmr_external(seg
, ia
);
2131 * Prepost any receive buffer, then post send.
2133 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2136 rpcrdma_ep_post(struct rpcrdma_ia
*ia
,
2137 struct rpcrdma_ep
*ep
,
2138 struct rpcrdma_req
*req
)
2140 struct ib_send_wr send_wr
, *send_wr_fail
;
2141 struct rpcrdma_rep
*rep
= req
->rl_reply
;
2145 rc
= rpcrdma_ep_post_recv(ia
, ep
, rep
);
2148 req
->rl_reply
= NULL
;
2151 send_wr
.next
= NULL
;
2152 send_wr
.wr_id
= 0ULL; /* no send cookie */
2153 send_wr
.sg_list
= req
->rl_send_iov
;
2154 send_wr
.num_sge
= req
->rl_niovs
;
2155 send_wr
.opcode
= IB_WR_SEND
;
2156 if (send_wr
.num_sge
== 4) /* no need to sync any pad (constant) */
2157 ib_dma_sync_single_for_device(ia
->ri_id
->device
,
2158 req
->rl_send_iov
[3].addr
, req
->rl_send_iov
[3].length
,
2160 ib_dma_sync_single_for_device(ia
->ri_id
->device
,
2161 req
->rl_send_iov
[1].addr
, req
->rl_send_iov
[1].length
,
2163 ib_dma_sync_single_for_device(ia
->ri_id
->device
,
2164 req
->rl_send_iov
[0].addr
, req
->rl_send_iov
[0].length
,
2167 if (DECR_CQCOUNT(ep
) > 0)
2168 send_wr
.send_flags
= 0;
2169 else { /* Provider must take a send completion every now and then */
2171 send_wr
.send_flags
= IB_SEND_SIGNALED
;
2174 rc
= ib_post_send(ia
->ri_id
->qp
, &send_wr
, &send_wr_fail
);
2176 dprintk("RPC: %s: ib_post_send returned %i\n", __func__
,
2183 * (Re)post a receive buffer.
2186 rpcrdma_ep_post_recv(struct rpcrdma_ia
*ia
,
2187 struct rpcrdma_ep
*ep
,
2188 struct rpcrdma_rep
*rep
)
2190 struct ib_recv_wr recv_wr
, *recv_wr_fail
;
2193 recv_wr
.next
= NULL
;
2194 recv_wr
.wr_id
= (u64
) (unsigned long) rep
;
2195 recv_wr
.sg_list
= &rep
->rr_rdmabuf
->rg_iov
;
2196 recv_wr
.num_sge
= 1;
2198 ib_dma_sync_single_for_cpu(ia
->ri_id
->device
,
2199 rdmab_addr(rep
->rr_rdmabuf
),
2200 rdmab_length(rep
->rr_rdmabuf
),
2203 rc
= ib_post_recv(ia
->ri_id
->qp
, &recv_wr
, &recv_wr_fail
);
2206 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__
,
2211 /* Physical mapping means one Read/Write list entry per-page.
2212 * All list entries must fit within an inline buffer
2214 * NB: The server must return a Write list for NFS READ,
2215 * which has the same constraint. Factor in the inline
2219 rpcrdma_physical_max_payload(struct rpcrdma_xprt
*r_xprt
)
2221 struct rpcrdma_create_data_internal
*cdata
= &r_xprt
->rx_data
;
2222 unsigned int inline_size
, pages
;
2224 inline_size
= min_t(unsigned int,
2225 cdata
->inline_wsize
, cdata
->inline_rsize
);
2226 inline_size
-= RPCRDMA_HDRLEN_MIN
;
2227 pages
= inline_size
/ sizeof(struct rpcrdma_segment
);
2228 return pages
<< PAGE_SHIFT
;
2232 rpcrdma_mr_max_payload(struct rpcrdma_xprt
*r_xprt
)
2234 return RPCRDMA_MAX_DATA_SEGS
<< PAGE_SHIFT
;
2238 rpcrdma_max_payload(struct rpcrdma_xprt
*r_xprt
)
2242 switch (r_xprt
->rx_ia
.ri_memreg_strategy
) {
2243 case RPCRDMA_ALLPHYSICAL
:
2244 result
= rpcrdma_physical_max_payload(r_xprt
);
2247 result
= rpcrdma_mr_max_payload(r_xprt
);