2 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the BSD-type
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
14 * Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
17 * Redistributions in binary form must reproduce the above
18 * copyright notice, this list of conditions and the following
19 * disclaimer in the documentation and/or other materials provided
20 * with the distribution.
22 * Neither the name of the Network Appliance, Inc. nor the names of
23 * its contributors may be used to endorse or promote products
24 * derived from this software without specific prior written
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * Encapsulates the major functions managing:
50 #include <linux/interrupt.h>
51 #include <linux/slab.h>
52 #include <linux/prefetch.h>
53 #include <linux/sunrpc/addr.h>
54 #include <asm/bitops.h>
56 #include "xprt_rdma.h"
62 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
63 # define RPCDBG_FACILITY RPCDBG_TRANS
66 static void rpcrdma_reset_frmrs(struct rpcrdma_ia
*);
67 static void rpcrdma_reset_fmrs(struct rpcrdma_ia
*);
74 * handle replies in tasklet context, using a single, global list
75 * rdma tasklet function -- just turn around and call the func
76 * for all replies on the list
79 static DEFINE_SPINLOCK(rpcrdma_tk_lock_g
);
80 static LIST_HEAD(rpcrdma_tasklets_g
);
83 rpcrdma_run_tasklet(unsigned long data
)
85 struct rpcrdma_rep
*rep
;
86 void (*func
)(struct rpcrdma_rep
*);
90 spin_lock_irqsave(&rpcrdma_tk_lock_g
, flags
);
91 while (!list_empty(&rpcrdma_tasklets_g
)) {
92 rep
= list_entry(rpcrdma_tasklets_g
.next
,
93 struct rpcrdma_rep
, rr_list
);
94 list_del(&rep
->rr_list
);
97 spin_unlock_irqrestore(&rpcrdma_tk_lock_g
, flags
);
102 rpcrdma_recv_buffer_put(rep
);
104 spin_lock_irqsave(&rpcrdma_tk_lock_g
, flags
);
106 spin_unlock_irqrestore(&rpcrdma_tk_lock_g
, flags
);
109 static DECLARE_TASKLET(rpcrdma_tasklet_g
, rpcrdma_run_tasklet
, 0UL);
111 static const char * const async_event
[] = {
116 "communication established",
117 "send queue drained",
118 "path migration successful",
120 "device fatal error",
133 #define ASYNC_MSG(status) \
134 ((status) < ARRAY_SIZE(async_event) ? \
135 async_event[(status)] : "unknown async error")
138 rpcrdma_schedule_tasklet(struct list_head
*sched_list
)
142 spin_lock_irqsave(&rpcrdma_tk_lock_g
, flags
);
143 list_splice_tail(sched_list
, &rpcrdma_tasklets_g
);
144 spin_unlock_irqrestore(&rpcrdma_tk_lock_g
, flags
);
145 tasklet_schedule(&rpcrdma_tasklet_g
);
149 rpcrdma_qp_async_error_upcall(struct ib_event
*event
, void *context
)
151 struct rpcrdma_ep
*ep
= context
;
153 pr_err("RPC: %s: %s on device %s ep %p\n",
154 __func__
, ASYNC_MSG(event
->event
),
155 event
->device
->name
, context
);
156 if (ep
->rep_connected
== 1) {
157 ep
->rep_connected
= -EIO
;
158 rpcrdma_conn_func(ep
);
159 wake_up_all(&ep
->rep_connect_wait
);
164 rpcrdma_cq_async_error_upcall(struct ib_event
*event
, void *context
)
166 struct rpcrdma_ep
*ep
= context
;
168 pr_err("RPC: %s: %s on device %s ep %p\n",
169 __func__
, ASYNC_MSG(event
->event
),
170 event
->device
->name
, context
);
171 if (ep
->rep_connected
== 1) {
172 ep
->rep_connected
= -EIO
;
173 rpcrdma_conn_func(ep
);
174 wake_up_all(&ep
->rep_connect_wait
);
178 static const char * const wc_status
[] = {
180 "local length error",
181 "local QP operation error",
182 "local EE context operation error",
183 "local protection error",
185 "memory management operation error",
186 "bad response error",
187 "local access error",
188 "remote invalid request error",
189 "remote access error",
190 "remote operation error",
191 "transport retry counter exceeded",
192 "RNR retrycounter exceeded",
193 "local RDD violation error",
194 "remove invalid RD request",
196 "invalid EE context number",
197 "invalid EE context state",
199 "response timeout error",
203 #define COMPLETION_MSG(status) \
204 ((status) < ARRAY_SIZE(wc_status) ? \
205 wc_status[(status)] : "unexpected completion error")
208 rpcrdma_sendcq_process_wc(struct ib_wc
*wc
)
210 if (likely(wc
->status
== IB_WC_SUCCESS
))
213 /* WARNING: Only wr_id and status are reliable at this point */
214 if (wc
->wr_id
== 0ULL) {
215 if (wc
->status
!= IB_WC_WR_FLUSH_ERR
)
216 pr_err("RPC: %s: SEND: %s\n",
217 __func__
, COMPLETION_MSG(wc
->status
));
219 struct rpcrdma_mw
*r
;
221 r
= (struct rpcrdma_mw
*)(unsigned long)wc
->wr_id
;
222 r
->r
.frmr
.fr_state
= FRMR_IS_STALE
;
223 pr_err("RPC: %s: frmr %p (stale): %s\n",
224 __func__
, r
, COMPLETION_MSG(wc
->status
));
229 rpcrdma_sendcq_poll(struct ib_cq
*cq
, struct rpcrdma_ep
*ep
)
232 int budget
, count
, rc
;
234 budget
= RPCRDMA_WC_BUDGET
/ RPCRDMA_POLLSIZE
;
236 wcs
= ep
->rep_send_wcs
;
238 rc
= ib_poll_cq(cq
, RPCRDMA_POLLSIZE
, wcs
);
244 rpcrdma_sendcq_process_wc(wcs
++);
245 } while (rc
== RPCRDMA_POLLSIZE
&& --budget
);
250 * Handle send, fast_reg_mr, and local_inv completions.
252 * Send events are typically suppressed and thus do not result
253 * in an upcall. Occasionally one is signaled, however. This
254 * prevents the provider's completion queue from wrapping and
255 * losing a completion.
258 rpcrdma_sendcq_upcall(struct ib_cq
*cq
, void *cq_context
)
260 struct rpcrdma_ep
*ep
= (struct rpcrdma_ep
*)cq_context
;
263 rc
= rpcrdma_sendcq_poll(cq
, ep
);
265 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
270 rc
= ib_req_notify_cq(cq
,
271 IB_CQ_NEXT_COMP
| IB_CQ_REPORT_MISSED_EVENTS
);
275 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
280 rpcrdma_sendcq_poll(cq
, ep
);
284 rpcrdma_recvcq_process_wc(struct ib_wc
*wc
, struct list_head
*sched_list
)
286 struct rpcrdma_rep
*rep
=
287 (struct rpcrdma_rep
*)(unsigned long)wc
->wr_id
;
289 /* WARNING: Only wr_id and status are reliable at this point */
290 if (wc
->status
!= IB_WC_SUCCESS
)
293 /* status == SUCCESS means all fields in wc are trustworthy */
294 if (wc
->opcode
!= IB_WC_RECV
)
297 dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
298 __func__
, rep
, wc
->byte_len
);
300 rep
->rr_len
= wc
->byte_len
;
301 ib_dma_sync_single_for_cpu(rdmab_to_ia(rep
->rr_buffer
)->ri_id
->device
,
302 rdmab_addr(rep
->rr_rdmabuf
),
303 rep
->rr_len
, DMA_FROM_DEVICE
);
304 prefetch(rdmab_to_msg(rep
->rr_rdmabuf
));
307 list_add_tail(&rep
->rr_list
, sched_list
);
310 if (wc
->status
!= IB_WC_WR_FLUSH_ERR
)
311 pr_err("RPC: %s: rep %p: %s\n",
312 __func__
, rep
, COMPLETION_MSG(wc
->status
));
318 rpcrdma_recvcq_poll(struct ib_cq
*cq
, struct rpcrdma_ep
*ep
)
320 struct list_head sched_list
;
322 int budget
, count
, rc
;
324 INIT_LIST_HEAD(&sched_list
);
325 budget
= RPCRDMA_WC_BUDGET
/ RPCRDMA_POLLSIZE
;
327 wcs
= ep
->rep_recv_wcs
;
329 rc
= ib_poll_cq(cq
, RPCRDMA_POLLSIZE
, wcs
);
335 rpcrdma_recvcq_process_wc(wcs
++, &sched_list
);
336 } while (rc
== RPCRDMA_POLLSIZE
&& --budget
);
340 rpcrdma_schedule_tasklet(&sched_list
);
345 * Handle receive completions.
347 * It is reentrant but processes single events in order to maintain
348 * ordering of receives to keep server credits.
350 * It is the responsibility of the scheduled tasklet to return
351 * recv buffers to the pool. NOTE: this affects synchronization of
352 * connection shutdown. That is, the structures required for
353 * the completion of the reply handler must remain intact until
354 * all memory has been reclaimed.
357 rpcrdma_recvcq_upcall(struct ib_cq
*cq
, void *cq_context
)
359 struct rpcrdma_ep
*ep
= (struct rpcrdma_ep
*)cq_context
;
362 rc
= rpcrdma_recvcq_poll(cq
, ep
);
364 dprintk("RPC: %s: ib_poll_cq failed: %i\n",
369 rc
= ib_req_notify_cq(cq
,
370 IB_CQ_NEXT_COMP
| IB_CQ_REPORT_MISSED_EVENTS
);
374 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
379 rpcrdma_recvcq_poll(cq
, ep
);
383 rpcrdma_flush_cqs(struct rpcrdma_ep
*ep
)
386 LIST_HEAD(sched_list
);
388 while (ib_poll_cq(ep
->rep_attr
.recv_cq
, 1, &wc
) > 0)
389 rpcrdma_recvcq_process_wc(&wc
, &sched_list
);
390 if (!list_empty(&sched_list
))
391 rpcrdma_schedule_tasklet(&sched_list
);
392 while (ib_poll_cq(ep
->rep_attr
.send_cq
, 1, &wc
) > 0)
393 rpcrdma_sendcq_process_wc(&wc
);
396 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
397 static const char * const conn
[] = {
416 #define CONNECTION_MSG(status) \
417 ((status) < ARRAY_SIZE(conn) ? \
418 conn[(status)] : "unrecognized connection error")
422 rpcrdma_conn_upcall(struct rdma_cm_id
*id
, struct rdma_cm_event
*event
)
424 struct rpcrdma_xprt
*xprt
= id
->context
;
425 struct rpcrdma_ia
*ia
= &xprt
->rx_ia
;
426 struct rpcrdma_ep
*ep
= &xprt
->rx_ep
;
427 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
428 struct sockaddr
*sap
= (struct sockaddr
*)&ep
->rep_remote_addr
;
430 struct ib_qp_attr
*attr
= &ia
->ri_qp_attr
;
431 struct ib_qp_init_attr
*iattr
= &ia
->ri_qp_init_attr
;
434 switch (event
->event
) {
435 case RDMA_CM_EVENT_ADDR_RESOLVED
:
436 case RDMA_CM_EVENT_ROUTE_RESOLVED
:
438 complete(&ia
->ri_done
);
440 case RDMA_CM_EVENT_ADDR_ERROR
:
441 ia
->ri_async_rc
= -EHOSTUNREACH
;
442 dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
444 complete(&ia
->ri_done
);
446 case RDMA_CM_EVENT_ROUTE_ERROR
:
447 ia
->ri_async_rc
= -ENETUNREACH
;
448 dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
450 complete(&ia
->ri_done
);
452 case RDMA_CM_EVENT_ESTABLISHED
:
454 ib_query_qp(ia
->ri_id
->qp
, attr
,
455 IB_QP_MAX_QP_RD_ATOMIC
| IB_QP_MAX_DEST_RD_ATOMIC
,
457 dprintk("RPC: %s: %d responder resources"
459 __func__
, attr
->max_dest_rd_atomic
,
460 attr
->max_rd_atomic
);
462 case RDMA_CM_EVENT_CONNECT_ERROR
:
463 connstate
= -ENOTCONN
;
465 case RDMA_CM_EVENT_UNREACHABLE
:
466 connstate
= -ENETDOWN
;
468 case RDMA_CM_EVENT_REJECTED
:
469 connstate
= -ECONNREFUSED
;
471 case RDMA_CM_EVENT_DISCONNECTED
:
472 connstate
= -ECONNABORTED
;
474 case RDMA_CM_EVENT_DEVICE_REMOVAL
:
477 dprintk("RPC: %s: %sconnected\n",
478 __func__
, connstate
> 0 ? "" : "dis");
479 ep
->rep_connected
= connstate
;
480 rpcrdma_conn_func(ep
);
481 wake_up_all(&ep
->rep_connect_wait
);
484 dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
485 __func__
, sap
, rpc_get_port(sap
), ep
,
486 CONNECTION_MSG(event
->event
));
490 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
491 if (connstate
== 1) {
492 int ird
= attr
->max_dest_rd_atomic
;
493 int tird
= ep
->rep_remote_cma
.responder_resources
;
495 pr_info("rpcrdma: connection to %pIS:%u on %s, memreg %d slots %d ird %d%s\n",
496 sap
, rpc_get_port(sap
),
497 ia
->ri_id
->device
->name
,
498 ia
->ri_memreg_strategy
,
499 xprt
->rx_buf
.rb_max_requests
,
500 ird
, ird
< 4 && ird
< tird
/ 2 ? " (low!)" : "");
501 } else if (connstate
< 0) {
502 pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
503 sap
, rpc_get_port(sap
), connstate
);
510 static struct rdma_cm_id
*
511 rpcrdma_create_id(struct rpcrdma_xprt
*xprt
,
512 struct rpcrdma_ia
*ia
, struct sockaddr
*addr
)
514 struct rdma_cm_id
*id
;
517 init_completion(&ia
->ri_done
);
519 id
= rdma_create_id(rpcrdma_conn_upcall
, xprt
, RDMA_PS_TCP
, IB_QPT_RC
);
522 dprintk("RPC: %s: rdma_create_id() failed %i\n",
527 ia
->ri_async_rc
= -ETIMEDOUT
;
528 rc
= rdma_resolve_addr(id
, NULL
, addr
, RDMA_RESOLVE_TIMEOUT
);
530 dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
534 wait_for_completion_interruptible_timeout(&ia
->ri_done
,
535 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT
) + 1);
536 rc
= ia
->ri_async_rc
;
540 ia
->ri_async_rc
= -ETIMEDOUT
;
541 rc
= rdma_resolve_route(id
, RDMA_RESOLVE_TIMEOUT
);
543 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
547 wait_for_completion_interruptible_timeout(&ia
->ri_done
,
548 msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT
) + 1);
549 rc
= ia
->ri_async_rc
;
561 * Drain any cq, prior to teardown.
564 rpcrdma_clean_cq(struct ib_cq
*cq
)
569 while (1 == ib_poll_cq(cq
, 1, &wc
))
573 dprintk("RPC: %s: flushed %d events (last 0x%x)\n",
574 __func__
, count
, wc
.opcode
);
578 * Exported functions.
582 * Open and initialize an Interface Adapter.
583 * o initializes fields of struct rpcrdma_ia, including
584 * interface and provider attributes and protection zone.
587 rpcrdma_ia_open(struct rpcrdma_xprt
*xprt
, struct sockaddr
*addr
, int memreg
)
590 struct rpcrdma_ia
*ia
= &xprt
->rx_ia
;
591 struct ib_device_attr
*devattr
= &ia
->ri_devattr
;
593 ia
->ri_id
= rpcrdma_create_id(xprt
, ia
, addr
);
594 if (IS_ERR(ia
->ri_id
)) {
595 rc
= PTR_ERR(ia
->ri_id
);
599 ia
->ri_pd
= ib_alloc_pd(ia
->ri_id
->device
);
600 if (IS_ERR(ia
->ri_pd
)) {
601 rc
= PTR_ERR(ia
->ri_pd
);
602 dprintk("RPC: %s: ib_alloc_pd() failed %i\n",
607 rc
= ib_query_device(ia
->ri_id
->device
, devattr
);
609 dprintk("RPC: %s: ib_query_device failed %d\n",
614 if (devattr
->device_cap_flags
& IB_DEVICE_LOCAL_DMA_LKEY
) {
615 ia
->ri_have_dma_lkey
= 1;
616 ia
->ri_dma_lkey
= ia
->ri_id
->device
->local_dma_lkey
;
619 if (memreg
== RPCRDMA_FRMR
) {
620 /* Requires both frmr reg and local dma lkey */
621 if (((devattr
->device_cap_flags
&
622 (IB_DEVICE_MEM_MGT_EXTENSIONS
|IB_DEVICE_LOCAL_DMA_LKEY
)) !=
623 (IB_DEVICE_MEM_MGT_EXTENSIONS
|IB_DEVICE_LOCAL_DMA_LKEY
)) ||
624 (devattr
->max_fast_reg_page_list_len
== 0)) {
625 dprintk("RPC: %s: FRMR registration "
626 "not supported by HCA\n", __func__
);
627 memreg
= RPCRDMA_MTHCAFMR
;
629 /* Mind the ia limit on FRMR page list depth */
630 ia
->ri_max_frmr_depth
= min_t(unsigned int,
631 RPCRDMA_MAX_DATA_SEGS
,
632 devattr
->max_fast_reg_page_list_len
);
635 if (memreg
== RPCRDMA_MTHCAFMR
) {
636 if (!ia
->ri_id
->device
->alloc_fmr
) {
637 dprintk("RPC: %s: MTHCAFMR registration "
638 "not supported by HCA\n", __func__
);
639 memreg
= RPCRDMA_ALLPHYSICAL
;
644 * Optionally obtain an underlying physical identity mapping in
645 * order to do a memory window-based bind. This base registration
646 * is protected from remote access - that is enabled only by binding
647 * for the specific bytes targeted during each RPC operation, and
648 * revoked after the corresponding completion similar to a storage
654 case RPCRDMA_ALLPHYSICAL
:
655 mem_priv
= IB_ACCESS_LOCAL_WRITE
|
656 IB_ACCESS_REMOTE_WRITE
|
657 IB_ACCESS_REMOTE_READ
;
659 case RPCRDMA_MTHCAFMR
:
660 if (ia
->ri_have_dma_lkey
)
662 mem_priv
= IB_ACCESS_LOCAL_WRITE
;
664 ia
->ri_bind_mem
= ib_get_dma_mr(ia
->ri_pd
, mem_priv
);
665 if (IS_ERR(ia
->ri_bind_mem
)) {
666 printk(KERN_ALERT
"%s: ib_get_dma_mr for "
667 "phys register failed with %lX\n",
668 __func__
, PTR_ERR(ia
->ri_bind_mem
));
674 printk(KERN_ERR
"RPC: Unsupported memory "
675 "registration mode: %d\n", memreg
);
679 dprintk("RPC: %s: memory registration strategy is %d\n",
682 /* Else will do memory reg/dereg for each chunk */
683 ia
->ri_memreg_strategy
= memreg
;
685 rwlock_init(&ia
->ri_qplock
);
689 ib_dealloc_pd(ia
->ri_pd
);
692 rdma_destroy_id(ia
->ri_id
);
699 * Clean up/close an IA.
700 * o if event handles and PD have been initialized, free them.
704 rpcrdma_ia_close(struct rpcrdma_ia
*ia
)
708 dprintk("RPC: %s: entering\n", __func__
);
709 if (ia
->ri_bind_mem
!= NULL
) {
710 rc
= ib_dereg_mr(ia
->ri_bind_mem
);
711 dprintk("RPC: %s: ib_dereg_mr returned %i\n",
714 if (ia
->ri_id
!= NULL
&& !IS_ERR(ia
->ri_id
)) {
716 rdma_destroy_qp(ia
->ri_id
);
717 rdma_destroy_id(ia
->ri_id
);
720 if (ia
->ri_pd
!= NULL
&& !IS_ERR(ia
->ri_pd
)) {
721 rc
= ib_dealloc_pd(ia
->ri_pd
);
722 dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
728 * Create unconnected endpoint.
731 rpcrdma_ep_create(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
,
732 struct rpcrdma_create_data_internal
*cdata
)
734 struct ib_device_attr
*devattr
= &ia
->ri_devattr
;
735 struct ib_cq
*sendcq
, *recvcq
;
738 /* check provider's send/recv wr limits */
739 if (cdata
->max_requests
> devattr
->max_qp_wr
)
740 cdata
->max_requests
= devattr
->max_qp_wr
;
742 ep
->rep_attr
.event_handler
= rpcrdma_qp_async_error_upcall
;
743 ep
->rep_attr
.qp_context
= ep
;
744 /* send_cq and recv_cq initialized below */
745 ep
->rep_attr
.srq
= NULL
;
746 ep
->rep_attr
.cap
.max_send_wr
= cdata
->max_requests
;
747 switch (ia
->ri_memreg_strategy
) {
751 /* Add room for frmr register and invalidate WRs.
752 * 1. FRMR reg WR for head
753 * 2. FRMR invalidate WR for head
754 * 3. N FRMR reg WRs for pagelist
755 * 4. N FRMR invalidate WRs for pagelist
756 * 5. FRMR reg WR for tail
757 * 6. FRMR invalidate WR for tail
758 * 7. The RDMA_SEND WR
761 /* Calculate N if the device max FRMR depth is smaller than
762 * RPCRDMA_MAX_DATA_SEGS.
764 if (ia
->ri_max_frmr_depth
< RPCRDMA_MAX_DATA_SEGS
) {
765 int delta
= RPCRDMA_MAX_DATA_SEGS
-
766 ia
->ri_max_frmr_depth
;
769 depth
+= 2; /* FRMR reg + invalidate */
770 delta
-= ia
->ri_max_frmr_depth
;
774 ep
->rep_attr
.cap
.max_send_wr
*= depth
;
775 if (ep
->rep_attr
.cap
.max_send_wr
> devattr
->max_qp_wr
) {
776 cdata
->max_requests
= devattr
->max_qp_wr
/ depth
;
777 if (!cdata
->max_requests
)
779 ep
->rep_attr
.cap
.max_send_wr
= cdata
->max_requests
*
787 ep
->rep_attr
.cap
.max_recv_wr
= cdata
->max_requests
;
788 ep
->rep_attr
.cap
.max_send_sge
= (cdata
->padding
? 4 : 2);
789 ep
->rep_attr
.cap
.max_recv_sge
= 1;
790 ep
->rep_attr
.cap
.max_inline_data
= 0;
791 ep
->rep_attr
.sq_sig_type
= IB_SIGNAL_REQ_WR
;
792 ep
->rep_attr
.qp_type
= IB_QPT_RC
;
793 ep
->rep_attr
.port_num
= ~0;
795 if (cdata
->padding
) {
796 ep
->rep_padbuf
= rpcrdma_alloc_regbuf(ia
, cdata
->padding
,
798 if (IS_ERR(ep
->rep_padbuf
))
799 return PTR_ERR(ep
->rep_padbuf
);
801 ep
->rep_padbuf
= NULL
;
803 dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
804 "iovs: send %d recv %d\n",
806 ep
->rep_attr
.cap
.max_send_wr
,
807 ep
->rep_attr
.cap
.max_recv_wr
,
808 ep
->rep_attr
.cap
.max_send_sge
,
809 ep
->rep_attr
.cap
.max_recv_sge
);
811 /* set trigger for requesting send completion */
812 ep
->rep_cqinit
= ep
->rep_attr
.cap
.max_send_wr
/2 - 1;
813 if (ep
->rep_cqinit
> RPCRDMA_MAX_UNSIGNALED_SENDS
)
814 ep
->rep_cqinit
= RPCRDMA_MAX_UNSIGNALED_SENDS
;
815 else if (ep
->rep_cqinit
<= 2)
818 init_waitqueue_head(&ep
->rep_connect_wait
);
819 INIT_DELAYED_WORK(&ep
->rep_connect_worker
, rpcrdma_connect_worker
);
821 sendcq
= ib_create_cq(ia
->ri_id
->device
, rpcrdma_sendcq_upcall
,
822 rpcrdma_cq_async_error_upcall
, ep
,
823 ep
->rep_attr
.cap
.max_send_wr
+ 1, 0);
824 if (IS_ERR(sendcq
)) {
825 rc
= PTR_ERR(sendcq
);
826 dprintk("RPC: %s: failed to create send CQ: %i\n",
831 rc
= ib_req_notify_cq(sendcq
, IB_CQ_NEXT_COMP
);
833 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
838 recvcq
= ib_create_cq(ia
->ri_id
->device
, rpcrdma_recvcq_upcall
,
839 rpcrdma_cq_async_error_upcall
, ep
,
840 ep
->rep_attr
.cap
.max_recv_wr
+ 1, 0);
841 if (IS_ERR(recvcq
)) {
842 rc
= PTR_ERR(recvcq
);
843 dprintk("RPC: %s: failed to create recv CQ: %i\n",
848 rc
= ib_req_notify_cq(recvcq
, IB_CQ_NEXT_COMP
);
850 dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
852 ib_destroy_cq(recvcq
);
856 ep
->rep_attr
.send_cq
= sendcq
;
857 ep
->rep_attr
.recv_cq
= recvcq
;
859 /* Initialize cma parameters */
861 /* RPC/RDMA does not use private data */
862 ep
->rep_remote_cma
.private_data
= NULL
;
863 ep
->rep_remote_cma
.private_data_len
= 0;
865 /* Client offers RDMA Read but does not initiate */
866 ep
->rep_remote_cma
.initiator_depth
= 0;
867 if (devattr
->max_qp_rd_atom
> 32) /* arbitrary but <= 255 */
868 ep
->rep_remote_cma
.responder_resources
= 32;
870 ep
->rep_remote_cma
.responder_resources
=
871 devattr
->max_qp_rd_atom
;
873 ep
->rep_remote_cma
.retry_count
= 7;
874 ep
->rep_remote_cma
.flow_control
= 0;
875 ep
->rep_remote_cma
.rnr_retry_count
= 0;
880 err
= ib_destroy_cq(sendcq
);
882 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
885 rpcrdma_free_regbuf(ia
, ep
->rep_padbuf
);
892 * Disconnect and destroy endpoint. After this, the only
893 * valid operations on the ep are to free it (if dynamically
894 * allocated) or re-create it.
897 rpcrdma_ep_destroy(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
901 dprintk("RPC: %s: entering, connected is %d\n",
902 __func__
, ep
->rep_connected
);
904 cancel_delayed_work_sync(&ep
->rep_connect_worker
);
907 rpcrdma_ep_disconnect(ep
, ia
);
908 rdma_destroy_qp(ia
->ri_id
);
909 ia
->ri_id
->qp
= NULL
;
912 rpcrdma_free_regbuf(ia
, ep
->rep_padbuf
);
914 rpcrdma_clean_cq(ep
->rep_attr
.recv_cq
);
915 rc
= ib_destroy_cq(ep
->rep_attr
.recv_cq
);
917 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
920 rpcrdma_clean_cq(ep
->rep_attr
.send_cq
);
921 rc
= ib_destroy_cq(ep
->rep_attr
.send_cq
);
923 dprintk("RPC: %s: ib_destroy_cq returned %i\n",
928 * Connect unconnected endpoint.
931 rpcrdma_ep_connect(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
933 struct rdma_cm_id
*id
, *old
;
937 if (ep
->rep_connected
!= 0) {
938 struct rpcrdma_xprt
*xprt
;
940 dprintk("RPC: %s: reconnecting...\n", __func__
);
942 rpcrdma_ep_disconnect(ep
, ia
);
943 rpcrdma_flush_cqs(ep
);
945 switch (ia
->ri_memreg_strategy
) {
947 rpcrdma_reset_frmrs(ia
);
949 case RPCRDMA_MTHCAFMR
:
950 rpcrdma_reset_fmrs(ia
);
952 case RPCRDMA_ALLPHYSICAL
:
959 xprt
= container_of(ia
, struct rpcrdma_xprt
, rx_ia
);
960 id
= rpcrdma_create_id(xprt
, ia
,
961 (struct sockaddr
*)&xprt
->rx_data
.addr
);
966 /* TEMP TEMP TEMP - fail if new device:
967 * Deregister/remarshal *all* requests!
968 * Close and recreate adapter, pd, etc!
969 * Re-determine all attributes still sane!
970 * More stuff I haven't thought of!
973 if (ia
->ri_id
->device
!= id
->device
) {
974 printk("RPC: %s: can't reconnect on "
975 "different device!\n", __func__
);
981 rc
= rdma_create_qp(id
, ia
->ri_pd
, &ep
->rep_attr
);
983 dprintk("RPC: %s: rdma_create_qp failed %i\n",
990 write_lock(&ia
->ri_qplock
);
993 write_unlock(&ia
->ri_qplock
);
995 rdma_destroy_qp(old
);
996 rdma_destroy_id(old
);
998 dprintk("RPC: %s: connecting...\n", __func__
);
999 rc
= rdma_create_qp(ia
->ri_id
, ia
->ri_pd
, &ep
->rep_attr
);
1001 dprintk("RPC: %s: rdma_create_qp failed %i\n",
1003 /* do not update ep->rep_connected */
1004 return -ENETUNREACH
;
1008 ep
->rep_connected
= 0;
1010 rc
= rdma_connect(ia
->ri_id
, &ep
->rep_remote_cma
);
1012 dprintk("RPC: %s: rdma_connect() failed with %i\n",
1017 wait_event_interruptible(ep
->rep_connect_wait
, ep
->rep_connected
!= 0);
1020 * Check state. A non-peer reject indicates no listener
1021 * (ECONNREFUSED), which may be a transient state. All
1022 * others indicate a transport condition which has already
1023 * undergone a best-effort.
1025 if (ep
->rep_connected
== -ECONNREFUSED
&&
1026 ++retry_count
<= RDMA_CONNECT_RETRY_MAX
) {
1027 dprintk("RPC: %s: non-peer_reject, retry\n", __func__
);
1030 if (ep
->rep_connected
<= 0) {
1031 /* Sometimes, the only way to reliably connect to remote
1032 * CMs is to use same nonzero values for ORD and IRD. */
1033 if (retry_count
++ <= RDMA_CONNECT_RETRY_MAX
+ 1 &&
1034 (ep
->rep_remote_cma
.responder_resources
== 0 ||
1035 ep
->rep_remote_cma
.initiator_depth
!=
1036 ep
->rep_remote_cma
.responder_resources
)) {
1037 if (ep
->rep_remote_cma
.responder_resources
== 0)
1038 ep
->rep_remote_cma
.responder_resources
= 1;
1039 ep
->rep_remote_cma
.initiator_depth
=
1040 ep
->rep_remote_cma
.responder_resources
;
1043 rc
= ep
->rep_connected
;
1045 dprintk("RPC: %s: connected\n", __func__
);
1050 ep
->rep_connected
= rc
;
1055 * rpcrdma_ep_disconnect
1057 * This is separate from destroy to facilitate the ability
1058 * to reconnect without recreating the endpoint.
1060 * This call is not reentrant, and must not be made in parallel
1061 * on the same endpoint.
1064 rpcrdma_ep_disconnect(struct rpcrdma_ep
*ep
, struct rpcrdma_ia
*ia
)
1068 rpcrdma_flush_cqs(ep
);
1069 rc
= rdma_disconnect(ia
->ri_id
);
1071 /* returns without wait if not connected */
1072 wait_event_interruptible(ep
->rep_connect_wait
,
1073 ep
->rep_connected
!= 1);
1074 dprintk("RPC: %s: after wait, %sconnected\n", __func__
,
1075 (ep
->rep_connected
== 1) ? "still " : "dis");
1077 dprintk("RPC: %s: rdma_disconnect %i\n", __func__
, rc
);
1078 ep
->rep_connected
= rc
;
1082 static struct rpcrdma_req
*
1083 rpcrdma_create_req(struct rpcrdma_xprt
*r_xprt
)
1085 struct rpcrdma_req
*req
;
1087 req
= kzalloc(sizeof(*req
), GFP_KERNEL
);
1089 return ERR_PTR(-ENOMEM
);
1091 req
->rl_buffer
= &r_xprt
->rx_buf
;
1095 static struct rpcrdma_rep
*
1096 rpcrdma_create_rep(struct rpcrdma_xprt
*r_xprt
)
1098 struct rpcrdma_create_data_internal
*cdata
= &r_xprt
->rx_data
;
1099 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
1100 struct rpcrdma_rep
*rep
;
1104 rep
= kzalloc(sizeof(*rep
), GFP_KERNEL
);
1108 rep
->rr_rdmabuf
= rpcrdma_alloc_regbuf(ia
, cdata
->inline_rsize
,
1110 if (IS_ERR(rep
->rr_rdmabuf
)) {
1111 rc
= PTR_ERR(rep
->rr_rdmabuf
);
1115 rep
->rr_buffer
= &r_xprt
->rx_buf
;
1125 rpcrdma_init_fmrs(struct rpcrdma_ia
*ia
, struct rpcrdma_buffer
*buf
)
1127 int mr_access_flags
= IB_ACCESS_REMOTE_WRITE
| IB_ACCESS_REMOTE_READ
;
1128 struct ib_fmr_attr fmr_attr
= {
1129 .max_pages
= RPCRDMA_MAX_DATA_SEGS
,
1131 .page_shift
= PAGE_SHIFT
1133 struct rpcrdma_mw
*r
;
1136 i
= (buf
->rb_max_requests
+ 1) * RPCRDMA_MAX_SEGS
;
1137 dprintk("RPC: %s: initalizing %d FMRs\n", __func__
, i
);
1140 r
= kzalloc(sizeof(*r
), GFP_KERNEL
);
1144 r
->r
.fmr
= ib_alloc_fmr(ia
->ri_pd
, mr_access_flags
, &fmr_attr
);
1145 if (IS_ERR(r
->r
.fmr
)) {
1146 rc
= PTR_ERR(r
->r
.fmr
);
1147 dprintk("RPC: %s: ib_alloc_fmr failed %i\n",
1152 list_add(&r
->mw_list
, &buf
->rb_mws
);
1153 list_add(&r
->mw_all
, &buf
->rb_all
);
1163 rpcrdma_init_frmrs(struct rpcrdma_ia
*ia
, struct rpcrdma_buffer
*buf
)
1165 struct rpcrdma_frmr
*f
;
1166 struct rpcrdma_mw
*r
;
1169 i
= (buf
->rb_max_requests
+ 1) * RPCRDMA_MAX_SEGS
;
1170 dprintk("RPC: %s: initalizing %d FRMRs\n", __func__
, i
);
1173 r
= kzalloc(sizeof(*r
), GFP_KERNEL
);
1178 f
->fr_mr
= ib_alloc_fast_reg_mr(ia
->ri_pd
,
1179 ia
->ri_max_frmr_depth
);
1180 if (IS_ERR(f
->fr_mr
)) {
1181 rc
= PTR_ERR(f
->fr_mr
);
1182 dprintk("RPC: %s: ib_alloc_fast_reg_mr "
1183 "failed %i\n", __func__
, rc
);
1187 f
->fr_pgl
= ib_alloc_fast_reg_page_list(ia
->ri_id
->device
,
1188 ia
->ri_max_frmr_depth
);
1189 if (IS_ERR(f
->fr_pgl
)) {
1190 rc
= PTR_ERR(f
->fr_pgl
);
1191 dprintk("RPC: %s: ib_alloc_fast_reg_page_list "
1192 "failed %i\n", __func__
, rc
);
1194 ib_dereg_mr(f
->fr_mr
);
1198 list_add(&r
->mw_list
, &buf
->rb_mws
);
1199 list_add(&r
->mw_all
, &buf
->rb_all
);
1210 rpcrdma_buffer_create(struct rpcrdma_xprt
*r_xprt
)
1212 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1213 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
1214 struct rpcrdma_create_data_internal
*cdata
= &r_xprt
->rx_data
;
1219 buf
->rb_max_requests
= cdata
->max_requests
;
1220 spin_lock_init(&buf
->rb_lock
);
1222 /* Need to allocate:
1223 * 1. arrays for send and recv pointers
1224 * 2. arrays of struct rpcrdma_req to fill in pointers
1225 * 3. array of struct rpcrdma_rep for replies
1226 * Send/recv buffers in req/rep need to be registered
1228 len
= buf
->rb_max_requests
*
1229 (sizeof(struct rpcrdma_req
*) + sizeof(struct rpcrdma_rep
*));
1231 p
= kzalloc(len
, GFP_KERNEL
);
1233 dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
1238 buf
->rb_pool
= p
; /* for freeing it later */
1240 buf
->rb_send_bufs
= (struct rpcrdma_req
**) p
;
1241 p
= (char *) &buf
->rb_send_bufs
[buf
->rb_max_requests
];
1242 buf
->rb_recv_bufs
= (struct rpcrdma_rep
**) p
;
1243 p
= (char *) &buf
->rb_recv_bufs
[buf
->rb_max_requests
];
1245 INIT_LIST_HEAD(&buf
->rb_mws
);
1246 INIT_LIST_HEAD(&buf
->rb_all
);
1247 switch (ia
->ri_memreg_strategy
) {
1249 rc
= rpcrdma_init_frmrs(ia
, buf
);
1253 case RPCRDMA_MTHCAFMR
:
1254 rc
= rpcrdma_init_fmrs(ia
, buf
);
1262 for (i
= 0; i
< buf
->rb_max_requests
; i
++) {
1263 struct rpcrdma_req
*req
;
1264 struct rpcrdma_rep
*rep
;
1266 req
= rpcrdma_create_req(r_xprt
);
1268 dprintk("RPC: %s: request buffer %d alloc"
1269 " failed\n", __func__
, i
);
1273 buf
->rb_send_bufs
[i
] = req
;
1275 rep
= rpcrdma_create_rep(r_xprt
);
1277 dprintk("RPC: %s: reply buffer %d alloc failed\n",
1282 buf
->rb_recv_bufs
[i
] = rep
;
1287 rpcrdma_buffer_destroy(buf
);
1292 rpcrdma_destroy_rep(struct rpcrdma_ia
*ia
, struct rpcrdma_rep
*rep
)
1297 rpcrdma_free_regbuf(ia
, rep
->rr_rdmabuf
);
1302 rpcrdma_destroy_req(struct rpcrdma_ia
*ia
, struct rpcrdma_req
*req
)
1307 rpcrdma_free_regbuf(ia
, req
->rl_sendbuf
);
1308 rpcrdma_free_regbuf(ia
, req
->rl_rdmabuf
);
1313 rpcrdma_destroy_fmrs(struct rpcrdma_buffer
*buf
)
1315 struct rpcrdma_mw
*r
;
1318 while (!list_empty(&buf
->rb_all
)) {
1319 r
= list_entry(buf
->rb_all
.next
, struct rpcrdma_mw
, mw_all
);
1320 list_del(&r
->mw_all
);
1321 list_del(&r
->mw_list
);
1323 rc
= ib_dealloc_fmr(r
->r
.fmr
);
1325 dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
1333 rpcrdma_destroy_frmrs(struct rpcrdma_buffer
*buf
)
1335 struct rpcrdma_mw
*r
;
1338 while (!list_empty(&buf
->rb_all
)) {
1339 r
= list_entry(buf
->rb_all
.next
, struct rpcrdma_mw
, mw_all
);
1340 list_del(&r
->mw_all
);
1341 list_del(&r
->mw_list
);
1343 rc
= ib_dereg_mr(r
->r
.frmr
.fr_mr
);
1345 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1347 ib_free_fast_reg_page_list(r
->r
.frmr
.fr_pgl
);
1354 rpcrdma_buffer_destroy(struct rpcrdma_buffer
*buf
)
1356 struct rpcrdma_ia
*ia
= rdmab_to_ia(buf
);
1359 /* clean up in reverse order from create
1360 * 1. recv mr memory (mr free, then kfree)
1361 * 2. send mr memory (mr free, then kfree)
1364 dprintk("RPC: %s: entering\n", __func__
);
1366 for (i
= 0; i
< buf
->rb_max_requests
; i
++) {
1367 if (buf
->rb_recv_bufs
)
1368 rpcrdma_destroy_rep(ia
, buf
->rb_recv_bufs
[i
]);
1369 if (buf
->rb_send_bufs
)
1370 rpcrdma_destroy_req(ia
, buf
->rb_send_bufs
[i
]);
1373 switch (ia
->ri_memreg_strategy
) {
1375 rpcrdma_destroy_frmrs(buf
);
1377 case RPCRDMA_MTHCAFMR
:
1378 rpcrdma_destroy_fmrs(buf
);
1384 kfree(buf
->rb_pool
);
1387 /* After a disconnect, unmap all FMRs.
1389 * This is invoked only in the transport connect worker in order
1390 * to serialize with rpcrdma_register_fmr_external().
1393 rpcrdma_reset_fmrs(struct rpcrdma_ia
*ia
)
1395 struct rpcrdma_xprt
*r_xprt
=
1396 container_of(ia
, struct rpcrdma_xprt
, rx_ia
);
1397 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1398 struct list_head
*pos
;
1399 struct rpcrdma_mw
*r
;
1403 list_for_each(pos
, &buf
->rb_all
) {
1404 r
= list_entry(pos
, struct rpcrdma_mw
, mw_all
);
1407 list_add(&r
->r
.fmr
->list
, &l
);
1408 rc
= ib_unmap_fmr(&l
);
1410 dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
1415 /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
1416 * an unusable state. Find FRMRs in this state and dereg / reg
1417 * each. FRMRs that are VALID and attached to an rpcrdma_req are
1420 * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
1422 * This is invoked only in the transport connect worker in order
1423 * to serialize with rpcrdma_register_frmr_external().
1426 rpcrdma_reset_frmrs(struct rpcrdma_ia
*ia
)
1428 struct rpcrdma_xprt
*r_xprt
=
1429 container_of(ia
, struct rpcrdma_xprt
, rx_ia
);
1430 struct rpcrdma_buffer
*buf
= &r_xprt
->rx_buf
;
1431 struct list_head
*pos
;
1432 struct rpcrdma_mw
*r
;
1435 list_for_each(pos
, &buf
->rb_all
) {
1436 r
= list_entry(pos
, struct rpcrdma_mw
, mw_all
);
1438 if (r
->r
.frmr
.fr_state
== FRMR_IS_INVALID
)
1441 rc
= ib_dereg_mr(r
->r
.frmr
.fr_mr
);
1443 dprintk("RPC: %s: ib_dereg_mr failed %i\n",
1445 ib_free_fast_reg_page_list(r
->r
.frmr
.fr_pgl
);
1447 r
->r
.frmr
.fr_mr
= ib_alloc_fast_reg_mr(ia
->ri_pd
,
1448 ia
->ri_max_frmr_depth
);
1449 if (IS_ERR(r
->r
.frmr
.fr_mr
)) {
1450 rc
= PTR_ERR(r
->r
.frmr
.fr_mr
);
1451 dprintk("RPC: %s: ib_alloc_fast_reg_mr"
1452 " failed %i\n", __func__
, rc
);
1455 r
->r
.frmr
.fr_pgl
= ib_alloc_fast_reg_page_list(
1457 ia
->ri_max_frmr_depth
);
1458 if (IS_ERR(r
->r
.frmr
.fr_pgl
)) {
1459 rc
= PTR_ERR(r
->r
.frmr
.fr_pgl
);
1461 "ib_alloc_fast_reg_page_list "
1462 "failed %i\n", __func__
, rc
);
1464 ib_dereg_mr(r
->r
.frmr
.fr_mr
);
1467 r
->r
.frmr
.fr_state
= FRMR_IS_INVALID
;
1471 /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
1472 * some req segments uninitialized.
1475 rpcrdma_buffer_put_mr(struct rpcrdma_mw
**mw
, struct rpcrdma_buffer
*buf
)
1478 list_add_tail(&(*mw
)->mw_list
, &buf
->rb_mws
);
1483 /* Cycle mw's back in reverse order, and "spin" them.
1484 * This delays and scrambles reuse as much as possible.
1487 rpcrdma_buffer_put_mrs(struct rpcrdma_req
*req
, struct rpcrdma_buffer
*buf
)
1489 struct rpcrdma_mr_seg
*seg
= req
->rl_segments
;
1490 struct rpcrdma_mr_seg
*seg1
= seg
;
1493 for (i
= 1, seg
++; i
< RPCRDMA_MAX_SEGS
; seg
++, i
++)
1494 rpcrdma_buffer_put_mr(&seg
->rl_mw
, buf
);
1495 rpcrdma_buffer_put_mr(&seg1
->rl_mw
, buf
);
1499 rpcrdma_buffer_put_sendbuf(struct rpcrdma_req
*req
, struct rpcrdma_buffer
*buf
)
1501 buf
->rb_send_bufs
[--buf
->rb_send_index
] = req
;
1503 if (req
->rl_reply
) {
1504 buf
->rb_recv_bufs
[--buf
->rb_recv_index
] = req
->rl_reply
;
1505 req
->rl_reply
->rr_func
= NULL
;
1506 req
->rl_reply
= NULL
;
1510 /* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
1511 * Redo only the ib_post_send().
1514 rpcrdma_retry_local_inv(struct rpcrdma_mw
*r
, struct rpcrdma_ia
*ia
)
1516 struct rpcrdma_xprt
*r_xprt
=
1517 container_of(ia
, struct rpcrdma_xprt
, rx_ia
);
1518 struct ib_send_wr invalidate_wr
, *bad_wr
;
1521 dprintk("RPC: %s: FRMR %p is stale\n", __func__
, r
);
1523 /* When this FRMR is re-inserted into rb_mws, it is no longer stale */
1524 r
->r
.frmr
.fr_state
= FRMR_IS_INVALID
;
1526 memset(&invalidate_wr
, 0, sizeof(invalidate_wr
));
1527 invalidate_wr
.wr_id
= (unsigned long)(void *)r
;
1528 invalidate_wr
.opcode
= IB_WR_LOCAL_INV
;
1529 invalidate_wr
.ex
.invalidate_rkey
= r
->r
.frmr
.fr_mr
->rkey
;
1530 DECR_CQCOUNT(&r_xprt
->rx_ep
);
1532 dprintk("RPC: %s: frmr %p invalidating rkey %08x\n",
1533 __func__
, r
, r
->r
.frmr
.fr_mr
->rkey
);
1535 read_lock(&ia
->ri_qplock
);
1536 rc
= ib_post_send(ia
->ri_id
->qp
, &invalidate_wr
, &bad_wr
);
1537 read_unlock(&ia
->ri_qplock
);
1539 /* Force rpcrdma_buffer_get() to retry */
1540 r
->r
.frmr
.fr_state
= FRMR_IS_STALE
;
1541 dprintk("RPC: %s: ib_post_send failed, %i\n",
1547 rpcrdma_retry_flushed_linv(struct list_head
*stale
,
1548 struct rpcrdma_buffer
*buf
)
1550 struct rpcrdma_ia
*ia
= rdmab_to_ia(buf
);
1551 struct list_head
*pos
;
1552 struct rpcrdma_mw
*r
;
1553 unsigned long flags
;
1555 list_for_each(pos
, stale
) {
1556 r
= list_entry(pos
, struct rpcrdma_mw
, mw_list
);
1557 rpcrdma_retry_local_inv(r
, ia
);
1560 spin_lock_irqsave(&buf
->rb_lock
, flags
);
1561 list_splice_tail(stale
, &buf
->rb_mws
);
1562 spin_unlock_irqrestore(&buf
->rb_lock
, flags
);
1565 static struct rpcrdma_req
*
1566 rpcrdma_buffer_get_frmrs(struct rpcrdma_req
*req
, struct rpcrdma_buffer
*buf
,
1567 struct list_head
*stale
)
1569 struct rpcrdma_mw
*r
;
1572 i
= RPCRDMA_MAX_SEGS
- 1;
1573 while (!list_empty(&buf
->rb_mws
)) {
1574 r
= list_entry(buf
->rb_mws
.next
,
1575 struct rpcrdma_mw
, mw_list
);
1576 list_del(&r
->mw_list
);
1577 if (r
->r
.frmr
.fr_state
== FRMR_IS_STALE
) {
1578 list_add(&r
->mw_list
, stale
);
1581 req
->rl_segments
[i
].rl_mw
= r
;
1582 if (unlikely(i
-- == 0))
1583 return req
; /* Success */
1586 /* Not enough entries on rb_mws for this req */
1587 rpcrdma_buffer_put_sendbuf(req
, buf
);
1588 rpcrdma_buffer_put_mrs(req
, buf
);
1592 static struct rpcrdma_req
*
1593 rpcrdma_buffer_get_fmrs(struct rpcrdma_req
*req
, struct rpcrdma_buffer
*buf
)
1595 struct rpcrdma_mw
*r
;
1598 i
= RPCRDMA_MAX_SEGS
- 1;
1599 while (!list_empty(&buf
->rb_mws
)) {
1600 r
= list_entry(buf
->rb_mws
.next
,
1601 struct rpcrdma_mw
, mw_list
);
1602 list_del(&r
->mw_list
);
1603 req
->rl_segments
[i
].rl_mw
= r
;
1604 if (unlikely(i
-- == 0))
1605 return req
; /* Success */
1608 /* Not enough entries on rb_mws for this req */
1609 rpcrdma_buffer_put_sendbuf(req
, buf
);
1610 rpcrdma_buffer_put_mrs(req
, buf
);
1615 * Get a set of request/reply buffers.
1617 * Reply buffer (if needed) is attached to send buffer upon return.
1619 * rb_send_index and rb_recv_index MUST always be pointing to the
1620 * *next* available buffer (non-NULL). They are incremented after
1621 * removing buffers, and decremented *before* returning them.
1623 struct rpcrdma_req
*
1624 rpcrdma_buffer_get(struct rpcrdma_buffer
*buffers
)
1626 struct rpcrdma_ia
*ia
= rdmab_to_ia(buffers
);
1627 struct list_head stale
;
1628 struct rpcrdma_req
*req
;
1629 unsigned long flags
;
1631 spin_lock_irqsave(&buffers
->rb_lock
, flags
);
1632 if (buffers
->rb_send_index
== buffers
->rb_max_requests
) {
1633 spin_unlock_irqrestore(&buffers
->rb_lock
, flags
);
1634 dprintk("RPC: %s: out of request buffers\n", __func__
);
1635 return ((struct rpcrdma_req
*)NULL
);
1638 req
= buffers
->rb_send_bufs
[buffers
->rb_send_index
];
1639 if (buffers
->rb_send_index
< buffers
->rb_recv_index
) {
1640 dprintk("RPC: %s: %d extra receives outstanding (ok)\n",
1642 buffers
->rb_recv_index
- buffers
->rb_send_index
);
1643 req
->rl_reply
= NULL
;
1645 req
->rl_reply
= buffers
->rb_recv_bufs
[buffers
->rb_recv_index
];
1646 buffers
->rb_recv_bufs
[buffers
->rb_recv_index
++] = NULL
;
1648 buffers
->rb_send_bufs
[buffers
->rb_send_index
++] = NULL
;
1650 INIT_LIST_HEAD(&stale
);
1651 switch (ia
->ri_memreg_strategy
) {
1653 req
= rpcrdma_buffer_get_frmrs(req
, buffers
, &stale
);
1655 case RPCRDMA_MTHCAFMR
:
1656 req
= rpcrdma_buffer_get_fmrs(req
, buffers
);
1661 spin_unlock_irqrestore(&buffers
->rb_lock
, flags
);
1662 if (!list_empty(&stale
))
1663 rpcrdma_retry_flushed_linv(&stale
, buffers
);
1668 * Put request/reply buffers back into pool.
1669 * Pre-decrement counter/array index.
1672 rpcrdma_buffer_put(struct rpcrdma_req
*req
)
1674 struct rpcrdma_buffer
*buffers
= req
->rl_buffer
;
1675 struct rpcrdma_ia
*ia
= rdmab_to_ia(buffers
);
1676 unsigned long flags
;
1678 spin_lock_irqsave(&buffers
->rb_lock
, flags
);
1679 rpcrdma_buffer_put_sendbuf(req
, buffers
);
1680 switch (ia
->ri_memreg_strategy
) {
1682 case RPCRDMA_MTHCAFMR
:
1683 rpcrdma_buffer_put_mrs(req
, buffers
);
1688 spin_unlock_irqrestore(&buffers
->rb_lock
, flags
);
1692 * Recover reply buffers from pool.
1693 * This happens when recovering from error conditions.
1694 * Post-increment counter/array index.
1697 rpcrdma_recv_buffer_get(struct rpcrdma_req
*req
)
1699 struct rpcrdma_buffer
*buffers
= req
->rl_buffer
;
1700 unsigned long flags
;
1702 spin_lock_irqsave(&buffers
->rb_lock
, flags
);
1703 if (buffers
->rb_recv_index
< buffers
->rb_max_requests
) {
1704 req
->rl_reply
= buffers
->rb_recv_bufs
[buffers
->rb_recv_index
];
1705 buffers
->rb_recv_bufs
[buffers
->rb_recv_index
++] = NULL
;
1707 spin_unlock_irqrestore(&buffers
->rb_lock
, flags
);
1711 * Put reply buffers back into pool when not attached to
1712 * request. This happens in error conditions.
1715 rpcrdma_recv_buffer_put(struct rpcrdma_rep
*rep
)
1717 struct rpcrdma_buffer
*buffers
= rep
->rr_buffer
;
1718 unsigned long flags
;
1720 rep
->rr_func
= NULL
;
1721 spin_lock_irqsave(&buffers
->rb_lock
, flags
);
1722 buffers
->rb_recv_bufs
[--buffers
->rb_recv_index
] = rep
;
1723 spin_unlock_irqrestore(&buffers
->rb_lock
, flags
);
1727 * Wrappers for internal-use kmalloc memory registration, used by buffer code.
1731 rpcrdma_register_internal(struct rpcrdma_ia
*ia
, void *va
, int len
,
1732 struct ib_mr
**mrp
, struct ib_sge
*iov
)
1734 struct ib_phys_buf ipb
;
1739 * All memory passed here was kmalloc'ed, therefore phys-contiguous.
1741 iov
->addr
= ib_dma_map_single(ia
->ri_id
->device
,
1742 va
, len
, DMA_BIDIRECTIONAL
);
1743 if (ib_dma_mapping_error(ia
->ri_id
->device
, iov
->addr
))
1748 if (ia
->ri_have_dma_lkey
) {
1750 iov
->lkey
= ia
->ri_dma_lkey
;
1752 } else if (ia
->ri_bind_mem
!= NULL
) {
1754 iov
->lkey
= ia
->ri_bind_mem
->lkey
;
1758 ipb
.addr
= iov
->addr
;
1759 ipb
.size
= iov
->length
;
1760 mr
= ib_reg_phys_mr(ia
->ri_pd
, &ipb
, 1,
1761 IB_ACCESS_LOCAL_WRITE
, &iov
->addr
);
1763 dprintk("RPC: %s: phys convert: 0x%llx "
1764 "registered 0x%llx length %d\n",
1765 __func__
, (unsigned long long)ipb
.addr
,
1766 (unsigned long long)iov
->addr
, len
);
1771 dprintk("RPC: %s: failed with %i\n", __func__
, rc
);
1774 iov
->lkey
= mr
->lkey
;
1782 rpcrdma_deregister_internal(struct rpcrdma_ia
*ia
,
1783 struct ib_mr
*mr
, struct ib_sge
*iov
)
1787 ib_dma_unmap_single(ia
->ri_id
->device
,
1788 iov
->addr
, iov
->length
, DMA_BIDIRECTIONAL
);
1793 rc
= ib_dereg_mr(mr
);
1795 dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__
, rc
);
1800 * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
1801 * @ia: controlling rpcrdma_ia
1802 * @size: size of buffer to be allocated, in bytes
1805 * Returns pointer to private header of an area of internally
1806 * registered memory, or an ERR_PTR. The registered buffer follows
1807 * the end of the private header.
1809 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
1810 * receiving the payload of RDMA RECV operations. regbufs are not
1811 * used for RDMA READ/WRITE operations, thus are registered only for
1814 struct rpcrdma_regbuf
*
1815 rpcrdma_alloc_regbuf(struct rpcrdma_ia
*ia
, size_t size
, gfp_t flags
)
1817 struct rpcrdma_regbuf
*rb
;
1821 rb
= kmalloc(sizeof(*rb
) + size
, flags
);
1826 rb
->rg_owner
= NULL
;
1827 rc
= rpcrdma_register_internal(ia
, rb
->rg_base
, size
,
1828 &rb
->rg_mr
, &rb
->rg_iov
);
1841 * rpcrdma_free_regbuf - deregister and free registered buffer
1842 * @ia: controlling rpcrdma_ia
1843 * @rb: regbuf to be deregistered and freed
1846 rpcrdma_free_regbuf(struct rpcrdma_ia
*ia
, struct rpcrdma_regbuf
*rb
)
1849 rpcrdma_deregister_internal(ia
, rb
->rg_mr
, &rb
->rg_iov
);
1855 * Wrappers for chunk registration, shared by read/write chunk code.
1859 rpcrdma_map_one(struct rpcrdma_ia
*ia
, struct rpcrdma_mr_seg
*seg
, int writing
)
1861 seg
->mr_dir
= writing
? DMA_FROM_DEVICE
: DMA_TO_DEVICE
;
1862 seg
->mr_dmalen
= seg
->mr_len
;
1864 seg
->mr_dma
= ib_dma_map_page(ia
->ri_id
->device
,
1865 seg
->mr_page
, offset_in_page(seg
->mr_offset
),
1866 seg
->mr_dmalen
, seg
->mr_dir
);
1868 seg
->mr_dma
= ib_dma_map_single(ia
->ri_id
->device
,
1870 seg
->mr_dmalen
, seg
->mr_dir
);
1871 if (ib_dma_mapping_error(ia
->ri_id
->device
, seg
->mr_dma
)) {
1872 dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
1874 (unsigned long long)seg
->mr_dma
,
1875 seg
->mr_offset
, seg
->mr_dmalen
);
1880 rpcrdma_unmap_one(struct rpcrdma_ia
*ia
, struct rpcrdma_mr_seg
*seg
)
1883 ib_dma_unmap_page(ia
->ri_id
->device
,
1884 seg
->mr_dma
, seg
->mr_dmalen
, seg
->mr_dir
);
1886 ib_dma_unmap_single(ia
->ri_id
->device
,
1887 seg
->mr_dma
, seg
->mr_dmalen
, seg
->mr_dir
);
1891 rpcrdma_register_frmr_external(struct rpcrdma_mr_seg
*seg
,
1892 int *nsegs
, int writing
, struct rpcrdma_ia
*ia
,
1893 struct rpcrdma_xprt
*r_xprt
)
1895 struct rpcrdma_mr_seg
*seg1
= seg
;
1896 struct rpcrdma_mw
*mw
= seg1
->rl_mw
;
1897 struct rpcrdma_frmr
*frmr
= &mw
->r
.frmr
;
1898 struct ib_mr
*mr
= frmr
->fr_mr
;
1899 struct ib_send_wr fastreg_wr
, *bad_wr
;
1907 pageoff
= offset_in_page(seg1
->mr_offset
);
1908 seg1
->mr_offset
-= pageoff
; /* start of page */
1909 seg1
->mr_len
+= pageoff
;
1911 if (*nsegs
> ia
->ri_max_frmr_depth
)
1912 *nsegs
= ia
->ri_max_frmr_depth
;
1913 for (page_no
= i
= 0; i
< *nsegs
;) {
1914 rpcrdma_map_one(ia
, seg
, writing
);
1916 for (seg_len
= seg
->mr_len
; seg_len
> 0; seg_len
-= PAGE_SIZE
) {
1917 frmr
->fr_pgl
->page_list
[page_no
++] = pa
;
1923 /* Check for holes */
1924 if ((i
< *nsegs
&& offset_in_page(seg
->mr_offset
)) ||
1925 offset_in_page((seg
-1)->mr_offset
+ (seg
-1)->mr_len
))
1928 dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n",
1929 __func__
, mw
, i
, len
);
1931 frmr
->fr_state
= FRMR_IS_VALID
;
1933 memset(&fastreg_wr
, 0, sizeof(fastreg_wr
));
1934 fastreg_wr
.wr_id
= (unsigned long)(void *)mw
;
1935 fastreg_wr
.opcode
= IB_WR_FAST_REG_MR
;
1936 fastreg_wr
.wr
.fast_reg
.iova_start
= seg1
->mr_dma
+ pageoff
;
1937 fastreg_wr
.wr
.fast_reg
.page_list
= frmr
->fr_pgl
;
1938 fastreg_wr
.wr
.fast_reg
.page_list_len
= page_no
;
1939 fastreg_wr
.wr
.fast_reg
.page_shift
= PAGE_SHIFT
;
1940 fastreg_wr
.wr
.fast_reg
.length
= len
;
1943 key
= (u8
)(mr
->rkey
& 0x000000FF);
1944 ib_update_fast_reg_key(mr
, ++key
);
1946 fastreg_wr
.wr
.fast_reg
.access_flags
= (writing
?
1947 IB_ACCESS_REMOTE_WRITE
| IB_ACCESS_LOCAL_WRITE
:
1948 IB_ACCESS_REMOTE_READ
);
1949 fastreg_wr
.wr
.fast_reg
.rkey
= mr
->rkey
;
1950 DECR_CQCOUNT(&r_xprt
->rx_ep
);
1952 rc
= ib_post_send(ia
->ri_id
->qp
, &fastreg_wr
, &bad_wr
);
1954 dprintk("RPC: %s: failed ib_post_send for register,"
1955 " status %i\n", __func__
, rc
);
1956 ib_update_fast_reg_key(mr
, --key
);
1959 seg1
->mr_rkey
= mr
->rkey
;
1960 seg1
->mr_base
= seg1
->mr_dma
+ pageoff
;
1967 frmr
->fr_state
= FRMR_IS_INVALID
;
1969 rpcrdma_unmap_one(ia
, --seg
);
1974 rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg
*seg
,
1975 struct rpcrdma_ia
*ia
, struct rpcrdma_xprt
*r_xprt
)
1977 struct rpcrdma_mr_seg
*seg1
= seg
;
1978 struct ib_send_wr invalidate_wr
, *bad_wr
;
1981 seg1
->rl_mw
->r
.frmr
.fr_state
= FRMR_IS_INVALID
;
1983 memset(&invalidate_wr
, 0, sizeof invalidate_wr
);
1984 invalidate_wr
.wr_id
= (unsigned long)(void *)seg1
->rl_mw
;
1985 invalidate_wr
.opcode
= IB_WR_LOCAL_INV
;
1986 invalidate_wr
.ex
.invalidate_rkey
= seg1
->rl_mw
->r
.frmr
.fr_mr
->rkey
;
1987 DECR_CQCOUNT(&r_xprt
->rx_ep
);
1989 read_lock(&ia
->ri_qplock
);
1990 while (seg1
->mr_nsegs
--)
1991 rpcrdma_unmap_one(ia
, seg
++);
1992 rc
= ib_post_send(ia
->ri_id
->qp
, &invalidate_wr
, &bad_wr
);
1993 read_unlock(&ia
->ri_qplock
);
1995 /* Force rpcrdma_buffer_get() to retry */
1996 seg1
->rl_mw
->r
.frmr
.fr_state
= FRMR_IS_STALE
;
1997 dprintk("RPC: %s: failed ib_post_send for invalidate,"
1998 " status %i\n", __func__
, rc
);
2004 rpcrdma_register_fmr_external(struct rpcrdma_mr_seg
*seg
,
2005 int *nsegs
, int writing
, struct rpcrdma_ia
*ia
)
2007 struct rpcrdma_mr_seg
*seg1
= seg
;
2008 u64 physaddrs
[RPCRDMA_MAX_DATA_SEGS
];
2009 int len
, pageoff
, i
, rc
;
2011 pageoff
= offset_in_page(seg1
->mr_offset
);
2012 seg1
->mr_offset
-= pageoff
; /* start of page */
2013 seg1
->mr_len
+= pageoff
;
2015 if (*nsegs
> RPCRDMA_MAX_DATA_SEGS
)
2016 *nsegs
= RPCRDMA_MAX_DATA_SEGS
;
2017 for (i
= 0; i
< *nsegs
;) {
2018 rpcrdma_map_one(ia
, seg
, writing
);
2019 physaddrs
[i
] = seg
->mr_dma
;
2023 /* Check for holes */
2024 if ((i
< *nsegs
&& offset_in_page(seg
->mr_offset
)) ||
2025 offset_in_page((seg
-1)->mr_offset
+ (seg
-1)->mr_len
))
2028 rc
= ib_map_phys_fmr(seg1
->rl_mw
->r
.fmr
, physaddrs
, i
, seg1
->mr_dma
);
2030 dprintk("RPC: %s: failed ib_map_phys_fmr "
2031 "%u@0x%llx+%i (%d)... status %i\n", __func__
,
2032 len
, (unsigned long long)seg1
->mr_dma
,
2035 rpcrdma_unmap_one(ia
, --seg
);
2037 seg1
->mr_rkey
= seg1
->rl_mw
->r
.fmr
->rkey
;
2038 seg1
->mr_base
= seg1
->mr_dma
+ pageoff
;
2047 rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg
*seg
,
2048 struct rpcrdma_ia
*ia
)
2050 struct rpcrdma_mr_seg
*seg1
= seg
;
2054 list_add(&seg1
->rl_mw
->r
.fmr
->list
, &l
);
2055 rc
= ib_unmap_fmr(&l
);
2056 read_lock(&ia
->ri_qplock
);
2057 while (seg1
->mr_nsegs
--)
2058 rpcrdma_unmap_one(ia
, seg
++);
2059 read_unlock(&ia
->ri_qplock
);
2061 dprintk("RPC: %s: failed ib_unmap_fmr,"
2062 " status %i\n", __func__
, rc
);
2067 rpcrdma_register_external(struct rpcrdma_mr_seg
*seg
,
2068 int nsegs
, int writing
, struct rpcrdma_xprt
*r_xprt
)
2070 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
2073 switch (ia
->ri_memreg_strategy
) {
2075 case RPCRDMA_ALLPHYSICAL
:
2076 rpcrdma_map_one(ia
, seg
, writing
);
2077 seg
->mr_rkey
= ia
->ri_bind_mem
->rkey
;
2078 seg
->mr_base
= seg
->mr_dma
;
2083 /* Registration using frmr registration */
2085 rc
= rpcrdma_register_frmr_external(seg
, &nsegs
, writing
, ia
, r_xprt
);
2088 /* Registration using fmr memory registration */
2089 case RPCRDMA_MTHCAFMR
:
2090 rc
= rpcrdma_register_fmr_external(seg
, &nsegs
, writing
, ia
);
2103 rpcrdma_deregister_external(struct rpcrdma_mr_seg
*seg
,
2104 struct rpcrdma_xprt
*r_xprt
)
2106 struct rpcrdma_ia
*ia
= &r_xprt
->rx_ia
;
2107 int nsegs
= seg
->mr_nsegs
, rc
;
2109 switch (ia
->ri_memreg_strategy
) {
2111 case RPCRDMA_ALLPHYSICAL
:
2112 read_lock(&ia
->ri_qplock
);
2113 rpcrdma_unmap_one(ia
, seg
);
2114 read_unlock(&ia
->ri_qplock
);
2118 rc
= rpcrdma_deregister_frmr_external(seg
, ia
, r_xprt
);
2121 case RPCRDMA_MTHCAFMR
:
2122 rc
= rpcrdma_deregister_fmr_external(seg
, ia
);
2132 * Prepost any receive buffer, then post send.
2134 * Receive buffer is donated to hardware, reclaimed upon recv completion.
2137 rpcrdma_ep_post(struct rpcrdma_ia
*ia
,
2138 struct rpcrdma_ep
*ep
,
2139 struct rpcrdma_req
*req
)
2141 struct ib_send_wr send_wr
, *send_wr_fail
;
2142 struct rpcrdma_rep
*rep
= req
->rl_reply
;
2146 rc
= rpcrdma_ep_post_recv(ia
, ep
, rep
);
2149 req
->rl_reply
= NULL
;
2152 send_wr
.next
= NULL
;
2153 send_wr
.wr_id
= 0ULL; /* no send cookie */
2154 send_wr
.sg_list
= req
->rl_send_iov
;
2155 send_wr
.num_sge
= req
->rl_niovs
;
2156 send_wr
.opcode
= IB_WR_SEND
;
2157 if (send_wr
.num_sge
== 4) /* no need to sync any pad (constant) */
2158 ib_dma_sync_single_for_device(ia
->ri_id
->device
,
2159 req
->rl_send_iov
[3].addr
, req
->rl_send_iov
[3].length
,
2161 ib_dma_sync_single_for_device(ia
->ri_id
->device
,
2162 req
->rl_send_iov
[1].addr
, req
->rl_send_iov
[1].length
,
2164 ib_dma_sync_single_for_device(ia
->ri_id
->device
,
2165 req
->rl_send_iov
[0].addr
, req
->rl_send_iov
[0].length
,
2168 if (DECR_CQCOUNT(ep
) > 0)
2169 send_wr
.send_flags
= 0;
2170 else { /* Provider must take a send completion every now and then */
2172 send_wr
.send_flags
= IB_SEND_SIGNALED
;
2175 rc
= ib_post_send(ia
->ri_id
->qp
, &send_wr
, &send_wr_fail
);
2177 dprintk("RPC: %s: ib_post_send returned %i\n", __func__
,
2184 * (Re)post a receive buffer.
2187 rpcrdma_ep_post_recv(struct rpcrdma_ia
*ia
,
2188 struct rpcrdma_ep
*ep
,
2189 struct rpcrdma_rep
*rep
)
2191 struct ib_recv_wr recv_wr
, *recv_wr_fail
;
2194 recv_wr
.next
= NULL
;
2195 recv_wr
.wr_id
= (u64
) (unsigned long) rep
;
2196 recv_wr
.sg_list
= &rep
->rr_rdmabuf
->rg_iov
;
2197 recv_wr
.num_sge
= 1;
2199 ib_dma_sync_single_for_cpu(ia
->ri_id
->device
,
2200 rdmab_addr(rep
->rr_rdmabuf
),
2201 rdmab_length(rep
->rr_rdmabuf
),
2204 rc
= ib_post_recv(ia
->ri_id
->qp
, &recv_wr
, &recv_wr_fail
);
2207 dprintk("RPC: %s: ib_post_recv returned %i\n", __func__
,
2212 /* Physical mapping means one Read/Write list entry per-page.
2213 * All list entries must fit within an inline buffer
2215 * NB: The server must return a Write list for NFS READ,
2216 * which has the same constraint. Factor in the inline
2220 rpcrdma_physical_max_payload(struct rpcrdma_xprt
*r_xprt
)
2222 struct rpcrdma_create_data_internal
*cdata
= &r_xprt
->rx_data
;
2223 unsigned int inline_size
, pages
;
2225 inline_size
= min_t(unsigned int,
2226 cdata
->inline_wsize
, cdata
->inline_rsize
);
2227 inline_size
-= RPCRDMA_HDRLEN_MIN
;
2228 pages
= inline_size
/ sizeof(struct rpcrdma_segment
);
2229 return pages
<< PAGE_SHIFT
;
2233 rpcrdma_mr_max_payload(struct rpcrdma_xprt
*r_xprt
)
2235 return RPCRDMA_MAX_DATA_SEGS
<< PAGE_SHIFT
;
2239 rpcrdma_max_payload(struct rpcrdma_xprt
*r_xprt
)
2243 switch (r_xprt
->rx_ia
.ri_memreg_strategy
) {
2244 case RPCRDMA_ALLPHYSICAL
:
2245 result
= rpcrdma_physical_max_payload(r_xprt
);
2248 result
= rpcrdma_mr_max_payload(r_xprt
);