Commit | Line | Data |
---|---|---|
5d252f90 CL |
1 | /* |
2 | * Copyright (c) 2015 Oracle. All rights reserved. | |
3 | * | |
4 | * Support for backward direction RPCs on RPC/RDMA (server-side). | |
5 | */ | |
6 | ||
7 | #include <linux/sunrpc/svc_rdma.h> | |
8 | #include "xprt_rdma.h" | |
9 | ||
10 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | |
11 | ||
12 | #undef SVCRDMA_BACKCHANNEL_DEBUG | |
13 | ||
14 | int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, | |
15 | struct xdr_buf *rcvbuf) | |
16 | { | |
17 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
18 | struct kvec *dst, *src = &rcvbuf->head[0]; | |
19 | struct rpc_rqst *req; | |
20 | unsigned long cwnd; | |
21 | u32 credits; | |
22 | size_t len; | |
23 | __be32 xid; | |
24 | __be32 *p; | |
25 | int ret; | |
26 | ||
27 | p = (__be32 *)src->iov_base; | |
28 | len = src->iov_len; | |
29 | xid = rmsgp->rm_xid; | |
30 | ||
31 | #ifdef SVCRDMA_BACKCHANNEL_DEBUG | |
32 | pr_info("%s: xid=%08x, length=%zu\n", | |
33 | __func__, be32_to_cpu(xid), len); | |
34 | pr_info("%s: RPC/RDMA: %*ph\n", | |
35 | __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp); | |
36 | pr_info("%s: RPC: %*ph\n", | |
37 | __func__, (int)len, p); | |
38 | #endif | |
39 | ||
40 | ret = -EAGAIN; | |
41 | if (src->iov_len < 24) | |
42 | goto out_shortreply; | |
43 | ||
44 | spin_lock_bh(&xprt->transport_lock); | |
45 | req = xprt_lookup_rqst(xprt, xid); | |
46 | if (!req) | |
47 | goto out_notfound; | |
48 | ||
49 | dst = &req->rq_private_buf.head[0]; | |
50 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); | |
51 | if (dst->iov_len < len) | |
52 | goto out_unlock; | |
53 | memcpy(dst->iov_base, p, len); | |
54 | ||
55 | credits = be32_to_cpu(rmsgp->rm_credit); | |
56 | if (credits == 0) | |
57 | credits = 1; /* don't deadlock */ | |
58 | else if (credits > r_xprt->rx_buf.rb_bc_max_requests) | |
59 | credits = r_xprt->rx_buf.rb_bc_max_requests; | |
60 | ||
61 | cwnd = xprt->cwnd; | |
62 | xprt->cwnd = credits << RPC_CWNDSHIFT; | |
63 | if (xprt->cwnd > cwnd) | |
64 | xprt_release_rqst_cong(req->rq_task); | |
65 | ||
66 | ret = 0; | |
67 | xprt_complete_rqst(req->rq_task, rcvbuf->len); | |
68 | rcvbuf->len = 0; | |
69 | ||
70 | out_unlock: | |
71 | spin_unlock_bh(&xprt->transport_lock); | |
72 | out: | |
73 | return ret; | |
74 | ||
75 | out_shortreply: | |
76 | dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n", | |
77 | xprt, src->iov_len); | |
78 | goto out; | |
79 | ||
80 | out_notfound: | |
81 | dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", | |
82 | xprt, be32_to_cpu(xid)); | |
83 | ||
84 | goto out_unlock; | |
85 | } | |
86 | ||
87 | /* Send a backwards direction RPC call. | |
88 | * | |
89 | * Caller holds the connection's mutex and has already marshaled | |
90 | * the RPC/RDMA request. | |
91 | * | |
92 | * This is similar to svc_rdma_reply, but takes an rpc_rqst | |
93 | * instead, does not support chunks, and avoids blocking memory | |
94 | * allocation. | |
95 | * | |
96 | * XXX: There is still an opportunity to block in svc_rdma_send() | |
97 | * if there are no SQ entries to post the Send. This may occur if | |
98 | * the adapter has a small maximum SQ depth. | |
99 | */ | |
100 | static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, | |
101 | struct rpc_rqst *rqst) | |
102 | { | |
103 | struct xdr_buf *sndbuf = &rqst->rq_snd_buf; | |
104 | struct svc_rdma_op_ctxt *ctxt; | |
105 | struct svc_rdma_req_map *vec; | |
106 | struct ib_send_wr send_wr; | |
107 | int ret; | |
108 | ||
109 | vec = svc_rdma_get_req_map(rdma); | |
f6763c29 | 110 | ret = svc_rdma_map_xdr(rdma, sndbuf, vec, false); |
5d252f90 CL |
111 | if (ret) |
112 | goto out_err; | |
113 | ||
114 | /* Post a recv buffer to handle the reply for this request. */ | |
115 | ret = svc_rdma_post_recv(rdma, GFP_NOIO); | |
116 | if (ret) { | |
117 | pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n", | |
118 | ret); | |
119 | pr_err("svcrdma: closing transport %p.\n", rdma); | |
120 | set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); | |
121 | ret = -ENOTCONN; | |
122 | goto out_err; | |
123 | } | |
124 | ||
125 | ctxt = svc_rdma_get_context(rdma); | |
126 | ctxt->pages[0] = virt_to_page(rqst->rq_buffer); | |
127 | ctxt->count = 1; | |
128 | ||
129 | ctxt->wr_op = IB_WR_SEND; | |
130 | ctxt->direction = DMA_TO_DEVICE; | |
5fe1043d | 131 | ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; |
5d252f90 CL |
132 | ctxt->sge[0].length = sndbuf->len; |
133 | ctxt->sge[0].addr = | |
134 | ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0, | |
135 | sndbuf->len, DMA_TO_DEVICE); | |
136 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) { | |
137 | ret = -EIO; | |
138 | goto out_unmap; | |
139 | } | |
140 | atomic_inc(&rdma->sc_dma_used); | |
141 | ||
142 | memset(&send_wr, 0, sizeof(send_wr)); | |
143 | send_wr.wr_id = (unsigned long)ctxt; | |
144 | send_wr.sg_list = ctxt->sge; | |
145 | send_wr.num_sge = 1; | |
146 | send_wr.opcode = IB_WR_SEND; | |
147 | send_wr.send_flags = IB_SEND_SIGNALED; | |
148 | ||
149 | ret = svc_rdma_send(rdma, &send_wr); | |
150 | if (ret) { | |
151 | ret = -EIO; | |
152 | goto out_unmap; | |
153 | } | |
154 | ||
155 | out_err: | |
156 | svc_rdma_put_req_map(rdma, vec); | |
157 | dprintk("svcrdma: %s returns %d\n", __func__, ret); | |
158 | return ret; | |
159 | ||
160 | out_unmap: | |
161 | svc_rdma_unmap_dma(ctxt); | |
162 | svc_rdma_put_context(ctxt, 1); | |
163 | goto out_err; | |
164 | } | |
165 | ||
166 | /* Server-side transport endpoint wants a whole page for its send | |
167 | * buffer. The client RPC code constructs the RPC header in this | |
168 | * buffer before it invokes ->send_request. | |
169 | * | |
170 | * Returns NULL if there was a temporary allocation failure. | |
171 | */ | |
172 | static void * | |
173 | xprt_rdma_bc_allocate(struct rpc_task *task, size_t size) | |
174 | { | |
175 | struct rpc_rqst *rqst = task->tk_rqstp; | |
176 | struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; | |
177 | struct svcxprt_rdma *rdma; | |
178 | struct page *page; | |
179 | ||
180 | rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); | |
181 | ||
182 | /* Prevent an infinite loop: try to make this case work */ | |
183 | if (size > PAGE_SIZE) | |
184 | WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n", | |
185 | size); | |
186 | ||
187 | page = alloc_page(RPCRDMA_DEF_GFP); | |
188 | if (!page) | |
189 | return NULL; | |
190 | ||
191 | return page_address(page); | |
192 | } | |
193 | ||
194 | static void | |
195 | xprt_rdma_bc_free(void *buffer) | |
196 | { | |
197 | /* No-op: ctxt and page have already been freed. */ | |
198 | } | |
199 | ||
200 | static int | |
201 | rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) | |
202 | { | |
203 | struct rpc_xprt *xprt = rqst->rq_xprt; | |
204 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | |
205 | struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer; | |
206 | int rc; | |
207 | ||
208 | /* Space in the send buffer for an RPC/RDMA header is reserved | |
209 | * via xprt->tsh_size. | |
210 | */ | |
211 | headerp->rm_xid = rqst->rq_xid; | |
212 | headerp->rm_vers = rpcrdma_version; | |
213 | headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); | |
214 | headerp->rm_type = rdma_msg; | |
215 | headerp->rm_body.rm_chunks[0] = xdr_zero; | |
216 | headerp->rm_body.rm_chunks[1] = xdr_zero; | |
217 | headerp->rm_body.rm_chunks[2] = xdr_zero; | |
218 | ||
219 | #ifdef SVCRDMA_BACKCHANNEL_DEBUG | |
220 | pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); | |
221 | #endif | |
222 | ||
223 | rc = svc_rdma_bc_sendto(rdma, rqst); | |
224 | if (rc) | |
225 | goto drop_connection; | |
226 | return rc; | |
227 | ||
228 | drop_connection: | |
229 | dprintk("svcrdma: failed to send bc call\n"); | |
230 | xprt_disconnect_done(xprt); | |
231 | return -ENOTCONN; | |
232 | } | |
233 | ||
234 | /* Send an RPC call on the passive end of a transport | |
235 | * connection. | |
236 | */ | |
237 | static int | |
238 | xprt_rdma_bc_send_request(struct rpc_task *task) | |
239 | { | |
240 | struct rpc_rqst *rqst = task->tk_rqstp; | |
241 | struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; | |
242 | struct svcxprt_rdma *rdma; | |
243 | int ret; | |
244 | ||
245 | dprintk("svcrdma: sending bc call with xid: %08x\n", | |
246 | be32_to_cpu(rqst->rq_xid)); | |
247 | ||
248 | if (!mutex_trylock(&sxprt->xpt_mutex)) { | |
249 | rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL); | |
250 | if (!mutex_trylock(&sxprt->xpt_mutex)) | |
251 | return -EAGAIN; | |
252 | rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task); | |
253 | } | |
254 | ||
255 | ret = -ENOTCONN; | |
256 | rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); | |
257 | if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) | |
258 | ret = rpcrdma_bc_send_request(rdma, rqst); | |
259 | ||
260 | mutex_unlock(&sxprt->xpt_mutex); | |
261 | ||
262 | if (ret < 0) | |
263 | return ret; | |
264 | return 0; | |
265 | } | |
266 | ||
267 | static void | |
268 | xprt_rdma_bc_close(struct rpc_xprt *xprt) | |
269 | { | |
270 | dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); | |
271 | } | |
272 | ||
273 | static void | |
274 | xprt_rdma_bc_put(struct rpc_xprt *xprt) | |
275 | { | |
276 | dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); | |
277 | ||
278 | xprt_free(xprt); | |
279 | module_put(THIS_MODULE); | |
280 | } | |
281 | ||
282 | static struct rpc_xprt_ops xprt_rdma_bc_procs = { | |
283 | .reserve_xprt = xprt_reserve_xprt_cong, | |
284 | .release_xprt = xprt_release_xprt_cong, | |
285 | .alloc_slot = xprt_alloc_slot, | |
286 | .release_request = xprt_release_rqst_cong, | |
287 | .buf_alloc = xprt_rdma_bc_allocate, | |
288 | .buf_free = xprt_rdma_bc_free, | |
289 | .send_request = xprt_rdma_bc_send_request, | |
290 | .set_retrans_timeout = xprt_set_retrans_timeout_def, | |
291 | .close = xprt_rdma_bc_close, | |
292 | .destroy = xprt_rdma_bc_put, | |
293 | .print_stats = xprt_rdma_print_stats | |
294 | }; | |
295 | ||
296 | static const struct rpc_timeout xprt_rdma_bc_timeout = { | |
297 | .to_initval = 60 * HZ, | |
298 | .to_maxval = 60 * HZ, | |
299 | }; | |
300 | ||
301 | /* It shouldn't matter if the number of backchannel session slots | |
302 | * doesn't match the number of RPC/RDMA credits. That just means | |
303 | * one or the other will have extra slots that aren't used. | |
304 | */ | |
305 | static struct rpc_xprt * | |
306 | xprt_setup_rdma_bc(struct xprt_create *args) | |
307 | { | |
308 | struct rpc_xprt *xprt; | |
309 | struct rpcrdma_xprt *new_xprt; | |
310 | ||
311 | if (args->addrlen > sizeof(xprt->addr)) { | |
312 | dprintk("RPC: %s: address too large\n", __func__); | |
313 | return ERR_PTR(-EBADF); | |
314 | } | |
315 | ||
316 | xprt = xprt_alloc(args->net, sizeof(*new_xprt), | |
317 | RPCRDMA_MAX_BC_REQUESTS, | |
318 | RPCRDMA_MAX_BC_REQUESTS); | |
319 | if (!xprt) { | |
320 | dprintk("RPC: %s: couldn't allocate rpc_xprt\n", | |
321 | __func__); | |
322 | return ERR_PTR(-ENOMEM); | |
323 | } | |
324 | ||
325 | xprt->timeout = &xprt_rdma_bc_timeout; | |
326 | xprt_set_bound(xprt); | |
327 | xprt_set_connected(xprt); | |
328 | xprt->bind_timeout = RPCRDMA_BIND_TO; | |
329 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; | |
330 | xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; | |
331 | ||
332 | xprt->prot = XPRT_TRANSPORT_BC_RDMA; | |
333 | xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32); | |
334 | xprt->ops = &xprt_rdma_bc_procs; | |
335 | ||
336 | memcpy(&xprt->addr, args->dstaddr, args->addrlen); | |
337 | xprt->addrlen = args->addrlen; | |
338 | xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr); | |
339 | xprt->resvport = 0; | |
340 | ||
341 | xprt->max_payload = xprt_rdma_max_inline_read; | |
342 | ||
343 | new_xprt = rpcx_to_rdmax(xprt); | |
344 | new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs; | |
345 | ||
346 | xprt_get(xprt); | |
347 | args->bc_xprt->xpt_bc_xprt = xprt; | |
348 | xprt->bc_xprt = args->bc_xprt; | |
349 | ||
350 | if (!try_module_get(THIS_MODULE)) | |
351 | goto out_fail; | |
352 | ||
353 | /* Final put for backchannel xprt is in __svc_rdma_free */ | |
354 | xprt_get(xprt); | |
355 | return xprt; | |
356 | ||
357 | out_fail: | |
358 | xprt_rdma_free_addresses(xprt); | |
359 | args->bc_xprt->xpt_bc_xprt = NULL; | |
360 | xprt_put(xprt); | |
361 | xprt_free(xprt); | |
362 | return ERR_PTR(-EINVAL); | |
363 | } | |
364 | ||
365 | struct xprt_class xprt_rdma_bc = { | |
366 | .list = LIST_HEAD_INIT(xprt_rdma_bc.list), | |
367 | .name = "rdma backchannel", | |
368 | .owner = THIS_MODULE, | |
369 | .ident = XPRT_TRANSPORT_BC_RDMA, | |
370 | .setup = xprt_setup_rdma_bc, | |
371 | }; |