IB/iser: Make fastreg pool cache friendly
[deliverable/linux.git] / drivers / infiniband / ulp / iser / iser_memory.c
1 /*
2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33 #include <linux/module.h>
34 #include <linux/kernel.h>
35 #include <linux/slab.h>
36 #include <linux/mm.h>
37 #include <linux/highmem.h>
38 #include <linux/scatterlist.h>
39
40 #include "iscsi_iser.h"
41
42 #define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
43
44 struct fast_reg_descriptor *
45 iser_reg_desc_get(struct ib_conn *ib_conn)
46 {
47 struct fast_reg_descriptor *desc;
48 unsigned long flags;
49
50 spin_lock_irqsave(&ib_conn->lock, flags);
51 desc = list_first_entry(&ib_conn->fastreg.pool,
52 struct fast_reg_descriptor, list);
53 list_del(&desc->list);
54 spin_unlock_irqrestore(&ib_conn->lock, flags);
55
56 return desc;
57 }
58
59 void
60 iser_reg_desc_put(struct ib_conn *ib_conn,
61 struct fast_reg_descriptor *desc)
62 {
63 unsigned long flags;
64
65 spin_lock_irqsave(&ib_conn->lock, flags);
66 list_add(&desc->list, &ib_conn->fastreg.pool);
67 spin_unlock_irqrestore(&ib_conn->lock, flags);
68 }
69
70 /**
71 * iser_start_rdma_unaligned_sg
72 */
73 static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
74 struct iser_data_buf *data,
75 enum iser_data_dir cmd_dir)
76 {
77 struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
78 struct scatterlist *sgl = data->sg;
79 struct scatterlist *sg;
80 char *mem = NULL;
81 unsigned long cmd_data_len = data->data_len;
82 int dma_nents, i;
83
84 if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
85 mem = (void *)__get_free_pages(GFP_ATOMIC,
86 ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
87 else
88 mem = kmalloc(cmd_data_len, GFP_ATOMIC);
89
90 if (mem == NULL) {
91 iser_err("Failed to allocate mem size %d %d for copying sglist\n",
92 data->size, (int)cmd_data_len);
93 return -ENOMEM;
94 }
95
96 if (cmd_dir == ISER_DIR_OUT) {
97 /* copy the unaligned sg the buffer which is used for RDMA */
98 char *p, *from;
99
100 sgl = data->sg;
101 p = mem;
102 for_each_sg(sgl, sg, data->size, i) {
103 from = kmap_atomic(sg_page(sg));
104 memcpy(p,
105 from + sg->offset,
106 sg->length);
107 kunmap_atomic(from);
108 p += sg->length;
109 }
110 }
111
112 sg_init_one(&data->sg_single, mem, cmd_data_len);
113 data->orig_sg = data->sg;
114 data->sg = &data->sg_single;
115 data->copy_buf = mem;
116 dma_nents = ib_dma_map_sg(dev, data->sg, 1,
117 (cmd_dir == ISER_DIR_OUT) ?
118 DMA_TO_DEVICE : DMA_FROM_DEVICE);
119 BUG_ON(dma_nents == 0);
120
121 data->dma_nents = dma_nents;
122
123 return 0;
124 }
125
126 /**
127 * iser_finalize_rdma_unaligned_sg
128 */
129
130 void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
131 struct iser_data_buf *data,
132 enum iser_data_dir cmd_dir)
133 {
134 struct ib_device *dev;
135 unsigned long cmd_data_len;
136
137 dev = iser_task->iser_conn->ib_conn.device->ib_device;
138
139 ib_dma_unmap_sg(dev, data->sg, 1,
140 (cmd_dir == ISER_DIR_OUT) ?
141 DMA_TO_DEVICE : DMA_FROM_DEVICE);
142
143 if (cmd_dir == ISER_DIR_IN) {
144 char *mem;
145 struct scatterlist *sgl, *sg;
146 unsigned char *p, *to;
147 unsigned int sg_size;
148 int i;
149
150 /* copy back read RDMA to unaligned sg */
151 mem = data->copy_buf;
152
153 sgl = data->sg;
154 sg_size = data->size;
155
156 p = mem;
157 for_each_sg(sgl, sg, sg_size, i) {
158 to = kmap_atomic(sg_page(sg));
159 memcpy(to + sg->offset,
160 p,
161 sg->length);
162 kunmap_atomic(to);
163 p += sg->length;
164 }
165 }
166
167 cmd_data_len = data->data_len;
168
169 if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
170 free_pages((unsigned long)data->copy_buf,
171 ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
172 else
173 kfree(data->copy_buf);
174
175 data->copy_buf = NULL;
176 }
177
178 #define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
179
180 /**
181 * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
182 * and returns the length of resulting physical address array (may be less than
183 * the original due to possible compaction).
184 *
185 * we build a "page vec" under the assumption that the SG meets the RDMA
186 * alignment requirements. Other then the first and last SG elements, all
187 * the "internal" elements can be compacted into a list whose elements are
188 * dma addresses of physical pages. The code supports also the weird case
189 * where --few fragments of the same page-- are present in the SG as
190 * consecutive elements. Also, it handles one entry SG.
191 */
192
193 static int iser_sg_to_page_vec(struct iser_data_buf *data,
194 struct ib_device *ibdev, u64 *pages,
195 int *offset, int *data_size)
196 {
197 struct scatterlist *sg, *sgl = data->sg;
198 u64 start_addr, end_addr, page, chunk_start = 0;
199 unsigned long total_sz = 0;
200 unsigned int dma_len;
201 int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
202
203 /* compute the offset of first element */
204 *offset = (u64) sgl[0].offset & ~MASK_4K;
205
206 new_chunk = 1;
207 cur_page = 0;
208 for_each_sg(sgl, sg, data->dma_nents, i) {
209 start_addr = ib_sg_dma_address(ibdev, sg);
210 if (new_chunk)
211 chunk_start = start_addr;
212 dma_len = ib_sg_dma_len(ibdev, sg);
213 end_addr = start_addr + dma_len;
214 total_sz += dma_len;
215
216 /* collect page fragments until aligned or end of SG list */
217 if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
218 new_chunk = 0;
219 continue;
220 }
221 new_chunk = 1;
222
223 /* address of the first page in the contiguous chunk;
224 masking relevant for the very first SG entry,
225 which might be unaligned */
226 page = chunk_start & MASK_4K;
227 do {
228 pages[cur_page++] = page;
229 page += SIZE_4K;
230 } while (page < end_addr);
231 }
232
233 *data_size = total_sz;
234 iser_dbg("page_vec->data_size:%d cur_page %d\n",
235 *data_size, cur_page);
236 return cur_page;
237 }
238
239
240 /**
241 * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
242 * for RDMA sub-list of a scatter-gather list of memory buffers, and returns
243 * the number of entries which are aligned correctly. Supports the case where
244 * consecutive SG elements are actually fragments of the same physcial page.
245 */
246 static int iser_data_buf_aligned_len(struct iser_data_buf *data,
247 struct ib_device *ibdev)
248 {
249 struct scatterlist *sg, *sgl, *next_sg = NULL;
250 u64 start_addr, end_addr;
251 int i, ret_len, start_check = 0;
252
253 if (data->dma_nents == 1)
254 return 1;
255
256 sgl = data->sg;
257 start_addr = ib_sg_dma_address(ibdev, sgl);
258
259 for_each_sg(sgl, sg, data->dma_nents, i) {
260 if (start_check && !IS_4K_ALIGNED(start_addr))
261 break;
262
263 next_sg = sg_next(sg);
264 if (!next_sg)
265 break;
266
267 end_addr = start_addr + ib_sg_dma_len(ibdev, sg);
268 start_addr = ib_sg_dma_address(ibdev, next_sg);
269
270 if (end_addr == start_addr) {
271 start_check = 0;
272 continue;
273 } else
274 start_check = 1;
275
276 if (!IS_4K_ALIGNED(end_addr))
277 break;
278 }
279 ret_len = (next_sg) ? i : i+1;
280 iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n",
281 ret_len, data->dma_nents, data);
282 return ret_len;
283 }
284
285 static void iser_data_buf_dump(struct iser_data_buf *data,
286 struct ib_device *ibdev)
287 {
288 struct scatterlist *sg;
289 int i;
290
291 for_each_sg(data->sg, sg, data->dma_nents, i)
292 iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
293 "off:0x%x sz:0x%x dma_len:0x%x\n",
294 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
295 sg_page(sg), sg->offset,
296 sg->length, ib_sg_dma_len(ibdev, sg));
297 }
298
299 static void iser_dump_page_vec(struct iser_page_vec *page_vec)
300 {
301 int i;
302
303 iser_err("page vec length %d data size %d\n",
304 page_vec->length, page_vec->data_size);
305 for (i = 0; i < page_vec->length; i++)
306 iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
307 }
308
309 int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
310 struct iser_data_buf *data,
311 enum iser_data_dir iser_dir,
312 enum dma_data_direction dma_dir)
313 {
314 struct ib_device *dev;
315
316 iser_task->dir[iser_dir] = 1;
317 dev = iser_task->iser_conn->ib_conn.device->ib_device;
318
319 data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
320 if (data->dma_nents == 0) {
321 iser_err("dma_map_sg failed!!!\n");
322 return -EINVAL;
323 }
324 return 0;
325 }
326
327 void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
328 struct iser_data_buf *data,
329 enum dma_data_direction dir)
330 {
331 struct ib_device *dev;
332
333 dev = iser_task->iser_conn->ib_conn.device->ib_device;
334 ib_dma_unmap_sg(dev, data->sg, data->size, dir);
335 }
336
337 static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
338 struct iser_data_buf *mem,
339 enum iser_data_dir cmd_dir,
340 int aligned_len)
341 {
342 struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
343 struct iser_device *device = iser_task->iser_conn->ib_conn.device;
344
345 iscsi_conn->fmr_unalign_cnt++;
346 iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
347 aligned_len, mem->size);
348
349 if (iser_debug_level > 0)
350 iser_data_buf_dump(mem, device->ib_device);
351
352 /* unmap the command data before accessing it */
353 iser_dma_unmap_task_data(iser_task, mem,
354 (cmd_dir == ISER_DIR_OUT) ?
355 DMA_TO_DEVICE : DMA_FROM_DEVICE);
356
357 /* allocate copy buf, if we are writing, copy the */
358 /* unaligned scatterlist, dma map the copy */
359 if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
360 return -ENOMEM;
361
362 return 0;
363 }
364
365 /**
366 * iser_reg_page_vec - Register physical memory
367 *
368 * returns: 0 on success, errno code on failure
369 */
370 static
371 int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
372 struct iser_data_buf *mem,
373 struct iser_page_vec *page_vec,
374 struct iser_mem_reg *mem_reg)
375 {
376 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
377 struct iser_device *device = ib_conn->device;
378 struct ib_pool_fmr *fmr;
379 int ret, plen;
380
381 plen = iser_sg_to_page_vec(mem, device->ib_device,
382 page_vec->pages,
383 &page_vec->offset,
384 &page_vec->data_size);
385 page_vec->length = plen;
386 if (plen * SIZE_4K < page_vec->data_size) {
387 iser_err("page vec too short to hold this SG\n");
388 iser_data_buf_dump(mem, device->ib_device);
389 iser_dump_page_vec(page_vec);
390 return -EINVAL;
391 }
392
393 fmr = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
394 page_vec->pages,
395 page_vec->length,
396 page_vec->pages[0]);
397 if (IS_ERR(fmr)) {
398 ret = PTR_ERR(fmr);
399 iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
400 return ret;
401 }
402
403 mem_reg->lkey = fmr->fmr->lkey;
404 mem_reg->rkey = fmr->fmr->rkey;
405 mem_reg->va = page_vec->pages[0] + page_vec->offset;
406 mem_reg->len = page_vec->data_size;
407 mem_reg->mem_h = fmr;
408
409 return 0;
410 }
411
412 /**
413 * Unregister (previosuly registered using FMR) memory.
414 * If memory is non-FMR does nothing.
415 */
416 void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
417 enum iser_data_dir cmd_dir)
418 {
419 struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
420 int ret;
421
422 if (!reg->mem_h)
423 return;
424
425 iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
426
427 ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
428 if (ret)
429 iser_err("ib_fmr_pool_unmap failed %d\n", ret);
430
431 reg->mem_h = NULL;
432 }
433
434 void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
435 enum iser_data_dir cmd_dir)
436 {
437 struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
438
439 if (!reg->mem_h)
440 return;
441
442 iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
443 reg->mem_h);
444 reg->mem_h = NULL;
445 }
446
447 /**
448 * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
449 * using FMR (if possible) obtaining rkey and va
450 *
451 * returns 0 on success, errno code on failure
452 */
453 int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
454 enum iser_data_dir cmd_dir)
455 {
456 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
457 struct iser_device *device = ib_conn->device;
458 struct ib_device *ibdev = device->ib_device;
459 struct iser_data_buf *mem = &iser_task->data[cmd_dir];
460 struct iser_mem_reg *mem_reg;
461 int aligned_len;
462 int err;
463 int i;
464 struct scatterlist *sg;
465
466 mem_reg = &iser_task->rdma_reg[cmd_dir];
467
468 aligned_len = iser_data_buf_aligned_len(mem, ibdev);
469 if (aligned_len != mem->dma_nents) {
470 err = fall_to_bounce_buf(iser_task, mem,
471 cmd_dir, aligned_len);
472 if (err) {
473 iser_err("failed to allocate bounce buffer\n");
474 return err;
475 }
476 }
477
478 /* if there a single dma entry, FMR is not needed */
479 if (mem->dma_nents == 1) {
480 sg = mem->sg;
481
482 mem_reg->lkey = device->mr->lkey;
483 mem_reg->rkey = device->mr->rkey;
484 mem_reg->len = ib_sg_dma_len(ibdev, &sg[0]);
485 mem_reg->va = ib_sg_dma_address(ibdev, &sg[0]);
486
487 iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
488 "va: 0x%08lX sz: %ld]\n",
489 (unsigned int)mem_reg->lkey,
490 (unsigned int)mem_reg->rkey,
491 (unsigned long)mem_reg->va,
492 (unsigned long)mem_reg->len);
493 } else { /* use FMR for multiple dma entries */
494 err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
495 mem_reg);
496 if (err && err != -EAGAIN) {
497 iser_data_buf_dump(mem, ibdev);
498 iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
499 mem->dma_nents,
500 ntoh24(iser_task->desc.iscsi_header.dlength));
501 iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
502 ib_conn->fmr.page_vec->data_size,
503 ib_conn->fmr.page_vec->length,
504 ib_conn->fmr.page_vec->offset);
505 for (i = 0; i < ib_conn->fmr.page_vec->length; i++)
506 iser_err("page_vec[%d] = 0x%llx\n", i,
507 (unsigned long long)ib_conn->fmr.page_vec->pages[i]);
508 }
509 if (err)
510 return err;
511 }
512 return 0;
513 }
514
515 static void
516 iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
517 struct ib_sig_domain *domain)
518 {
519 domain->sig_type = IB_SIG_TYPE_T10_DIF;
520 domain->sig.dif.pi_interval = scsi_prot_interval(sc);
521 domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
522 /*
523 * At the moment we hard code those, but in the future
524 * we will take them from sc.
525 */
526 domain->sig.dif.apptag_check_mask = 0xffff;
527 domain->sig.dif.app_escape = true;
528 domain->sig.dif.ref_escape = true;
529 if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
530 domain->sig.dif.ref_remap = true;
531 };
532
533 static int
534 iser_set_sig_attrs(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs)
535 {
536 switch (scsi_get_prot_op(sc)) {
537 case SCSI_PROT_WRITE_INSERT:
538 case SCSI_PROT_READ_STRIP:
539 sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
540 iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
541 sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
542 break;
543 case SCSI_PROT_READ_INSERT:
544 case SCSI_PROT_WRITE_STRIP:
545 sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
546 iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
547 sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
548 IB_T10DIF_CSUM : IB_T10DIF_CRC;
549 break;
550 case SCSI_PROT_READ_PASS:
551 case SCSI_PROT_WRITE_PASS:
552 iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
553 sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
554 iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
555 sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
556 IB_T10DIF_CSUM : IB_T10DIF_CRC;
557 break;
558 default:
559 iser_err("Unsupported PI operation %d\n",
560 scsi_get_prot_op(sc));
561 return -EINVAL;
562 }
563
564 return 0;
565 }
566
567 static inline void
568 iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
569 {
570 *mask = 0;
571 if (sc->prot_flags & SCSI_PROT_REF_CHECK)
572 *mask |= ISER_CHECK_REFTAG;
573 if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
574 *mask |= ISER_CHECK_GUARD;
575 }
576
577 static void
578 iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
579 {
580 u32 rkey;
581
582 memset(inv_wr, 0, sizeof(*inv_wr));
583 inv_wr->opcode = IB_WR_LOCAL_INV;
584 inv_wr->wr_id = ISER_FASTREG_LI_WRID;
585 inv_wr->ex.invalidate_rkey = mr->rkey;
586
587 rkey = ib_inc_rkey(mr->rkey);
588 ib_update_fast_reg_key(mr, rkey);
589 }
590
591 static int
592 iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
593 struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
594 struct ib_sge *prot_sge, struct ib_sge *sig_sge)
595 {
596 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
597 struct iser_pi_context *pi_ctx = desc->pi_ctx;
598 struct ib_send_wr sig_wr, inv_wr;
599 struct ib_send_wr *bad_wr, *wr = NULL;
600 struct ib_sig_attrs sig_attrs;
601 int ret;
602
603 memset(&sig_attrs, 0, sizeof(sig_attrs));
604 ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
605 if (ret)
606 goto err;
607
608 iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
609
610 if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
611 iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
612 wr = &inv_wr;
613 }
614
615 memset(&sig_wr, 0, sizeof(sig_wr));
616 sig_wr.opcode = IB_WR_REG_SIG_MR;
617 sig_wr.wr_id = ISER_FASTREG_LI_WRID;
618 sig_wr.sg_list = data_sge;
619 sig_wr.num_sge = 1;
620 sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
621 sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
622 if (scsi_prot_sg_count(iser_task->sc))
623 sig_wr.wr.sig_handover.prot = prot_sge;
624 sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
625 IB_ACCESS_REMOTE_READ |
626 IB_ACCESS_REMOTE_WRITE;
627
628 if (!wr)
629 wr = &sig_wr;
630 else
631 wr->next = &sig_wr;
632
633 ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
634 if (ret) {
635 iser_err("reg_sig_mr failed, ret:%d\n", ret);
636 goto err;
637 }
638 desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
639
640 sig_sge->lkey = pi_ctx->sig_mr->lkey;
641 sig_sge->addr = 0;
642 sig_sge->length = scsi_transfer_length(iser_task->sc);
643
644 iser_dbg("sig_sge: addr: 0x%llx length: %u lkey: 0x%x\n",
645 sig_sge->addr, sig_sge->length,
646 sig_sge->lkey);
647 err:
648 return ret;
649 }
650
651 static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
652 struct iser_mem_reg *mem_reg,
653 struct iser_data_buf *mem,
654 enum iser_reg_indicator ind,
655 struct ib_sge *sge)
656 {
657 struct fast_reg_descriptor *desc = mem_reg->mem_h;
658 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
659 struct iser_device *device = ib_conn->device;
660 struct ib_device *ibdev = device->ib_device;
661 struct ib_mr *mr;
662 struct ib_fast_reg_page_list *frpl;
663 struct ib_send_wr fastreg_wr, inv_wr;
664 struct ib_send_wr *bad_wr, *wr = NULL;
665 int ret, offset, size, plen;
666
667 /* if there a single dma entry, dma mr suffices */
668 if (mem->dma_nents == 1) {
669 struct scatterlist *sg = mem->sg;
670
671 sge->lkey = device->mr->lkey;
672 sge->addr = ib_sg_dma_address(ibdev, &sg[0]);
673 sge->length = ib_sg_dma_len(ibdev, &sg[0]);
674
675 iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
676 sge->lkey, sge->addr, sge->length);
677 return 0;
678 }
679
680 if (ind == ISER_DATA_KEY_VALID) {
681 mr = desc->data_mr;
682 frpl = desc->data_frpl;
683 } else {
684 mr = desc->pi_ctx->prot_mr;
685 frpl = desc->pi_ctx->prot_frpl;
686 }
687
688 plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
689 &offset, &size);
690 if (plen * SIZE_4K < size) {
691 iser_err("fast reg page_list too short to hold this SG\n");
692 return -EINVAL;
693 }
694
695 if (!(desc->reg_indicators & ind)) {
696 iser_inv_rkey(&inv_wr, mr);
697 wr = &inv_wr;
698 }
699
700 /* Prepare FASTREG WR */
701 memset(&fastreg_wr, 0, sizeof(fastreg_wr));
702 fastreg_wr.wr_id = ISER_FASTREG_LI_WRID;
703 fastreg_wr.opcode = IB_WR_FAST_REG_MR;
704 fastreg_wr.wr.fast_reg.iova_start = frpl->page_list[0] + offset;
705 fastreg_wr.wr.fast_reg.page_list = frpl;
706 fastreg_wr.wr.fast_reg.page_list_len = plen;
707 fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
708 fastreg_wr.wr.fast_reg.length = size;
709 fastreg_wr.wr.fast_reg.rkey = mr->rkey;
710 fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
711 IB_ACCESS_REMOTE_WRITE |
712 IB_ACCESS_REMOTE_READ);
713
714 if (!wr)
715 wr = &fastreg_wr;
716 else
717 wr->next = &fastreg_wr;
718
719 ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
720 if (ret) {
721 iser_err("fast registration failed, ret:%d\n", ret);
722 return ret;
723 }
724 desc->reg_indicators &= ~ind;
725
726 sge->lkey = mr->lkey;
727 sge->addr = frpl->page_list[0] + offset;
728 sge->length = size;
729
730 return ret;
731 }
732
733 /**
734 * iser_reg_rdma_mem_fastreg - Registers memory intended for RDMA,
735 * using Fast Registration WR (if possible) obtaining rkey and va
736 *
737 * returns 0 on success, errno code on failure
738 */
739 int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
740 enum iser_data_dir cmd_dir)
741 {
742 struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
743 struct iser_device *device = ib_conn->device;
744 struct ib_device *ibdev = device->ib_device;
745 struct iser_data_buf *mem = &iser_task->data[cmd_dir];
746 struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
747 struct fast_reg_descriptor *desc = NULL;
748 struct ib_sge data_sge;
749 int err, aligned_len;
750
751 aligned_len = iser_data_buf_aligned_len(mem, ibdev);
752 if (aligned_len != mem->dma_nents) {
753 err = fall_to_bounce_buf(iser_task, mem,
754 cmd_dir, aligned_len);
755 if (err) {
756 iser_err("failed to allocate bounce buffer\n");
757 return err;
758 }
759 }
760
761 if (mem->dma_nents != 1 ||
762 scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
763 desc = iser_reg_desc_get(ib_conn);
764 mem_reg->mem_h = desc;
765 }
766
767 err = iser_fast_reg_mr(iser_task, mem_reg, mem,
768 ISER_DATA_KEY_VALID, &data_sge);
769 if (err)
770 goto err_reg;
771
772 if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
773 struct ib_sge prot_sge, sig_sge;
774
775 memset(&prot_sge, 0, sizeof(prot_sge));
776 if (scsi_prot_sg_count(iser_task->sc)) {
777 mem = &iser_task->prot[cmd_dir];
778 aligned_len = iser_data_buf_aligned_len(mem, ibdev);
779 if (aligned_len != mem->dma_nents) {
780 err = fall_to_bounce_buf(iser_task, mem,
781 cmd_dir, aligned_len);
782 if (err) {
783 iser_err("failed to allocate bounce buffer\n");
784 return err;
785 }
786 }
787
788 err = iser_fast_reg_mr(iser_task, mem_reg, mem,
789 ISER_PROT_KEY_VALID, &prot_sge);
790 if (err)
791 goto err_reg;
792 }
793
794 err = iser_reg_sig_mr(iser_task, desc, &data_sge,
795 &prot_sge, &sig_sge);
796 if (err) {
797 iser_err("Failed to register signature mr\n");
798 return err;
799 }
800 desc->reg_indicators |= ISER_FASTREG_PROTECTED;
801
802 mem_reg->lkey = sig_sge.lkey;
803 mem_reg->rkey = desc->pi_ctx->sig_mr->rkey;
804 mem_reg->va = sig_sge.addr;
805 mem_reg->len = sig_sge.length;
806 } else {
807 if (desc)
808 mem_reg->rkey = desc->data_mr->rkey;
809 else
810 mem_reg->rkey = device->mr->rkey;
811
812 mem_reg->lkey = data_sge.lkey;
813 mem_reg->va = data_sge.addr;
814 mem_reg->len = data_sge.length;
815 }
816
817 return 0;
818 err_reg:
819 if (desc)
820 iser_reg_desc_put(ib_conn, desc);
821
822 return err;
823 }
This page took 0.048614 seconds and 5 git commands to generate.