2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/kref.h>
35 #include <linux/random.h>
36 #include <linux/debugfs.h>
37 #include <linux/export.h>
38 #include <linux/delay.h>
39 #include <rdma/ib_umem.h>
40 #include <rdma/ib_verbs.h>
44 MAX_PENDING_REG_MR
= 8,
51 static int order2idx(struct mlx5_ib_dev
*dev
, int order
)
53 struct mlx5_mr_cache
*cache
= &dev
->cache
;
55 if (order
< cache
->ent
[0].order
)
58 return order
- cache
->ent
[0].order
;
61 static void reg_mr_callback(int status
, void *context
)
63 struct mlx5_ib_mr
*mr
= context
;
64 struct mlx5_ib_dev
*dev
= mr
->dev
;
65 struct mlx5_mr_cache
*cache
= &dev
->cache
;
66 int c
= order2idx(dev
, mr
->order
);
67 struct mlx5_cache_ent
*ent
= &cache
->ent
[c
];
70 struct mlx5_mr_table
*table
= &dev
->mdev
->priv
.mr_table
;
73 spin_lock_irqsave(&ent
->lock
, flags
);
75 spin_unlock_irqrestore(&ent
->lock
, flags
);
77 mlx5_ib_warn(dev
, "async reg mr failed. status %d\n", status
);
80 mod_timer(&dev
->delay_timer
, jiffies
+ HZ
);
84 if (mr
->out
.hdr
.status
) {
85 mlx5_ib_warn(dev
, "failed - status %d, syndorme 0x%x\n",
87 be32_to_cpu(mr
->out
.hdr
.syndrome
));
90 mod_timer(&dev
->delay_timer
, jiffies
+ HZ
);
94 spin_lock_irqsave(&dev
->mdev
->priv
.mkey_lock
, flags
);
95 key
= dev
->mdev
->priv
.mkey_key
++;
96 spin_unlock_irqrestore(&dev
->mdev
->priv
.mkey_lock
, flags
);
97 mr
->mmr
.key
= mlx5_idx_to_mkey(be32_to_cpu(mr
->out
.mkey
) & 0xffffff) | key
;
99 cache
->last_add
= jiffies
;
101 spin_lock_irqsave(&ent
->lock
, flags
);
102 list_add_tail(&mr
->list
, &ent
->head
);
105 spin_unlock_irqrestore(&ent
->lock
, flags
);
107 write_lock_irqsave(&table
->lock
, flags
);
108 err
= radix_tree_insert(&table
->tree
, mlx5_base_mkey(mr
->mmr
.key
),
111 pr_err("Error inserting to mr tree. 0x%x\n", -err
);
112 write_unlock_irqrestore(&table
->lock
, flags
);
115 static int add_keys(struct mlx5_ib_dev
*dev
, int c
, int num
)
117 struct mlx5_mr_cache
*cache
= &dev
->cache
;
118 struct mlx5_cache_ent
*ent
= &cache
->ent
[c
];
119 struct mlx5_create_mkey_mbox_in
*in
;
120 struct mlx5_ib_mr
*mr
;
121 int npages
= 1 << ent
->order
;
125 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
129 for (i
= 0; i
< num
; i
++) {
130 if (ent
->pending
>= MAX_PENDING_REG_MR
) {
135 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
140 mr
->order
= ent
->order
;
143 in
->seg
.status
= MLX5_MKEY_STATUS_FREE
;
144 in
->seg
.xlt_oct_size
= cpu_to_be32((npages
+ 1) / 2);
145 in
->seg
.qpn_mkey7_0
= cpu_to_be32(0xffffff << 8);
146 in
->seg
.flags
= MLX5_ACCESS_MODE_MTT
| MLX5_PERM_UMR_EN
;
147 in
->seg
.log2_page_size
= 12;
149 spin_lock_irq(&ent
->lock
);
151 spin_unlock_irq(&ent
->lock
);
152 err
= mlx5_core_create_mkey(dev
->mdev
, &mr
->mmr
, in
,
153 sizeof(*in
), reg_mr_callback
,
156 spin_lock_irq(&ent
->lock
);
158 spin_unlock_irq(&ent
->lock
);
159 mlx5_ib_warn(dev
, "create mkey failed %d\n", err
);
169 static void remove_keys(struct mlx5_ib_dev
*dev
, int c
, int num
)
171 struct mlx5_mr_cache
*cache
= &dev
->cache
;
172 struct mlx5_cache_ent
*ent
= &cache
->ent
[c
];
173 struct mlx5_ib_mr
*mr
;
177 for (i
= 0; i
< num
; i
++) {
178 spin_lock_irq(&ent
->lock
);
179 if (list_empty(&ent
->head
)) {
180 spin_unlock_irq(&ent
->lock
);
183 mr
= list_first_entry(&ent
->head
, struct mlx5_ib_mr
, list
);
187 spin_unlock_irq(&ent
->lock
);
188 err
= mlx5_core_destroy_mkey(dev
->mdev
, &mr
->mmr
);
190 mlx5_ib_warn(dev
, "failed destroy mkey\n");
196 static ssize_t
size_write(struct file
*filp
, const char __user
*buf
,
197 size_t count
, loff_t
*pos
)
199 struct mlx5_cache_ent
*ent
= filp
->private_data
;
200 struct mlx5_ib_dev
*dev
= ent
->dev
;
206 if (copy_from_user(lbuf
, buf
, sizeof(lbuf
)))
209 c
= order2idx(dev
, ent
->order
);
210 lbuf
[sizeof(lbuf
) - 1] = 0;
212 if (sscanf(lbuf
, "%u", &var
) != 1)
215 if (var
< ent
->limit
)
218 if (var
> ent
->size
) {
220 err
= add_keys(dev
, c
, var
- ent
->size
);
221 if (err
&& err
!= -EAGAIN
)
224 usleep_range(3000, 5000);
226 } else if (var
< ent
->size
) {
227 remove_keys(dev
, c
, ent
->size
- var
);
233 static ssize_t
size_read(struct file
*filp
, char __user
*buf
, size_t count
,
236 struct mlx5_cache_ent
*ent
= filp
->private_data
;
243 err
= snprintf(lbuf
, sizeof(lbuf
), "%d\n", ent
->size
);
247 if (copy_to_user(buf
, lbuf
, err
))
255 static const struct file_operations size_fops
= {
256 .owner
= THIS_MODULE
,
262 static ssize_t
limit_write(struct file
*filp
, const char __user
*buf
,
263 size_t count
, loff_t
*pos
)
265 struct mlx5_cache_ent
*ent
= filp
->private_data
;
266 struct mlx5_ib_dev
*dev
= ent
->dev
;
272 if (copy_from_user(lbuf
, buf
, sizeof(lbuf
)))
275 c
= order2idx(dev
, ent
->order
);
276 lbuf
[sizeof(lbuf
) - 1] = 0;
278 if (sscanf(lbuf
, "%u", &var
) != 1)
286 if (ent
->cur
< ent
->limit
) {
287 err
= add_keys(dev
, c
, 2 * ent
->limit
- ent
->cur
);
295 static ssize_t
limit_read(struct file
*filp
, char __user
*buf
, size_t count
,
298 struct mlx5_cache_ent
*ent
= filp
->private_data
;
305 err
= snprintf(lbuf
, sizeof(lbuf
), "%d\n", ent
->limit
);
309 if (copy_to_user(buf
, lbuf
, err
))
317 static const struct file_operations limit_fops
= {
318 .owner
= THIS_MODULE
,
320 .write
= limit_write
,
324 static int someone_adding(struct mlx5_mr_cache
*cache
)
328 for (i
= 0; i
< MAX_MR_CACHE_ENTRIES
; i
++) {
329 if (cache
->ent
[i
].cur
< cache
->ent
[i
].limit
)
336 static void __cache_work_func(struct mlx5_cache_ent
*ent
)
338 struct mlx5_ib_dev
*dev
= ent
->dev
;
339 struct mlx5_mr_cache
*cache
= &dev
->cache
;
340 int i
= order2idx(dev
, ent
->order
);
346 ent
= &dev
->cache
.ent
[i
];
347 if (ent
->cur
< 2 * ent
->limit
&& !dev
->fill_delay
) {
348 err
= add_keys(dev
, i
, 1);
349 if (ent
->cur
< 2 * ent
->limit
) {
350 if (err
== -EAGAIN
) {
351 mlx5_ib_dbg(dev
, "returned eagain, order %d\n",
353 queue_delayed_work(cache
->wq
, &ent
->dwork
,
354 msecs_to_jiffies(3));
356 mlx5_ib_warn(dev
, "command failed order %d, err %d\n",
358 queue_delayed_work(cache
->wq
, &ent
->dwork
,
359 msecs_to_jiffies(1000));
361 queue_work(cache
->wq
, &ent
->work
);
364 } else if (ent
->cur
> 2 * ent
->limit
) {
365 if (!someone_adding(cache
) &&
366 time_after(jiffies
, cache
->last_add
+ 300 * HZ
)) {
367 remove_keys(dev
, i
, 1);
368 if (ent
->cur
> ent
->limit
)
369 queue_work(cache
->wq
, &ent
->work
);
371 queue_delayed_work(cache
->wq
, &ent
->dwork
, 300 * HZ
);
376 static void delayed_cache_work_func(struct work_struct
*work
)
378 struct mlx5_cache_ent
*ent
;
380 ent
= container_of(work
, struct mlx5_cache_ent
, dwork
.work
);
381 __cache_work_func(ent
);
384 static void cache_work_func(struct work_struct
*work
)
386 struct mlx5_cache_ent
*ent
;
388 ent
= container_of(work
, struct mlx5_cache_ent
, work
);
389 __cache_work_func(ent
);
392 static struct mlx5_ib_mr
*alloc_cached_mr(struct mlx5_ib_dev
*dev
, int order
)
394 struct mlx5_mr_cache
*cache
= &dev
->cache
;
395 struct mlx5_ib_mr
*mr
= NULL
;
396 struct mlx5_cache_ent
*ent
;
400 c
= order2idx(dev
, order
);
401 if (c
< 0 || c
>= MAX_MR_CACHE_ENTRIES
) {
402 mlx5_ib_warn(dev
, "order %d, cache index %d\n", order
, c
);
406 for (i
= c
; i
< MAX_MR_CACHE_ENTRIES
; i
++) {
407 ent
= &cache
->ent
[i
];
409 mlx5_ib_dbg(dev
, "order %d, cache index %d\n", ent
->order
, i
);
411 spin_lock_irq(&ent
->lock
);
412 if (!list_empty(&ent
->head
)) {
413 mr
= list_first_entry(&ent
->head
, struct mlx5_ib_mr
,
417 spin_unlock_irq(&ent
->lock
);
418 if (ent
->cur
< ent
->limit
)
419 queue_work(cache
->wq
, &ent
->work
);
422 spin_unlock_irq(&ent
->lock
);
424 queue_work(cache
->wq
, &ent
->work
);
431 cache
->ent
[c
].miss
++;
436 static void free_cached_mr(struct mlx5_ib_dev
*dev
, struct mlx5_ib_mr
*mr
)
438 struct mlx5_mr_cache
*cache
= &dev
->cache
;
439 struct mlx5_cache_ent
*ent
;
443 c
= order2idx(dev
, mr
->order
);
444 if (c
< 0 || c
>= MAX_MR_CACHE_ENTRIES
) {
445 mlx5_ib_warn(dev
, "order %d, cache index %d\n", mr
->order
, c
);
448 ent
= &cache
->ent
[c
];
449 spin_lock_irq(&ent
->lock
);
450 list_add_tail(&mr
->list
, &ent
->head
);
452 if (ent
->cur
> 2 * ent
->limit
)
454 spin_unlock_irq(&ent
->lock
);
457 queue_work(cache
->wq
, &ent
->work
);
460 static void clean_keys(struct mlx5_ib_dev
*dev
, int c
)
462 struct mlx5_mr_cache
*cache
= &dev
->cache
;
463 struct mlx5_cache_ent
*ent
= &cache
->ent
[c
];
464 struct mlx5_ib_mr
*mr
;
467 cancel_delayed_work(&ent
->dwork
);
469 spin_lock_irq(&ent
->lock
);
470 if (list_empty(&ent
->head
)) {
471 spin_unlock_irq(&ent
->lock
);
474 mr
= list_first_entry(&ent
->head
, struct mlx5_ib_mr
, list
);
478 spin_unlock_irq(&ent
->lock
);
479 err
= mlx5_core_destroy_mkey(dev
->mdev
, &mr
->mmr
);
481 mlx5_ib_warn(dev
, "failed destroy mkey\n");
487 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev
*dev
)
489 struct mlx5_mr_cache
*cache
= &dev
->cache
;
490 struct mlx5_cache_ent
*ent
;
493 if (!mlx5_debugfs_root
)
496 cache
->root
= debugfs_create_dir("mr_cache", dev
->mdev
->priv
.dbg_root
);
500 for (i
= 0; i
< MAX_MR_CACHE_ENTRIES
; i
++) {
501 ent
= &cache
->ent
[i
];
502 sprintf(ent
->name
, "%d", ent
->order
);
503 ent
->dir
= debugfs_create_dir(ent
->name
, cache
->root
);
507 ent
->fsize
= debugfs_create_file("size", 0600, ent
->dir
, ent
,
512 ent
->flimit
= debugfs_create_file("limit", 0600, ent
->dir
, ent
,
517 ent
->fcur
= debugfs_create_u32("cur", 0400, ent
->dir
,
522 ent
->fmiss
= debugfs_create_u32("miss", 0600, ent
->dir
,
531 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev
*dev
)
533 if (!mlx5_debugfs_root
)
536 debugfs_remove_recursive(dev
->cache
.root
);
539 static void delay_time_func(unsigned long ctx
)
541 struct mlx5_ib_dev
*dev
= (struct mlx5_ib_dev
*)ctx
;
546 int mlx5_mr_cache_init(struct mlx5_ib_dev
*dev
)
548 struct mlx5_mr_cache
*cache
= &dev
->cache
;
549 struct mlx5_cache_ent
*ent
;
554 cache
->wq
= create_singlethread_workqueue("mkey_cache");
556 mlx5_ib_warn(dev
, "failed to create work queue\n");
560 setup_timer(&dev
->delay_timer
, delay_time_func
, (unsigned long)dev
);
561 for (i
= 0; i
< MAX_MR_CACHE_ENTRIES
; i
++) {
562 INIT_LIST_HEAD(&cache
->ent
[i
].head
);
563 spin_lock_init(&cache
->ent
[i
].lock
);
565 ent
= &cache
->ent
[i
];
566 INIT_LIST_HEAD(&ent
->head
);
567 spin_lock_init(&ent
->lock
);
571 if (dev
->mdev
->profile
->mask
& MLX5_PROF_MASK_MR_CACHE
)
572 limit
= dev
->mdev
->profile
->mr_cache
[i
].limit
;
576 INIT_WORK(&ent
->work
, cache_work_func
);
577 INIT_DELAYED_WORK(&ent
->dwork
, delayed_cache_work_func
);
579 queue_work(cache
->wq
, &ent
->work
);
582 err
= mlx5_mr_cache_debugfs_init(dev
);
584 mlx5_ib_warn(dev
, "cache debugfs failure\n");
589 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev
*dev
)
593 dev
->cache
.stopped
= 1;
594 flush_workqueue(dev
->cache
.wq
);
596 mlx5_mr_cache_debugfs_cleanup(dev
);
598 for (i
= 0; i
< MAX_MR_CACHE_ENTRIES
; i
++)
601 destroy_workqueue(dev
->cache
.wq
);
602 del_timer_sync(&dev
->delay_timer
);
607 struct ib_mr
*mlx5_ib_get_dma_mr(struct ib_pd
*pd
, int acc
)
609 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
610 struct mlx5_core_dev
*mdev
= dev
->mdev
;
611 struct mlx5_create_mkey_mbox_in
*in
;
612 struct mlx5_mkey_seg
*seg
;
613 struct mlx5_ib_mr
*mr
;
616 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
618 return ERR_PTR(-ENOMEM
);
620 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
627 seg
->flags
= convert_access(acc
) | MLX5_ACCESS_MODE_PA
;
628 seg
->flags_pd
= cpu_to_be32(to_mpd(pd
)->pdn
| MLX5_MKEY_LEN64
);
629 seg
->qpn_mkey7_0
= cpu_to_be32(0xffffff << 8);
632 err
= mlx5_core_create_mkey(mdev
, &mr
->mmr
, in
, sizeof(*in
), NULL
, NULL
,
638 mr
->ibmr
.lkey
= mr
->mmr
.key
;
639 mr
->ibmr
.rkey
= mr
->mmr
.key
;
653 static int get_octo_len(u64 addr
, u64 len
, int page_size
)
658 offset
= addr
& (page_size
- 1);
659 npages
= ALIGN(len
+ offset
, page_size
) >> ilog2(page_size
);
660 return (npages
+ 1) / 2;
663 static int use_umr(int order
)
665 return order
<= MLX5_MAX_UMR_SHIFT
;
668 static void prep_umr_reg_wqe(struct ib_pd
*pd
, struct ib_send_wr
*wr
,
669 struct ib_sge
*sg
, u64 dma
, int n
, u32 key
,
670 int page_shift
, u64 virt_addr
, u64 len
,
673 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
674 struct ib_mr
*mr
= dev
->umrc
.mr
;
675 struct mlx5_umr_wr
*umrwr
= (struct mlx5_umr_wr
*)&wr
->wr
.fast_reg
;
678 sg
->length
= ALIGN(sizeof(u64
) * n
, 64);
689 wr
->opcode
= MLX5_IB_WR_UMR
;
692 umrwr
->page_shift
= page_shift
;
694 umrwr
->target
.virt_addr
= virt_addr
;
696 umrwr
->access_flags
= access_flags
;
700 static void prep_umr_unreg_wqe(struct mlx5_ib_dev
*dev
,
701 struct ib_send_wr
*wr
, u32 key
)
703 struct mlx5_umr_wr
*umrwr
= (struct mlx5_umr_wr
*)&wr
->wr
.fast_reg
;
705 wr
->send_flags
= MLX5_IB_SEND_UMR_UNREG
| MLX5_IB_SEND_UMR_FAIL_IF_FREE
;
706 wr
->opcode
= MLX5_IB_WR_UMR
;
710 void mlx5_umr_cq_handler(struct ib_cq
*cq
, void *cq_context
)
712 struct mlx5_ib_umr_context
*context
;
717 err
= ib_poll_cq(cq
, 1, &wc
);
719 pr_warn("poll cq error %d\n", err
);
725 context
= (struct mlx5_ib_umr_context
*) (unsigned long) wc
.wr_id
;
726 context
->status
= wc
.status
;
727 complete(&context
->done
);
729 ib_req_notify_cq(cq
, IB_CQ_NEXT_COMP
);
732 static struct mlx5_ib_mr
*reg_umr(struct ib_pd
*pd
, struct ib_umem
*umem
,
733 u64 virt_addr
, u64 len
, int npages
,
734 int page_shift
, int order
, int access_flags
)
736 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
737 struct device
*ddev
= dev
->ib_dev
.dma_device
;
738 struct umr_common
*umrc
= &dev
->umrc
;
739 struct mlx5_ib_umr_context umr_context
;
740 struct ib_send_wr wr
, *bad
;
741 struct mlx5_ib_mr
*mr
;
750 for (i
= 0; i
< 1; i
++) {
751 mr
= alloc_cached_mr(dev
, order
);
755 err
= add_keys(dev
, order2idx(dev
, order
), 1);
756 if (err
&& err
!= -EAGAIN
) {
757 mlx5_ib_warn(dev
, "add_keys failed, err %d\n", err
);
763 return ERR_PTR(-EAGAIN
);
765 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
766 * To avoid copying garbage after the pas array, we allocate
768 size
= ALIGN(sizeof(u64
) * npages
, MLX5_UMR_MTT_ALIGNMENT
);
769 mr_pas
= kmalloc(size
+ MLX5_UMR_ALIGN
- 1, GFP_KERNEL
);
775 pas
= PTR_ALIGN(mr_pas
, MLX5_UMR_ALIGN
);
776 mlx5_ib_populate_pas(dev
, umem
, page_shift
, pas
, MLX5_IB_MTT_PRESENT
);
777 /* Clear padding after the actual pages. */
778 memset(pas
+ npages
, 0, size
- npages
* sizeof(u64
));
780 dma
= dma_map_single(ddev
, pas
, size
, DMA_TO_DEVICE
);
781 if (dma_mapping_error(ddev
, dma
)) {
786 memset(&wr
, 0, sizeof(wr
));
787 wr
.wr_id
= (u64
)(unsigned long)&umr_context
;
788 prep_umr_reg_wqe(pd
, &wr
, &sg
, dma
, npages
, mr
->mmr
.key
, page_shift
,
789 virt_addr
, len
, access_flags
);
791 mlx5_ib_init_umr_context(&umr_context
);
793 err
= ib_post_send(umrc
->qp
, &wr
, &bad
);
795 mlx5_ib_warn(dev
, "post send failed, err %d\n", err
);
798 wait_for_completion(&umr_context
.done
);
799 if (umr_context
.status
!= IB_WC_SUCCESS
) {
800 mlx5_ib_warn(dev
, "reg umr failed\n");
805 mr
->mmr
.iova
= virt_addr
;
807 mr
->mmr
.pd
= to_mpd(pd
)->pdn
;
811 dma_unmap_single(ddev
, dma
, size
, DMA_TO_DEVICE
);
818 free_cached_mr(dev
, mr
);
825 static struct mlx5_ib_mr
*reg_create(struct ib_pd
*pd
, u64 virt_addr
,
826 u64 length
, struct ib_umem
*umem
,
827 int npages
, int page_shift
,
830 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
831 struct mlx5_create_mkey_mbox_in
*in
;
832 struct mlx5_ib_mr
*mr
;
835 bool pg_cap
= !!(dev
->mdev
->caps
.gen
.flags
&
836 MLX5_DEV_CAP_FLAG_ON_DMND_PG
);
838 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
840 return ERR_PTR(-ENOMEM
);
842 inlen
= sizeof(*in
) + sizeof(*in
->pas
) * ((npages
+ 1) / 2) * 2;
843 in
= mlx5_vzalloc(inlen
);
848 mlx5_ib_populate_pas(dev
, umem
, page_shift
, in
->pas
,
849 pg_cap
? MLX5_IB_MTT_PRESENT
: 0);
851 /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
852 * in the page list submitted with the command. */
853 in
->flags
= pg_cap
? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS
) : 0;
854 in
->seg
.flags
= convert_access(access_flags
) |
855 MLX5_ACCESS_MODE_MTT
;
856 in
->seg
.flags_pd
= cpu_to_be32(to_mpd(pd
)->pdn
);
857 in
->seg
.start_addr
= cpu_to_be64(virt_addr
);
858 in
->seg
.len
= cpu_to_be64(length
);
859 in
->seg
.bsfs_octo_size
= 0;
860 in
->seg
.xlt_oct_size
= cpu_to_be32(get_octo_len(virt_addr
, length
, 1 << page_shift
));
861 in
->seg
.log2_page_size
= page_shift
;
862 in
->seg
.qpn_mkey7_0
= cpu_to_be32(0xffffff << 8);
863 in
->xlat_oct_act_size
= cpu_to_be32(get_octo_len(virt_addr
, length
,
865 err
= mlx5_core_create_mkey(dev
->mdev
, &mr
->mmr
, in
, inlen
, NULL
,
868 mlx5_ib_warn(dev
, "create mkey failed\n");
874 mlx5_ib_dbg(dev
, "mkey = 0x%x\n", mr
->mmr
.key
);
887 struct ib_mr
*mlx5_ib_reg_user_mr(struct ib_pd
*pd
, u64 start
, u64 length
,
888 u64 virt_addr
, int access_flags
,
889 struct ib_udata
*udata
)
891 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
892 struct mlx5_ib_mr
*mr
= NULL
;
893 struct ib_umem
*umem
;
900 mlx5_ib_dbg(dev
, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
901 start
, virt_addr
, length
, access_flags
);
902 umem
= ib_umem_get(pd
->uobject
->context
, start
, length
, access_flags
,
905 mlx5_ib_dbg(dev
, "umem get failed (%ld)\n", PTR_ERR(umem
));
909 mlx5_ib_cont_pages(umem
, start
, &npages
, &page_shift
, &ncont
, &order
);
911 mlx5_ib_warn(dev
, "avoid zero region\n");
916 mlx5_ib_dbg(dev
, "npages %d, ncont %d, order %d, page_shift %d\n",
917 npages
, ncont
, order
, page_shift
);
919 if (use_umr(order
)) {
920 mr
= reg_umr(pd
, umem
, virt_addr
, length
, ncont
, page_shift
,
921 order
, access_flags
);
922 if (PTR_ERR(mr
) == -EAGAIN
) {
923 mlx5_ib_dbg(dev
, "cache empty for order %d", order
);
929 mr
= reg_create(pd
, virt_addr
, length
, umem
, ncont
, page_shift
,
937 mlx5_ib_dbg(dev
, "mkey 0x%x\n", mr
->mmr
.key
);
941 spin_lock(&dev
->mr_lock
);
942 dev
->mdev
->priv
.reg_pages
+= npages
;
943 spin_unlock(&dev
->mr_lock
);
944 mr
->ibmr
.lkey
= mr
->mmr
.key
;
945 mr
->ibmr
.rkey
= mr
->mmr
.key
;
950 ib_umem_release(umem
);
954 static int unreg_umr(struct mlx5_ib_dev
*dev
, struct mlx5_ib_mr
*mr
)
956 struct umr_common
*umrc
= &dev
->umrc
;
957 struct mlx5_ib_umr_context umr_context
;
958 struct ib_send_wr wr
, *bad
;
961 memset(&wr
, 0, sizeof(wr
));
962 wr
.wr_id
= (u64
)(unsigned long)&umr_context
;
963 prep_umr_unreg_wqe(dev
, &wr
, mr
->mmr
.key
);
965 mlx5_ib_init_umr_context(&umr_context
);
967 err
= ib_post_send(umrc
->qp
, &wr
, &bad
);
970 mlx5_ib_dbg(dev
, "err %d\n", err
);
973 wait_for_completion(&umr_context
.done
);
976 if (umr_context
.status
!= IB_WC_SUCCESS
) {
977 mlx5_ib_warn(dev
, "unreg umr failed\n");
987 int mlx5_ib_dereg_mr(struct ib_mr
*ibmr
)
989 struct mlx5_ib_dev
*dev
= to_mdev(ibmr
->device
);
990 struct mlx5_ib_mr
*mr
= to_mmr(ibmr
);
991 struct ib_umem
*umem
= mr
->umem
;
992 int npages
= mr
->npages
;
993 int umred
= mr
->umred
;
997 err
= mlx5_core_destroy_mkey(dev
->mdev
, &mr
->mmr
);
999 mlx5_ib_warn(dev
, "failed to destroy mkey 0x%x (%d)\n",
1004 err
= unreg_umr(dev
, mr
);
1006 mlx5_ib_warn(dev
, "failed unregister\n");
1009 free_cached_mr(dev
, mr
);
1013 ib_umem_release(umem
);
1014 spin_lock(&dev
->mr_lock
);
1015 dev
->mdev
->priv
.reg_pages
-= npages
;
1016 spin_unlock(&dev
->mr_lock
);
1025 struct ib_mr
*mlx5_ib_create_mr(struct ib_pd
*pd
,
1026 struct ib_mr_init_attr
*mr_init_attr
)
1028 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
1029 struct mlx5_create_mkey_mbox_in
*in
;
1030 struct mlx5_ib_mr
*mr
;
1031 int access_mode
, err
;
1032 int ndescs
= roundup(mr_init_attr
->max_reg_descriptors
, 4);
1034 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1036 return ERR_PTR(-ENOMEM
);
1038 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
1044 in
->seg
.status
= MLX5_MKEY_STATUS_FREE
;
1045 in
->seg
.xlt_oct_size
= cpu_to_be32(ndescs
);
1046 in
->seg
.qpn_mkey7_0
= cpu_to_be32(0xffffff << 8);
1047 in
->seg
.flags_pd
= cpu_to_be32(to_mpd(pd
)->pdn
);
1048 access_mode
= MLX5_ACCESS_MODE_MTT
;
1050 if (mr_init_attr
->flags
& IB_MR_SIGNATURE_EN
) {
1053 in
->seg
.flags_pd
= cpu_to_be32(be32_to_cpu(in
->seg
.flags_pd
) |
1055 in
->seg
.bsfs_octo_size
= cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE
);
1056 mr
->sig
= kzalloc(sizeof(*mr
->sig
), GFP_KERNEL
);
1062 /* create mem & wire PSVs */
1063 err
= mlx5_core_create_psv(dev
->mdev
, to_mpd(pd
)->pdn
,
1068 access_mode
= MLX5_ACCESS_MODE_KLM
;
1069 mr
->sig
->psv_memory
.psv_idx
= psv_index
[0];
1070 mr
->sig
->psv_wire
.psv_idx
= psv_index
[1];
1072 mr
->sig
->sig_status_checked
= true;
1073 mr
->sig
->sig_err_exists
= false;
1074 /* Next UMR, Arm SIGERR */
1075 ++mr
->sig
->sigerr_count
;
1078 in
->seg
.flags
= MLX5_PERM_UMR_EN
| access_mode
;
1079 err
= mlx5_core_create_mkey(dev
->mdev
, &mr
->mmr
, in
, sizeof(*in
),
1082 goto err_destroy_psv
;
1084 mr
->ibmr
.lkey
= mr
->mmr
.key
;
1085 mr
->ibmr
.rkey
= mr
->mmr
.key
;
1093 if (mlx5_core_destroy_psv(dev
->mdev
,
1094 mr
->sig
->psv_memory
.psv_idx
))
1095 mlx5_ib_warn(dev
, "failed to destroy mem psv %d\n",
1096 mr
->sig
->psv_memory
.psv_idx
);
1097 if (mlx5_core_destroy_psv(dev
->mdev
,
1098 mr
->sig
->psv_wire
.psv_idx
))
1099 mlx5_ib_warn(dev
, "failed to destroy wire psv %d\n",
1100 mr
->sig
->psv_wire
.psv_idx
);
1108 return ERR_PTR(err
);
1111 int mlx5_ib_destroy_mr(struct ib_mr
*ibmr
)
1113 struct mlx5_ib_dev
*dev
= to_mdev(ibmr
->device
);
1114 struct mlx5_ib_mr
*mr
= to_mmr(ibmr
);
1118 if (mlx5_core_destroy_psv(dev
->mdev
,
1119 mr
->sig
->psv_memory
.psv_idx
))
1120 mlx5_ib_warn(dev
, "failed to destroy mem psv %d\n",
1121 mr
->sig
->psv_memory
.psv_idx
);
1122 if (mlx5_core_destroy_psv(dev
->mdev
,
1123 mr
->sig
->psv_wire
.psv_idx
))
1124 mlx5_ib_warn(dev
, "failed to destroy wire psv %d\n",
1125 mr
->sig
->psv_wire
.psv_idx
);
1129 err
= mlx5_core_destroy_mkey(dev
->mdev
, &mr
->mmr
);
1131 mlx5_ib_warn(dev
, "failed to destroy mkey 0x%x (%d)\n",
1141 struct ib_mr
*mlx5_ib_alloc_fast_reg_mr(struct ib_pd
*pd
,
1142 int max_page_list_len
)
1144 struct mlx5_ib_dev
*dev
= to_mdev(pd
->device
);
1145 struct mlx5_create_mkey_mbox_in
*in
;
1146 struct mlx5_ib_mr
*mr
;
1149 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
1151 return ERR_PTR(-ENOMEM
);
1153 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
1159 in
->seg
.status
= MLX5_MKEY_STATUS_FREE
;
1160 in
->seg
.xlt_oct_size
= cpu_to_be32((max_page_list_len
+ 1) / 2);
1161 in
->seg
.qpn_mkey7_0
= cpu_to_be32(0xffffff << 8);
1162 in
->seg
.flags
= MLX5_PERM_UMR_EN
| MLX5_ACCESS_MODE_MTT
;
1163 in
->seg
.flags_pd
= cpu_to_be32(to_mpd(pd
)->pdn
);
1165 * TBD not needed - issue 197292 */
1166 in
->seg
.log2_page_size
= PAGE_SHIFT
;
1168 err
= mlx5_core_create_mkey(dev
->mdev
, &mr
->mmr
, in
, sizeof(*in
), NULL
,
1174 mr
->ibmr
.lkey
= mr
->mmr
.key
;
1175 mr
->ibmr
.rkey
= mr
->mmr
.key
;
1182 return ERR_PTR(err
);
1185 struct ib_fast_reg_page_list
*mlx5_ib_alloc_fast_reg_page_list(struct ib_device
*ibdev
,
1188 struct mlx5_ib_fast_reg_page_list
*mfrpl
;
1189 int size
= page_list_len
* sizeof(u64
);
1191 mfrpl
= kmalloc(sizeof(*mfrpl
), GFP_KERNEL
);
1193 return ERR_PTR(-ENOMEM
);
1195 mfrpl
->ibfrpl
.page_list
= kmalloc(size
, GFP_KERNEL
);
1196 if (!mfrpl
->ibfrpl
.page_list
)
1199 mfrpl
->mapped_page_list
= dma_alloc_coherent(ibdev
->dma_device
,
1202 if (!mfrpl
->mapped_page_list
)
1205 WARN_ON(mfrpl
->map
& 0x3f);
1207 return &mfrpl
->ibfrpl
;
1210 kfree(mfrpl
->ibfrpl
.page_list
);
1212 return ERR_PTR(-ENOMEM
);
1215 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list
*page_list
)
1217 struct mlx5_ib_fast_reg_page_list
*mfrpl
= to_mfrpl(page_list
);
1218 struct mlx5_ib_dev
*dev
= to_mdev(page_list
->device
);
1219 int size
= page_list
->max_page_list_len
* sizeof(u64
);
1221 dma_free_coherent(&dev
->mdev
->pdev
->dev
, size
, mfrpl
->mapped_page_list
,
1223 kfree(mfrpl
->ibfrpl
.page_list
);
1227 int mlx5_ib_check_mr_status(struct ib_mr
*ibmr
, u32 check_mask
,
1228 struct ib_mr_status
*mr_status
)
1230 struct mlx5_ib_mr
*mmr
= to_mmr(ibmr
);
1233 if (check_mask
& ~IB_MR_CHECK_SIG_STATUS
) {
1234 pr_err("Invalid status check mask\n");
1239 mr_status
->fail_status
= 0;
1240 if (check_mask
& IB_MR_CHECK_SIG_STATUS
) {
1243 pr_err("signature status check requested on a non-signature enabled MR\n");
1247 mmr
->sig
->sig_status_checked
= true;
1248 if (!mmr
->sig
->sig_err_exists
)
1251 if (ibmr
->lkey
== mmr
->sig
->err_item
.key
)
1252 memcpy(&mr_status
->sig_err
, &mmr
->sig
->err_item
,
1253 sizeof(mr_status
->sig_err
));
1255 mr_status
->sig_err
.err_type
= IB_SIG_BAD_GUARD
;
1256 mr_status
->sig_err
.sig_err_offset
= 0;
1257 mr_status
->sig_err
.key
= mmr
->sig
->err_item
.key
;
1260 mmr
->sig
->sig_err_exists
= false;
1261 mr_status
->fail_status
|= IB_MR_CHECK_SIG_STATUS
;