IB/mlx5: Refactoring register MR code
[deliverable/linux.git] / drivers / infiniband / hw / mlx5 / mr.c
CommitLineData
e126ba97 1/*
6cf0a15f 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
e126ba97
EC
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33
34#include <linux/kref.h>
35#include <linux/random.h>
36#include <linux/debugfs.h>
37#include <linux/export.h>
746b5583 38#include <linux/delay.h>
e126ba97 39#include <rdma/ib_umem.h>
b4cfe447 40#include <rdma/ib_umem_odp.h>
968e78dd 41#include <rdma/ib_verbs.h>
e126ba97
EC
42#include "mlx5_ib.h"
43
44enum {
746b5583 45 MAX_PENDING_REG_MR = 8,
e126ba97
EC
46};
47
832a6b06
HE
48#define MLX5_UMR_ALIGN 2048
49#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
50static __be64 mlx5_ib_update_mtt_emergency_buffer[
51 MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
52 __aligned(MLX5_UMR_ALIGN);
53static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
54#endif
fe45f827 55
6aec21f6
HE
56static int clean_mr(struct mlx5_ib_mr *mr);
57
b4cfe447
HE
58static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
59{
60 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
61
62#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
63 /* Wait until all page fault handlers using the mr complete. */
64 synchronize_srcu(&dev->mr_srcu);
65#endif
66
67 return err;
68}
69
e126ba97
EC
70static int order2idx(struct mlx5_ib_dev *dev, int order)
71{
72 struct mlx5_mr_cache *cache = &dev->cache;
73
74 if (order < cache->ent[0].order)
75 return 0;
76 else
77 return order - cache->ent[0].order;
78}
79
395a8e4c
NO
80#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
81static void update_odp_mr(struct mlx5_ib_mr *mr)
82{
83 if (mr->umem->odp_data) {
84 /*
85 * This barrier prevents the compiler from moving the
86 * setting of umem->odp_data->private to point to our
87 * MR, before reg_umr finished, to ensure that the MR
88 * initialization have finished before starting to
89 * handle invalidations.
90 */
91 smp_wmb();
92 mr->umem->odp_data->private = mr;
93 /*
94 * Make sure we will see the new
95 * umem->odp_data->private value in the invalidation
96 * routines, before we can get page faults on the
97 * MR. Page faults can happen once we put the MR in
98 * the tree, below this line. Without the barrier,
99 * there can be a fault handling and an invalidation
100 * before umem->odp_data->private == mr is visible to
101 * the invalidation handler.
102 */
103 smp_wmb();
104 }
105}
106#endif
107
746b5583
EC
108static void reg_mr_callback(int status, void *context)
109{
110 struct mlx5_ib_mr *mr = context;
111 struct mlx5_ib_dev *dev = mr->dev;
112 struct mlx5_mr_cache *cache = &dev->cache;
113 int c = order2idx(dev, mr->order);
114 struct mlx5_cache_ent *ent = &cache->ent[c];
115 u8 key;
746b5583 116 unsigned long flags;
9603b61d 117 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
8605933a 118 int err;
746b5583 119
746b5583
EC
120 spin_lock_irqsave(&ent->lock, flags);
121 ent->pending--;
122 spin_unlock_irqrestore(&ent->lock, flags);
123 if (status) {
124 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
125 kfree(mr);
126 dev->fill_delay = 1;
127 mod_timer(&dev->delay_timer, jiffies + HZ);
128 return;
129 }
130
131 if (mr->out.hdr.status) {
132 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
133 mr->out.hdr.status,
134 be32_to_cpu(mr->out.hdr.syndrome));
135 kfree(mr);
136 dev->fill_delay = 1;
137 mod_timer(&dev->delay_timer, jiffies + HZ);
138 return;
139 }
140
9603b61d
JM
141 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
142 key = dev->mdev->priv.mkey_key++;
143 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
746b5583
EC
144 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
145
146 cache->last_add = jiffies;
147
148 spin_lock_irqsave(&ent->lock, flags);
149 list_add_tail(&mr->list, &ent->head);
150 ent->cur++;
151 ent->size++;
152 spin_unlock_irqrestore(&ent->lock, flags);
8605933a
HE
153
154 write_lock_irqsave(&table->lock, flags);
155 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
156 &mr->mmr);
157 if (err)
158 pr_err("Error inserting to mr tree. 0x%x\n", -err);
159 write_unlock_irqrestore(&table->lock, flags);
746b5583
EC
160}
161
e126ba97
EC
162static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
163{
e126ba97
EC
164 struct mlx5_mr_cache *cache = &dev->cache;
165 struct mlx5_cache_ent *ent = &cache->ent[c];
166 struct mlx5_create_mkey_mbox_in *in;
167 struct mlx5_ib_mr *mr;
168 int npages = 1 << ent->order;
e126ba97
EC
169 int err = 0;
170 int i;
171
172 in = kzalloc(sizeof(*in), GFP_KERNEL);
173 if (!in)
174 return -ENOMEM;
175
176 for (i = 0; i < num; i++) {
746b5583
EC
177 if (ent->pending >= MAX_PENDING_REG_MR) {
178 err = -EAGAIN;
179 break;
180 }
181
e126ba97
EC
182 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
183 if (!mr) {
184 err = -ENOMEM;
746b5583 185 break;
e126ba97
EC
186 }
187 mr->order = ent->order;
188 mr->umred = 1;
746b5583 189 mr->dev = dev;
968e78dd 190 in->seg.status = MLX5_MKEY_STATUS_FREE;
e126ba97
EC
191 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
192 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
193 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
194 in->seg.log2_page_size = 12;
195
746b5583
EC
196 spin_lock_irq(&ent->lock);
197 ent->pending++;
198 spin_unlock_irq(&ent->lock);
9603b61d 199 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
746b5583
EC
200 sizeof(*in), reg_mr_callback,
201 mr, &mr->out);
e126ba97 202 if (err) {
d14e7110
EC
203 spin_lock_irq(&ent->lock);
204 ent->pending--;
205 spin_unlock_irq(&ent->lock);
e126ba97 206 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
e126ba97 207 kfree(mr);
746b5583 208 break;
e126ba97 209 }
e126ba97
EC
210 }
211
e126ba97
EC
212 kfree(in);
213 return err;
214}
215
216static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
217{
e126ba97
EC
218 struct mlx5_mr_cache *cache = &dev->cache;
219 struct mlx5_cache_ent *ent = &cache->ent[c];
220 struct mlx5_ib_mr *mr;
e126ba97
EC
221 int err;
222 int i;
223
224 for (i = 0; i < num; i++) {
746b5583 225 spin_lock_irq(&ent->lock);
e126ba97 226 if (list_empty(&ent->head)) {
746b5583 227 spin_unlock_irq(&ent->lock);
e126ba97
EC
228 return;
229 }
230 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
231 list_del(&mr->list);
232 ent->cur--;
233 ent->size--;
746b5583 234 spin_unlock_irq(&ent->lock);
b4cfe447 235 err = destroy_mkey(dev, mr);
203099fd 236 if (err)
e126ba97 237 mlx5_ib_warn(dev, "failed destroy mkey\n");
203099fd 238 else
e126ba97 239 kfree(mr);
e126ba97
EC
240 }
241}
242
243static ssize_t size_write(struct file *filp, const char __user *buf,
244 size_t count, loff_t *pos)
245{
246 struct mlx5_cache_ent *ent = filp->private_data;
247 struct mlx5_ib_dev *dev = ent->dev;
248 char lbuf[20];
249 u32 var;
250 int err;
251 int c;
252
253 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
5e631a03 254 return -EFAULT;
e126ba97
EC
255
256 c = order2idx(dev, ent->order);
257 lbuf[sizeof(lbuf) - 1] = 0;
258
259 if (sscanf(lbuf, "%u", &var) != 1)
260 return -EINVAL;
261
262 if (var < ent->limit)
263 return -EINVAL;
264
265 if (var > ent->size) {
746b5583
EC
266 do {
267 err = add_keys(dev, c, var - ent->size);
268 if (err && err != -EAGAIN)
269 return err;
270
271 usleep_range(3000, 5000);
272 } while (err);
e126ba97
EC
273 } else if (var < ent->size) {
274 remove_keys(dev, c, ent->size - var);
275 }
276
277 return count;
278}
279
280static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
281 loff_t *pos)
282{
283 struct mlx5_cache_ent *ent = filp->private_data;
284 char lbuf[20];
285 int err;
286
287 if (*pos)
288 return 0;
289
290 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
291 if (err < 0)
292 return err;
293
294 if (copy_to_user(buf, lbuf, err))
5e631a03 295 return -EFAULT;
e126ba97
EC
296
297 *pos += err;
298
299 return err;
300}
301
302static const struct file_operations size_fops = {
303 .owner = THIS_MODULE,
304 .open = simple_open,
305 .write = size_write,
306 .read = size_read,
307};
308
309static ssize_t limit_write(struct file *filp, const char __user *buf,
310 size_t count, loff_t *pos)
311{
312 struct mlx5_cache_ent *ent = filp->private_data;
313 struct mlx5_ib_dev *dev = ent->dev;
314 char lbuf[20];
315 u32 var;
316 int err;
317 int c;
318
319 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
5e631a03 320 return -EFAULT;
e126ba97
EC
321
322 c = order2idx(dev, ent->order);
323 lbuf[sizeof(lbuf) - 1] = 0;
324
325 if (sscanf(lbuf, "%u", &var) != 1)
326 return -EINVAL;
327
328 if (var > ent->size)
329 return -EINVAL;
330
331 ent->limit = var;
332
333 if (ent->cur < ent->limit) {
334 err = add_keys(dev, c, 2 * ent->limit - ent->cur);
335 if (err)
336 return err;
337 }
338
339 return count;
340}
341
342static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
343 loff_t *pos)
344{
345 struct mlx5_cache_ent *ent = filp->private_data;
346 char lbuf[20];
347 int err;
348
349 if (*pos)
350 return 0;
351
352 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
353 if (err < 0)
354 return err;
355
356 if (copy_to_user(buf, lbuf, err))
5e631a03 357 return -EFAULT;
e126ba97
EC
358
359 *pos += err;
360
361 return err;
362}
363
364static const struct file_operations limit_fops = {
365 .owner = THIS_MODULE,
366 .open = simple_open,
367 .write = limit_write,
368 .read = limit_read,
369};
370
371static int someone_adding(struct mlx5_mr_cache *cache)
372{
373 int i;
374
375 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
376 if (cache->ent[i].cur < cache->ent[i].limit)
377 return 1;
378 }
379
380 return 0;
381}
382
383static void __cache_work_func(struct mlx5_cache_ent *ent)
384{
385 struct mlx5_ib_dev *dev = ent->dev;
386 struct mlx5_mr_cache *cache = &dev->cache;
387 int i = order2idx(dev, ent->order);
746b5583 388 int err;
e126ba97
EC
389
390 if (cache->stopped)
391 return;
392
393 ent = &dev->cache.ent[i];
746b5583
EC
394 if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
395 err = add_keys(dev, i, 1);
396 if (ent->cur < 2 * ent->limit) {
397 if (err == -EAGAIN) {
398 mlx5_ib_dbg(dev, "returned eagain, order %d\n",
399 i + 2);
400 queue_delayed_work(cache->wq, &ent->dwork,
401 msecs_to_jiffies(3));
402 } else if (err) {
403 mlx5_ib_warn(dev, "command failed order %d, err %d\n",
404 i + 2, err);
405 queue_delayed_work(cache->wq, &ent->dwork,
406 msecs_to_jiffies(1000));
407 } else {
408 queue_work(cache->wq, &ent->work);
409 }
410 }
e126ba97 411 } else if (ent->cur > 2 * ent->limit) {
ab5cdc31
LR
412 /*
413 * The remove_keys() logic is performed as garbage collection
414 * task. Such task is intended to be run when no other active
415 * processes are running.
416 *
417 * The need_resched() will return TRUE if there are user tasks
418 * to be activated in near future.
419 *
420 * In such case, we don't execute remove_keys() and postpone
421 * the garbage collection work to try to run in next cycle,
422 * in order to free CPU resources to other tasks.
423 */
424 if (!need_resched() && !someone_adding(cache) &&
746b5583 425 time_after(jiffies, cache->last_add + 300 * HZ)) {
e126ba97
EC
426 remove_keys(dev, i, 1);
427 if (ent->cur > ent->limit)
428 queue_work(cache->wq, &ent->work);
429 } else {
746b5583 430 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
e126ba97
EC
431 }
432 }
433}
434
435static void delayed_cache_work_func(struct work_struct *work)
436{
437 struct mlx5_cache_ent *ent;
438
439 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
440 __cache_work_func(ent);
441}
442
443static void cache_work_func(struct work_struct *work)
444{
445 struct mlx5_cache_ent *ent;
446
447 ent = container_of(work, struct mlx5_cache_ent, work);
448 __cache_work_func(ent);
449}
450
451static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
452{
453 struct mlx5_mr_cache *cache = &dev->cache;
454 struct mlx5_ib_mr *mr = NULL;
455 struct mlx5_cache_ent *ent;
456 int c;
457 int i;
458
459 c = order2idx(dev, order);
460 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
461 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
462 return NULL;
463 }
464
465 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
466 ent = &cache->ent[i];
467
468 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
469
746b5583 470 spin_lock_irq(&ent->lock);
e126ba97
EC
471 if (!list_empty(&ent->head)) {
472 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
473 list);
474 list_del(&mr->list);
475 ent->cur--;
746b5583 476 spin_unlock_irq(&ent->lock);
e126ba97
EC
477 if (ent->cur < ent->limit)
478 queue_work(cache->wq, &ent->work);
479 break;
480 }
746b5583 481 spin_unlock_irq(&ent->lock);
e126ba97
EC
482
483 queue_work(cache->wq, &ent->work);
e126ba97
EC
484 }
485
486 if (!mr)
487 cache->ent[c].miss++;
488
489 return mr;
490}
491
492static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
493{
494 struct mlx5_mr_cache *cache = &dev->cache;
495 struct mlx5_cache_ent *ent;
496 int shrink = 0;
497 int c;
498
499 c = order2idx(dev, mr->order);
500 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
501 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
502 return;
503 }
504 ent = &cache->ent[c];
746b5583 505 spin_lock_irq(&ent->lock);
e126ba97
EC
506 list_add_tail(&mr->list, &ent->head);
507 ent->cur++;
508 if (ent->cur > 2 * ent->limit)
509 shrink = 1;
746b5583 510 spin_unlock_irq(&ent->lock);
e126ba97
EC
511
512 if (shrink)
513 queue_work(cache->wq, &ent->work);
514}
515
516static void clean_keys(struct mlx5_ib_dev *dev, int c)
517{
e126ba97
EC
518 struct mlx5_mr_cache *cache = &dev->cache;
519 struct mlx5_cache_ent *ent = &cache->ent[c];
520 struct mlx5_ib_mr *mr;
e126ba97
EC
521 int err;
522
3c461911 523 cancel_delayed_work(&ent->dwork);
e126ba97 524 while (1) {
746b5583 525 spin_lock_irq(&ent->lock);
e126ba97 526 if (list_empty(&ent->head)) {
746b5583 527 spin_unlock_irq(&ent->lock);
e126ba97
EC
528 return;
529 }
530 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
531 list_del(&mr->list);
532 ent->cur--;
533 ent->size--;
746b5583 534 spin_unlock_irq(&ent->lock);
b4cfe447 535 err = destroy_mkey(dev, mr);
203099fd 536 if (err)
e126ba97 537 mlx5_ib_warn(dev, "failed destroy mkey\n");
203099fd 538 else
e126ba97 539 kfree(mr);
e126ba97
EC
540 }
541}
542
543static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
544{
545 struct mlx5_mr_cache *cache = &dev->cache;
546 struct mlx5_cache_ent *ent;
547 int i;
548
549 if (!mlx5_debugfs_root)
550 return 0;
551
9603b61d 552 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
e126ba97
EC
553 if (!cache->root)
554 return -ENOMEM;
555
556 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
557 ent = &cache->ent[i];
558 sprintf(ent->name, "%d", ent->order);
559 ent->dir = debugfs_create_dir(ent->name, cache->root);
560 if (!ent->dir)
561 return -ENOMEM;
562
563 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
564 &size_fops);
565 if (!ent->fsize)
566 return -ENOMEM;
567
568 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
569 &limit_fops);
570 if (!ent->flimit)
571 return -ENOMEM;
572
573 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
574 &ent->cur);
575 if (!ent->fcur)
576 return -ENOMEM;
577
578 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
579 &ent->miss);
580 if (!ent->fmiss)
581 return -ENOMEM;
582 }
583
584 return 0;
585}
586
587static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
588{
589 if (!mlx5_debugfs_root)
590 return;
591
592 debugfs_remove_recursive(dev->cache.root);
593}
594
746b5583
EC
595static void delay_time_func(unsigned long ctx)
596{
597 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
598
599 dev->fill_delay = 0;
600}
601
e126ba97
EC
602int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
603{
604 struct mlx5_mr_cache *cache = &dev->cache;
605 struct mlx5_cache_ent *ent;
606 int limit;
e126ba97
EC
607 int err;
608 int i;
609
610 cache->wq = create_singlethread_workqueue("mkey_cache");
611 if (!cache->wq) {
612 mlx5_ib_warn(dev, "failed to create work queue\n");
613 return -ENOMEM;
614 }
615
746b5583 616 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
e126ba97
EC
617 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
618 INIT_LIST_HEAD(&cache->ent[i].head);
619 spin_lock_init(&cache->ent[i].lock);
620
621 ent = &cache->ent[i];
622 INIT_LIST_HEAD(&ent->head);
623 spin_lock_init(&ent->lock);
624 ent->order = i + 2;
625 ent->dev = dev;
626
9603b61d
JM
627 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
628 limit = dev->mdev->profile->mr_cache[i].limit;
2d036fad 629 else
e126ba97 630 limit = 0;
2d036fad 631
e126ba97
EC
632 INIT_WORK(&ent->work, cache_work_func);
633 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
634 ent->limit = limit;
635 queue_work(cache->wq, &ent->work);
636 }
637
638 err = mlx5_mr_cache_debugfs_init(dev);
639 if (err)
640 mlx5_ib_warn(dev, "cache debugfs failure\n");
641
642 return 0;
643}
644
645int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
646{
647 int i;
648
649 dev->cache.stopped = 1;
3c461911 650 flush_workqueue(dev->cache.wq);
e126ba97
EC
651
652 mlx5_mr_cache_debugfs_cleanup(dev);
653
654 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
655 clean_keys(dev, i);
656
3c461911 657 destroy_workqueue(dev->cache.wq);
746b5583 658 del_timer_sync(&dev->delay_timer);
3c461911 659
e126ba97
EC
660 return 0;
661}
662
663struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
664{
665 struct mlx5_ib_dev *dev = to_mdev(pd->device);
9603b61d 666 struct mlx5_core_dev *mdev = dev->mdev;
e126ba97
EC
667 struct mlx5_create_mkey_mbox_in *in;
668 struct mlx5_mkey_seg *seg;
669 struct mlx5_ib_mr *mr;
670 int err;
671
672 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
673 if (!mr)
674 return ERR_PTR(-ENOMEM);
675
676 in = kzalloc(sizeof(*in), GFP_KERNEL);
677 if (!in) {
678 err = -ENOMEM;
679 goto err_free;
680 }
681
682 seg = &in->seg;
683 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
684 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
685 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
686 seg->start_addr = 0;
687
746b5583
EC
688 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
689 NULL);
e126ba97
EC
690 if (err)
691 goto err_in;
692
693 kfree(in);
694 mr->ibmr.lkey = mr->mmr.key;
695 mr->ibmr.rkey = mr->mmr.key;
696 mr->umem = NULL;
697
698 return &mr->ibmr;
699
700err_in:
701 kfree(in);
702
703err_free:
704 kfree(mr);
705
706 return ERR_PTR(err);
707}
708
709static int get_octo_len(u64 addr, u64 len, int page_size)
710{
711 u64 offset;
712 int npages;
713
714 offset = addr & (page_size - 1);
715 npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
716 return (npages + 1) / 2;
717}
718
719static int use_umr(int order)
720{
cc149f75 721 return order <= MLX5_MAX_UMR_SHIFT;
e126ba97
EC
722}
723
395a8e4c
NO
724static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
725 int npages, int page_shift, int *size,
726 __be64 **mr_pas, dma_addr_t *dma)
727{
728 __be64 *pas;
729 struct device *ddev = dev->ib_dev.dma_device;
730
731 /*
732 * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
733 * To avoid copying garbage after the pas array, we allocate
734 * a little more.
735 */
736 *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
737 *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
738 if (!(*mr_pas))
739 return -ENOMEM;
740
741 pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
742 mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
743 /* Clear padding after the actual pages. */
744 memset(pas + npages, 0, *size - npages * sizeof(u64));
745
746 *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
747 if (dma_mapping_error(ddev, *dma)) {
748 kfree(*mr_pas);
749 return -ENOMEM;
750 }
751
752 return 0;
753}
754
755static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
756 struct ib_sge *sg, u64 dma, int n, u32 key,
757 int page_shift)
e126ba97
EC
758{
759 struct mlx5_ib_dev *dev = to_mdev(pd->device);
e622f2f4 760 struct mlx5_umr_wr *umrwr = umr_wr(wr);
e126ba97
EC
761
762 sg->addr = dma;
763 sg->length = ALIGN(sizeof(u64) * n, 64);
b37c788f 764 sg->lkey = dev->umrc.pd->local_dma_lkey;
e126ba97
EC
765
766 wr->next = NULL;
e126ba97
EC
767 wr->sg_list = sg;
768 if (n)
769 wr->num_sge = 1;
770 else
771 wr->num_sge = 0;
772
773 wr->opcode = MLX5_IB_WR_UMR;
968e78dd
HE
774
775 umrwr->npages = n;
776 umrwr->page_shift = page_shift;
777 umrwr->mkey = key;
395a8e4c
NO
778}
779
780static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
781 struct ib_sge *sg, u64 dma, int n, u32 key,
782 int page_shift, u64 virt_addr, u64 len,
783 int access_flags)
784{
785 struct mlx5_umr_wr *umrwr = umr_wr(wr);
786
787 prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
788
789 wr->send_flags = 0;
790
968e78dd
HE
791 umrwr->target.virt_addr = virt_addr;
792 umrwr->length = len;
793 umrwr->access_flags = access_flags;
794 umrwr->pd = pd;
e126ba97
EC
795}
796
797static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
798 struct ib_send_wr *wr, u32 key)
799{
e622f2f4 800 struct mlx5_umr_wr *umrwr = umr_wr(wr);
968e78dd
HE
801
802 wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
e126ba97 803 wr->opcode = MLX5_IB_WR_UMR;
968e78dd 804 umrwr->mkey = key;
e126ba97
EC
805}
806
395a8e4c
NO
807static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
808 int access_flags, int *npages,
809 int *page_shift, int *ncont, int *order)
810{
811 struct mlx5_ib_dev *dev = to_mdev(pd->device);
812 struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length,
813 access_flags, 0);
814 if (IS_ERR(umem)) {
815 mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
816 return (void *)umem;
817 }
818
819 mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order);
820 if (!*npages) {
821 mlx5_ib_warn(dev, "avoid zero region\n");
822 ib_umem_release(umem);
823 return ERR_PTR(-EINVAL);
824 }
825
826 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
827 *npages, *ncont, *order, *page_shift);
828
829 return umem;
830}
831
e126ba97
EC
832void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
833{
a74d2416 834 struct mlx5_ib_umr_context *context;
e126ba97
EC
835 struct ib_wc wc;
836 int err;
837
838 while (1) {
839 err = ib_poll_cq(cq, 1, &wc);
840 if (err < 0) {
841 pr_warn("poll cq error %d\n", err);
842 return;
843 }
844 if (err == 0)
845 break;
846
6c9b5d9b 847 context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
a74d2416
SR
848 context->status = wc.status;
849 complete(&context->done);
e126ba97
EC
850 }
851 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
852}
853
854static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
855 u64 virt_addr, u64 len, int npages,
856 int page_shift, int order, int access_flags)
857{
858 struct mlx5_ib_dev *dev = to_mdev(pd->device);
203099fd 859 struct device *ddev = dev->ib_dev.dma_device;
e126ba97 860 struct umr_common *umrc = &dev->umrc;
a74d2416 861 struct mlx5_ib_umr_context umr_context;
e622f2f4
CH
862 struct mlx5_umr_wr umrwr;
863 struct ib_send_wr *bad;
e126ba97
EC
864 struct mlx5_ib_mr *mr;
865 struct ib_sge sg;
cc149f75 866 int size;
21af2c3e
HE
867 __be64 *mr_pas;
868 dma_addr_t dma;
096f7e72 869 int err = 0;
e126ba97
EC
870 int i;
871
746b5583 872 for (i = 0; i < 1; i++) {
e126ba97
EC
873 mr = alloc_cached_mr(dev, order);
874 if (mr)
875 break;
876
877 err = add_keys(dev, order2idx(dev, order), 1);
746b5583
EC
878 if (err && err != -EAGAIN) {
879 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
e126ba97
EC
880 break;
881 }
882 }
883
884 if (!mr)
885 return ERR_PTR(-EAGAIN);
886
395a8e4c
NO
887 err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
888 &dma);
889 if (err)
096f7e72 890 goto free_mr;
203099fd 891
e622f2f4
CH
892 memset(&umrwr, 0, sizeof(umrwr));
893 umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
894 prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
895 page_shift, virt_addr, len, access_flags);
e126ba97 896
a74d2416 897 mlx5_ib_init_umr_context(&umr_context);
e126ba97 898 down(&umrc->sem);
e622f2f4 899 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
e126ba97
EC
900 if (err) {
901 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
096f7e72 902 goto unmap_dma;
a74d2416
SR
903 } else {
904 wait_for_completion(&umr_context.done);
905 if (umr_context.status != IB_WC_SUCCESS) {
906 mlx5_ib_warn(dev, "reg umr failed\n");
907 err = -EFAULT;
908 }
096f7e72 909 }
e126ba97 910
b475598a
HE
911 mr->mmr.iova = virt_addr;
912 mr->mmr.size = len;
913 mr->mmr.pd = to_mpd(pd)->pdn;
914
b4cfe447
HE
915 mr->live = 1;
916
096f7e72
HE
917unmap_dma:
918 up(&umrc->sem);
21af2c3e 919 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
096f7e72 920
21af2c3e 921 kfree(mr_pas);
203099fd 922
096f7e72
HE
923free_mr:
924 if (err) {
925 free_cached_mr(dev, mr);
926 return ERR_PTR(err);
e126ba97
EC
927 }
928
929 return mr;
e126ba97
EC
930}
931
832a6b06
HE
932#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
933int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
934 int zap)
935{
936 struct mlx5_ib_dev *dev = mr->dev;
937 struct device *ddev = dev->ib_dev.dma_device;
938 struct umr_common *umrc = &dev->umrc;
939 struct mlx5_ib_umr_context umr_context;
940 struct ib_umem *umem = mr->umem;
941 int size;
942 __be64 *pas;
943 dma_addr_t dma;
e622f2f4
CH
944 struct ib_send_wr *bad;
945 struct mlx5_umr_wr wr;
832a6b06
HE
946 struct ib_sge sg;
947 int err = 0;
948 const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
949 const int page_index_mask = page_index_alignment - 1;
950 size_t pages_mapped = 0;
951 size_t pages_to_map = 0;
952 size_t pages_iter = 0;
953 int use_emergency_buf = 0;
954
955 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
956 * so we need to align the offset and length accordingly */
957 if (start_page_index & page_index_mask) {
958 npages += start_page_index & page_index_mask;
959 start_page_index &= ~page_index_mask;
960 }
961
962 pages_to_map = ALIGN(npages, page_index_alignment);
963
964 if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
965 return -EINVAL;
966
967 size = sizeof(u64) * pages_to_map;
968 size = min_t(int, PAGE_SIZE, size);
969 /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
970 * code, when we are called from an invalidation. The pas buffer must
971 * be 2k-aligned for Connect-IB. */
972 pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
973 if (!pas) {
974 mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
975 pas = mlx5_ib_update_mtt_emergency_buffer;
976 size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
977 use_emergency_buf = 1;
978 mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
979 memset(pas, 0, size);
980 }
981 pages_iter = size / sizeof(u64);
982 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
983 if (dma_mapping_error(ddev, dma)) {
984 mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
985 err = -ENOMEM;
986 goto free_pas;
987 }
988
989 for (pages_mapped = 0;
990 pages_mapped < pages_to_map && !err;
991 pages_mapped += pages_iter, start_page_index += pages_iter) {
992 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
993
994 npages = min_t(size_t,
995 pages_iter,
996 ib_umem_num_pages(umem) - start_page_index);
997
998 if (!zap) {
999 __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
1000 start_page_index, npages, pas,
1001 MLX5_IB_MTT_PRESENT);
1002 /* Clear padding after the pages brought from the
1003 * umem. */
1004 memset(pas + npages, 0, size - npages * sizeof(u64));
1005 }
1006
1007 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
1008
1009 memset(&wr, 0, sizeof(wr));
e622f2f4 1010 wr.wr.wr_id = (u64)(unsigned long)&umr_context;
832a6b06
HE
1011
1012 sg.addr = dma;
1013 sg.length = ALIGN(npages * sizeof(u64),
1014 MLX5_UMR_MTT_ALIGNMENT);
b37c788f 1015 sg.lkey = dev->umrc.pd->local_dma_lkey;
832a6b06 1016
e622f2f4 1017 wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
832a6b06 1018 MLX5_IB_SEND_UMR_UPDATE_MTT;
e622f2f4
CH
1019 wr.wr.sg_list = &sg;
1020 wr.wr.num_sge = 1;
1021 wr.wr.opcode = MLX5_IB_WR_UMR;
1022 wr.npages = sg.length / sizeof(u64);
1023 wr.page_shift = PAGE_SHIFT;
1024 wr.mkey = mr->mmr.key;
1025 wr.target.offset = start_page_index;
832a6b06
HE
1026
1027 mlx5_ib_init_umr_context(&umr_context);
1028 down(&umrc->sem);
e622f2f4 1029 err = ib_post_send(umrc->qp, &wr.wr, &bad);
832a6b06
HE
1030 if (err) {
1031 mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
1032 } else {
1033 wait_for_completion(&umr_context.done);
1034 if (umr_context.status != IB_WC_SUCCESS) {
1035 mlx5_ib_err(dev, "UMR completion failed, code %d\n",
1036 umr_context.status);
1037 err = -EFAULT;
1038 }
1039 }
1040 up(&umrc->sem);
1041 }
1042 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1043
1044free_pas:
1045 if (!use_emergency_buf)
1046 free_page((unsigned long)pas);
1047 else
1048 mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
1049
1050 return err;
1051}
1052#endif
1053
395a8e4c
NO
1054/*
1055 * If ibmr is NULL it will be allocated by reg_create.
1056 * Else, the given ibmr will be used.
1057 */
1058static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
1059 u64 virt_addr, u64 length,
1060 struct ib_umem *umem, int npages,
1061 int page_shift, int access_flags)
e126ba97
EC
1062{
1063 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1064 struct mlx5_create_mkey_mbox_in *in;
1065 struct mlx5_ib_mr *mr;
1066 int inlen;
1067 int err;
938fe83c 1068 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
e126ba97 1069
395a8e4c 1070 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
e126ba97
EC
1071 if (!mr)
1072 return ERR_PTR(-ENOMEM);
1073
1074 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
1075 in = mlx5_vzalloc(inlen);
1076 if (!in) {
1077 err = -ENOMEM;
1078 goto err_1;
1079 }
cc149f75
HE
1080 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
1081 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
e126ba97 1082
cc149f75
HE
1083 /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
1084 * in the page list submitted with the command. */
1085 in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
e126ba97
EC
1086 in->seg.flags = convert_access(access_flags) |
1087 MLX5_ACCESS_MODE_MTT;
1088 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1089 in->seg.start_addr = cpu_to_be64(virt_addr);
1090 in->seg.len = cpu_to_be64(length);
1091 in->seg.bsfs_octo_size = 0;
1092 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
1093 in->seg.log2_page_size = page_shift;
1094 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
746b5583
EC
1095 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
1096 1 << page_shift));
9603b61d 1097 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
746b5583 1098 NULL, NULL);
e126ba97
EC
1099 if (err) {
1100 mlx5_ib_warn(dev, "create mkey failed\n");
1101 goto err_2;
1102 }
1103 mr->umem = umem;
7eae20db 1104 mr->dev = dev;
b4cfe447 1105 mr->live = 1;
479163f4 1106 kvfree(in);
e126ba97
EC
1107
1108 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
1109
1110 return mr;
1111
1112err_2:
479163f4 1113 kvfree(in);
e126ba97
EC
1114
1115err_1:
395a8e4c
NO
1116 if (!ibmr)
1117 kfree(mr);
e126ba97
EC
1118
1119 return ERR_PTR(err);
1120}
1121
395a8e4c
NO
1122static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
1123 int npages, u64 length, int access_flags)
1124{
1125 mr->npages = npages;
1126 atomic_add(npages, &dev->mdev->priv.reg_pages);
1127 mr->ibmr.lkey = mr->mmr.key;
1128 mr->ibmr.rkey = mr->mmr.key;
1129 mr->ibmr.length = length;
1130}
1131
e126ba97
EC
1132struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1133 u64 virt_addr, int access_flags,
1134 struct ib_udata *udata)
1135{
1136 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1137 struct mlx5_ib_mr *mr = NULL;
1138 struct ib_umem *umem;
1139 int page_shift;
1140 int npages;
1141 int ncont;
1142 int order;
1143 int err;
1144
900a6d79
EC
1145 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1146 start, virt_addr, length, access_flags);
395a8e4c
NO
1147 umem = mr_umem_get(pd, start, length, access_flags, &npages,
1148 &page_shift, &ncont, &order);
e126ba97 1149
395a8e4c
NO
1150 if (IS_ERR(umem))
1151 return (void *)umem;
e126ba97
EC
1152
1153 if (use_umr(order)) {
1154 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
1155 order, access_flags);
1156 if (PTR_ERR(mr) == -EAGAIN) {
1157 mlx5_ib_dbg(dev, "cache empty for order %d", order);
1158 mr = NULL;
1159 }
6aec21f6
HE
1160 } else if (access_flags & IB_ACCESS_ON_DEMAND) {
1161 err = -EINVAL;
1162 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
1163 goto error;
e126ba97
EC
1164 }
1165
1166 if (!mr)
395a8e4c
NO
1167 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
1168 page_shift, access_flags);
e126ba97
EC
1169
1170 if (IS_ERR(mr)) {
1171 err = PTR_ERR(mr);
1172 goto error;
1173 }
1174
1175 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
1176
1177 mr->umem = umem;
395a8e4c 1178 set_mr_fileds(dev, mr, npages, length, access_flags);
e126ba97 1179
b4cfe447 1180#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
395a8e4c 1181 update_odp_mr(mr);
b4cfe447
HE
1182#endif
1183
e126ba97
EC
1184 return &mr->ibmr;
1185
1186error:
1187 ib_umem_release(umem);
1188 return ERR_PTR(err);
1189}
1190
1191static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1192{
1193 struct umr_common *umrc = &dev->umrc;
a74d2416 1194 struct mlx5_ib_umr_context umr_context;
e622f2f4
CH
1195 struct mlx5_umr_wr umrwr;
1196 struct ib_send_wr *bad;
e126ba97
EC
1197 int err;
1198
e622f2f4
CH
1199 memset(&umrwr.wr, 0, sizeof(umrwr));
1200 umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
1201 prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key);
e126ba97 1202
a74d2416 1203 mlx5_ib_init_umr_context(&umr_context);
e126ba97 1204 down(&umrc->sem);
e622f2f4 1205 err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
e126ba97
EC
1206 if (err) {
1207 up(&umrc->sem);
1208 mlx5_ib_dbg(dev, "err %d\n", err);
1209 goto error;
a74d2416
SR
1210 } else {
1211 wait_for_completion(&umr_context.done);
1212 up(&umrc->sem);
e126ba97 1213 }
a74d2416 1214 if (umr_context.status != IB_WC_SUCCESS) {
e126ba97
EC
1215 mlx5_ib_warn(dev, "unreg umr failed\n");
1216 err = -EFAULT;
1217 goto error;
1218 }
1219 return 0;
1220
1221error:
1222 return err;
1223}
1224
8a187ee5
SG
1225static int
1226mlx5_alloc_priv_descs(struct ib_device *device,
1227 struct mlx5_ib_mr *mr,
1228 int ndescs,
1229 int desc_size)
1230{
1231 int size = ndescs * desc_size;
1232 int add_size;
1233 int ret;
1234
1235 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1236
1237 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1238 if (!mr->descs_alloc)
1239 return -ENOMEM;
1240
1241 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1242
1243 mr->desc_map = dma_map_single(device->dma_device, mr->descs,
1244 size, DMA_TO_DEVICE);
1245 if (dma_mapping_error(device->dma_device, mr->desc_map)) {
1246 ret = -ENOMEM;
1247 goto err;
1248 }
1249
1250 return 0;
1251err:
1252 kfree(mr->descs_alloc);
1253
1254 return ret;
1255}
1256
1257static void
1258mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1259{
1260 if (mr->descs) {
1261 struct ib_device *device = mr->ibmr.device;
1262 int size = mr->max_descs * mr->desc_size;
1263
1264 dma_unmap_single(device->dma_device, mr->desc_map,
1265 size, DMA_TO_DEVICE);
1266 kfree(mr->descs_alloc);
1267 mr->descs = NULL;
1268 }
1269}
1270
6aec21f6 1271static int clean_mr(struct mlx5_ib_mr *mr)
e126ba97 1272{
6aec21f6 1273 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
e126ba97
EC
1274 int umred = mr->umred;
1275 int err;
1276
8b91ffc1
SG
1277 if (mr->sig) {
1278 if (mlx5_core_destroy_psv(dev->mdev,
1279 mr->sig->psv_memory.psv_idx))
1280 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1281 mr->sig->psv_memory.psv_idx);
1282 if (mlx5_core_destroy_psv(dev->mdev,
1283 mr->sig->psv_wire.psv_idx))
1284 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1285 mr->sig->psv_wire.psv_idx);
1286 kfree(mr->sig);
1287 mr->sig = NULL;
1288 }
1289
8a187ee5
SG
1290 mlx5_free_priv_descs(mr);
1291
e126ba97 1292 if (!umred) {
b4cfe447 1293 err = destroy_mkey(dev, mr);
e126ba97
EC
1294 if (err) {
1295 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1296 mr->mmr.key, err);
1297 return err;
1298 }
1299 } else {
1300 err = unreg_umr(dev, mr);
1301 if (err) {
1302 mlx5_ib_warn(dev, "failed unregister\n");
1303 return err;
1304 }
1305 free_cached_mr(dev, mr);
1306 }
1307
6aec21f6
HE
1308 if (!umred)
1309 kfree(mr);
1310
1311 return 0;
1312}
1313
1314int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1315{
1316 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1317 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1318 int npages = mr->npages;
1319 struct ib_umem *umem = mr->umem;
1320
1321#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
b4cfe447
HE
1322 if (umem && umem->odp_data) {
1323 /* Prevent new page faults from succeeding */
1324 mr->live = 0;
6aec21f6
HE
1325 /* Wait for all running page-fault handlers to finish. */
1326 synchronize_srcu(&dev->mr_srcu);
b4cfe447
HE
1327 /* Destroy all page mappings */
1328 mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1329 ib_umem_end(umem));
1330 /*
1331 * We kill the umem before the MR for ODP,
1332 * so that there will not be any invalidations in
1333 * flight, looking at the *mr struct.
1334 */
1335 ib_umem_release(umem);
1336 atomic_sub(npages, &dev->mdev->priv.reg_pages);
1337
1338 /* Avoid double-freeing the umem. */
1339 umem = NULL;
1340 }
6aec21f6
HE
1341#endif
1342
1343 clean_mr(mr);
1344
e126ba97
EC
1345 if (umem) {
1346 ib_umem_release(umem);
6aec21f6 1347 atomic_sub(npages, &dev->mdev->priv.reg_pages);
e126ba97
EC
1348 }
1349
e126ba97
EC
1350 return 0;
1351}
1352
9bee178b
SG
1353struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1354 enum ib_mr_type mr_type,
1355 u32 max_num_sg)
3121e3c4
SG
1356{
1357 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1358 struct mlx5_create_mkey_mbox_in *in;
1359 struct mlx5_ib_mr *mr;
1360 int access_mode, err;
9bee178b 1361 int ndescs = roundup(max_num_sg, 4);
3121e3c4
SG
1362
1363 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1364 if (!mr)
1365 return ERR_PTR(-ENOMEM);
1366
1367 in = kzalloc(sizeof(*in), GFP_KERNEL);
1368 if (!in) {
1369 err = -ENOMEM;
1370 goto err_free;
1371 }
1372
968e78dd 1373 in->seg.status = MLX5_MKEY_STATUS_FREE;
3121e3c4
SG
1374 in->seg.xlt_oct_size = cpu_to_be32(ndescs);
1375 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1376 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
3121e3c4 1377
9bee178b
SG
1378 if (mr_type == IB_MR_TYPE_MEM_REG) {
1379 access_mode = MLX5_ACCESS_MODE_MTT;
1380 in->seg.log2_page_size = PAGE_SHIFT;
8a187ee5
SG
1381
1382 err = mlx5_alloc_priv_descs(pd->device, mr,
1383 ndescs, sizeof(u64));
1384 if (err)
1385 goto err_free_in;
1386
1387 mr->desc_size = sizeof(u64);
1388 mr->max_descs = ndescs;
9bee178b 1389 } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
3121e3c4
SG
1390 u32 psv_index[2];
1391
1392 in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
1393 MLX5_MKEY_BSF_EN);
1394 in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
1395 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1396 if (!mr->sig) {
1397 err = -ENOMEM;
1398 goto err_free_in;
1399 }
1400
1401 /* create mem & wire PSVs */
9603b61d 1402 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
3121e3c4
SG
1403 2, psv_index);
1404 if (err)
1405 goto err_free_sig;
1406
1407 access_mode = MLX5_ACCESS_MODE_KLM;
1408 mr->sig->psv_memory.psv_idx = psv_index[0];
1409 mr->sig->psv_wire.psv_idx = psv_index[1];
d5436ba0
SG
1410
1411 mr->sig->sig_status_checked = true;
1412 mr->sig->sig_err_exists = false;
1413 /* Next UMR, Arm SIGERR */
1414 ++mr->sig->sigerr_count;
9bee178b
SG
1415 } else {
1416 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1417 err = -EINVAL;
1418 goto err_free_in;
3121e3c4
SG
1419 }
1420
1421 in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
9603b61d 1422 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
3121e3c4
SG
1423 NULL, NULL, NULL);
1424 if (err)
1425 goto err_destroy_psv;
1426
1427 mr->ibmr.lkey = mr->mmr.key;
1428 mr->ibmr.rkey = mr->mmr.key;
1429 mr->umem = NULL;
1430 kfree(in);
1431
1432 return &mr->ibmr;
1433
1434err_destroy_psv:
1435 if (mr->sig) {
9603b61d 1436 if (mlx5_core_destroy_psv(dev->mdev,
3121e3c4
SG
1437 mr->sig->psv_memory.psv_idx))
1438 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1439 mr->sig->psv_memory.psv_idx);
9603b61d 1440 if (mlx5_core_destroy_psv(dev->mdev,
3121e3c4
SG
1441 mr->sig->psv_wire.psv_idx))
1442 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1443 mr->sig->psv_wire.psv_idx);
1444 }
8a187ee5 1445 mlx5_free_priv_descs(mr);
3121e3c4
SG
1446err_free_sig:
1447 kfree(mr->sig);
1448err_free_in:
1449 kfree(in);
1450err_free:
1451 kfree(mr);
1452 return ERR_PTR(err);
1453}
1454
d5436ba0
SG
1455int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1456 struct ib_mr_status *mr_status)
1457{
1458 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1459 int ret = 0;
1460
1461 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1462 pr_err("Invalid status check mask\n");
1463 ret = -EINVAL;
1464 goto done;
1465 }
1466
1467 mr_status->fail_status = 0;
1468 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1469 if (!mmr->sig) {
1470 ret = -EINVAL;
1471 pr_err("signature status check requested on a non-signature enabled MR\n");
1472 goto done;
1473 }
1474
1475 mmr->sig->sig_status_checked = true;
1476 if (!mmr->sig->sig_err_exists)
1477 goto done;
1478
1479 if (ibmr->lkey == mmr->sig->err_item.key)
1480 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1481 sizeof(mr_status->sig_err));
1482 else {
1483 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1484 mr_status->sig_err.sig_err_offset = 0;
1485 mr_status->sig_err.key = mmr->sig->err_item.key;
1486 }
1487
1488 mmr->sig->sig_err_exists = false;
1489 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1490 }
1491
1492done:
1493 return ret;
1494}
8a187ee5
SG
1495
1496static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1497{
1498 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1499 __be64 *descs;
1500
1501 if (unlikely(mr->ndescs == mr->max_descs))
1502 return -ENOMEM;
1503
1504 descs = mr->descs;
1505 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
1506
1507 return 0;
1508}
1509
1510int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
1511 struct scatterlist *sg,
1512 int sg_nents)
1513{
1514 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1515 int n;
1516
1517 mr->ndescs = 0;
1518
1519 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
1520 mr->desc_size * mr->max_descs,
1521 DMA_TO_DEVICE);
1522
1523 n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
1524
1525 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1526 mr->desc_size * mr->max_descs,
1527 DMA_TO_DEVICE);
1528
1529 return n;
1530}
This page took 0.195779 seconds and 5 git commands to generate.