49fc3ca735a4de957ac173ce653367522d5f83cd
[deliverable/linux.git] / drivers / infiniband / hw / mlx5 / mr.c
1 /*
2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33
34 #include <linux/kref.h>
35 #include <linux/random.h>
36 #include <linux/debugfs.h>
37 #include <linux/export.h>
38 #include <linux/delay.h>
39 #include <rdma/ib_umem.h>
40 #include <rdma/ib_verbs.h>
41 #include "mlx5_ib.h"
42
43 enum {
44 MAX_PENDING_REG_MR = 8,
45 };
46
47 enum {
48 MLX5_UMR_ALIGN = 2048
49 };
50
51 static int order2idx(struct mlx5_ib_dev *dev, int order)
52 {
53 struct mlx5_mr_cache *cache = &dev->cache;
54
55 if (order < cache->ent[0].order)
56 return 0;
57 else
58 return order - cache->ent[0].order;
59 }
60
61 static void reg_mr_callback(int status, void *context)
62 {
63 struct mlx5_ib_mr *mr = context;
64 struct mlx5_ib_dev *dev = mr->dev;
65 struct mlx5_mr_cache *cache = &dev->cache;
66 int c = order2idx(dev, mr->order);
67 struct mlx5_cache_ent *ent = &cache->ent[c];
68 u8 key;
69 unsigned long flags;
70 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
71 int err;
72
73 spin_lock_irqsave(&ent->lock, flags);
74 ent->pending--;
75 spin_unlock_irqrestore(&ent->lock, flags);
76 if (status) {
77 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
78 kfree(mr);
79 dev->fill_delay = 1;
80 mod_timer(&dev->delay_timer, jiffies + HZ);
81 return;
82 }
83
84 if (mr->out.hdr.status) {
85 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
86 mr->out.hdr.status,
87 be32_to_cpu(mr->out.hdr.syndrome));
88 kfree(mr);
89 dev->fill_delay = 1;
90 mod_timer(&dev->delay_timer, jiffies + HZ);
91 return;
92 }
93
94 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
95 key = dev->mdev->priv.mkey_key++;
96 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
97 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
98
99 cache->last_add = jiffies;
100
101 spin_lock_irqsave(&ent->lock, flags);
102 list_add_tail(&mr->list, &ent->head);
103 ent->cur++;
104 ent->size++;
105 spin_unlock_irqrestore(&ent->lock, flags);
106
107 write_lock_irqsave(&table->lock, flags);
108 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
109 &mr->mmr);
110 if (err)
111 pr_err("Error inserting to mr tree. 0x%x\n", -err);
112 write_unlock_irqrestore(&table->lock, flags);
113 }
114
115 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
116 {
117 struct mlx5_mr_cache *cache = &dev->cache;
118 struct mlx5_cache_ent *ent = &cache->ent[c];
119 struct mlx5_create_mkey_mbox_in *in;
120 struct mlx5_ib_mr *mr;
121 int npages = 1 << ent->order;
122 int err = 0;
123 int i;
124
125 in = kzalloc(sizeof(*in), GFP_KERNEL);
126 if (!in)
127 return -ENOMEM;
128
129 for (i = 0; i < num; i++) {
130 if (ent->pending >= MAX_PENDING_REG_MR) {
131 err = -EAGAIN;
132 break;
133 }
134
135 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
136 if (!mr) {
137 err = -ENOMEM;
138 break;
139 }
140 mr->order = ent->order;
141 mr->umred = 1;
142 mr->dev = dev;
143 in->seg.status = MLX5_MKEY_STATUS_FREE;
144 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
145 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
146 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
147 in->seg.log2_page_size = 12;
148
149 spin_lock_irq(&ent->lock);
150 ent->pending++;
151 spin_unlock_irq(&ent->lock);
152 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
153 sizeof(*in), reg_mr_callback,
154 mr, &mr->out);
155 if (err) {
156 spin_lock_irq(&ent->lock);
157 ent->pending--;
158 spin_unlock_irq(&ent->lock);
159 mlx5_ib_warn(dev, "create mkey failed %d\n", err);
160 kfree(mr);
161 break;
162 }
163 }
164
165 kfree(in);
166 return err;
167 }
168
169 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
170 {
171 struct mlx5_mr_cache *cache = &dev->cache;
172 struct mlx5_cache_ent *ent = &cache->ent[c];
173 struct mlx5_ib_mr *mr;
174 int err;
175 int i;
176
177 for (i = 0; i < num; i++) {
178 spin_lock_irq(&ent->lock);
179 if (list_empty(&ent->head)) {
180 spin_unlock_irq(&ent->lock);
181 return;
182 }
183 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
184 list_del(&mr->list);
185 ent->cur--;
186 ent->size--;
187 spin_unlock_irq(&ent->lock);
188 err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
189 if (err)
190 mlx5_ib_warn(dev, "failed destroy mkey\n");
191 else
192 kfree(mr);
193 }
194 }
195
196 static ssize_t size_write(struct file *filp, const char __user *buf,
197 size_t count, loff_t *pos)
198 {
199 struct mlx5_cache_ent *ent = filp->private_data;
200 struct mlx5_ib_dev *dev = ent->dev;
201 char lbuf[20];
202 u32 var;
203 int err;
204 int c;
205
206 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
207 return -EFAULT;
208
209 c = order2idx(dev, ent->order);
210 lbuf[sizeof(lbuf) - 1] = 0;
211
212 if (sscanf(lbuf, "%u", &var) != 1)
213 return -EINVAL;
214
215 if (var < ent->limit)
216 return -EINVAL;
217
218 if (var > ent->size) {
219 do {
220 err = add_keys(dev, c, var - ent->size);
221 if (err && err != -EAGAIN)
222 return err;
223
224 usleep_range(3000, 5000);
225 } while (err);
226 } else if (var < ent->size) {
227 remove_keys(dev, c, ent->size - var);
228 }
229
230 return count;
231 }
232
233 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
234 loff_t *pos)
235 {
236 struct mlx5_cache_ent *ent = filp->private_data;
237 char lbuf[20];
238 int err;
239
240 if (*pos)
241 return 0;
242
243 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
244 if (err < 0)
245 return err;
246
247 if (copy_to_user(buf, lbuf, err))
248 return -EFAULT;
249
250 *pos += err;
251
252 return err;
253 }
254
255 static const struct file_operations size_fops = {
256 .owner = THIS_MODULE,
257 .open = simple_open,
258 .write = size_write,
259 .read = size_read,
260 };
261
262 static ssize_t limit_write(struct file *filp, const char __user *buf,
263 size_t count, loff_t *pos)
264 {
265 struct mlx5_cache_ent *ent = filp->private_data;
266 struct mlx5_ib_dev *dev = ent->dev;
267 char lbuf[20];
268 u32 var;
269 int err;
270 int c;
271
272 if (copy_from_user(lbuf, buf, sizeof(lbuf)))
273 return -EFAULT;
274
275 c = order2idx(dev, ent->order);
276 lbuf[sizeof(lbuf) - 1] = 0;
277
278 if (sscanf(lbuf, "%u", &var) != 1)
279 return -EINVAL;
280
281 if (var > ent->size)
282 return -EINVAL;
283
284 ent->limit = var;
285
286 if (ent->cur < ent->limit) {
287 err = add_keys(dev, c, 2 * ent->limit - ent->cur);
288 if (err)
289 return err;
290 }
291
292 return count;
293 }
294
295 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
296 loff_t *pos)
297 {
298 struct mlx5_cache_ent *ent = filp->private_data;
299 char lbuf[20];
300 int err;
301
302 if (*pos)
303 return 0;
304
305 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
306 if (err < 0)
307 return err;
308
309 if (copy_to_user(buf, lbuf, err))
310 return -EFAULT;
311
312 *pos += err;
313
314 return err;
315 }
316
317 static const struct file_operations limit_fops = {
318 .owner = THIS_MODULE,
319 .open = simple_open,
320 .write = limit_write,
321 .read = limit_read,
322 };
323
324 static int someone_adding(struct mlx5_mr_cache *cache)
325 {
326 int i;
327
328 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
329 if (cache->ent[i].cur < cache->ent[i].limit)
330 return 1;
331 }
332
333 return 0;
334 }
335
336 static void __cache_work_func(struct mlx5_cache_ent *ent)
337 {
338 struct mlx5_ib_dev *dev = ent->dev;
339 struct mlx5_mr_cache *cache = &dev->cache;
340 int i = order2idx(dev, ent->order);
341 int err;
342
343 if (cache->stopped)
344 return;
345
346 ent = &dev->cache.ent[i];
347 if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
348 err = add_keys(dev, i, 1);
349 if (ent->cur < 2 * ent->limit) {
350 if (err == -EAGAIN) {
351 mlx5_ib_dbg(dev, "returned eagain, order %d\n",
352 i + 2);
353 queue_delayed_work(cache->wq, &ent->dwork,
354 msecs_to_jiffies(3));
355 } else if (err) {
356 mlx5_ib_warn(dev, "command failed order %d, err %d\n",
357 i + 2, err);
358 queue_delayed_work(cache->wq, &ent->dwork,
359 msecs_to_jiffies(1000));
360 } else {
361 queue_work(cache->wq, &ent->work);
362 }
363 }
364 } else if (ent->cur > 2 * ent->limit) {
365 if (!someone_adding(cache) &&
366 time_after(jiffies, cache->last_add + 300 * HZ)) {
367 remove_keys(dev, i, 1);
368 if (ent->cur > ent->limit)
369 queue_work(cache->wq, &ent->work);
370 } else {
371 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
372 }
373 }
374 }
375
376 static void delayed_cache_work_func(struct work_struct *work)
377 {
378 struct mlx5_cache_ent *ent;
379
380 ent = container_of(work, struct mlx5_cache_ent, dwork.work);
381 __cache_work_func(ent);
382 }
383
384 static void cache_work_func(struct work_struct *work)
385 {
386 struct mlx5_cache_ent *ent;
387
388 ent = container_of(work, struct mlx5_cache_ent, work);
389 __cache_work_func(ent);
390 }
391
392 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
393 {
394 struct mlx5_mr_cache *cache = &dev->cache;
395 struct mlx5_ib_mr *mr = NULL;
396 struct mlx5_cache_ent *ent;
397 int c;
398 int i;
399
400 c = order2idx(dev, order);
401 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
402 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
403 return NULL;
404 }
405
406 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
407 ent = &cache->ent[i];
408
409 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
410
411 spin_lock_irq(&ent->lock);
412 if (!list_empty(&ent->head)) {
413 mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
414 list);
415 list_del(&mr->list);
416 ent->cur--;
417 spin_unlock_irq(&ent->lock);
418 if (ent->cur < ent->limit)
419 queue_work(cache->wq, &ent->work);
420 break;
421 }
422 spin_unlock_irq(&ent->lock);
423
424 queue_work(cache->wq, &ent->work);
425
426 if (mr)
427 break;
428 }
429
430 if (!mr)
431 cache->ent[c].miss++;
432
433 return mr;
434 }
435
436 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
437 {
438 struct mlx5_mr_cache *cache = &dev->cache;
439 struct mlx5_cache_ent *ent;
440 int shrink = 0;
441 int c;
442
443 c = order2idx(dev, mr->order);
444 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
445 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
446 return;
447 }
448 ent = &cache->ent[c];
449 spin_lock_irq(&ent->lock);
450 list_add_tail(&mr->list, &ent->head);
451 ent->cur++;
452 if (ent->cur > 2 * ent->limit)
453 shrink = 1;
454 spin_unlock_irq(&ent->lock);
455
456 if (shrink)
457 queue_work(cache->wq, &ent->work);
458 }
459
460 static void clean_keys(struct mlx5_ib_dev *dev, int c)
461 {
462 struct mlx5_mr_cache *cache = &dev->cache;
463 struct mlx5_cache_ent *ent = &cache->ent[c];
464 struct mlx5_ib_mr *mr;
465 int err;
466
467 cancel_delayed_work(&ent->dwork);
468 while (1) {
469 spin_lock_irq(&ent->lock);
470 if (list_empty(&ent->head)) {
471 spin_unlock_irq(&ent->lock);
472 return;
473 }
474 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
475 list_del(&mr->list);
476 ent->cur--;
477 ent->size--;
478 spin_unlock_irq(&ent->lock);
479 err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
480 if (err)
481 mlx5_ib_warn(dev, "failed destroy mkey\n");
482 else
483 kfree(mr);
484 }
485 }
486
487 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
488 {
489 struct mlx5_mr_cache *cache = &dev->cache;
490 struct mlx5_cache_ent *ent;
491 int i;
492
493 if (!mlx5_debugfs_root)
494 return 0;
495
496 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
497 if (!cache->root)
498 return -ENOMEM;
499
500 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
501 ent = &cache->ent[i];
502 sprintf(ent->name, "%d", ent->order);
503 ent->dir = debugfs_create_dir(ent->name, cache->root);
504 if (!ent->dir)
505 return -ENOMEM;
506
507 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
508 &size_fops);
509 if (!ent->fsize)
510 return -ENOMEM;
511
512 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
513 &limit_fops);
514 if (!ent->flimit)
515 return -ENOMEM;
516
517 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
518 &ent->cur);
519 if (!ent->fcur)
520 return -ENOMEM;
521
522 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
523 &ent->miss);
524 if (!ent->fmiss)
525 return -ENOMEM;
526 }
527
528 return 0;
529 }
530
531 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
532 {
533 if (!mlx5_debugfs_root)
534 return;
535
536 debugfs_remove_recursive(dev->cache.root);
537 }
538
539 static void delay_time_func(unsigned long ctx)
540 {
541 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
542
543 dev->fill_delay = 0;
544 }
545
546 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
547 {
548 struct mlx5_mr_cache *cache = &dev->cache;
549 struct mlx5_cache_ent *ent;
550 int limit;
551 int err;
552 int i;
553
554 cache->wq = create_singlethread_workqueue("mkey_cache");
555 if (!cache->wq) {
556 mlx5_ib_warn(dev, "failed to create work queue\n");
557 return -ENOMEM;
558 }
559
560 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
561 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
562 INIT_LIST_HEAD(&cache->ent[i].head);
563 spin_lock_init(&cache->ent[i].lock);
564
565 ent = &cache->ent[i];
566 INIT_LIST_HEAD(&ent->head);
567 spin_lock_init(&ent->lock);
568 ent->order = i + 2;
569 ent->dev = dev;
570
571 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
572 limit = dev->mdev->profile->mr_cache[i].limit;
573 else
574 limit = 0;
575
576 INIT_WORK(&ent->work, cache_work_func);
577 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
578 ent->limit = limit;
579 queue_work(cache->wq, &ent->work);
580 }
581
582 err = mlx5_mr_cache_debugfs_init(dev);
583 if (err)
584 mlx5_ib_warn(dev, "cache debugfs failure\n");
585
586 return 0;
587 }
588
589 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
590 {
591 int i;
592
593 dev->cache.stopped = 1;
594 flush_workqueue(dev->cache.wq);
595
596 mlx5_mr_cache_debugfs_cleanup(dev);
597
598 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
599 clean_keys(dev, i);
600
601 destroy_workqueue(dev->cache.wq);
602 del_timer_sync(&dev->delay_timer);
603
604 return 0;
605 }
606
607 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
608 {
609 struct mlx5_ib_dev *dev = to_mdev(pd->device);
610 struct mlx5_core_dev *mdev = dev->mdev;
611 struct mlx5_create_mkey_mbox_in *in;
612 struct mlx5_mkey_seg *seg;
613 struct mlx5_ib_mr *mr;
614 int err;
615
616 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
617 if (!mr)
618 return ERR_PTR(-ENOMEM);
619
620 in = kzalloc(sizeof(*in), GFP_KERNEL);
621 if (!in) {
622 err = -ENOMEM;
623 goto err_free;
624 }
625
626 seg = &in->seg;
627 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
628 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
629 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
630 seg->start_addr = 0;
631
632 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
633 NULL);
634 if (err)
635 goto err_in;
636
637 kfree(in);
638 mr->ibmr.lkey = mr->mmr.key;
639 mr->ibmr.rkey = mr->mmr.key;
640 mr->umem = NULL;
641
642 return &mr->ibmr;
643
644 err_in:
645 kfree(in);
646
647 err_free:
648 kfree(mr);
649
650 return ERR_PTR(err);
651 }
652
653 static int get_octo_len(u64 addr, u64 len, int page_size)
654 {
655 u64 offset;
656 int npages;
657
658 offset = addr & (page_size - 1);
659 npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
660 return (npages + 1) / 2;
661 }
662
663 static int use_umr(int order)
664 {
665 return order <= MLX5_MAX_UMR_SHIFT;
666 }
667
668 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
669 struct ib_sge *sg, u64 dma, int n, u32 key,
670 int page_shift, u64 virt_addr, u64 len,
671 int access_flags)
672 {
673 struct mlx5_ib_dev *dev = to_mdev(pd->device);
674 struct ib_mr *mr = dev->umrc.mr;
675 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
676
677 sg->addr = dma;
678 sg->length = ALIGN(sizeof(u64) * n, 64);
679 sg->lkey = mr->lkey;
680
681 wr->next = NULL;
682 wr->send_flags = 0;
683 wr->sg_list = sg;
684 if (n)
685 wr->num_sge = 1;
686 else
687 wr->num_sge = 0;
688
689 wr->opcode = MLX5_IB_WR_UMR;
690
691 umrwr->npages = n;
692 umrwr->page_shift = page_shift;
693 umrwr->mkey = key;
694 umrwr->target.virt_addr = virt_addr;
695 umrwr->length = len;
696 umrwr->access_flags = access_flags;
697 umrwr->pd = pd;
698 }
699
700 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
701 struct ib_send_wr *wr, u32 key)
702 {
703 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
704
705 wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
706 wr->opcode = MLX5_IB_WR_UMR;
707 umrwr->mkey = key;
708 }
709
710 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
711 {
712 struct mlx5_ib_umr_context *context;
713 struct ib_wc wc;
714 int err;
715
716 while (1) {
717 err = ib_poll_cq(cq, 1, &wc);
718 if (err < 0) {
719 pr_warn("poll cq error %d\n", err);
720 return;
721 }
722 if (err == 0)
723 break;
724
725 context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
726 context->status = wc.status;
727 complete(&context->done);
728 }
729 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
730 }
731
732 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
733 u64 virt_addr, u64 len, int npages,
734 int page_shift, int order, int access_flags)
735 {
736 struct mlx5_ib_dev *dev = to_mdev(pd->device);
737 struct device *ddev = dev->ib_dev.dma_device;
738 struct umr_common *umrc = &dev->umrc;
739 struct mlx5_ib_umr_context umr_context;
740 struct ib_send_wr wr, *bad;
741 struct mlx5_ib_mr *mr;
742 struct ib_sge sg;
743 int size;
744 __be64 *mr_pas;
745 __be64 *pas;
746 dma_addr_t dma;
747 int err = 0;
748 int i;
749
750 for (i = 0; i < 1; i++) {
751 mr = alloc_cached_mr(dev, order);
752 if (mr)
753 break;
754
755 err = add_keys(dev, order2idx(dev, order), 1);
756 if (err && err != -EAGAIN) {
757 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
758 break;
759 }
760 }
761
762 if (!mr)
763 return ERR_PTR(-EAGAIN);
764
765 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
766 * To avoid copying garbage after the pas array, we allocate
767 * a little more. */
768 size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
769 mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
770 if (!mr_pas) {
771 err = -ENOMEM;
772 goto free_mr;
773 }
774
775 pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
776 mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
777 /* Clear padding after the actual pages. */
778 memset(pas + npages, 0, size - npages * sizeof(u64));
779
780 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
781 if (dma_mapping_error(ddev, dma)) {
782 err = -ENOMEM;
783 goto free_pas;
784 }
785
786 memset(&wr, 0, sizeof(wr));
787 wr.wr_id = (u64)(unsigned long)&umr_context;
788 prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift,
789 virt_addr, len, access_flags);
790
791 mlx5_ib_init_umr_context(&umr_context);
792 down(&umrc->sem);
793 err = ib_post_send(umrc->qp, &wr, &bad);
794 if (err) {
795 mlx5_ib_warn(dev, "post send failed, err %d\n", err);
796 goto unmap_dma;
797 } else {
798 wait_for_completion(&umr_context.done);
799 if (umr_context.status != IB_WC_SUCCESS) {
800 mlx5_ib_warn(dev, "reg umr failed\n");
801 err = -EFAULT;
802 }
803 }
804
805 mr->mmr.iova = virt_addr;
806 mr->mmr.size = len;
807 mr->mmr.pd = to_mpd(pd)->pdn;
808
809 unmap_dma:
810 up(&umrc->sem);
811 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
812
813 free_pas:
814 kfree(mr_pas);
815
816 free_mr:
817 if (err) {
818 free_cached_mr(dev, mr);
819 return ERR_PTR(err);
820 }
821
822 return mr;
823 }
824
825 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
826 u64 length, struct ib_umem *umem,
827 int npages, int page_shift,
828 int access_flags)
829 {
830 struct mlx5_ib_dev *dev = to_mdev(pd->device);
831 struct mlx5_create_mkey_mbox_in *in;
832 struct mlx5_ib_mr *mr;
833 int inlen;
834 int err;
835 bool pg_cap = !!(dev->mdev->caps.gen.flags &
836 MLX5_DEV_CAP_FLAG_ON_DMND_PG);
837
838 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
839 if (!mr)
840 return ERR_PTR(-ENOMEM);
841
842 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
843 in = mlx5_vzalloc(inlen);
844 if (!in) {
845 err = -ENOMEM;
846 goto err_1;
847 }
848 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
849 pg_cap ? MLX5_IB_MTT_PRESENT : 0);
850
851 /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
852 * in the page list submitted with the command. */
853 in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
854 in->seg.flags = convert_access(access_flags) |
855 MLX5_ACCESS_MODE_MTT;
856 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
857 in->seg.start_addr = cpu_to_be64(virt_addr);
858 in->seg.len = cpu_to_be64(length);
859 in->seg.bsfs_octo_size = 0;
860 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
861 in->seg.log2_page_size = page_shift;
862 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
863 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
864 1 << page_shift));
865 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
866 NULL, NULL);
867 if (err) {
868 mlx5_ib_warn(dev, "create mkey failed\n");
869 goto err_2;
870 }
871 mr->umem = umem;
872 kvfree(in);
873
874 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
875
876 return mr;
877
878 err_2:
879 kvfree(in);
880
881 err_1:
882 kfree(mr);
883
884 return ERR_PTR(err);
885 }
886
887 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
888 u64 virt_addr, int access_flags,
889 struct ib_udata *udata)
890 {
891 struct mlx5_ib_dev *dev = to_mdev(pd->device);
892 struct mlx5_ib_mr *mr = NULL;
893 struct ib_umem *umem;
894 int page_shift;
895 int npages;
896 int ncont;
897 int order;
898 int err;
899
900 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
901 start, virt_addr, length, access_flags);
902 umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
903 0);
904 if (IS_ERR(umem)) {
905 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
906 return (void *)umem;
907 }
908
909 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
910 if (!npages) {
911 mlx5_ib_warn(dev, "avoid zero region\n");
912 err = -EINVAL;
913 goto error;
914 }
915
916 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
917 npages, ncont, order, page_shift);
918
919 if (use_umr(order)) {
920 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
921 order, access_flags);
922 if (PTR_ERR(mr) == -EAGAIN) {
923 mlx5_ib_dbg(dev, "cache empty for order %d", order);
924 mr = NULL;
925 }
926 }
927
928 if (!mr)
929 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
930 access_flags);
931
932 if (IS_ERR(mr)) {
933 err = PTR_ERR(mr);
934 goto error;
935 }
936
937 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
938
939 mr->umem = umem;
940 mr->npages = npages;
941 spin_lock(&dev->mr_lock);
942 dev->mdev->priv.reg_pages += npages;
943 spin_unlock(&dev->mr_lock);
944 mr->ibmr.lkey = mr->mmr.key;
945 mr->ibmr.rkey = mr->mmr.key;
946
947 return &mr->ibmr;
948
949 error:
950 ib_umem_release(umem);
951 return ERR_PTR(err);
952 }
953
954 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
955 {
956 struct umr_common *umrc = &dev->umrc;
957 struct mlx5_ib_umr_context umr_context;
958 struct ib_send_wr wr, *bad;
959 int err;
960
961 memset(&wr, 0, sizeof(wr));
962 wr.wr_id = (u64)(unsigned long)&umr_context;
963 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
964
965 mlx5_ib_init_umr_context(&umr_context);
966 down(&umrc->sem);
967 err = ib_post_send(umrc->qp, &wr, &bad);
968 if (err) {
969 up(&umrc->sem);
970 mlx5_ib_dbg(dev, "err %d\n", err);
971 goto error;
972 } else {
973 wait_for_completion(&umr_context.done);
974 up(&umrc->sem);
975 }
976 if (umr_context.status != IB_WC_SUCCESS) {
977 mlx5_ib_warn(dev, "unreg umr failed\n");
978 err = -EFAULT;
979 goto error;
980 }
981 return 0;
982
983 error:
984 return err;
985 }
986
987 int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
988 {
989 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
990 struct mlx5_ib_mr *mr = to_mmr(ibmr);
991 struct ib_umem *umem = mr->umem;
992 int npages = mr->npages;
993 int umred = mr->umred;
994 int err;
995
996 if (!umred) {
997 err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
998 if (err) {
999 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1000 mr->mmr.key, err);
1001 return err;
1002 }
1003 } else {
1004 err = unreg_umr(dev, mr);
1005 if (err) {
1006 mlx5_ib_warn(dev, "failed unregister\n");
1007 return err;
1008 }
1009 free_cached_mr(dev, mr);
1010 }
1011
1012 if (umem) {
1013 ib_umem_release(umem);
1014 spin_lock(&dev->mr_lock);
1015 dev->mdev->priv.reg_pages -= npages;
1016 spin_unlock(&dev->mr_lock);
1017 }
1018
1019 if (!umred)
1020 kfree(mr);
1021
1022 return 0;
1023 }
1024
1025 struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
1026 struct ib_mr_init_attr *mr_init_attr)
1027 {
1028 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1029 struct mlx5_create_mkey_mbox_in *in;
1030 struct mlx5_ib_mr *mr;
1031 int access_mode, err;
1032 int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4);
1033
1034 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1035 if (!mr)
1036 return ERR_PTR(-ENOMEM);
1037
1038 in = kzalloc(sizeof(*in), GFP_KERNEL);
1039 if (!in) {
1040 err = -ENOMEM;
1041 goto err_free;
1042 }
1043
1044 in->seg.status = MLX5_MKEY_STATUS_FREE;
1045 in->seg.xlt_oct_size = cpu_to_be32(ndescs);
1046 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1047 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1048 access_mode = MLX5_ACCESS_MODE_MTT;
1049
1050 if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) {
1051 u32 psv_index[2];
1052
1053 in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
1054 MLX5_MKEY_BSF_EN);
1055 in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
1056 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1057 if (!mr->sig) {
1058 err = -ENOMEM;
1059 goto err_free_in;
1060 }
1061
1062 /* create mem & wire PSVs */
1063 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1064 2, psv_index);
1065 if (err)
1066 goto err_free_sig;
1067
1068 access_mode = MLX5_ACCESS_MODE_KLM;
1069 mr->sig->psv_memory.psv_idx = psv_index[0];
1070 mr->sig->psv_wire.psv_idx = psv_index[1];
1071
1072 mr->sig->sig_status_checked = true;
1073 mr->sig->sig_err_exists = false;
1074 /* Next UMR, Arm SIGERR */
1075 ++mr->sig->sigerr_count;
1076 }
1077
1078 in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
1079 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
1080 NULL, NULL, NULL);
1081 if (err)
1082 goto err_destroy_psv;
1083
1084 mr->ibmr.lkey = mr->mmr.key;
1085 mr->ibmr.rkey = mr->mmr.key;
1086 mr->umem = NULL;
1087 kfree(in);
1088
1089 return &mr->ibmr;
1090
1091 err_destroy_psv:
1092 if (mr->sig) {
1093 if (mlx5_core_destroy_psv(dev->mdev,
1094 mr->sig->psv_memory.psv_idx))
1095 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1096 mr->sig->psv_memory.psv_idx);
1097 if (mlx5_core_destroy_psv(dev->mdev,
1098 mr->sig->psv_wire.psv_idx))
1099 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1100 mr->sig->psv_wire.psv_idx);
1101 }
1102 err_free_sig:
1103 kfree(mr->sig);
1104 err_free_in:
1105 kfree(in);
1106 err_free:
1107 kfree(mr);
1108 return ERR_PTR(err);
1109 }
1110
1111 int mlx5_ib_destroy_mr(struct ib_mr *ibmr)
1112 {
1113 struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1114 struct mlx5_ib_mr *mr = to_mmr(ibmr);
1115 int err;
1116
1117 if (mr->sig) {
1118 if (mlx5_core_destroy_psv(dev->mdev,
1119 mr->sig->psv_memory.psv_idx))
1120 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1121 mr->sig->psv_memory.psv_idx);
1122 if (mlx5_core_destroy_psv(dev->mdev,
1123 mr->sig->psv_wire.psv_idx))
1124 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1125 mr->sig->psv_wire.psv_idx);
1126 kfree(mr->sig);
1127 }
1128
1129 err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
1130 if (err) {
1131 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1132 mr->mmr.key, err);
1133 return err;
1134 }
1135
1136 kfree(mr);
1137
1138 return err;
1139 }
1140
1141 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
1142 int max_page_list_len)
1143 {
1144 struct mlx5_ib_dev *dev = to_mdev(pd->device);
1145 struct mlx5_create_mkey_mbox_in *in;
1146 struct mlx5_ib_mr *mr;
1147 int err;
1148
1149 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1150 if (!mr)
1151 return ERR_PTR(-ENOMEM);
1152
1153 in = kzalloc(sizeof(*in), GFP_KERNEL);
1154 if (!in) {
1155 err = -ENOMEM;
1156 goto err_free;
1157 }
1158
1159 in->seg.status = MLX5_MKEY_STATUS_FREE;
1160 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
1161 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1162 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
1163 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
1164 /*
1165 * TBD not needed - issue 197292 */
1166 in->seg.log2_page_size = PAGE_SHIFT;
1167
1168 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
1169 NULL, NULL);
1170 kfree(in);
1171 if (err)
1172 goto err_free;
1173
1174 mr->ibmr.lkey = mr->mmr.key;
1175 mr->ibmr.rkey = mr->mmr.key;
1176 mr->umem = NULL;
1177
1178 return &mr->ibmr;
1179
1180 err_free:
1181 kfree(mr);
1182 return ERR_PTR(err);
1183 }
1184
1185 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
1186 int page_list_len)
1187 {
1188 struct mlx5_ib_fast_reg_page_list *mfrpl;
1189 int size = page_list_len * sizeof(u64);
1190
1191 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
1192 if (!mfrpl)
1193 return ERR_PTR(-ENOMEM);
1194
1195 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
1196 if (!mfrpl->ibfrpl.page_list)
1197 goto err_free;
1198
1199 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
1200 size, &mfrpl->map,
1201 GFP_KERNEL);
1202 if (!mfrpl->mapped_page_list)
1203 goto err_free;
1204
1205 WARN_ON(mfrpl->map & 0x3f);
1206
1207 return &mfrpl->ibfrpl;
1208
1209 err_free:
1210 kfree(mfrpl->ibfrpl.page_list);
1211 kfree(mfrpl);
1212 return ERR_PTR(-ENOMEM);
1213 }
1214
1215 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
1216 {
1217 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
1218 struct mlx5_ib_dev *dev = to_mdev(page_list->device);
1219 int size = page_list->max_page_list_len * sizeof(u64);
1220
1221 dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
1222 mfrpl->map);
1223 kfree(mfrpl->ibfrpl.page_list);
1224 kfree(mfrpl);
1225 }
1226
1227 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1228 struct ib_mr_status *mr_status)
1229 {
1230 struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1231 int ret = 0;
1232
1233 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1234 pr_err("Invalid status check mask\n");
1235 ret = -EINVAL;
1236 goto done;
1237 }
1238
1239 mr_status->fail_status = 0;
1240 if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1241 if (!mmr->sig) {
1242 ret = -EINVAL;
1243 pr_err("signature status check requested on a non-signature enabled MR\n");
1244 goto done;
1245 }
1246
1247 mmr->sig->sig_status_checked = true;
1248 if (!mmr->sig->sig_err_exists)
1249 goto done;
1250
1251 if (ibmr->lkey == mmr->sig->err_item.key)
1252 memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1253 sizeof(mr_status->sig_err));
1254 else {
1255 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1256 mr_status->sig_err.sig_err_offset = 0;
1257 mr_status->sig_err.key = mmr->sig->err_item.key;
1258 }
1259
1260 mmr->sig->sig_err_exists = false;
1261 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1262 }
1263
1264 done:
1265 return ret;
1266 }
This page took 0.079797 seconds and 5 git commands to generate.