powerpc/iommu/ioda2: Add get_table_size() to calculate the size of future table
[deliverable/linux.git] / drivers / vfio / vfio_iommu_spapr_tce.c
CommitLineData
5ffd229c
AK
1/*
2 * VFIO: IOMMU DMA mapping support for TCE on POWER
3 *
4 * Copyright (C) 2013 IBM Corp. All rights reserved.
5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio_iommu_type1.c:
12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
13 * Author: Alex Williamson <alex.williamson@redhat.com>
14 */
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <asm/iommu.h>
23#include <asm/tce.h>
24
25#define DRIVER_VERSION "0.1"
26#define DRIVER_AUTHOR "aik@ozlabs.ru"
27#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
28
29static void tce_iommu_detach_group(void *iommu_data,
30 struct iommu_group *iommu_group);
31
2d270df8
AK
32static long try_increment_locked_vm(long npages)
33{
34 long ret = 0, locked, lock_limit;
35
36 if (!current || !current->mm)
37 return -ESRCH; /* process exited */
38
39 if (!npages)
40 return 0;
41
42 down_write(&current->mm->mmap_sem);
43 locked = current->mm->locked_vm + npages;
44 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
45 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
46 ret = -ENOMEM;
47 else
48 current->mm->locked_vm += npages;
49
50 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
51 npages << PAGE_SHIFT,
52 current->mm->locked_vm << PAGE_SHIFT,
53 rlimit(RLIMIT_MEMLOCK),
54 ret ? " - exceeded" : "");
55
56 up_write(&current->mm->mmap_sem);
57
58 return ret;
59}
60
61static void decrement_locked_vm(long npages)
62{
63 if (!current || !current->mm || !npages)
64 return; /* process exited */
65
66 down_write(&current->mm->mmap_sem);
67 if (WARN_ON_ONCE(npages > current->mm->locked_vm))
68 npages = current->mm->locked_vm;
69 current->mm->locked_vm -= npages;
70 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
71 npages << PAGE_SHIFT,
72 current->mm->locked_vm << PAGE_SHIFT,
73 rlimit(RLIMIT_MEMLOCK));
74 up_write(&current->mm->mmap_sem);
75}
76
5ffd229c
AK
77/*
78 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
79 *
80 * This code handles mapping and unmapping of user data buffers
81 * into DMA'ble space using the IOMMU
82 */
83
84/*
85 * The container descriptor supports only a single group per container.
86 * Required by the API as the container is not supplied with the IOMMU group
87 * at the moment of initialization.
88 */
89struct tce_container {
90 struct mutex lock;
0eaf4def 91 struct iommu_group *grp;
5ffd229c 92 bool enabled;
2d270df8 93 unsigned long locked_pages;
5ffd229c
AK
94};
95
e432bc7e
AK
96static bool tce_page_is_contained(struct page *page, unsigned page_shift)
97{
98 /*
99 * Check that the TCE table granularity is not bigger than the size of
100 * a page we just found. Otherwise the hardware can get access to
101 * a bigger memory chunk that it should.
102 */
103 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
104}
105
0eaf4def
AK
106static long tce_iommu_find_table(struct tce_container *container,
107 phys_addr_t ioba, struct iommu_table **ptbl)
108{
109 long i;
110 struct iommu_table_group *table_group;
111
112 table_group = iommu_group_get_iommudata(container->grp);
113 if (!table_group)
114 return -1;
115
116 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
117 struct iommu_table *tbl = table_group->tables[i];
118
119 if (tbl) {
120 unsigned long entry = ioba >> tbl->it_page_shift;
121 unsigned long start = tbl->it_offset;
122 unsigned long end = start + tbl->it_size;
123
124 if ((start <= entry) && (entry < end)) {
125 *ptbl = tbl;
126 return i;
127 }
128 }
129 }
130
131 return -1;
132}
133
5ffd229c
AK
134static int tce_iommu_enable(struct tce_container *container)
135{
136 int ret = 0;
2d270df8 137 unsigned long locked;
0eaf4def 138 struct iommu_table_group *table_group;
5ffd229c 139
0eaf4def 140 if (!container->grp)
5ffd229c
AK
141 return -ENXIO;
142
143 if (!current->mm)
144 return -ESRCH; /* process exited */
145
146 if (container->enabled)
147 return -EBUSY;
148
149 /*
150 * When userspace pages are mapped into the IOMMU, they are effectively
151 * locked memory, so, theoretically, we need to update the accounting
152 * of locked pages on each map and unmap. For powerpc, the map unmap
153 * paths can be very hot, though, and the accounting would kill
154 * performance, especially since it would be difficult to impossible
155 * to handle the accounting in real mode only.
156 *
157 * To address that, rather than precisely accounting every page, we
158 * instead account for a worst case on locked memory when the iommu is
159 * enabled and disabled. The worst case upper bound on locked memory
160 * is the size of the whole iommu window, which is usually relatively
161 * small (compared to total memory sizes) on POWER hardware.
162 *
163 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
164 * that would effectively kill the guest at random points, much better
165 * enforcing the limit based on the max that the guest can map.
2d270df8
AK
166 *
167 * Unfortunately at the moment it counts whole tables, no matter how
168 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
169 * each with 2GB DMA window, 8GB will be counted here. The reason for
170 * this is that we cannot tell here the amount of RAM used by the guest
171 * as this information is only available from KVM and VFIO is
172 * KVM agnostic.
4793d65d
AK
173 *
174 * So we do not allow enabling a container without a group attached
175 * as there is no way to know how much we should increment
176 * the locked_vm counter.
5ffd229c 177 */
0eaf4def
AK
178 table_group = iommu_group_get_iommudata(container->grp);
179 if (!table_group)
180 return -ENODEV;
181
4793d65d
AK
182 if (!table_group->tce32_size)
183 return -EPERM;
184
185 locked = table_group->tce32_size >> PAGE_SHIFT;
2d270df8
AK
186 ret = try_increment_locked_vm(locked);
187 if (ret)
188 return ret;
5ffd229c 189
2d270df8
AK
190 container->locked_pages = locked;
191
192 container->enabled = true;
5ffd229c
AK
193
194 return ret;
195}
196
197static void tce_iommu_disable(struct tce_container *container)
198{
199 if (!container->enabled)
200 return;
201
202 container->enabled = false;
203
2d270df8 204 if (!current->mm)
5ffd229c
AK
205 return;
206
2d270df8 207 decrement_locked_vm(container->locked_pages);
5ffd229c
AK
208}
209
210static void *tce_iommu_open(unsigned long arg)
211{
212 struct tce_container *container;
213
214 if (arg != VFIO_SPAPR_TCE_IOMMU) {
215 pr_err("tce_vfio: Wrong IOMMU type\n");
216 return ERR_PTR(-EINVAL);
217 }
218
219 container = kzalloc(sizeof(*container), GFP_KERNEL);
220 if (!container)
221 return ERR_PTR(-ENOMEM);
222
223 mutex_init(&container->lock);
224
225 return container;
226}
227
228static void tce_iommu_release(void *iommu_data)
229{
230 struct tce_container *container = iommu_data;
231
0eaf4def 232 WARN_ON(container->grp);
5ffd229c 233
0eaf4def
AK
234 if (container->grp)
235 tce_iommu_detach_group(iommu_data, container->grp);
5ffd229c 236
649354b7 237 tce_iommu_disable(container);
5ffd229c
AK
238 mutex_destroy(&container->lock);
239
240 kfree(container);
241}
242
649354b7 243static void tce_iommu_unuse_page(struct tce_container *container,
05c6cfb9 244 unsigned long hpa)
649354b7
AK
245{
246 struct page *page;
247
05c6cfb9 248 page = pfn_to_page(hpa >> PAGE_SHIFT);
649354b7
AK
249 put_page(page);
250}
251
9b14a1ff
AK
252static int tce_iommu_clear(struct tce_container *container,
253 struct iommu_table *tbl,
254 unsigned long entry, unsigned long pages)
255{
05c6cfb9
AK
256 unsigned long oldhpa;
257 long ret;
258 enum dma_data_direction direction;
9b14a1ff
AK
259
260 for ( ; pages; --pages, ++entry) {
05c6cfb9
AK
261 direction = DMA_NONE;
262 oldhpa = 0;
263 ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
264 if (ret)
265 continue;
266
267 if (direction == DMA_NONE)
9b14a1ff
AK
268 continue;
269
05c6cfb9 270 tce_iommu_unuse_page(container, oldhpa);
9b14a1ff
AK
271 }
272
273 return 0;
274}
275
649354b7
AK
276static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
277{
278 struct page *page = NULL;
279 enum dma_data_direction direction = iommu_tce_direction(tce);
280
281 if (get_user_pages_fast(tce & PAGE_MASK, 1,
282 direction != DMA_TO_DEVICE, &page) != 1)
283 return -EFAULT;
284
285 *hpa = __pa((unsigned long) page_address(page));
286
287 return 0;
288}
289
9b14a1ff
AK
290static long tce_iommu_build(struct tce_container *container,
291 struct iommu_table *tbl,
05c6cfb9
AK
292 unsigned long entry, unsigned long tce, unsigned long pages,
293 enum dma_data_direction direction)
9b14a1ff
AK
294{
295 long i, ret = 0;
649354b7
AK
296 struct page *page;
297 unsigned long hpa;
05c6cfb9 298 enum dma_data_direction dirtmp;
9b14a1ff
AK
299
300 for (i = 0; i < pages; ++i) {
301 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
302
649354b7
AK
303 ret = tce_iommu_use_page(tce, &hpa);
304 if (ret)
9b14a1ff 305 break;
e432bc7e 306
649354b7 307 page = pfn_to_page(hpa >> PAGE_SHIFT);
e432bc7e
AK
308 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
309 ret = -EPERM;
310 break;
311 }
312
649354b7 313 hpa |= offset;
05c6cfb9
AK
314 dirtmp = direction;
315 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
9b14a1ff 316 if (ret) {
649354b7 317 tce_iommu_unuse_page(container, hpa);
9b14a1ff
AK
318 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
319 __func__, entry << tbl->it_page_shift,
320 tce, ret);
321 break;
322 }
05c6cfb9
AK
323
324 if (dirtmp != DMA_NONE)
325 tce_iommu_unuse_page(container, hpa);
326
00663d4e 327 tce += IOMMU_PAGE_SIZE(tbl);
9b14a1ff
AK
328 }
329
330 if (ret)
331 tce_iommu_clear(container, tbl, entry, i);
332
333 return ret;
334}
335
5ffd229c
AK
336static long tce_iommu_ioctl(void *iommu_data,
337 unsigned int cmd, unsigned long arg)
338{
339 struct tce_container *container = iommu_data;
340 unsigned long minsz;
341 long ret;
342
343 switch (cmd) {
344 case VFIO_CHECK_EXTENSION:
1b69be5e
GS
345 switch (arg) {
346 case VFIO_SPAPR_TCE_IOMMU:
347 ret = 1;
348 break;
349 default:
350 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
351 break;
352 }
353
354 return (ret < 0) ? 0 : ret;
5ffd229c
AK
355
356 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
357 struct vfio_iommu_spapr_tce_info info;
0eaf4def
AK
358 struct iommu_table_group *table_group;
359
360 if (WARN_ON(!container->grp))
361 return -ENXIO;
362
363 table_group = iommu_group_get_iommudata(container->grp);
5ffd229c 364
4793d65d 365 if (!table_group)
5ffd229c
AK
366 return -ENXIO;
367
368 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
369 dma32_window_size);
370
371 if (copy_from_user(&info, (void __user *)arg, minsz))
372 return -EFAULT;
373
374 if (info.argsz < minsz)
375 return -EINVAL;
376
4793d65d
AK
377 info.dma32_window_start = table_group->tce32_start;
378 info.dma32_window_size = table_group->tce32_size;
5ffd229c
AK
379 info.flags = 0;
380
381 if (copy_to_user((void __user *)arg, &info, minsz))
382 return -EFAULT;
383
384 return 0;
385 }
386 case VFIO_IOMMU_MAP_DMA: {
387 struct vfio_iommu_type1_dma_map param;
0eaf4def 388 struct iommu_table *tbl = NULL;
0eaf4def 389 long num;
05c6cfb9 390 enum dma_data_direction direction;
5ffd229c 391
3c56e822
AK
392 if (!container->enabled)
393 return -EPERM;
394
5ffd229c
AK
395 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
396
397 if (copy_from_user(&param, (void __user *)arg, minsz))
398 return -EFAULT;
399
400 if (param.argsz < minsz)
401 return -EINVAL;
402
403 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
404 VFIO_DMA_MAP_FLAG_WRITE))
405 return -EINVAL;
406
0eaf4def
AK
407 num = tce_iommu_find_table(container, param.iova, &tbl);
408 if (num < 0)
409 return -ENXIO;
410
00663d4e
AK
411 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
412 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
5ffd229c
AK
413 return -EINVAL;
414
415 /* iova is checked by the IOMMU API */
05c6cfb9
AK
416 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
417 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
418 direction = DMA_BIDIRECTIONAL;
419 else
420 direction = DMA_TO_DEVICE;
421 } else {
422 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
423 direction = DMA_FROM_DEVICE;
424 else
425 return -EINVAL;
426 }
5ffd229c 427
05c6cfb9 428 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
5ffd229c
AK
429 if (ret)
430 return ret;
431
9b14a1ff 432 ret = tce_iommu_build(container, tbl,
00663d4e 433 param.iova >> tbl->it_page_shift,
05c6cfb9
AK
434 param.vaddr,
435 param.size >> tbl->it_page_shift,
436 direction);
5ffd229c
AK
437
438 iommu_flush_tce(tbl);
439
440 return ret;
441 }
442 case VFIO_IOMMU_UNMAP_DMA: {
443 struct vfio_iommu_type1_dma_unmap param;
0eaf4def
AK
444 struct iommu_table *tbl = NULL;
445 long num;
5ffd229c 446
3c56e822
AK
447 if (!container->enabled)
448 return -EPERM;
449
5ffd229c
AK
450 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
451 size);
452
453 if (copy_from_user(&param, (void __user *)arg, minsz))
454 return -EFAULT;
455
456 if (param.argsz < minsz)
457 return -EINVAL;
458
459 /* No flag is supported now */
460 if (param.flags)
461 return -EINVAL;
462
0eaf4def
AK
463 num = tce_iommu_find_table(container, param.iova, &tbl);
464 if (num < 0)
465 return -ENXIO;
466
00663d4e 467 if (param.size & ~IOMMU_PAGE_MASK(tbl))
5ffd229c
AK
468 return -EINVAL;
469
470 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
00663d4e 471 param.size >> tbl->it_page_shift);
5ffd229c
AK
472 if (ret)
473 return ret;
474
9b14a1ff 475 ret = tce_iommu_clear(container, tbl,
00663d4e
AK
476 param.iova >> tbl->it_page_shift,
477 param.size >> tbl->it_page_shift);
5ffd229c
AK
478 iommu_flush_tce(tbl);
479
480 return ret;
481 }
482 case VFIO_IOMMU_ENABLE:
483 mutex_lock(&container->lock);
484 ret = tce_iommu_enable(container);
485 mutex_unlock(&container->lock);
486 return ret;
487
488
489 case VFIO_IOMMU_DISABLE:
490 mutex_lock(&container->lock);
491 tce_iommu_disable(container);
492 mutex_unlock(&container->lock);
493 return 0;
1b69be5e 494 case VFIO_EEH_PE_OP:
0eaf4def 495 if (!container->grp)
1b69be5e
GS
496 return -ENODEV;
497
0eaf4def
AK
498 return vfio_spapr_iommu_eeh_ioctl(container->grp,
499 cmd, arg);
5ffd229c
AK
500 }
501
502 return -ENOTTY;
503}
504
f87a8864
AK
505static void tce_iommu_release_ownership(struct tce_container *container,
506 struct iommu_table_group *table_group)
507{
508 int i;
509
510 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
511 struct iommu_table *tbl = table_group->tables[i];
512
513 if (!tbl)
514 continue;
515
516 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
517 if (tbl->it_map)
518 iommu_release_ownership(tbl);
519 }
520}
521
522static int tce_iommu_take_ownership(struct tce_container *container,
523 struct iommu_table_group *table_group)
524{
525 int i, j, rc = 0;
526
527 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
528 struct iommu_table *tbl = table_group->tables[i];
529
530 if (!tbl || !tbl->it_map)
531 continue;
532
533 rc = iommu_take_ownership(tbl);
534 if (rc) {
535 for (j = 0; j < i; ++j)
536 iommu_release_ownership(
537 table_group->tables[j]);
538
539 return rc;
540 }
541 }
542
543 return 0;
544}
545
546static void tce_iommu_release_ownership_ddw(struct tce_container *container,
547 struct iommu_table_group *table_group)
548{
549 table_group->ops->release_ownership(table_group);
550}
551
552static long tce_iommu_take_ownership_ddw(struct tce_container *container,
553 struct iommu_table_group *table_group)
554{
555 table_group->ops->take_ownership(table_group);
556
557 return 0;
558}
559
5ffd229c
AK
560static int tce_iommu_attach_group(void *iommu_data,
561 struct iommu_group *iommu_group)
562{
563 int ret;
564 struct tce_container *container = iommu_data;
0eaf4def 565 struct iommu_table_group *table_group;
5ffd229c 566
5ffd229c
AK
567 mutex_lock(&container->lock);
568
569 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
570 iommu_group_id(iommu_group), iommu_group); */
0eaf4def 571 if (container->grp) {
5ffd229c 572 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
0eaf4def 573 iommu_group_id(container->grp),
5ffd229c
AK
574 iommu_group_id(iommu_group));
575 ret = -EBUSY;
22af4859
AK
576 goto unlock_exit;
577 }
578
579 if (container->enabled) {
5ffd229c
AK
580 pr_err("tce_vfio: attaching group #%u to enabled container\n",
581 iommu_group_id(iommu_group));
582 ret = -EBUSY;
22af4859 583 goto unlock_exit;
5ffd229c
AK
584 }
585
0eaf4def
AK
586 table_group = iommu_group_get_iommudata(iommu_group);
587 if (!table_group) {
588 ret = -ENXIO;
589 goto unlock_exit;
590 }
591
f87a8864
AK
592 if (!table_group->ops || !table_group->ops->take_ownership ||
593 !table_group->ops->release_ownership)
594 ret = tce_iommu_take_ownership(container, table_group);
595 else
596 ret = tce_iommu_take_ownership_ddw(container, table_group);
597
22af4859 598 if (!ret)
0eaf4def 599 container->grp = iommu_group;
22af4859
AK
600
601unlock_exit:
5ffd229c
AK
602 mutex_unlock(&container->lock);
603
604 return ret;
605}
606
607static void tce_iommu_detach_group(void *iommu_data,
608 struct iommu_group *iommu_group)
609{
610 struct tce_container *container = iommu_data;
0eaf4def 611 struct iommu_table_group *table_group;
5ffd229c 612
5ffd229c 613 mutex_lock(&container->lock);
0eaf4def 614 if (iommu_group != container->grp) {
5ffd229c
AK
615 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
616 iommu_group_id(iommu_group),
0eaf4def 617 iommu_group_id(container->grp));
22af4859
AK
618 goto unlock_exit;
619 }
5ffd229c 620
22af4859
AK
621 if (container->enabled) {
622 pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
0eaf4def 623 iommu_group_id(container->grp));
22af4859 624 tce_iommu_disable(container);
5ffd229c 625 }
22af4859
AK
626
627 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
628 iommu_group_id(iommu_group), iommu_group); */
0eaf4def
AK
629 container->grp = NULL;
630
631 table_group = iommu_group_get_iommudata(iommu_group);
632 BUG_ON(!table_group);
633
f87a8864
AK
634 if (!table_group->ops || !table_group->ops->release_ownership)
635 tce_iommu_release_ownership(container, table_group);
636 else
637 tce_iommu_release_ownership_ddw(container, table_group);
22af4859
AK
638
639unlock_exit:
5ffd229c
AK
640 mutex_unlock(&container->lock);
641}
642
643const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
644 .name = "iommu-vfio-powerpc",
645 .owner = THIS_MODULE,
646 .open = tce_iommu_open,
647 .release = tce_iommu_release,
648 .ioctl = tce_iommu_ioctl,
649 .attach_group = tce_iommu_attach_group,
650 .detach_group = tce_iommu_detach_group,
651};
652
653static int __init tce_iommu_init(void)
654{
655 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
656}
657
658static void __exit tce_iommu_cleanup(void)
659{
660 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
661}
662
663module_init(tce_iommu_init);
664module_exit(tce_iommu_cleanup);
665
666MODULE_VERSION(DRIVER_VERSION);
667MODULE_LICENSE("GPL v2");
668MODULE_AUTHOR(DRIVER_AUTHOR);
669MODULE_DESCRIPTION(DRIVER_DESC);
670
This page took 0.145348 seconds and 5 git commands to generate.