vfio: powerpc/spapr: Moving pinning/unpinning to helpers
[deliverable/linux.git] / drivers / vfio / vfio_iommu_spapr_tce.c
CommitLineData
5ffd229c
AK
1/*
2 * VFIO: IOMMU DMA mapping support for TCE on POWER
3 *
4 * Copyright (C) 2013 IBM Corp. All rights reserved.
5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio_iommu_type1.c:
12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
13 * Author: Alex Williamson <alex.williamson@redhat.com>
14 */
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <asm/iommu.h>
23#include <asm/tce.h>
24
25#define DRIVER_VERSION "0.1"
26#define DRIVER_AUTHOR "aik@ozlabs.ru"
27#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
28
29static void tce_iommu_detach_group(void *iommu_data,
30 struct iommu_group *iommu_group);
31
2d270df8
AK
32static long try_increment_locked_vm(long npages)
33{
34 long ret = 0, locked, lock_limit;
35
36 if (!current || !current->mm)
37 return -ESRCH; /* process exited */
38
39 if (!npages)
40 return 0;
41
42 down_write(&current->mm->mmap_sem);
43 locked = current->mm->locked_vm + npages;
44 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
45 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
46 ret = -ENOMEM;
47 else
48 current->mm->locked_vm += npages;
49
50 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
51 npages << PAGE_SHIFT,
52 current->mm->locked_vm << PAGE_SHIFT,
53 rlimit(RLIMIT_MEMLOCK),
54 ret ? " - exceeded" : "");
55
56 up_write(&current->mm->mmap_sem);
57
58 return ret;
59}
60
61static void decrement_locked_vm(long npages)
62{
63 if (!current || !current->mm || !npages)
64 return; /* process exited */
65
66 down_write(&current->mm->mmap_sem);
67 if (WARN_ON_ONCE(npages > current->mm->locked_vm))
68 npages = current->mm->locked_vm;
69 current->mm->locked_vm -= npages;
70 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
71 npages << PAGE_SHIFT,
72 current->mm->locked_vm << PAGE_SHIFT,
73 rlimit(RLIMIT_MEMLOCK));
74 up_write(&current->mm->mmap_sem);
75}
76
5ffd229c
AK
77/*
78 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
79 *
80 * This code handles mapping and unmapping of user data buffers
81 * into DMA'ble space using the IOMMU
82 */
83
84/*
85 * The container descriptor supports only a single group per container.
86 * Required by the API as the container is not supplied with the IOMMU group
87 * at the moment of initialization.
88 */
89struct tce_container {
90 struct mutex lock;
91 struct iommu_table *tbl;
92 bool enabled;
2d270df8 93 unsigned long locked_pages;
5ffd229c
AK
94};
95
e432bc7e
AK
96static bool tce_page_is_contained(struct page *page, unsigned page_shift)
97{
98 /*
99 * Check that the TCE table granularity is not bigger than the size of
100 * a page we just found. Otherwise the hardware can get access to
101 * a bigger memory chunk that it should.
102 */
103 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
104}
105
5ffd229c
AK
106static int tce_iommu_enable(struct tce_container *container)
107{
108 int ret = 0;
2d270df8 109 unsigned long locked;
5ffd229c
AK
110 struct iommu_table *tbl = container->tbl;
111
112 if (!container->tbl)
113 return -ENXIO;
114
115 if (!current->mm)
116 return -ESRCH; /* process exited */
117
118 if (container->enabled)
119 return -EBUSY;
120
121 /*
122 * When userspace pages are mapped into the IOMMU, they are effectively
123 * locked memory, so, theoretically, we need to update the accounting
124 * of locked pages on each map and unmap. For powerpc, the map unmap
125 * paths can be very hot, though, and the accounting would kill
126 * performance, especially since it would be difficult to impossible
127 * to handle the accounting in real mode only.
128 *
129 * To address that, rather than precisely accounting every page, we
130 * instead account for a worst case on locked memory when the iommu is
131 * enabled and disabled. The worst case upper bound on locked memory
132 * is the size of the whole iommu window, which is usually relatively
133 * small (compared to total memory sizes) on POWER hardware.
134 *
135 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
136 * that would effectively kill the guest at random points, much better
137 * enforcing the limit based on the max that the guest can map.
2d270df8
AK
138 *
139 * Unfortunately at the moment it counts whole tables, no matter how
140 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
141 * each with 2GB DMA window, 8GB will be counted here. The reason for
142 * this is that we cannot tell here the amount of RAM used by the guest
143 * as this information is only available from KVM and VFIO is
144 * KVM agnostic.
5ffd229c 145 */
2d270df8
AK
146 locked = (tbl->it_size << tbl->it_page_shift) >> PAGE_SHIFT;
147 ret = try_increment_locked_vm(locked);
148 if (ret)
149 return ret;
5ffd229c 150
2d270df8
AK
151 container->locked_pages = locked;
152
153 container->enabled = true;
5ffd229c
AK
154
155 return ret;
156}
157
158static void tce_iommu_disable(struct tce_container *container)
159{
160 if (!container->enabled)
161 return;
162
163 container->enabled = false;
164
2d270df8 165 if (!current->mm)
5ffd229c
AK
166 return;
167
2d270df8 168 decrement_locked_vm(container->locked_pages);
5ffd229c
AK
169}
170
171static void *tce_iommu_open(unsigned long arg)
172{
173 struct tce_container *container;
174
175 if (arg != VFIO_SPAPR_TCE_IOMMU) {
176 pr_err("tce_vfio: Wrong IOMMU type\n");
177 return ERR_PTR(-EINVAL);
178 }
179
180 container = kzalloc(sizeof(*container), GFP_KERNEL);
181 if (!container)
182 return ERR_PTR(-ENOMEM);
183
184 mutex_init(&container->lock);
185
186 return container;
187}
188
189static void tce_iommu_release(void *iommu_data)
190{
191 struct tce_container *container = iommu_data;
192
193 WARN_ON(container->tbl && !container->tbl->it_group);
5ffd229c
AK
194
195 if (container->tbl && container->tbl->it_group)
196 tce_iommu_detach_group(iommu_data, container->tbl->it_group);
197
649354b7 198 tce_iommu_disable(container);
5ffd229c
AK
199 mutex_destroy(&container->lock);
200
201 kfree(container);
202}
203
649354b7
AK
204static void tce_iommu_unuse_page(struct tce_container *container,
205 unsigned long oldtce)
206{
207 struct page *page;
208
209 if (!(oldtce & (TCE_PCI_READ | TCE_PCI_WRITE)))
210 return;
211
212 page = pfn_to_page(oldtce >> PAGE_SHIFT);
213
214 if (oldtce & TCE_PCI_WRITE)
215 SetPageDirty(page);
216
217 put_page(page);
218}
219
9b14a1ff
AK
220static int tce_iommu_clear(struct tce_container *container,
221 struct iommu_table *tbl,
222 unsigned long entry, unsigned long pages)
223{
224 unsigned long oldtce;
9b14a1ff
AK
225
226 for ( ; pages; --pages, ++entry) {
227 oldtce = iommu_clear_tce(tbl, entry);
228 if (!oldtce)
229 continue;
230
649354b7 231 tce_iommu_unuse_page(container, oldtce);
9b14a1ff
AK
232 }
233
234 return 0;
235}
236
649354b7
AK
237static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
238{
239 struct page *page = NULL;
240 enum dma_data_direction direction = iommu_tce_direction(tce);
241
242 if (get_user_pages_fast(tce & PAGE_MASK, 1,
243 direction != DMA_TO_DEVICE, &page) != 1)
244 return -EFAULT;
245
246 *hpa = __pa((unsigned long) page_address(page));
247
248 return 0;
249}
250
9b14a1ff
AK
251static long tce_iommu_build(struct tce_container *container,
252 struct iommu_table *tbl,
253 unsigned long entry, unsigned long tce, unsigned long pages)
254{
255 long i, ret = 0;
649354b7
AK
256 struct page *page;
257 unsigned long hpa;
9b14a1ff
AK
258 enum dma_data_direction direction = iommu_tce_direction(tce);
259
260 for (i = 0; i < pages; ++i) {
261 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
262
649354b7
AK
263 ret = tce_iommu_use_page(tce, &hpa);
264 if (ret)
9b14a1ff 265 break;
e432bc7e 266
649354b7 267 page = pfn_to_page(hpa >> PAGE_SHIFT);
e432bc7e
AK
268 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
269 ret = -EPERM;
270 break;
271 }
272
649354b7
AK
273 hpa |= offset;
274 ret = iommu_tce_build(tbl, entry + i, (unsigned long) __va(hpa),
275 direction);
9b14a1ff 276 if (ret) {
649354b7 277 tce_iommu_unuse_page(container, hpa);
9b14a1ff
AK
278 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
279 __func__, entry << tbl->it_page_shift,
280 tce, ret);
281 break;
282 }
00663d4e 283 tce += IOMMU_PAGE_SIZE(tbl);
9b14a1ff
AK
284 }
285
286 if (ret)
287 tce_iommu_clear(container, tbl, entry, i);
288
289 return ret;
290}
291
5ffd229c
AK
292static long tce_iommu_ioctl(void *iommu_data,
293 unsigned int cmd, unsigned long arg)
294{
295 struct tce_container *container = iommu_data;
296 unsigned long minsz;
297 long ret;
298
299 switch (cmd) {
300 case VFIO_CHECK_EXTENSION:
1b69be5e
GS
301 switch (arg) {
302 case VFIO_SPAPR_TCE_IOMMU:
303 ret = 1;
304 break;
305 default:
306 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
307 break;
308 }
309
310 return (ret < 0) ? 0 : ret;
5ffd229c
AK
311
312 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
313 struct vfio_iommu_spapr_tce_info info;
314 struct iommu_table *tbl = container->tbl;
315
316 if (WARN_ON(!tbl))
317 return -ENXIO;
318
319 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
320 dma32_window_size);
321
322 if (copy_from_user(&info, (void __user *)arg, minsz))
323 return -EFAULT;
324
325 if (info.argsz < minsz)
326 return -EINVAL;
327
00663d4e
AK
328 info.dma32_window_start = tbl->it_offset << tbl->it_page_shift;
329 info.dma32_window_size = tbl->it_size << tbl->it_page_shift;
5ffd229c
AK
330 info.flags = 0;
331
332 if (copy_to_user((void __user *)arg, &info, minsz))
333 return -EFAULT;
334
335 return 0;
336 }
337 case VFIO_IOMMU_MAP_DMA: {
338 struct vfio_iommu_type1_dma_map param;
339 struct iommu_table *tbl = container->tbl;
9b14a1ff 340 unsigned long tce;
5ffd229c 341
3c56e822
AK
342 if (!container->enabled)
343 return -EPERM;
344
5ffd229c
AK
345 if (!tbl)
346 return -ENXIO;
347
348 BUG_ON(!tbl->it_group);
349
350 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
351
352 if (copy_from_user(&param, (void __user *)arg, minsz))
353 return -EFAULT;
354
355 if (param.argsz < minsz)
356 return -EINVAL;
357
358 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
359 VFIO_DMA_MAP_FLAG_WRITE))
360 return -EINVAL;
361
00663d4e
AK
362 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
363 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
5ffd229c
AK
364 return -EINVAL;
365
366 /* iova is checked by the IOMMU API */
367 tce = param.vaddr;
368 if (param.flags & VFIO_DMA_MAP_FLAG_READ)
369 tce |= TCE_PCI_READ;
370 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
371 tce |= TCE_PCI_WRITE;
372
373 ret = iommu_tce_put_param_check(tbl, param.iova, tce);
374 if (ret)
375 return ret;
376
9b14a1ff 377 ret = tce_iommu_build(container, tbl,
00663d4e
AK
378 param.iova >> tbl->it_page_shift,
379 tce, param.size >> tbl->it_page_shift);
5ffd229c
AK
380
381 iommu_flush_tce(tbl);
382
383 return ret;
384 }
385 case VFIO_IOMMU_UNMAP_DMA: {
386 struct vfio_iommu_type1_dma_unmap param;
387 struct iommu_table *tbl = container->tbl;
388
3c56e822
AK
389 if (!container->enabled)
390 return -EPERM;
391
5ffd229c
AK
392 if (WARN_ON(!tbl))
393 return -ENXIO;
394
395 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
396 size);
397
398 if (copy_from_user(&param, (void __user *)arg, minsz))
399 return -EFAULT;
400
401 if (param.argsz < minsz)
402 return -EINVAL;
403
404 /* No flag is supported now */
405 if (param.flags)
406 return -EINVAL;
407
00663d4e 408 if (param.size & ~IOMMU_PAGE_MASK(tbl))
5ffd229c
AK
409 return -EINVAL;
410
411 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
00663d4e 412 param.size >> tbl->it_page_shift);
5ffd229c
AK
413 if (ret)
414 return ret;
415
9b14a1ff 416 ret = tce_iommu_clear(container, tbl,
00663d4e
AK
417 param.iova >> tbl->it_page_shift,
418 param.size >> tbl->it_page_shift);
5ffd229c
AK
419 iommu_flush_tce(tbl);
420
421 return ret;
422 }
423 case VFIO_IOMMU_ENABLE:
424 mutex_lock(&container->lock);
425 ret = tce_iommu_enable(container);
426 mutex_unlock(&container->lock);
427 return ret;
428
429
430 case VFIO_IOMMU_DISABLE:
431 mutex_lock(&container->lock);
432 tce_iommu_disable(container);
433 mutex_unlock(&container->lock);
434 return 0;
1b69be5e
GS
435 case VFIO_EEH_PE_OP:
436 if (!container->tbl || !container->tbl->it_group)
437 return -ENODEV;
438
439 return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
440 cmd, arg);
5ffd229c
AK
441 }
442
443 return -ENOTTY;
444}
445
446static int tce_iommu_attach_group(void *iommu_data,
447 struct iommu_group *iommu_group)
448{
449 int ret;
450 struct tce_container *container = iommu_data;
451 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
452
453 BUG_ON(!tbl);
454 mutex_lock(&container->lock);
455
456 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
457 iommu_group_id(iommu_group), iommu_group); */
458 if (container->tbl) {
459 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
460 iommu_group_id(container->tbl->it_group),
461 iommu_group_id(iommu_group));
462 ret = -EBUSY;
463 } else if (container->enabled) {
464 pr_err("tce_vfio: attaching group #%u to enabled container\n",
465 iommu_group_id(iommu_group));
466 ret = -EBUSY;
467 } else {
468 ret = iommu_take_ownership(tbl);
469 if (!ret)
470 container->tbl = tbl;
471 }
472
473 mutex_unlock(&container->lock);
474
475 return ret;
476}
477
478static void tce_iommu_detach_group(void *iommu_data,
479 struct iommu_group *iommu_group)
480{
481 struct tce_container *container = iommu_data;
482 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
483
484 BUG_ON(!tbl);
485 mutex_lock(&container->lock);
486 if (tbl != container->tbl) {
487 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
488 iommu_group_id(iommu_group),
489 iommu_group_id(tbl->it_group));
490 } else {
491 if (container->enabled) {
492 pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
493 iommu_group_id(tbl->it_group));
494 tce_iommu_disable(container);
495 }
496
497 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
498 iommu_group_id(iommu_group), iommu_group); */
499 container->tbl = NULL;
9b14a1ff 500 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
5ffd229c
AK
501 iommu_release_ownership(tbl);
502 }
503 mutex_unlock(&container->lock);
504}
505
506const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
507 .name = "iommu-vfio-powerpc",
508 .owner = THIS_MODULE,
509 .open = tce_iommu_open,
510 .release = tce_iommu_release,
511 .ioctl = tce_iommu_ioctl,
512 .attach_group = tce_iommu_attach_group,
513 .detach_group = tce_iommu_detach_group,
514};
515
516static int __init tce_iommu_init(void)
517{
518 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
519}
520
521static void __exit tce_iommu_cleanup(void)
522{
523 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
524}
525
526module_init(tce_iommu_init);
527module_exit(tce_iommu_cleanup);
528
529MODULE_VERSION(DRIVER_VERSION);
530MODULE_LICENSE("GPL v2");
531MODULE_AUTHOR(DRIVER_AUTHOR);
532MODULE_DESCRIPTION(DRIVER_DESC);
533
This page took 0.152745 seconds and 5 git commands to generate.