Merge branch 'enable-devices' into omap-for-v4.5/fixes
[deliverable/linux.git] / drivers / vfio / vfio.c
1 /*
2 * VFIO core
3 *
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio:
12 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
13 * Author: Tom Lyon, pugs@cisco.com
14 */
15
16 #include <linux/cdev.h>
17 #include <linux/compat.h>
18 #include <linux/device.h>
19 #include <linux/file.h>
20 #include <linux/anon_inodes.h>
21 #include <linux/fs.h>
22 #include <linux/idr.h>
23 #include <linux/iommu.h>
24 #include <linux/list.h>
25 #include <linux/miscdevice.h>
26 #include <linux/module.h>
27 #include <linux/mutex.h>
28 #include <linux/pci.h>
29 #include <linux/rwsem.h>
30 #include <linux/sched.h>
31 #include <linux/slab.h>
32 #include <linux/stat.h>
33 #include <linux/string.h>
34 #include <linux/uaccess.h>
35 #include <linux/vfio.h>
36 #include <linux/wait.h>
37
38 #define DRIVER_VERSION "0.3"
39 #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
40 #define DRIVER_DESC "VFIO - User Level meta-driver"
41
42 static struct vfio {
43 struct class *class;
44 struct list_head iommu_drivers_list;
45 struct mutex iommu_drivers_lock;
46 struct list_head group_list;
47 struct idr group_idr;
48 struct mutex group_lock;
49 struct cdev group_cdev;
50 dev_t group_devt;
51 wait_queue_head_t release_q;
52 } vfio;
53
54 struct vfio_iommu_driver {
55 const struct vfio_iommu_driver_ops *ops;
56 struct list_head vfio_next;
57 };
58
59 struct vfio_container {
60 struct kref kref;
61 struct list_head group_list;
62 struct rw_semaphore group_lock;
63 struct vfio_iommu_driver *iommu_driver;
64 void *iommu_data;
65 bool noiommu;
66 };
67
68 struct vfio_unbound_dev {
69 struct device *dev;
70 struct list_head unbound_next;
71 };
72
73 struct vfio_group {
74 struct kref kref;
75 int minor;
76 atomic_t container_users;
77 struct iommu_group *iommu_group;
78 struct vfio_container *container;
79 struct list_head device_list;
80 struct mutex device_lock;
81 struct device *dev;
82 struct notifier_block nb;
83 struct list_head vfio_next;
84 struct list_head container_next;
85 struct list_head unbound_list;
86 struct mutex unbound_lock;
87 atomic_t opened;
88 bool noiommu;
89 };
90
91 struct vfio_device {
92 struct kref kref;
93 struct device *dev;
94 const struct vfio_device_ops *ops;
95 struct vfio_group *group;
96 struct list_head group_next;
97 void *device_data;
98 };
99
100 #ifdef CONFIG_VFIO_NOIOMMU
101 static bool noiommu __read_mostly;
102 module_param_named(enable_unsafe_noiommu_mode,
103 noiommu, bool, S_IRUGO | S_IWUSR);
104 MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
105 #endif
106
107 /*
108 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
109 * and remove functions, any use cases other than acquiring the first
110 * reference for the purpose of calling vfio_add_group_dev() or removing
111 * that symmetric reference after vfio_del_group_dev() should use the raw
112 * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
113 * removes the device from the dummy group and cannot be nested.
114 */
115 struct iommu_group *vfio_iommu_group_get(struct device *dev)
116 {
117 struct iommu_group *group;
118 int __maybe_unused ret;
119
120 group = iommu_group_get(dev);
121
122 #ifdef CONFIG_VFIO_NOIOMMU
123 /*
124 * With noiommu enabled, an IOMMU group will be created for a device
125 * that doesn't already have one and doesn't have an iommu_ops on their
126 * bus. We use iommu_present() again in the main code to detect these
127 * fake groups.
128 */
129 if (group || !noiommu || iommu_present(dev->bus))
130 return group;
131
132 group = iommu_group_alloc();
133 if (IS_ERR(group))
134 return NULL;
135
136 iommu_group_set_name(group, "vfio-noiommu");
137 ret = iommu_group_add_device(group, dev);
138 iommu_group_put(group);
139 if (ret)
140 return NULL;
141
142 /*
143 * Where to taint? At this point we've added an IOMMU group for a
144 * device that is not backed by iommu_ops, therefore any iommu_
145 * callback using iommu_ops can legitimately Oops. So, while we may
146 * be about to give a DMA capable device to a user without IOMMU
147 * protection, which is clearly taint-worthy, let's go ahead and do
148 * it here.
149 */
150 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
151 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
152 #endif
153
154 return group;
155 }
156 EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
157
158 void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
159 {
160 #ifdef CONFIG_VFIO_NOIOMMU
161 if (!iommu_present(dev->bus))
162 iommu_group_remove_device(dev);
163 #endif
164
165 iommu_group_put(group);
166 }
167 EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
168
169 #ifdef CONFIG_VFIO_NOIOMMU
170 static void *vfio_noiommu_open(unsigned long arg)
171 {
172 if (arg != VFIO_NOIOMMU_IOMMU)
173 return ERR_PTR(-EINVAL);
174 if (!capable(CAP_SYS_RAWIO))
175 return ERR_PTR(-EPERM);
176
177 return NULL;
178 }
179
180 static void vfio_noiommu_release(void *iommu_data)
181 {
182 }
183
184 static long vfio_noiommu_ioctl(void *iommu_data,
185 unsigned int cmd, unsigned long arg)
186 {
187 if (cmd == VFIO_CHECK_EXTENSION)
188 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
189
190 return -ENOTTY;
191 }
192
193 static int vfio_iommu_present(struct device *dev, void *unused)
194 {
195 return iommu_present(dev->bus) ? 1 : 0;
196 }
197
198 static int vfio_noiommu_attach_group(void *iommu_data,
199 struct iommu_group *iommu_group)
200 {
201 return iommu_group_for_each_dev(iommu_group, NULL,
202 vfio_iommu_present) ? -EINVAL : 0;
203 }
204
205 static void vfio_noiommu_detach_group(void *iommu_data,
206 struct iommu_group *iommu_group)
207 {
208 }
209
210 static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
211 .name = "vfio-noiommu",
212 .owner = THIS_MODULE,
213 .open = vfio_noiommu_open,
214 .release = vfio_noiommu_release,
215 .ioctl = vfio_noiommu_ioctl,
216 .attach_group = vfio_noiommu_attach_group,
217 .detach_group = vfio_noiommu_detach_group,
218 };
219 #endif
220
221
222 /**
223 * IOMMU driver registration
224 */
225 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
226 {
227 struct vfio_iommu_driver *driver, *tmp;
228
229 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
230 if (!driver)
231 return -ENOMEM;
232
233 driver->ops = ops;
234
235 mutex_lock(&vfio.iommu_drivers_lock);
236
237 /* Check for duplicates */
238 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
239 if (tmp->ops == ops) {
240 mutex_unlock(&vfio.iommu_drivers_lock);
241 kfree(driver);
242 return -EINVAL;
243 }
244 }
245
246 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
247
248 mutex_unlock(&vfio.iommu_drivers_lock);
249
250 return 0;
251 }
252 EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
253
254 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
255 {
256 struct vfio_iommu_driver *driver;
257
258 mutex_lock(&vfio.iommu_drivers_lock);
259 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
260 if (driver->ops == ops) {
261 list_del(&driver->vfio_next);
262 mutex_unlock(&vfio.iommu_drivers_lock);
263 kfree(driver);
264 return;
265 }
266 }
267 mutex_unlock(&vfio.iommu_drivers_lock);
268 }
269 EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
270
271 /**
272 * Group minor allocation/free - both called with vfio.group_lock held
273 */
274 static int vfio_alloc_group_minor(struct vfio_group *group)
275 {
276 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
277 }
278
279 static void vfio_free_group_minor(int minor)
280 {
281 idr_remove(&vfio.group_idr, minor);
282 }
283
284 static int vfio_iommu_group_notifier(struct notifier_block *nb,
285 unsigned long action, void *data);
286 static void vfio_group_get(struct vfio_group *group);
287
288 /**
289 * Container objects - containers are created when /dev/vfio/vfio is
290 * opened, but their lifecycle extends until the last user is done, so
291 * it's freed via kref. Must support container/group/device being
292 * closed in any order.
293 */
294 static void vfio_container_get(struct vfio_container *container)
295 {
296 kref_get(&container->kref);
297 }
298
299 static void vfio_container_release(struct kref *kref)
300 {
301 struct vfio_container *container;
302 container = container_of(kref, struct vfio_container, kref);
303
304 kfree(container);
305 }
306
307 static void vfio_container_put(struct vfio_container *container)
308 {
309 kref_put(&container->kref, vfio_container_release);
310 }
311
312 static void vfio_group_unlock_and_free(struct vfio_group *group)
313 {
314 mutex_unlock(&vfio.group_lock);
315 /*
316 * Unregister outside of lock. A spurious callback is harmless now
317 * that the group is no longer in vfio.group_list.
318 */
319 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
320 kfree(group);
321 }
322
323 /**
324 * Group objects - create, release, get, put, search
325 */
326 static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
327 bool iommu_present)
328 {
329 struct vfio_group *group, *tmp;
330 struct device *dev;
331 int ret, minor;
332
333 group = kzalloc(sizeof(*group), GFP_KERNEL);
334 if (!group)
335 return ERR_PTR(-ENOMEM);
336
337 kref_init(&group->kref);
338 INIT_LIST_HEAD(&group->device_list);
339 mutex_init(&group->device_lock);
340 INIT_LIST_HEAD(&group->unbound_list);
341 mutex_init(&group->unbound_lock);
342 atomic_set(&group->container_users, 0);
343 atomic_set(&group->opened, 0);
344 group->iommu_group = iommu_group;
345 group->noiommu = !iommu_present;
346
347 group->nb.notifier_call = vfio_iommu_group_notifier;
348
349 /*
350 * blocking notifiers acquire a rwsem around registering and hold
351 * it around callback. Therefore, need to register outside of
352 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
353 * do anything unless it can find the group in vfio.group_list, so
354 * no harm in registering early.
355 */
356 ret = iommu_group_register_notifier(iommu_group, &group->nb);
357 if (ret) {
358 kfree(group);
359 return ERR_PTR(ret);
360 }
361
362 mutex_lock(&vfio.group_lock);
363
364 /* Did we race creating this group? */
365 list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
366 if (tmp->iommu_group == iommu_group) {
367 vfio_group_get(tmp);
368 vfio_group_unlock_and_free(group);
369 return tmp;
370 }
371 }
372
373 minor = vfio_alloc_group_minor(group);
374 if (minor < 0) {
375 vfio_group_unlock_and_free(group);
376 return ERR_PTR(minor);
377 }
378
379 dev = device_create(vfio.class, NULL,
380 MKDEV(MAJOR(vfio.group_devt), minor),
381 group, "%s%d", group->noiommu ? "noiommu-" : "",
382 iommu_group_id(iommu_group));
383 if (IS_ERR(dev)) {
384 vfio_free_group_minor(minor);
385 vfio_group_unlock_and_free(group);
386 return (struct vfio_group *)dev; /* ERR_PTR */
387 }
388
389 group->minor = minor;
390 group->dev = dev;
391
392 list_add(&group->vfio_next, &vfio.group_list);
393
394 mutex_unlock(&vfio.group_lock);
395
396 return group;
397 }
398
399 /* called with vfio.group_lock held */
400 static void vfio_group_release(struct kref *kref)
401 {
402 struct vfio_group *group = container_of(kref, struct vfio_group, kref);
403 struct vfio_unbound_dev *unbound, *tmp;
404 struct iommu_group *iommu_group = group->iommu_group;
405
406 WARN_ON(!list_empty(&group->device_list));
407
408 list_for_each_entry_safe(unbound, tmp,
409 &group->unbound_list, unbound_next) {
410 list_del(&unbound->unbound_next);
411 kfree(unbound);
412 }
413
414 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
415 list_del(&group->vfio_next);
416 vfio_free_group_minor(group->minor);
417 vfio_group_unlock_and_free(group);
418 iommu_group_put(iommu_group);
419 }
420
421 static void vfio_group_put(struct vfio_group *group)
422 {
423 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
424 }
425
426 /* Assume group_lock or group reference is held */
427 static void vfio_group_get(struct vfio_group *group)
428 {
429 kref_get(&group->kref);
430 }
431
432 /*
433 * Not really a try as we will sleep for mutex, but we need to make
434 * sure the group pointer is valid under lock and get a reference.
435 */
436 static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
437 {
438 struct vfio_group *target = group;
439
440 mutex_lock(&vfio.group_lock);
441 list_for_each_entry(group, &vfio.group_list, vfio_next) {
442 if (group == target) {
443 vfio_group_get(group);
444 mutex_unlock(&vfio.group_lock);
445 return group;
446 }
447 }
448 mutex_unlock(&vfio.group_lock);
449
450 return NULL;
451 }
452
453 static
454 struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
455 {
456 struct vfio_group *group;
457
458 mutex_lock(&vfio.group_lock);
459 list_for_each_entry(group, &vfio.group_list, vfio_next) {
460 if (group->iommu_group == iommu_group) {
461 vfio_group_get(group);
462 mutex_unlock(&vfio.group_lock);
463 return group;
464 }
465 }
466 mutex_unlock(&vfio.group_lock);
467
468 return NULL;
469 }
470
471 static struct vfio_group *vfio_group_get_from_minor(int minor)
472 {
473 struct vfio_group *group;
474
475 mutex_lock(&vfio.group_lock);
476 group = idr_find(&vfio.group_idr, minor);
477 if (!group) {
478 mutex_unlock(&vfio.group_lock);
479 return NULL;
480 }
481 vfio_group_get(group);
482 mutex_unlock(&vfio.group_lock);
483
484 return group;
485 }
486
487 /**
488 * Device objects - create, release, get, put, search
489 */
490 static
491 struct vfio_device *vfio_group_create_device(struct vfio_group *group,
492 struct device *dev,
493 const struct vfio_device_ops *ops,
494 void *device_data)
495 {
496 struct vfio_device *device;
497
498 device = kzalloc(sizeof(*device), GFP_KERNEL);
499 if (!device)
500 return ERR_PTR(-ENOMEM);
501
502 kref_init(&device->kref);
503 device->dev = dev;
504 device->group = group;
505 device->ops = ops;
506 device->device_data = device_data;
507 dev_set_drvdata(dev, device);
508
509 /* No need to get group_lock, caller has group reference */
510 vfio_group_get(group);
511
512 mutex_lock(&group->device_lock);
513 list_add(&device->group_next, &group->device_list);
514 mutex_unlock(&group->device_lock);
515
516 return device;
517 }
518
519 static void vfio_device_release(struct kref *kref)
520 {
521 struct vfio_device *device = container_of(kref,
522 struct vfio_device, kref);
523 struct vfio_group *group = device->group;
524
525 list_del(&device->group_next);
526 mutex_unlock(&group->device_lock);
527
528 dev_set_drvdata(device->dev, NULL);
529
530 kfree(device);
531
532 /* vfio_del_group_dev may be waiting for this device */
533 wake_up(&vfio.release_q);
534 }
535
536 /* Device reference always implies a group reference */
537 void vfio_device_put(struct vfio_device *device)
538 {
539 struct vfio_group *group = device->group;
540 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
541 vfio_group_put(group);
542 }
543 EXPORT_SYMBOL_GPL(vfio_device_put);
544
545 static void vfio_device_get(struct vfio_device *device)
546 {
547 vfio_group_get(device->group);
548 kref_get(&device->kref);
549 }
550
551 static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
552 struct device *dev)
553 {
554 struct vfio_device *device;
555
556 mutex_lock(&group->device_lock);
557 list_for_each_entry(device, &group->device_list, group_next) {
558 if (device->dev == dev) {
559 vfio_device_get(device);
560 mutex_unlock(&group->device_lock);
561 return device;
562 }
563 }
564 mutex_unlock(&group->device_lock);
565 return NULL;
566 }
567
568 /*
569 * Some drivers, like pci-stub, are only used to prevent other drivers from
570 * claiming a device and are therefore perfectly legitimate for a user owned
571 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping
572 * of the device, but it does prevent the user from having direct access to
573 * the device, which is useful in some circumstances.
574 *
575 * We also assume that we can include PCI interconnect devices, ie. bridges.
576 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
577 * then all of the downstream devices will be part of the same IOMMU group as
578 * the bridge. Thus, if placing the bridge into the user owned IOVA space
579 * breaks anything, it only does so for user owned devices downstream. Note
580 * that error notification via MSI can be affected for platforms that handle
581 * MSI within the same IOVA space as DMA.
582 */
583 static const char * const vfio_driver_whitelist[] = { "pci-stub" };
584
585 static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
586 {
587 int i;
588
589 if (dev_is_pci(dev)) {
590 struct pci_dev *pdev = to_pci_dev(dev);
591
592 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
593 return true;
594 }
595
596 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
597 if (!strcmp(drv->name, vfio_driver_whitelist[i]))
598 return true;
599 }
600
601 return false;
602 }
603
604 /*
605 * A vfio group is viable for use by userspace if all devices are in
606 * one of the following states:
607 * - driver-less
608 * - bound to a vfio driver
609 * - bound to a whitelisted driver
610 * - a PCI interconnect device
611 *
612 * We use two methods to determine whether a device is bound to a vfio
613 * driver. The first is to test whether the device exists in the vfio
614 * group. The second is to test if the device exists on the group
615 * unbound_list, indicating it's in the middle of transitioning from
616 * a vfio driver to driver-less.
617 */
618 static int vfio_dev_viable(struct device *dev, void *data)
619 {
620 struct vfio_group *group = data;
621 struct vfio_device *device;
622 struct device_driver *drv = ACCESS_ONCE(dev->driver);
623 struct vfio_unbound_dev *unbound;
624 int ret = -EINVAL;
625
626 mutex_lock(&group->unbound_lock);
627 list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
628 if (dev == unbound->dev) {
629 ret = 0;
630 break;
631 }
632 }
633 mutex_unlock(&group->unbound_lock);
634
635 if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
636 return 0;
637
638 device = vfio_group_get_device(group, dev);
639 if (device) {
640 vfio_device_put(device);
641 return 0;
642 }
643
644 return ret;
645 }
646
647 /**
648 * Async device support
649 */
650 static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
651 {
652 struct vfio_device *device;
653
654 /* Do we already know about it? We shouldn't */
655 device = vfio_group_get_device(group, dev);
656 if (WARN_ON_ONCE(device)) {
657 vfio_device_put(device);
658 return 0;
659 }
660
661 /* Nothing to do for idle groups */
662 if (!atomic_read(&group->container_users))
663 return 0;
664
665 /* TODO Prevent device auto probing */
666 WARN(1, "Device %s added to live group %d!\n", dev_name(dev),
667 iommu_group_id(group->iommu_group));
668
669 return 0;
670 }
671
672 static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
673 {
674 /* We don't care what happens when the group isn't in use */
675 if (!atomic_read(&group->container_users))
676 return 0;
677
678 return vfio_dev_viable(dev, group);
679 }
680
681 static int vfio_iommu_group_notifier(struct notifier_block *nb,
682 unsigned long action, void *data)
683 {
684 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
685 struct device *dev = data;
686 struct vfio_unbound_dev *unbound;
687
688 /*
689 * Need to go through a group_lock lookup to get a reference or we
690 * risk racing a group being removed. Ignore spurious notifies.
691 */
692 group = vfio_group_try_get(group);
693 if (!group)
694 return NOTIFY_OK;
695
696 switch (action) {
697 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
698 vfio_group_nb_add_dev(group, dev);
699 break;
700 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
701 /*
702 * Nothing to do here. If the device is in use, then the
703 * vfio sub-driver should block the remove callback until
704 * it is unused. If the device is unused or attached to a
705 * stub driver, then it should be released and we don't
706 * care that it will be going away.
707 */
708 break;
709 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
710 pr_debug("%s: Device %s, group %d binding to driver\n",
711 __func__, dev_name(dev),
712 iommu_group_id(group->iommu_group));
713 break;
714 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
715 pr_debug("%s: Device %s, group %d bound to driver %s\n",
716 __func__, dev_name(dev),
717 iommu_group_id(group->iommu_group), dev->driver->name);
718 BUG_ON(vfio_group_nb_verify(group, dev));
719 break;
720 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
721 pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
722 __func__, dev_name(dev),
723 iommu_group_id(group->iommu_group), dev->driver->name);
724 break;
725 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
726 pr_debug("%s: Device %s, group %d unbound from driver\n",
727 __func__, dev_name(dev),
728 iommu_group_id(group->iommu_group));
729 /*
730 * XXX An unbound device in a live group is ok, but we'd
731 * really like to avoid the above BUG_ON by preventing other
732 * drivers from binding to it. Once that occurs, we have to
733 * stop the system to maintain isolation. At a minimum, we'd
734 * want a toggle to disable driver auto probe for this device.
735 */
736
737 mutex_lock(&group->unbound_lock);
738 list_for_each_entry(unbound,
739 &group->unbound_list, unbound_next) {
740 if (dev == unbound->dev) {
741 list_del(&unbound->unbound_next);
742 kfree(unbound);
743 break;
744 }
745 }
746 mutex_unlock(&group->unbound_lock);
747 break;
748 }
749
750 vfio_group_put(group);
751 return NOTIFY_OK;
752 }
753
754 /**
755 * VFIO driver API
756 */
757 int vfio_add_group_dev(struct device *dev,
758 const struct vfio_device_ops *ops, void *device_data)
759 {
760 struct iommu_group *iommu_group;
761 struct vfio_group *group;
762 struct vfio_device *device;
763
764 iommu_group = iommu_group_get(dev);
765 if (!iommu_group)
766 return -EINVAL;
767
768 group = vfio_group_get_from_iommu(iommu_group);
769 if (!group) {
770 group = vfio_create_group(iommu_group, iommu_present(dev->bus));
771 if (IS_ERR(group)) {
772 iommu_group_put(iommu_group);
773 return PTR_ERR(group);
774 }
775 } else {
776 /*
777 * A found vfio_group already holds a reference to the
778 * iommu_group. A created vfio_group keeps the reference.
779 */
780 iommu_group_put(iommu_group);
781 }
782
783 device = vfio_group_get_device(group, dev);
784 if (device) {
785 WARN(1, "Device %s already exists on group %d\n",
786 dev_name(dev), iommu_group_id(iommu_group));
787 vfio_device_put(device);
788 vfio_group_put(group);
789 return -EBUSY;
790 }
791
792 device = vfio_group_create_device(group, dev, ops, device_data);
793 if (IS_ERR(device)) {
794 vfio_group_put(group);
795 return PTR_ERR(device);
796 }
797
798 /*
799 * Drop all but the vfio_device reference. The vfio_device holds
800 * a reference to the vfio_group, which holds a reference to the
801 * iommu_group.
802 */
803 vfio_group_put(group);
804
805 return 0;
806 }
807 EXPORT_SYMBOL_GPL(vfio_add_group_dev);
808
809 /**
810 * Get a reference to the vfio_device for a device. Even if the
811 * caller thinks they own the device, they could be racing with a
812 * release call path, so we can't trust drvdata for the shortcut.
813 * Go the long way around, from the iommu_group to the vfio_group
814 * to the vfio_device.
815 */
816 struct vfio_device *vfio_device_get_from_dev(struct device *dev)
817 {
818 struct iommu_group *iommu_group;
819 struct vfio_group *group;
820 struct vfio_device *device;
821
822 iommu_group = iommu_group_get(dev);
823 if (!iommu_group)
824 return NULL;
825
826 group = vfio_group_get_from_iommu(iommu_group);
827 iommu_group_put(iommu_group);
828 if (!group)
829 return NULL;
830
831 device = vfio_group_get_device(group, dev);
832 vfio_group_put(group);
833
834 return device;
835 }
836 EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
837
838 static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
839 char *buf)
840 {
841 struct vfio_device *it, *device = NULL;
842
843 mutex_lock(&group->device_lock);
844 list_for_each_entry(it, &group->device_list, group_next) {
845 if (!strcmp(dev_name(it->dev), buf)) {
846 device = it;
847 vfio_device_get(device);
848 break;
849 }
850 }
851 mutex_unlock(&group->device_lock);
852
853 return device;
854 }
855
856 /*
857 * Caller must hold a reference to the vfio_device
858 */
859 void *vfio_device_data(struct vfio_device *device)
860 {
861 return device->device_data;
862 }
863 EXPORT_SYMBOL_GPL(vfio_device_data);
864
865 /* Given a referenced group, check if it contains the device */
866 static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
867 {
868 struct vfio_device *device;
869
870 device = vfio_group_get_device(group, dev);
871 if (!device)
872 return false;
873
874 vfio_device_put(device);
875 return true;
876 }
877
878 /*
879 * Decrement the device reference count and wait for the device to be
880 * removed. Open file descriptors for the device... */
881 void *vfio_del_group_dev(struct device *dev)
882 {
883 struct vfio_device *device = dev_get_drvdata(dev);
884 struct vfio_group *group = device->group;
885 void *device_data = device->device_data;
886 struct vfio_unbound_dev *unbound;
887 unsigned int i = 0;
888 long ret;
889 bool interrupted = false;
890
891 /*
892 * The group exists so long as we have a device reference. Get
893 * a group reference and use it to scan for the device going away.
894 */
895 vfio_group_get(group);
896
897 /*
898 * When the device is removed from the group, the group suddenly
899 * becomes non-viable; the device has a driver (until the unbind
900 * completes), but it's not present in the group. This is bad news
901 * for any external users that need to re-acquire a group reference
902 * in order to match and release their existing reference. To
903 * solve this, we track such devices on the unbound_list to bridge
904 * the gap until they're fully unbound.
905 */
906 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
907 if (unbound) {
908 unbound->dev = dev;
909 mutex_lock(&group->unbound_lock);
910 list_add(&unbound->unbound_next, &group->unbound_list);
911 mutex_unlock(&group->unbound_lock);
912 }
913 WARN_ON(!unbound);
914
915 vfio_device_put(device);
916
917 /*
918 * If the device is still present in the group after the above
919 * 'put', then it is in use and we need to request it from the
920 * bus driver. The driver may in turn need to request the
921 * device from the user. We send the request on an arbitrary
922 * interval with counter to allow the driver to take escalating
923 * measures to release the device if it has the ability to do so.
924 */
925 do {
926 device = vfio_group_get_device(group, dev);
927 if (!device)
928 break;
929
930 if (device->ops->request)
931 device->ops->request(device_data, i++);
932
933 vfio_device_put(device);
934
935 if (interrupted) {
936 ret = wait_event_timeout(vfio.release_q,
937 !vfio_dev_present(group, dev), HZ * 10);
938 } else {
939 ret = wait_event_interruptible_timeout(vfio.release_q,
940 !vfio_dev_present(group, dev), HZ * 10);
941 if (ret == -ERESTARTSYS) {
942 interrupted = true;
943 dev_warn(dev,
944 "Device is currently in use, task"
945 " \"%s\" (%d) "
946 "blocked until device is released",
947 current->comm, task_pid_nr(current));
948 }
949 }
950 } while (ret <= 0);
951
952 vfio_group_put(group);
953
954 return device_data;
955 }
956 EXPORT_SYMBOL_GPL(vfio_del_group_dev);
957
958 /**
959 * VFIO base fd, /dev/vfio/vfio
960 */
961 static long vfio_ioctl_check_extension(struct vfio_container *container,
962 unsigned long arg)
963 {
964 struct vfio_iommu_driver *driver;
965 long ret = 0;
966
967 down_read(&container->group_lock);
968
969 driver = container->iommu_driver;
970
971 switch (arg) {
972 /* No base extensions yet */
973 default:
974 /*
975 * If no driver is set, poll all registered drivers for
976 * extensions and return the first positive result. If
977 * a driver is already set, further queries will be passed
978 * only to that driver.
979 */
980 if (!driver) {
981 mutex_lock(&vfio.iommu_drivers_lock);
982 list_for_each_entry(driver, &vfio.iommu_drivers_list,
983 vfio_next) {
984
985 #ifdef CONFIG_VFIO_NOIOMMU
986 if (!list_empty(&container->group_list) &&
987 (container->noiommu !=
988 (driver->ops == &vfio_noiommu_ops)))
989 continue;
990 #endif
991
992 if (!try_module_get(driver->ops->owner))
993 continue;
994
995 ret = driver->ops->ioctl(NULL,
996 VFIO_CHECK_EXTENSION,
997 arg);
998 module_put(driver->ops->owner);
999 if (ret > 0)
1000 break;
1001 }
1002 mutex_unlock(&vfio.iommu_drivers_lock);
1003 } else
1004 ret = driver->ops->ioctl(container->iommu_data,
1005 VFIO_CHECK_EXTENSION, arg);
1006 }
1007
1008 up_read(&container->group_lock);
1009
1010 return ret;
1011 }
1012
1013 /* hold write lock on container->group_lock */
1014 static int __vfio_container_attach_groups(struct vfio_container *container,
1015 struct vfio_iommu_driver *driver,
1016 void *data)
1017 {
1018 struct vfio_group *group;
1019 int ret = -ENODEV;
1020
1021 list_for_each_entry(group, &container->group_list, container_next) {
1022 ret = driver->ops->attach_group(data, group->iommu_group);
1023 if (ret)
1024 goto unwind;
1025 }
1026
1027 return ret;
1028
1029 unwind:
1030 list_for_each_entry_continue_reverse(group, &container->group_list,
1031 container_next) {
1032 driver->ops->detach_group(data, group->iommu_group);
1033 }
1034
1035 return ret;
1036 }
1037
1038 static long vfio_ioctl_set_iommu(struct vfio_container *container,
1039 unsigned long arg)
1040 {
1041 struct vfio_iommu_driver *driver;
1042 long ret = -ENODEV;
1043
1044 down_write(&container->group_lock);
1045
1046 /*
1047 * The container is designed to be an unprivileged interface while
1048 * the group can be assigned to specific users. Therefore, only by
1049 * adding a group to a container does the user get the privilege of
1050 * enabling the iommu, which may allocate finite resources. There
1051 * is no unset_iommu, but by removing all the groups from a container,
1052 * the container is deprivileged and returns to an unset state.
1053 */
1054 if (list_empty(&container->group_list) || container->iommu_driver) {
1055 up_write(&container->group_lock);
1056 return -EINVAL;
1057 }
1058
1059 mutex_lock(&vfio.iommu_drivers_lock);
1060 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
1061 void *data;
1062
1063 #ifdef CONFIG_VFIO_NOIOMMU
1064 /*
1065 * Only noiommu containers can use vfio-noiommu and noiommu
1066 * containers can only use vfio-noiommu.
1067 */
1068 if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1069 continue;
1070 #endif
1071
1072 if (!try_module_get(driver->ops->owner))
1073 continue;
1074
1075 /*
1076 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
1077 * so test which iommu driver reported support for this
1078 * extension and call open on them. We also pass them the
1079 * magic, allowing a single driver to support multiple
1080 * interfaces if they'd like.
1081 */
1082 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1083 module_put(driver->ops->owner);
1084 continue;
1085 }
1086
1087 /* module reference holds the driver we're working on */
1088 mutex_unlock(&vfio.iommu_drivers_lock);
1089
1090 data = driver->ops->open(arg);
1091 if (IS_ERR(data)) {
1092 ret = PTR_ERR(data);
1093 module_put(driver->ops->owner);
1094 goto skip_drivers_unlock;
1095 }
1096
1097 ret = __vfio_container_attach_groups(container, driver, data);
1098 if (!ret) {
1099 container->iommu_driver = driver;
1100 container->iommu_data = data;
1101 } else {
1102 driver->ops->release(data);
1103 module_put(driver->ops->owner);
1104 }
1105
1106 goto skip_drivers_unlock;
1107 }
1108
1109 mutex_unlock(&vfio.iommu_drivers_lock);
1110 skip_drivers_unlock:
1111 up_write(&container->group_lock);
1112
1113 return ret;
1114 }
1115
1116 static long vfio_fops_unl_ioctl(struct file *filep,
1117 unsigned int cmd, unsigned long arg)
1118 {
1119 struct vfio_container *container = filep->private_data;
1120 struct vfio_iommu_driver *driver;
1121 void *data;
1122 long ret = -EINVAL;
1123
1124 if (!container)
1125 return ret;
1126
1127 switch (cmd) {
1128 case VFIO_GET_API_VERSION:
1129 ret = VFIO_API_VERSION;
1130 break;
1131 case VFIO_CHECK_EXTENSION:
1132 ret = vfio_ioctl_check_extension(container, arg);
1133 break;
1134 case VFIO_SET_IOMMU:
1135 ret = vfio_ioctl_set_iommu(container, arg);
1136 break;
1137 default:
1138 down_read(&container->group_lock);
1139
1140 driver = container->iommu_driver;
1141 data = container->iommu_data;
1142
1143 if (driver) /* passthrough all unrecognized ioctls */
1144 ret = driver->ops->ioctl(data, cmd, arg);
1145
1146 up_read(&container->group_lock);
1147 }
1148
1149 return ret;
1150 }
1151
1152 #ifdef CONFIG_COMPAT
1153 static long vfio_fops_compat_ioctl(struct file *filep,
1154 unsigned int cmd, unsigned long arg)
1155 {
1156 arg = (unsigned long)compat_ptr(arg);
1157 return vfio_fops_unl_ioctl(filep, cmd, arg);
1158 }
1159 #endif /* CONFIG_COMPAT */
1160
1161 static int vfio_fops_open(struct inode *inode, struct file *filep)
1162 {
1163 struct vfio_container *container;
1164
1165 container = kzalloc(sizeof(*container), GFP_KERNEL);
1166 if (!container)
1167 return -ENOMEM;
1168
1169 INIT_LIST_HEAD(&container->group_list);
1170 init_rwsem(&container->group_lock);
1171 kref_init(&container->kref);
1172
1173 filep->private_data = container;
1174
1175 return 0;
1176 }
1177
1178 static int vfio_fops_release(struct inode *inode, struct file *filep)
1179 {
1180 struct vfio_container *container = filep->private_data;
1181
1182 filep->private_data = NULL;
1183
1184 vfio_container_put(container);
1185
1186 return 0;
1187 }
1188
1189 /*
1190 * Once an iommu driver is set, we optionally pass read/write/mmap
1191 * on to the driver, allowing management interfaces beyond ioctl.
1192 */
1193 static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1194 size_t count, loff_t *ppos)
1195 {
1196 struct vfio_container *container = filep->private_data;
1197 struct vfio_iommu_driver *driver;
1198 ssize_t ret = -EINVAL;
1199
1200 down_read(&container->group_lock);
1201
1202 driver = container->iommu_driver;
1203 if (likely(driver && driver->ops->read))
1204 ret = driver->ops->read(container->iommu_data,
1205 buf, count, ppos);
1206
1207 up_read(&container->group_lock);
1208
1209 return ret;
1210 }
1211
1212 static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1213 size_t count, loff_t *ppos)
1214 {
1215 struct vfio_container *container = filep->private_data;
1216 struct vfio_iommu_driver *driver;
1217 ssize_t ret = -EINVAL;
1218
1219 down_read(&container->group_lock);
1220
1221 driver = container->iommu_driver;
1222 if (likely(driver && driver->ops->write))
1223 ret = driver->ops->write(container->iommu_data,
1224 buf, count, ppos);
1225
1226 up_read(&container->group_lock);
1227
1228 return ret;
1229 }
1230
1231 static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1232 {
1233 struct vfio_container *container = filep->private_data;
1234 struct vfio_iommu_driver *driver;
1235 int ret = -EINVAL;
1236
1237 down_read(&container->group_lock);
1238
1239 driver = container->iommu_driver;
1240 if (likely(driver && driver->ops->mmap))
1241 ret = driver->ops->mmap(container->iommu_data, vma);
1242
1243 up_read(&container->group_lock);
1244
1245 return ret;
1246 }
1247
1248 static const struct file_operations vfio_fops = {
1249 .owner = THIS_MODULE,
1250 .open = vfio_fops_open,
1251 .release = vfio_fops_release,
1252 .read = vfio_fops_read,
1253 .write = vfio_fops_write,
1254 .unlocked_ioctl = vfio_fops_unl_ioctl,
1255 #ifdef CONFIG_COMPAT
1256 .compat_ioctl = vfio_fops_compat_ioctl,
1257 #endif
1258 .mmap = vfio_fops_mmap,
1259 };
1260
1261 /**
1262 * VFIO Group fd, /dev/vfio/$GROUP
1263 */
1264 static void __vfio_group_unset_container(struct vfio_group *group)
1265 {
1266 struct vfio_container *container = group->container;
1267 struct vfio_iommu_driver *driver;
1268
1269 down_write(&container->group_lock);
1270
1271 driver = container->iommu_driver;
1272 if (driver)
1273 driver->ops->detach_group(container->iommu_data,
1274 group->iommu_group);
1275
1276 group->container = NULL;
1277 list_del(&group->container_next);
1278
1279 /* Detaching the last group deprivileges a container, remove iommu */
1280 if (driver && list_empty(&container->group_list)) {
1281 driver->ops->release(container->iommu_data);
1282 module_put(driver->ops->owner);
1283 container->iommu_driver = NULL;
1284 container->iommu_data = NULL;
1285 }
1286
1287 up_write(&container->group_lock);
1288
1289 vfio_container_put(container);
1290 }
1291
1292 /*
1293 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1294 * if there was no container to unset. Since the ioctl is called on
1295 * the group, we know that still exists, therefore the only valid
1296 * transition here is 1->0.
1297 */
1298 static int vfio_group_unset_container(struct vfio_group *group)
1299 {
1300 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1301
1302 if (!users)
1303 return -EINVAL;
1304 if (users != 1)
1305 return -EBUSY;
1306
1307 __vfio_group_unset_container(group);
1308
1309 return 0;
1310 }
1311
1312 /*
1313 * When removing container users, anything that removes the last user
1314 * implicitly removes the group from the container. That is, if the
1315 * group file descriptor is closed, as well as any device file descriptors,
1316 * the group is free.
1317 */
1318 static void vfio_group_try_dissolve_container(struct vfio_group *group)
1319 {
1320 if (0 == atomic_dec_if_positive(&group->container_users))
1321 __vfio_group_unset_container(group);
1322 }
1323
1324 static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1325 {
1326 struct fd f;
1327 struct vfio_container *container;
1328 struct vfio_iommu_driver *driver;
1329 int ret = 0;
1330
1331 if (atomic_read(&group->container_users))
1332 return -EINVAL;
1333
1334 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1335 return -EPERM;
1336
1337 f = fdget(container_fd);
1338 if (!f.file)
1339 return -EBADF;
1340
1341 /* Sanity check, is this really our fd? */
1342 if (f.file->f_op != &vfio_fops) {
1343 fdput(f);
1344 return -EINVAL;
1345 }
1346
1347 container = f.file->private_data;
1348 WARN_ON(!container); /* fget ensures we don't race vfio_release */
1349
1350 down_write(&container->group_lock);
1351
1352 /* Real groups and fake groups cannot mix */
1353 if (!list_empty(&container->group_list) &&
1354 container->noiommu != group->noiommu) {
1355 ret = -EPERM;
1356 goto unlock_out;
1357 }
1358
1359 driver = container->iommu_driver;
1360 if (driver) {
1361 ret = driver->ops->attach_group(container->iommu_data,
1362 group->iommu_group);
1363 if (ret)
1364 goto unlock_out;
1365 }
1366
1367 group->container = container;
1368 container->noiommu = group->noiommu;
1369 list_add(&group->container_next, &container->group_list);
1370
1371 /* Get a reference on the container and mark a user within the group */
1372 vfio_container_get(container);
1373 atomic_inc(&group->container_users);
1374
1375 unlock_out:
1376 up_write(&container->group_lock);
1377 fdput(f);
1378 return ret;
1379 }
1380
1381 static bool vfio_group_viable(struct vfio_group *group)
1382 {
1383 return (iommu_group_for_each_dev(group->iommu_group,
1384 group, vfio_dev_viable) == 0);
1385 }
1386
1387 static const struct file_operations vfio_device_fops;
1388
1389 static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1390 {
1391 struct vfio_device *device;
1392 struct file *filep;
1393 int ret;
1394
1395 if (0 == atomic_read(&group->container_users) ||
1396 !group->container->iommu_driver || !vfio_group_viable(group))
1397 return -EINVAL;
1398
1399 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1400 return -EPERM;
1401
1402 device = vfio_device_get_from_name(group, buf);
1403 if (!device)
1404 return -ENODEV;
1405
1406 ret = device->ops->open(device->device_data);
1407 if (ret) {
1408 vfio_device_put(device);
1409 return ret;
1410 }
1411
1412 /*
1413 * We can't use anon_inode_getfd() because we need to modify
1414 * the f_mode flags directly to allow more than just ioctls
1415 */
1416 ret = get_unused_fd_flags(O_CLOEXEC);
1417 if (ret < 0) {
1418 device->ops->release(device->device_data);
1419 vfio_device_put(device);
1420 return ret;
1421 }
1422
1423 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1424 device, O_RDWR);
1425 if (IS_ERR(filep)) {
1426 put_unused_fd(ret);
1427 ret = PTR_ERR(filep);
1428 device->ops->release(device->device_data);
1429 vfio_device_put(device);
1430 return ret;
1431 }
1432
1433 /*
1434 * TODO: add an anon_inode interface to do this.
1435 * Appears to be missing by lack of need rather than
1436 * explicitly prevented. Now there's need.
1437 */
1438 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1439
1440 atomic_inc(&group->container_users);
1441
1442 fd_install(ret, filep);
1443
1444 if (group->noiommu)
1445 dev_warn(device->dev, "vfio-noiommu device opened by user "
1446 "(%s:%d)\n", current->comm, task_pid_nr(current));
1447
1448 return ret;
1449 }
1450
1451 static long vfio_group_fops_unl_ioctl(struct file *filep,
1452 unsigned int cmd, unsigned long arg)
1453 {
1454 struct vfio_group *group = filep->private_data;
1455 long ret = -ENOTTY;
1456
1457 switch (cmd) {
1458 case VFIO_GROUP_GET_STATUS:
1459 {
1460 struct vfio_group_status status;
1461 unsigned long minsz;
1462
1463 minsz = offsetofend(struct vfio_group_status, flags);
1464
1465 if (copy_from_user(&status, (void __user *)arg, minsz))
1466 return -EFAULT;
1467
1468 if (status.argsz < minsz)
1469 return -EINVAL;
1470
1471 status.flags = 0;
1472
1473 if (vfio_group_viable(group))
1474 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1475
1476 if (group->container)
1477 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1478
1479 if (copy_to_user((void __user *)arg, &status, minsz))
1480 return -EFAULT;
1481
1482 ret = 0;
1483 break;
1484 }
1485 case VFIO_GROUP_SET_CONTAINER:
1486 {
1487 int fd;
1488
1489 if (get_user(fd, (int __user *)arg))
1490 return -EFAULT;
1491
1492 if (fd < 0)
1493 return -EINVAL;
1494
1495 ret = vfio_group_set_container(group, fd);
1496 break;
1497 }
1498 case VFIO_GROUP_UNSET_CONTAINER:
1499 ret = vfio_group_unset_container(group);
1500 break;
1501 case VFIO_GROUP_GET_DEVICE_FD:
1502 {
1503 char *buf;
1504
1505 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1506 if (IS_ERR(buf))
1507 return PTR_ERR(buf);
1508
1509 ret = vfio_group_get_device_fd(group, buf);
1510 kfree(buf);
1511 break;
1512 }
1513 }
1514
1515 return ret;
1516 }
1517
1518 #ifdef CONFIG_COMPAT
1519 static long vfio_group_fops_compat_ioctl(struct file *filep,
1520 unsigned int cmd, unsigned long arg)
1521 {
1522 arg = (unsigned long)compat_ptr(arg);
1523 return vfio_group_fops_unl_ioctl(filep, cmd, arg);
1524 }
1525 #endif /* CONFIG_COMPAT */
1526
1527 static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1528 {
1529 struct vfio_group *group;
1530 int opened;
1531
1532 group = vfio_group_get_from_minor(iminor(inode));
1533 if (!group)
1534 return -ENODEV;
1535
1536 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1537 vfio_group_put(group);
1538 return -EPERM;
1539 }
1540
1541 /* Do we need multiple instances of the group open? Seems not. */
1542 opened = atomic_cmpxchg(&group->opened, 0, 1);
1543 if (opened) {
1544 vfio_group_put(group);
1545 return -EBUSY;
1546 }
1547
1548 /* Is something still in use from a previous open? */
1549 if (group->container) {
1550 atomic_dec(&group->opened);
1551 vfio_group_put(group);
1552 return -EBUSY;
1553 }
1554
1555 filep->private_data = group;
1556
1557 return 0;
1558 }
1559
1560 static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1561 {
1562 struct vfio_group *group = filep->private_data;
1563
1564 filep->private_data = NULL;
1565
1566 vfio_group_try_dissolve_container(group);
1567
1568 atomic_dec(&group->opened);
1569
1570 vfio_group_put(group);
1571
1572 return 0;
1573 }
1574
1575 static const struct file_operations vfio_group_fops = {
1576 .owner = THIS_MODULE,
1577 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1578 #ifdef CONFIG_COMPAT
1579 .compat_ioctl = vfio_group_fops_compat_ioctl,
1580 #endif
1581 .open = vfio_group_fops_open,
1582 .release = vfio_group_fops_release,
1583 };
1584
1585 /**
1586 * VFIO Device fd
1587 */
1588 static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1589 {
1590 struct vfio_device *device = filep->private_data;
1591
1592 device->ops->release(device->device_data);
1593
1594 vfio_group_try_dissolve_container(device->group);
1595
1596 vfio_device_put(device);
1597
1598 return 0;
1599 }
1600
1601 static long vfio_device_fops_unl_ioctl(struct file *filep,
1602 unsigned int cmd, unsigned long arg)
1603 {
1604 struct vfio_device *device = filep->private_data;
1605
1606 if (unlikely(!device->ops->ioctl))
1607 return -EINVAL;
1608
1609 return device->ops->ioctl(device->device_data, cmd, arg);
1610 }
1611
1612 static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1613 size_t count, loff_t *ppos)
1614 {
1615 struct vfio_device *device = filep->private_data;
1616
1617 if (unlikely(!device->ops->read))
1618 return -EINVAL;
1619
1620 return device->ops->read(device->device_data, buf, count, ppos);
1621 }
1622
1623 static ssize_t vfio_device_fops_write(struct file *filep,
1624 const char __user *buf,
1625 size_t count, loff_t *ppos)
1626 {
1627 struct vfio_device *device = filep->private_data;
1628
1629 if (unlikely(!device->ops->write))
1630 return -EINVAL;
1631
1632 return device->ops->write(device->device_data, buf, count, ppos);
1633 }
1634
1635 static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1636 {
1637 struct vfio_device *device = filep->private_data;
1638
1639 if (unlikely(!device->ops->mmap))
1640 return -EINVAL;
1641
1642 return device->ops->mmap(device->device_data, vma);
1643 }
1644
1645 #ifdef CONFIG_COMPAT
1646 static long vfio_device_fops_compat_ioctl(struct file *filep,
1647 unsigned int cmd, unsigned long arg)
1648 {
1649 arg = (unsigned long)compat_ptr(arg);
1650 return vfio_device_fops_unl_ioctl(filep, cmd, arg);
1651 }
1652 #endif /* CONFIG_COMPAT */
1653
1654 static const struct file_operations vfio_device_fops = {
1655 .owner = THIS_MODULE,
1656 .release = vfio_device_fops_release,
1657 .read = vfio_device_fops_read,
1658 .write = vfio_device_fops_write,
1659 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1660 #ifdef CONFIG_COMPAT
1661 .compat_ioctl = vfio_device_fops_compat_ioctl,
1662 #endif
1663 .mmap = vfio_device_fops_mmap,
1664 };
1665
1666 /**
1667 * External user API, exported by symbols to be linked dynamically.
1668 *
1669 * The protocol includes:
1670 * 1. do normal VFIO init operation:
1671 * - opening a new container;
1672 * - attaching group(s) to it;
1673 * - setting an IOMMU driver for a container.
1674 * When IOMMU is set for a container, all groups in it are
1675 * considered ready to use by an external user.
1676 *
1677 * 2. User space passes a group fd to an external user.
1678 * The external user calls vfio_group_get_external_user()
1679 * to verify that:
1680 * - the group is initialized;
1681 * - IOMMU is set for it.
1682 * If both checks passed, vfio_group_get_external_user()
1683 * increments the container user counter to prevent
1684 * the VFIO group from disposal before KVM exits.
1685 *
1686 * 3. The external user calls vfio_external_user_iommu_id()
1687 * to know an IOMMU ID.
1688 *
1689 * 4. When the external KVM finishes, it calls
1690 * vfio_group_put_external_user() to release the VFIO group.
1691 * This call decrements the container user counter.
1692 */
1693 struct vfio_group *vfio_group_get_external_user(struct file *filep)
1694 {
1695 struct vfio_group *group = filep->private_data;
1696
1697 if (filep->f_op != &vfio_group_fops)
1698 return ERR_PTR(-EINVAL);
1699
1700 if (!atomic_inc_not_zero(&group->container_users))
1701 return ERR_PTR(-EINVAL);
1702
1703 if (group->noiommu) {
1704 atomic_dec(&group->container_users);
1705 return ERR_PTR(-EPERM);
1706 }
1707
1708 if (!group->container->iommu_driver ||
1709 !vfio_group_viable(group)) {
1710 atomic_dec(&group->container_users);
1711 return ERR_PTR(-EINVAL);
1712 }
1713
1714 vfio_group_get(group);
1715
1716 return group;
1717 }
1718 EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1719
1720 void vfio_group_put_external_user(struct vfio_group *group)
1721 {
1722 vfio_group_put(group);
1723 vfio_group_try_dissolve_container(group);
1724 }
1725 EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1726
1727 int vfio_external_user_iommu_id(struct vfio_group *group)
1728 {
1729 return iommu_group_id(group->iommu_group);
1730 }
1731 EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1732
1733 long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1734 {
1735 return vfio_ioctl_check_extension(group->container, arg);
1736 }
1737 EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1738
1739 /**
1740 * Module/class support
1741 */
1742 static char *vfio_devnode(struct device *dev, umode_t *mode)
1743 {
1744 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
1745 }
1746
1747 static struct miscdevice vfio_dev = {
1748 .minor = VFIO_MINOR,
1749 .name = "vfio",
1750 .fops = &vfio_fops,
1751 .nodename = "vfio/vfio",
1752 .mode = S_IRUGO | S_IWUGO,
1753 };
1754
1755 static int __init vfio_init(void)
1756 {
1757 int ret;
1758
1759 idr_init(&vfio.group_idr);
1760 mutex_init(&vfio.group_lock);
1761 mutex_init(&vfio.iommu_drivers_lock);
1762 INIT_LIST_HEAD(&vfio.group_list);
1763 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
1764 init_waitqueue_head(&vfio.release_q);
1765
1766 ret = misc_register(&vfio_dev);
1767 if (ret) {
1768 pr_err("vfio: misc device register failed\n");
1769 return ret;
1770 }
1771
1772 /* /dev/vfio/$GROUP */
1773 vfio.class = class_create(THIS_MODULE, "vfio");
1774 if (IS_ERR(vfio.class)) {
1775 ret = PTR_ERR(vfio.class);
1776 goto err_class;
1777 }
1778
1779 vfio.class->devnode = vfio_devnode;
1780
1781 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio");
1782 if (ret)
1783 goto err_alloc_chrdev;
1784
1785 cdev_init(&vfio.group_cdev, &vfio_group_fops);
1786 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK);
1787 if (ret)
1788 goto err_cdev_add;
1789
1790 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
1791
1792 /*
1793 * Attempt to load known iommu-drivers. This gives us a working
1794 * environment without the user needing to explicitly load iommu
1795 * drivers.
1796 */
1797 request_module_nowait("vfio_iommu_type1");
1798 request_module_nowait("vfio_iommu_spapr_tce");
1799
1800 #ifdef CONFIG_VFIO_NOIOMMU
1801 vfio_register_iommu_driver(&vfio_noiommu_ops);
1802 #endif
1803 return 0;
1804
1805 err_cdev_add:
1806 unregister_chrdev_region(vfio.group_devt, MINORMASK);
1807 err_alloc_chrdev:
1808 class_destroy(vfio.class);
1809 vfio.class = NULL;
1810 err_class:
1811 misc_deregister(&vfio_dev);
1812 return ret;
1813 }
1814
1815 static void __exit vfio_cleanup(void)
1816 {
1817 WARN_ON(!list_empty(&vfio.group_list));
1818
1819 #ifdef CONFIG_VFIO_NOIOMMU
1820 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
1821 #endif
1822 idr_destroy(&vfio.group_idr);
1823 cdev_del(&vfio.group_cdev);
1824 unregister_chrdev_region(vfio.group_devt, MINORMASK);
1825 class_destroy(vfio.class);
1826 vfio.class = NULL;
1827 misc_deregister(&vfio_dev);
1828 }
1829
1830 module_init(vfio_init);
1831 module_exit(vfio_cleanup);
1832
1833 MODULE_VERSION(DRIVER_VERSION);
1834 MODULE_LICENSE("GPL v2");
1835 MODULE_AUTHOR(DRIVER_AUTHOR);
1836 MODULE_DESCRIPTION(DRIVER_DESC);
1837 MODULE_ALIAS_MISCDEV(VFIO_MINOR);
1838 MODULE_ALIAS("devname:vfio/vfio");
This page took 0.069741 seconds and 5 git commands to generate.