Commit | Line | Data |
---|---|---|
bfd99ff5 AK |
1 | /* |
2 | * Kernel-based Virtual Machine - device assignment support | |
3 | * | |
221d059d | 4 | * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. |
bfd99ff5 AK |
5 | * |
6 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
7 | * the COPYING file in the top-level directory. | |
8 | * | |
9 | */ | |
10 | ||
11 | #include <linux/kvm_host.h> | |
12 | #include <linux/kvm.h> | |
13 | #include <linux/uaccess.h> | |
14 | #include <linux/vmalloc.h> | |
15 | #include <linux/errno.h> | |
16 | #include <linux/spinlock.h> | |
17 | #include <linux/pci.h> | |
18 | #include <linux/interrupt.h> | |
5a0e3ad6 | 19 | #include <linux/slab.h> |
3d27e23b AW |
20 | #include <linux/namei.h> |
21 | #include <linux/fs.h> | |
bfd99ff5 AK |
22 | #include "irq.h" |
23 | ||
24 | static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, | |
25 | int assigned_dev_id) | |
26 | { | |
27 | struct list_head *ptr; | |
28 | struct kvm_assigned_dev_kernel *match; | |
29 | ||
30 | list_for_each(ptr, head) { | |
31 | match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); | |
32 | if (match->assigned_dev_id == assigned_dev_id) | |
33 | return match; | |
34 | } | |
35 | return NULL; | |
36 | } | |
37 | ||
38 | static int find_index_from_host_irq(struct kvm_assigned_dev_kernel | |
39 | *assigned_dev, int irq) | |
40 | { | |
41 | int i, index; | |
42 | struct msix_entry *host_msix_entries; | |
43 | ||
44 | host_msix_entries = assigned_dev->host_msix_entries; | |
45 | ||
46 | index = -1; | |
47 | for (i = 0; i < assigned_dev->entries_nr; i++) | |
48 | if (irq == host_msix_entries[i].vector) { | |
49 | index = i; | |
50 | break; | |
51 | } | |
b93a3553 | 52 | if (index < 0) |
bfd99ff5 | 53 | printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); |
bfd99ff5 AK |
54 | |
55 | return index; | |
56 | } | |
57 | ||
0645211c | 58 | static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) |
bfd99ff5 | 59 | { |
0645211c | 60 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; |
bfd99ff5 | 61 | |
0645211c JK |
62 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { |
63 | spin_lock(&assigned_dev->intx_lock); | |
64 | disable_irq_nosync(irq); | |
65 | assigned_dev->host_irq_disabled = true; | |
66 | spin_unlock(&assigned_dev->intx_lock); | |
67 | } | |
bfd99ff5 | 68 | |
cc079396 JK |
69 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
70 | assigned_dev->guest_irq, 1); | |
71 | ||
72 | return IRQ_HANDLED; | |
73 | } | |
74 | ||
75 | #ifdef __KVM_HAVE_MSIX | |
76 | static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) | |
77 | { | |
78 | struct kvm_assigned_dev_kernel *assigned_dev = dev_id; | |
79 | int index = find_index_from_host_irq(assigned_dev, irq); | |
80 | u32 vector; | |
81 | ||
82 | if (index >= 0) { | |
83 | vector = assigned_dev->guest_msix_entries[index].vector; | |
bfd99ff5 | 84 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
cc079396 JK |
85 | vector, 1); |
86 | } | |
bfd99ff5 | 87 | |
bfd99ff5 AK |
88 | return IRQ_HANDLED; |
89 | } | |
cc079396 | 90 | #endif |
bfd99ff5 AK |
91 | |
92 | /* Ack the irq line for an assigned device */ | |
93 | static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) | |
94 | { | |
c61fa9d6 JK |
95 | struct kvm_assigned_dev_kernel *dev = |
96 | container_of(kian, struct kvm_assigned_dev_kernel, | |
97 | ack_notifier); | |
bfd99ff5 AK |
98 | |
99 | kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); | |
100 | ||
101 | /* The guest irq may be shared so this ack may be | |
102 | * from another device. | |
103 | */ | |
0645211c | 104 | spin_lock(&dev->intx_lock); |
bfd99ff5 AK |
105 | if (dev->host_irq_disabled) { |
106 | enable_irq(dev->host_irq); | |
107 | dev->host_irq_disabled = false; | |
108 | } | |
0645211c | 109 | spin_unlock(&dev->intx_lock); |
bfd99ff5 AK |
110 | } |
111 | ||
112 | static void deassign_guest_irq(struct kvm *kvm, | |
113 | struct kvm_assigned_dev_kernel *assigned_dev) | |
114 | { | |
c61fa9d6 JK |
115 | if (assigned_dev->ack_notifier.gsi != -1) |
116 | kvm_unregister_irq_ack_notifier(kvm, | |
117 | &assigned_dev->ack_notifier); | |
bfd99ff5 | 118 | |
0c106b5a JK |
119 | kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, |
120 | assigned_dev->guest_irq, 0); | |
121 | ||
bfd99ff5 AK |
122 | if (assigned_dev->irq_source_id != -1) |
123 | kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); | |
124 | assigned_dev->irq_source_id = -1; | |
125 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); | |
126 | } | |
127 | ||
128 | /* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ | |
129 | static void deassign_host_irq(struct kvm *kvm, | |
130 | struct kvm_assigned_dev_kernel *assigned_dev) | |
131 | { | |
132 | /* | |
0645211c | 133 | * We disable irq here to prevent further events. |
bfd99ff5 AK |
134 | * |
135 | * Notice this maybe result in nested disable if the interrupt type is | |
136 | * INTx, but it's OK for we are going to free it. | |
137 | * | |
138 | * If this function is a part of VM destroy, please ensure that till | |
139 | * now, the kvm state is still legal for probably we also have to wait | |
0645211c | 140 | * on a currently running IRQ handler. |
bfd99ff5 AK |
141 | */ |
142 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { | |
143 | int i; | |
144 | for (i = 0; i < assigned_dev->entries_nr; i++) | |
0645211c | 145 | disable_irq(assigned_dev->host_msix_entries[i].vector); |
bfd99ff5 AK |
146 | |
147 | for (i = 0; i < assigned_dev->entries_nr; i++) | |
148 | free_irq(assigned_dev->host_msix_entries[i].vector, | |
9f9f6b78 | 149 | assigned_dev); |
bfd99ff5 AK |
150 | |
151 | assigned_dev->entries_nr = 0; | |
152 | kfree(assigned_dev->host_msix_entries); | |
153 | kfree(assigned_dev->guest_msix_entries); | |
154 | pci_disable_msix(assigned_dev->dev); | |
155 | } else { | |
156 | /* Deal with MSI and INTx */ | |
0645211c | 157 | disable_irq(assigned_dev->host_irq); |
bfd99ff5 | 158 | |
9f9f6b78 | 159 | free_irq(assigned_dev->host_irq, assigned_dev); |
bfd99ff5 AK |
160 | |
161 | if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) | |
162 | pci_disable_msi(assigned_dev->dev); | |
163 | } | |
164 | ||
165 | assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); | |
166 | } | |
167 | ||
168 | static int kvm_deassign_irq(struct kvm *kvm, | |
169 | struct kvm_assigned_dev_kernel *assigned_dev, | |
170 | unsigned long irq_requested_type) | |
171 | { | |
172 | unsigned long guest_irq_type, host_irq_type; | |
173 | ||
174 | if (!irqchip_in_kernel(kvm)) | |
175 | return -EINVAL; | |
176 | /* no irq assignment to deassign */ | |
177 | if (!assigned_dev->irq_requested_type) | |
178 | return -ENXIO; | |
179 | ||
180 | host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; | |
181 | guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; | |
182 | ||
183 | if (host_irq_type) | |
184 | deassign_host_irq(kvm, assigned_dev); | |
185 | if (guest_irq_type) | |
186 | deassign_guest_irq(kvm, assigned_dev); | |
187 | ||
188 | return 0; | |
189 | } | |
190 | ||
191 | static void kvm_free_assigned_irq(struct kvm *kvm, | |
192 | struct kvm_assigned_dev_kernel *assigned_dev) | |
193 | { | |
194 | kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); | |
195 | } | |
196 | ||
197 | static void kvm_free_assigned_device(struct kvm *kvm, | |
198 | struct kvm_assigned_dev_kernel | |
199 | *assigned_dev) | |
200 | { | |
201 | kvm_free_assigned_irq(kvm, assigned_dev); | |
202 | ||
f8fcfd77 AW |
203 | pci_reset_function(assigned_dev->dev); |
204 | if (pci_load_and_free_saved_state(assigned_dev->dev, | |
205 | &assigned_dev->pci_saved_state)) | |
206 | printk(KERN_INFO "%s: Couldn't reload %s saved state\n", | |
207 | __func__, dev_name(&assigned_dev->dev->dev)); | |
208 | else | |
209 | pci_restore_state(assigned_dev->dev); | |
bfd99ff5 | 210 | |
6777829c GR |
211 | assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; |
212 | ||
bfd99ff5 AK |
213 | pci_release_regions(assigned_dev->dev); |
214 | pci_disable_device(assigned_dev->dev); | |
215 | pci_dev_put(assigned_dev->dev); | |
216 | ||
217 | list_del(&assigned_dev->list); | |
218 | kfree(assigned_dev); | |
219 | } | |
220 | ||
221 | void kvm_free_all_assigned_devices(struct kvm *kvm) | |
222 | { | |
223 | struct list_head *ptr, *ptr2; | |
224 | struct kvm_assigned_dev_kernel *assigned_dev; | |
225 | ||
226 | list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { | |
227 | assigned_dev = list_entry(ptr, | |
228 | struct kvm_assigned_dev_kernel, | |
229 | list); | |
230 | ||
231 | kvm_free_assigned_device(kvm, assigned_dev); | |
232 | } | |
233 | } | |
234 | ||
235 | static int assigned_device_enable_host_intx(struct kvm *kvm, | |
236 | struct kvm_assigned_dev_kernel *dev) | |
237 | { | |
238 | dev->host_irq = dev->dev->irq; | |
239 | /* Even though this is PCI, we don't want to use shared | |
240 | * interrupts. Sharing host devices with guest-assigned devices | |
241 | * on the same interrupt line is not a happy situation: there | |
242 | * are going to be long delays in accepting, acking, etc. | |
243 | */ | |
0645211c | 244 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, |
9f9f6b78 | 245 | IRQF_ONESHOT, dev->irq_name, dev)) |
bfd99ff5 AK |
246 | return -EIO; |
247 | return 0; | |
248 | } | |
249 | ||
250 | #ifdef __KVM_HAVE_MSI | |
251 | static int assigned_device_enable_host_msi(struct kvm *kvm, | |
252 | struct kvm_assigned_dev_kernel *dev) | |
253 | { | |
254 | int r; | |
255 | ||
256 | if (!dev->dev->msi_enabled) { | |
257 | r = pci_enable_msi(dev->dev); | |
258 | if (r) | |
259 | return r; | |
260 | } | |
261 | ||
262 | dev->host_irq = dev->dev->irq; | |
0645211c | 263 | if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, |
9f9f6b78 | 264 | 0, dev->irq_name, dev)) { |
bfd99ff5 AK |
265 | pci_disable_msi(dev->dev); |
266 | return -EIO; | |
267 | } | |
268 | ||
269 | return 0; | |
270 | } | |
271 | #endif | |
272 | ||
273 | #ifdef __KVM_HAVE_MSIX | |
274 | static int assigned_device_enable_host_msix(struct kvm *kvm, | |
275 | struct kvm_assigned_dev_kernel *dev) | |
276 | { | |
277 | int i, r = -EINVAL; | |
278 | ||
279 | /* host_msix_entries and guest_msix_entries should have been | |
280 | * initialized */ | |
281 | if (dev->entries_nr == 0) | |
282 | return r; | |
283 | ||
284 | r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); | |
285 | if (r) | |
286 | return r; | |
287 | ||
288 | for (i = 0; i < dev->entries_nr; i++) { | |
0645211c | 289 | r = request_threaded_irq(dev->host_msix_entries[i].vector, |
cc079396 | 290 | NULL, kvm_assigned_dev_thread_msix, |
9f9f6b78 | 291 | 0, dev->irq_name, dev); |
bfd99ff5 | 292 | if (r) |
d57e2c07 | 293 | goto err; |
bfd99ff5 AK |
294 | } |
295 | ||
296 | return 0; | |
d57e2c07 | 297 | err: |
298 | for (i -= 1; i >= 0; i--) | |
9f9f6b78 | 299 | free_irq(dev->host_msix_entries[i].vector, dev); |
d57e2c07 | 300 | pci_disable_msix(dev->dev); |
301 | return r; | |
bfd99ff5 AK |
302 | } |
303 | ||
304 | #endif | |
305 | ||
306 | static int assigned_device_enable_guest_intx(struct kvm *kvm, | |
307 | struct kvm_assigned_dev_kernel *dev, | |
308 | struct kvm_assigned_irq *irq) | |
309 | { | |
310 | dev->guest_irq = irq->guest_irq; | |
311 | dev->ack_notifier.gsi = irq->guest_irq; | |
312 | return 0; | |
313 | } | |
314 | ||
315 | #ifdef __KVM_HAVE_MSI | |
316 | static int assigned_device_enable_guest_msi(struct kvm *kvm, | |
317 | struct kvm_assigned_dev_kernel *dev, | |
318 | struct kvm_assigned_irq *irq) | |
319 | { | |
320 | dev->guest_irq = irq->guest_irq; | |
321 | dev->ack_notifier.gsi = -1; | |
322 | dev->host_irq_disabled = false; | |
323 | return 0; | |
324 | } | |
325 | #endif | |
326 | ||
327 | #ifdef __KVM_HAVE_MSIX | |
328 | static int assigned_device_enable_guest_msix(struct kvm *kvm, | |
329 | struct kvm_assigned_dev_kernel *dev, | |
330 | struct kvm_assigned_irq *irq) | |
331 | { | |
332 | dev->guest_irq = irq->guest_irq; | |
333 | dev->ack_notifier.gsi = -1; | |
334 | dev->host_irq_disabled = false; | |
335 | return 0; | |
336 | } | |
337 | #endif | |
338 | ||
339 | static int assign_host_irq(struct kvm *kvm, | |
340 | struct kvm_assigned_dev_kernel *dev, | |
341 | __u32 host_irq_type) | |
342 | { | |
343 | int r = -EEXIST; | |
344 | ||
345 | if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) | |
346 | return r; | |
347 | ||
1e001d49 JK |
348 | snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", |
349 | pci_name(dev->dev)); | |
350 | ||
bfd99ff5 AK |
351 | switch (host_irq_type) { |
352 | case KVM_DEV_IRQ_HOST_INTX: | |
353 | r = assigned_device_enable_host_intx(kvm, dev); | |
354 | break; | |
355 | #ifdef __KVM_HAVE_MSI | |
356 | case KVM_DEV_IRQ_HOST_MSI: | |
357 | r = assigned_device_enable_host_msi(kvm, dev); | |
358 | break; | |
359 | #endif | |
360 | #ifdef __KVM_HAVE_MSIX | |
361 | case KVM_DEV_IRQ_HOST_MSIX: | |
362 | r = assigned_device_enable_host_msix(kvm, dev); | |
363 | break; | |
364 | #endif | |
365 | default: | |
366 | r = -EINVAL; | |
367 | } | |
368 | ||
369 | if (!r) | |
370 | dev->irq_requested_type |= host_irq_type; | |
371 | ||
372 | return r; | |
373 | } | |
374 | ||
375 | static int assign_guest_irq(struct kvm *kvm, | |
376 | struct kvm_assigned_dev_kernel *dev, | |
377 | struct kvm_assigned_irq *irq, | |
378 | unsigned long guest_irq_type) | |
379 | { | |
380 | int id; | |
381 | int r = -EEXIST; | |
382 | ||
383 | if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) | |
384 | return r; | |
385 | ||
386 | id = kvm_request_irq_source_id(kvm); | |
387 | if (id < 0) | |
388 | return id; | |
389 | ||
390 | dev->irq_source_id = id; | |
391 | ||
392 | switch (guest_irq_type) { | |
393 | case KVM_DEV_IRQ_GUEST_INTX: | |
394 | r = assigned_device_enable_guest_intx(kvm, dev, irq); | |
395 | break; | |
396 | #ifdef __KVM_HAVE_MSI | |
397 | case KVM_DEV_IRQ_GUEST_MSI: | |
398 | r = assigned_device_enable_guest_msi(kvm, dev, irq); | |
399 | break; | |
400 | #endif | |
401 | #ifdef __KVM_HAVE_MSIX | |
402 | case KVM_DEV_IRQ_GUEST_MSIX: | |
403 | r = assigned_device_enable_guest_msix(kvm, dev, irq); | |
404 | break; | |
405 | #endif | |
406 | default: | |
407 | r = -EINVAL; | |
408 | } | |
409 | ||
410 | if (!r) { | |
411 | dev->irq_requested_type |= guest_irq_type; | |
c61fa9d6 JK |
412 | if (dev->ack_notifier.gsi != -1) |
413 | kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); | |
bfd99ff5 AK |
414 | } else |
415 | kvm_free_irq_source_id(kvm, dev->irq_source_id); | |
416 | ||
417 | return r; | |
418 | } | |
419 | ||
420 | /* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ | |
421 | static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, | |
422 | struct kvm_assigned_irq *assigned_irq) | |
423 | { | |
424 | int r = -EINVAL; | |
425 | struct kvm_assigned_dev_kernel *match; | |
426 | unsigned long host_irq_type, guest_irq_type; | |
427 | ||
bfd99ff5 AK |
428 | if (!irqchip_in_kernel(kvm)) |
429 | return r; | |
430 | ||
431 | mutex_lock(&kvm->lock); | |
432 | r = -ENODEV; | |
433 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
434 | assigned_irq->assigned_dev_id); | |
435 | if (!match) | |
436 | goto out; | |
437 | ||
438 | host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); | |
439 | guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); | |
440 | ||
441 | r = -EINVAL; | |
442 | /* can only assign one type at a time */ | |
443 | if (hweight_long(host_irq_type) > 1) | |
444 | goto out; | |
445 | if (hweight_long(guest_irq_type) > 1) | |
446 | goto out; | |
447 | if (host_irq_type == 0 && guest_irq_type == 0) | |
448 | goto out; | |
449 | ||
450 | r = 0; | |
451 | if (host_irq_type) | |
452 | r = assign_host_irq(kvm, match, host_irq_type); | |
453 | if (r) | |
454 | goto out; | |
455 | ||
456 | if (guest_irq_type) | |
457 | r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); | |
458 | out: | |
459 | mutex_unlock(&kvm->lock); | |
460 | return r; | |
461 | } | |
462 | ||
463 | static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, | |
464 | struct kvm_assigned_irq | |
465 | *assigned_irq) | |
466 | { | |
467 | int r = -ENODEV; | |
468 | struct kvm_assigned_dev_kernel *match; | |
469 | ||
470 | mutex_lock(&kvm->lock); | |
471 | ||
472 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
473 | assigned_irq->assigned_dev_id); | |
474 | if (!match) | |
475 | goto out; | |
476 | ||
477 | r = kvm_deassign_irq(kvm, match, assigned_irq->flags); | |
478 | out: | |
479 | mutex_unlock(&kvm->lock); | |
480 | return r; | |
481 | } | |
482 | ||
3d27e23b AW |
483 | /* |
484 | * We want to test whether the caller has been granted permissions to | |
485 | * use this device. To be able to configure and control the device, | |
486 | * the user needs access to PCI configuration space and BAR resources. | |
487 | * These are accessed through PCI sysfs. PCI config space is often | |
488 | * passed to the process calling this ioctl via file descriptor, so we | |
489 | * can't rely on access to that file. We can check for permissions | |
490 | * on each of the BAR resource files, which is a pretty clear | |
491 | * indicator that the user has been granted access to the device. | |
492 | */ | |
493 | static int probe_sysfs_permissions(struct pci_dev *dev) | |
494 | { | |
495 | #ifdef CONFIG_SYSFS | |
496 | int i; | |
497 | bool bar_found = false; | |
498 | ||
499 | for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { | |
500 | char *kpath, *syspath; | |
501 | struct path path; | |
502 | struct inode *inode; | |
503 | int r; | |
504 | ||
505 | if (!pci_resource_len(dev, i)) | |
506 | continue; | |
507 | ||
508 | kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); | |
509 | if (!kpath) | |
510 | return -ENOMEM; | |
511 | ||
512 | /* Per sysfs-rules, sysfs is always at /sys */ | |
513 | syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); | |
514 | kfree(kpath); | |
515 | if (!syspath) | |
516 | return -ENOMEM; | |
517 | ||
518 | r = kern_path(syspath, LOOKUP_FOLLOW, &path); | |
519 | kfree(syspath); | |
520 | if (r) | |
521 | return r; | |
522 | ||
523 | inode = path.dentry->d_inode; | |
524 | ||
525 | r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); | |
526 | path_put(&path); | |
527 | if (r) | |
528 | return r; | |
529 | ||
530 | bar_found = true; | |
531 | } | |
532 | ||
533 | /* If no resources, probably something special */ | |
534 | if (!bar_found) | |
535 | return -EPERM; | |
536 | ||
537 | return 0; | |
538 | #else | |
539 | return -EINVAL; /* No way to control the device without sysfs */ | |
540 | #endif | |
541 | } | |
542 | ||
bfd99ff5 AK |
543 | static int kvm_vm_ioctl_assign_device(struct kvm *kvm, |
544 | struct kvm_assigned_pci_dev *assigned_dev) | |
545 | { | |
bc6678a3 | 546 | int r = 0, idx; |
bfd99ff5 AK |
547 | struct kvm_assigned_dev_kernel *match; |
548 | struct pci_dev *dev; | |
3d27e23b | 549 | u8 header_type; |
bfd99ff5 | 550 | |
42387373 AW |
551 | if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) |
552 | return -EINVAL; | |
553 | ||
bfd99ff5 | 554 | mutex_lock(&kvm->lock); |
bc6678a3 | 555 | idx = srcu_read_lock(&kvm->srcu); |
bfd99ff5 AK |
556 | |
557 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
558 | assigned_dev->assigned_dev_id); | |
559 | if (match) { | |
560 | /* device already assigned */ | |
561 | r = -EEXIST; | |
562 | goto out; | |
563 | } | |
564 | ||
565 | match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); | |
566 | if (match == NULL) { | |
567 | printk(KERN_INFO "%s: Couldn't allocate memory\n", | |
568 | __func__); | |
569 | r = -ENOMEM; | |
570 | goto out; | |
571 | } | |
ab9f4ecb ZE |
572 | dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, |
573 | assigned_dev->busnr, | |
bfd99ff5 AK |
574 | assigned_dev->devfn); |
575 | if (!dev) { | |
576 | printk(KERN_INFO "%s: host device not found\n", __func__); | |
577 | r = -EINVAL; | |
578 | goto out_free; | |
579 | } | |
3d27e23b AW |
580 | |
581 | /* Don't allow bridges to be assigned */ | |
582 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); | |
583 | if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) { | |
584 | r = -EPERM; | |
585 | goto out_put; | |
586 | } | |
587 | ||
588 | r = probe_sysfs_permissions(dev); | |
589 | if (r) | |
590 | goto out_put; | |
591 | ||
bfd99ff5 AK |
592 | if (pci_enable_device(dev)) { |
593 | printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); | |
594 | r = -EBUSY; | |
595 | goto out_put; | |
596 | } | |
597 | r = pci_request_regions(dev, "kvm_assigned_device"); | |
598 | if (r) { | |
599 | printk(KERN_INFO "%s: Could not get access to device regions\n", | |
600 | __func__); | |
601 | goto out_disable; | |
602 | } | |
603 | ||
604 | pci_reset_function(dev); | |
ed78661f | 605 | pci_save_state(dev); |
f8fcfd77 AW |
606 | match->pci_saved_state = pci_store_saved_state(dev); |
607 | if (!match->pci_saved_state) | |
608 | printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", | |
609 | __func__, dev_name(&dev->dev)); | |
bfd99ff5 | 610 | match->assigned_dev_id = assigned_dev->assigned_dev_id; |
ab9f4ecb | 611 | match->host_segnr = assigned_dev->segnr; |
bfd99ff5 AK |
612 | match->host_busnr = assigned_dev->busnr; |
613 | match->host_devfn = assigned_dev->devfn; | |
614 | match->flags = assigned_dev->flags; | |
615 | match->dev = dev; | |
0645211c | 616 | spin_lock_init(&match->intx_lock); |
bfd99ff5 AK |
617 | match->irq_source_id = -1; |
618 | match->kvm = kvm; | |
619 | match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; | |
bfd99ff5 AK |
620 | |
621 | list_add(&match->list, &kvm->arch.assigned_dev_head); | |
622 | ||
42387373 AW |
623 | if (!kvm->arch.iommu_domain) { |
624 | r = kvm_iommu_map_guest(kvm); | |
bfd99ff5 AK |
625 | if (r) |
626 | goto out_list_del; | |
627 | } | |
42387373 AW |
628 | r = kvm_assign_device(kvm, match); |
629 | if (r) | |
630 | goto out_list_del; | |
bfd99ff5 AK |
631 | |
632 | out: | |
bc6678a3 | 633 | srcu_read_unlock(&kvm->srcu, idx); |
fae3a353 | 634 | mutex_unlock(&kvm->lock); |
bfd99ff5 AK |
635 | return r; |
636 | out_list_del: | |
f8fcfd77 AW |
637 | if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) |
638 | printk(KERN_INFO "%s: Couldn't reload %s saved state\n", | |
639 | __func__, dev_name(&dev->dev)); | |
bfd99ff5 AK |
640 | list_del(&match->list); |
641 | pci_release_regions(dev); | |
642 | out_disable: | |
643 | pci_disable_device(dev); | |
644 | out_put: | |
645 | pci_dev_put(dev); | |
646 | out_free: | |
647 | kfree(match); | |
bc6678a3 | 648 | srcu_read_unlock(&kvm->srcu, idx); |
fae3a353 | 649 | mutex_unlock(&kvm->lock); |
bfd99ff5 AK |
650 | return r; |
651 | } | |
652 | ||
653 | static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, | |
654 | struct kvm_assigned_pci_dev *assigned_dev) | |
655 | { | |
656 | int r = 0; | |
657 | struct kvm_assigned_dev_kernel *match; | |
658 | ||
659 | mutex_lock(&kvm->lock); | |
660 | ||
661 | match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
662 | assigned_dev->assigned_dev_id); | |
663 | if (!match) { | |
664 | printk(KERN_INFO "%s: device hasn't been assigned before, " | |
665 | "so cannot be deassigned\n", __func__); | |
666 | r = -EINVAL; | |
667 | goto out; | |
668 | } | |
669 | ||
42387373 | 670 | kvm_deassign_device(kvm, match); |
bfd99ff5 AK |
671 | |
672 | kvm_free_assigned_device(kvm, match); | |
673 | ||
674 | out: | |
675 | mutex_unlock(&kvm->lock); | |
676 | return r; | |
677 | } | |
678 | ||
679 | ||
680 | #ifdef __KVM_HAVE_MSIX | |
681 | static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, | |
682 | struct kvm_assigned_msix_nr *entry_nr) | |
683 | { | |
684 | int r = 0; | |
685 | struct kvm_assigned_dev_kernel *adev; | |
686 | ||
687 | mutex_lock(&kvm->lock); | |
688 | ||
689 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
690 | entry_nr->assigned_dev_id); | |
691 | if (!adev) { | |
692 | r = -EINVAL; | |
693 | goto msix_nr_out; | |
694 | } | |
695 | ||
696 | if (adev->entries_nr == 0) { | |
697 | adev->entries_nr = entry_nr->entry_nr; | |
698 | if (adev->entries_nr == 0 || | |
9f3191ae | 699 | adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { |
bfd99ff5 AK |
700 | r = -EINVAL; |
701 | goto msix_nr_out; | |
702 | } | |
703 | ||
704 | adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * | |
705 | entry_nr->entry_nr, | |
706 | GFP_KERNEL); | |
707 | if (!adev->host_msix_entries) { | |
708 | r = -ENOMEM; | |
709 | goto msix_nr_out; | |
710 | } | |
0645211c JK |
711 | adev->guest_msix_entries = |
712 | kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, | |
713 | GFP_KERNEL); | |
bfd99ff5 AK |
714 | if (!adev->guest_msix_entries) { |
715 | kfree(adev->host_msix_entries); | |
716 | r = -ENOMEM; | |
717 | goto msix_nr_out; | |
718 | } | |
719 | } else /* Not allowed set MSI-X number twice */ | |
720 | r = -EINVAL; | |
721 | msix_nr_out: | |
722 | mutex_unlock(&kvm->lock); | |
723 | return r; | |
724 | } | |
725 | ||
726 | static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, | |
727 | struct kvm_assigned_msix_entry *entry) | |
728 | { | |
729 | int r = 0, i; | |
730 | struct kvm_assigned_dev_kernel *adev; | |
731 | ||
732 | mutex_lock(&kvm->lock); | |
733 | ||
734 | adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, | |
735 | entry->assigned_dev_id); | |
736 | ||
737 | if (!adev) { | |
738 | r = -EINVAL; | |
739 | goto msix_entry_out; | |
740 | } | |
741 | ||
742 | for (i = 0; i < adev->entries_nr; i++) | |
743 | if (adev->guest_msix_entries[i].vector == 0 || | |
744 | adev->guest_msix_entries[i].entry == entry->entry) { | |
745 | adev->guest_msix_entries[i].entry = entry->entry; | |
746 | adev->guest_msix_entries[i].vector = entry->gsi; | |
747 | adev->host_msix_entries[i].entry = entry->entry; | |
748 | break; | |
749 | } | |
750 | if (i == adev->entries_nr) { | |
751 | r = -ENOSPC; | |
752 | goto msix_entry_out; | |
753 | } | |
754 | ||
755 | msix_entry_out: | |
756 | mutex_unlock(&kvm->lock); | |
757 | ||
758 | return r; | |
759 | } | |
760 | #endif | |
761 | ||
762 | long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, | |
763 | unsigned long arg) | |
764 | { | |
765 | void __user *argp = (void __user *)arg; | |
51de271d | 766 | int r; |
bfd99ff5 AK |
767 | |
768 | switch (ioctl) { | |
769 | case KVM_ASSIGN_PCI_DEVICE: { | |
770 | struct kvm_assigned_pci_dev assigned_dev; | |
771 | ||
772 | r = -EFAULT; | |
773 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | |
774 | goto out; | |
775 | r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); | |
776 | if (r) | |
777 | goto out; | |
778 | break; | |
779 | } | |
780 | case KVM_ASSIGN_IRQ: { | |
781 | r = -EOPNOTSUPP; | |
782 | break; | |
783 | } | |
bfd99ff5 AK |
784 | case KVM_ASSIGN_DEV_IRQ: { |
785 | struct kvm_assigned_irq assigned_irq; | |
786 | ||
787 | r = -EFAULT; | |
788 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | |
789 | goto out; | |
790 | r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); | |
791 | if (r) | |
792 | goto out; | |
793 | break; | |
794 | } | |
795 | case KVM_DEASSIGN_DEV_IRQ: { | |
796 | struct kvm_assigned_irq assigned_irq; | |
797 | ||
798 | r = -EFAULT; | |
799 | if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) | |
800 | goto out; | |
801 | r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); | |
802 | if (r) | |
803 | goto out; | |
804 | break; | |
805 | } | |
bfd99ff5 AK |
806 | case KVM_DEASSIGN_PCI_DEVICE: { |
807 | struct kvm_assigned_pci_dev assigned_dev; | |
808 | ||
809 | r = -EFAULT; | |
810 | if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) | |
811 | goto out; | |
812 | r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); | |
813 | if (r) | |
814 | goto out; | |
815 | break; | |
816 | } | |
bfd99ff5 AK |
817 | #ifdef KVM_CAP_IRQ_ROUTING |
818 | case KVM_SET_GSI_ROUTING: { | |
819 | struct kvm_irq_routing routing; | |
820 | struct kvm_irq_routing __user *urouting; | |
821 | struct kvm_irq_routing_entry *entries; | |
822 | ||
823 | r = -EFAULT; | |
824 | if (copy_from_user(&routing, argp, sizeof(routing))) | |
825 | goto out; | |
826 | r = -EINVAL; | |
827 | if (routing.nr >= KVM_MAX_IRQ_ROUTES) | |
828 | goto out; | |
829 | if (routing.flags) | |
830 | goto out; | |
831 | r = -ENOMEM; | |
832 | entries = vmalloc(routing.nr * sizeof(*entries)); | |
833 | if (!entries) | |
834 | goto out; | |
835 | r = -EFAULT; | |
836 | urouting = argp; | |
837 | if (copy_from_user(entries, urouting->entries, | |
838 | routing.nr * sizeof(*entries))) | |
839 | goto out_free_irq_routing; | |
840 | r = kvm_set_irq_routing(kvm, entries, routing.nr, | |
841 | routing.flags); | |
842 | out_free_irq_routing: | |
843 | vfree(entries); | |
844 | break; | |
845 | } | |
846 | #endif /* KVM_CAP_IRQ_ROUTING */ | |
847 | #ifdef __KVM_HAVE_MSIX | |
848 | case KVM_ASSIGN_SET_MSIX_NR: { | |
849 | struct kvm_assigned_msix_nr entry_nr; | |
850 | r = -EFAULT; | |
851 | if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) | |
852 | goto out; | |
853 | r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); | |
854 | if (r) | |
855 | goto out; | |
856 | break; | |
857 | } | |
858 | case KVM_ASSIGN_SET_MSIX_ENTRY: { | |
859 | struct kvm_assigned_msix_entry entry; | |
860 | r = -EFAULT; | |
861 | if (copy_from_user(&entry, argp, sizeof entry)) | |
862 | goto out; | |
863 | r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); | |
864 | if (r) | |
865 | goto out; | |
866 | break; | |
867 | } | |
868 | #endif | |
51de271d JK |
869 | default: |
870 | r = -ENOTTY; | |
871 | break; | |
bfd99ff5 AK |
872 | } |
873 | out: | |
874 | return r; | |
875 | } | |
876 |