#include <asm/uaccess.h>
#include <asm/pgtable.h>
+#ifdef CONFIG_X86
+#include <asm/msidef.h>
+#endif
+
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
#include "coalesced_mmio.h"
#endif
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
+static int msi2intx = 1;
+module_param(msi2intx, bool, 0);
+
DEFINE_SPINLOCK(kvm_lock);
LIST_HEAD(vm_list);
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
-bool kvm_rebooting;
+static bool kvm_rebooting;
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
+
+#ifdef CONFIG_X86
+static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev)
+{
+ int vcpu_id;
+ struct kvm_vcpu *vcpu;
+ struct kvm_ioapic *ioapic = ioapic_irqchip(dev->kvm);
+ int dest_id = (dev->guest_msi.address_lo & MSI_ADDR_DEST_ID_MASK)
+ >> MSI_ADDR_DEST_ID_SHIFT;
+ int vector = (dev->guest_msi.data & MSI_DATA_VECTOR_MASK)
+ >> MSI_DATA_VECTOR_SHIFT;
+ int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT,
+ (unsigned long *)&dev->guest_msi.address_lo);
+ int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT,
+ (unsigned long *)&dev->guest_msi.data);
+ int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT,
+ (unsigned long *)&dev->guest_msi.data);
+ u32 deliver_bitmask;
+
+ BUG_ON(!ioapic);
+
+ deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic,
+ dest_id, dest_mode);
+ /* IOAPIC delivery mode value is the same as MSI here */
+ switch (delivery_mode) {
+ case IOAPIC_LOWEST_PRIORITY:
+ vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector,
+ deliver_bitmask);
+ if (vcpu != NULL)
+ kvm_apic_set_irq(vcpu, vector, trig_mode);
+ else
+ printk(KERN_INFO "kvm: null lowest priority vcpu!\n");
+ break;
+ case IOAPIC_FIXED:
+ for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+ if (!(deliver_bitmask & (1 << vcpu_id)))
+ continue;
+ deliver_bitmask &= ~(1 << vcpu_id);
+ vcpu = ioapic->kvm->vcpus[vcpu_id];
+ if (vcpu)
+ kvm_apic_set_irq(vcpu, vector, trig_mode);
+ }
+ break;
+ default:
+ printk(KERN_INFO "kvm: unsupported MSI delivery mode\n");
+ }
+}
+#else
+static void assigned_device_msi_dispatch(struct kvm_assigned_dev_kernel *dev) {}
+#endif
+
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
int assigned_dev_id)
{
* finer-grained lock, update this
*/
mutex_lock(&assigned_dev->kvm->lock);
- kvm_set_irq(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 1);
+ if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_INTX)
+ kvm_set_irq(assigned_dev->kvm,
+ assigned_dev->irq_source_id,
+ assigned_dev->guest_irq, 1);
+ else if (assigned_dev->irq_requested_type &
+ KVM_ASSIGNED_DEV_GUEST_MSI) {
+ assigned_device_msi_dispatch(assigned_dev);
+ enable_irq(assigned_dev->host_irq);
+ }
mutex_unlock(&assigned_dev->kvm->lock);
kvm_put_kvm(assigned_dev->kvm);
}
struct kvm_assigned_dev_kernel
*assigned_dev)
{
- if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
+ if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested_type)
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+ if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
+ pci_disable_msi(assigned_dev->dev);
+
+ kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
- kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
- kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
+ if (assigned_dev->irq_source_id != -1)
+ kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
+ assigned_dev->irq_source_id = -1;
if (cancel_work_sync(&assigned_dev->interrupt_work))
/* We had pending work. That means we will have to take
*/
kvm_put_kvm(kvm);
+ pci_reset_function(assigned_dev->dev);
+
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
pci_dev_put(assigned_dev->dev);
}
}
+static int assigned_device_update_intx(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *adev,
+ struct kvm_assigned_irq *airq)
+{
+ adev->guest_irq = airq->guest_irq;
+ adev->ack_notifier.gsi = airq->guest_irq;
+
+ if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX)
+ return 0;
+
+ if (irqchip_in_kernel(kvm)) {
+ if (!msi2intx &&
+ adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
+ free_irq(adev->host_irq, (void *)kvm);
+ pci_disable_msi(adev->dev);
+ }
+
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ if (airq->host_irq)
+ adev->host_irq = airq->host_irq;
+ else
+ adev->host_irq = adev->dev->irq;
+
+ /* Even though this is PCI, we don't want to use shared
+ * interrupts. Sharing host devices with guest-assigned devices
+ * on the same interrupt line is not a happy situation: there
+ * are going to be long delays in accepting, acking, etc.
+ */
+ if (request_irq(adev->host_irq, kvm_assigned_dev_intr,
+ 0, "kvm_assigned_intx_device", (void *)adev))
+ return -EIO;
+ }
+
+ adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX |
+ KVM_ASSIGNED_DEV_HOST_INTX;
+ return 0;
+}
+
+#ifdef CONFIG_X86
+static int assigned_device_update_msi(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *adev,
+ struct kvm_assigned_irq *airq)
+{
+ int r;
+
+ if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) {
+ /* x86 don't care upper address of guest msi message addr */
+ adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI;
+ adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX;
+ adev->guest_msi.address_lo = airq->guest_msi.addr_lo;
+ adev->guest_msi.data = airq->guest_msi.data;
+ adev->ack_notifier.gsi = -1;
+ } else if (msi2intx) {
+ adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX;
+ adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI;
+ adev->guest_irq = airq->guest_irq;
+ adev->ack_notifier.gsi = airq->guest_irq;
+ }
+
+ if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)
+ return 0;
+
+ if (irqchip_in_kernel(kvm)) {
+ if (!msi2intx) {
+ if (adev->irq_requested_type &
+ KVM_ASSIGNED_DEV_HOST_INTX)
+ free_irq(adev->host_irq, (void *)adev);
+
+ r = pci_enable_msi(adev->dev);
+ if (r)
+ return r;
+ }
+
+ adev->host_irq = adev->dev->irq;
+ if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0,
+ "kvm_assigned_msi_device", (void *)adev))
+ return -EIO;
+ }
+
+ if (!msi2intx)
+ adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI;
+
+ adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI;
+ return 0;
+}
+#endif
+
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_irq
*assigned_irq)
return -EINVAL;
}
- if (match->irq_requested) {
- match->guest_irq = assigned_irq->guest_irq;
- match->ack_notifier.gsi = assigned_irq->guest_irq;
- mutex_unlock(&kvm->lock);
- return 0;
- }
+ if (!match->irq_requested_type) {
+ INIT_WORK(&match->interrupt_work,
+ kvm_assigned_dev_interrupt_work_handler);
+ if (irqchip_in_kernel(kvm)) {
+ /* Register ack nofitier */
+ match->ack_notifier.gsi = -1;
+ match->ack_notifier.irq_acked =
+ kvm_assigned_dev_ack_irq;
+ kvm_register_irq_ack_notifier(kvm,
+ &match->ack_notifier);
+
+ /* Request IRQ source ID */
+ r = kvm_request_irq_source_id(kvm);
+ if (r < 0)
+ goto out_release;
+ else
+ match->irq_source_id = r;
- INIT_WORK(&match->interrupt_work,
- kvm_assigned_dev_interrupt_work_handler);
+#ifdef CONFIG_X86
+ /* Determine host device irq type, we can know the
+ * result from dev->msi_enabled */
+ if (msi2intx)
+ pci_enable_msi(match->dev);
+#endif
+ }
+ }
- if (irqchip_in_kernel(kvm)) {
- if (!capable(CAP_SYS_RAWIO)) {
- r = -EPERM;
+ if ((!msi2intx &&
+ (assigned_irq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI)) ||
+ (msi2intx && match->dev->msi_enabled)) {
+#ifdef CONFIG_X86
+ r = assigned_device_update_msi(kvm, match, assigned_irq);
+ if (r) {
+ printk(KERN_WARNING "kvm: failed to enable "
+ "MSI device!\n");
goto out_release;
}
-
- if (assigned_irq->host_irq)
- match->host_irq = assigned_irq->host_irq;
- else
- match->host_irq = match->dev->irq;
- match->guest_irq = assigned_irq->guest_irq;
- match->ack_notifier.gsi = assigned_irq->guest_irq;
- match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
- kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
- r = kvm_request_irq_source_id(kvm);
- if (r < 0)
+#else
+ r = -ENOTTY;
+#endif
+ } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) {
+ /* Host device IRQ 0 means don't support INTx */
+ if (!msi2intx) {
+ printk(KERN_WARNING
+ "kvm: wait device to enable MSI!\n");
+ r = 0;
+ } else {
+ printk(KERN_WARNING
+ "kvm: failed to enable MSI device!\n");
+ r = -ENOTTY;
goto out_release;
- else
- match->irq_source_id = r;
-
- /* Even though this is PCI, we don't want to use shared
- * interrupts. Sharing host devices with guest-assigned devices
- * on the same interrupt line is not a happy situation: there
- * are going to be long delays in accepting, acking, etc.
- */
- if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
- "kvm_assigned_device", (void *)match)) {
- r = -EIO;
+ }
+ } else {
+ /* Non-sharing INTx mode */
+ r = assigned_device_update_intx(kvm, match, assigned_irq);
+ if (r) {
+ printk(KERN_WARNING "kvm: failed to enable "
+ "INTx device!\n");
goto out_release;
}
}
- match->irq_requested = true;
mutex_unlock(&kvm->lock);
return r;
out_release:
__func__);
goto out_disable;
}
+
+ pci_reset_function(dev);
+
match->assigned_dev_id = assigned_dev->assigned_dev_id;
match->host_busnr = assigned_dev->busnr;
match->host_devfn = assigned_dev->devfn;
match->dev = dev;
-
+ match->irq_source_id = -1;
match->kvm = kvm;
list_add(&match->list, &kvm->arch.assigned_dev_head);
goto out;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
goto out;
+ if (user_alloc && (mem->userspace_addr & (PAGE_SIZE - 1)))
+ goto out;
if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
goto out;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
}
EXPORT_SYMBOL_GPL(kvm_is_error_hva);
-static struct kvm_memory_slot *__gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
+struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
{
int i;
}
return NULL;
}
+EXPORT_SYMBOL_GPL(gfn_to_memslot_unaliased);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
{
gfn = unalias_gfn(kvm, gfn);
- return __gfn_to_memslot(kvm, gfn);
+ return gfn_to_memslot_unaliased(kvm, gfn);
}
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
struct kvm_memory_slot *slot;
gfn = unalias_gfn(kvm, gfn);
- slot = __gfn_to_memslot(kvm, gfn);
+ slot = gfn_to_memslot_unaliased(kvm, gfn);
if (!slot)
return bad_hva();
return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
struct kvm_memory_slot *memslot;
gfn = unalias_gfn(kvm, gfn);
- memslot = __gfn_to_memslot(kvm, gfn);
+ memslot = gfn_to_memslot_unaliased(kvm, gfn);
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
kvm_preempt_ops.sched_in = kvm_sched_in;
kvm_preempt_ops.sched_out = kvm_sched_out;
+#ifndef CONFIG_X86
+ msi2intx = 0;
+#endif
return 0;