4.52 KVM_SET_GSI_ROUTING
Capability: KVM_CAP_IRQ_ROUTING
- Architectures: x86 s390
+ Architectures: x86 s390 arm arm64
Type: vm ioctl
Parameters: struct kvm_irq_routing (in)
Returns: 0 on success, -1 on error
Sets the GSI routing table entries, overwriting any previously set entries.
+ On arm/arm64, GSI routing has the following limitation:
+ - GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD.
+
struct kvm_irq_routing {
__u32 nr;
__u32 flags;
#define KVM_IRQ_ROUTING_S390_ADAPTER 3
#define KVM_IRQ_ROUTING_HV_SINT 4
- No flags are specified so far, the corresponding field must be set to zero.
+ flags:
-- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI
- routing entry type, specifies that the devid field contains
- a valid value.
++- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI routing entry
++ type, specifies that the devid field contains a valid value. The per-VM
++ KVM_CAP_MSI_DEVID capability advertises the requirement to provide
++ the device ID. If this capability is not available, userspace should
++ never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
+ - zero otherwise
struct kvm_irq_routing_irqchip {
__u32 irqchip;
__u32 address_lo;
__u32 address_hi;
__u32 data;
- __u32 pad;
+ union {
+ __u32 pad;
+ __u32 devid;
+ };
};
-devid: If KVM_MSI_VALID_DEVID is set, contains a unique device identifier
- for the device that wrote the MSI message.
- For PCI, this is usually a BFD identifier in the lower 16 bits.
++If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
++for the device that wrote the MSI message. For PCI, this is usually a
++BFD identifier in the lower 16 bits.
+
-The per-VM KVM_CAP_MSI_DEVID capability advertises the requirement to
-provide the device ID. If this capability is not set, userland cannot
-rely on the kernel to allow the KVM_MSI_VALID_DEVID flag being set.
+On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
+feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled,
+address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of
+address_hi must be zero.
struct kvm_irq_routing_s390_adapter {
__u64 ind_addr;
Reads the Local APIC registers and copies them into the input argument. The
data format and layout are the same as documented in the architecture manual.
+If KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API is
+enabled, then the format of APIC_ID register depends on the APIC mode
+(reported by MSR_IA32_APICBASE) of its VCPU. x2APIC stores APIC ID in
+the APIC_ID register (bytes 32-35). xAPIC only allows an 8-bit APIC ID
+which is stored in bits 31-24 of the APIC register, or equivalently in
+byte 35 of struct kvm_lapic_state's regs field. KVM_GET_LAPIC must then
+be called after MSR_IA32_APICBASE has been set with KVM_SET_MSR.
+
+If KVM_X2APIC_API_USE_32BIT_IDS feature is disabled, struct kvm_lapic_state
+always uses xAPIC format.
+
4.58 KVM_SET_LAPIC
Copies the input argument into the Local APIC registers. The data format
and layout are the same as documented in the architecture manual.
+The format of the APIC ID register (bytes 32-35 of struct kvm_lapic_state's
+regs field) depends on the state of the KVM_CAP_X2APIC_API capability.
+See the note in KVM_GET_LAPIC.
+
4.59 KVM_IOEVENTFD
__u8 pad[12];
};
--flags: KVM_MSI_VALID_DEVID: devid contains a valid value
--devid: If KVM_MSI_VALID_DEVID is set, contains a unique device identifier
-- for the device that wrote the MSI message.
-- For PCI, this is usually a BFD identifier in the lower 16 bits.
++flags: KVM_MSI_VALID_DEVID: devid contains a valid value. The per-VM
++ KVM_CAP_MSI_DEVID capability advertises the requirement to provide
++ the device ID. If this capability is not available, userspace
++ should never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
--The per-VM KVM_CAP_MSI_DEVID capability advertises the need to provide
--the device ID. If this capability is not set, userland cannot rely on
--the kernel to allow the KVM_MSI_VALID_DEVID flag being set.
++If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
++for the device that wrote the MSI message. For PCI, this is usually a
++BFD identifier in the lower 16 bits.
+
+On x86, address_hi is ignored unless the KVM_CAP_X2APIC_API capability is
+enabled. If it is enabled, address_hi bits 31-8 provide bits 31-8 of the
+destination id. Bits 7-0 of address_hi must be zero.
4.71 KVM_CREATE_PIT2
irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
- On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared
- Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is
- given by gsi + 32.
+ On arm/arm64, gsi routing being supported, the following can happen:
+ - in case no routing entry is associated to this gsi, injection fails
+ - in case the gsi is associated to an irqchip routing entry,
+ irqchip.pin + 32 corresponds to the injected SPI ID.
+ - in case the gsi is associated to an MSI routing entry, the MSI
+ message and device ID are translated into an LPI (support restricted
+ to GICv3 ITS in-kernel emulation).
4.76 KVM_PPC_ALLOCATE_HTAB
Will return -EINVAL if the machine does not support runtime-instrumentation.
Will return -EBUSY if a VCPU has already been created.
+7.7 KVM_CAP_X2APIC_API
+
+Architectures: x86
+Parameters: args[0] - features that should be enabled
+Returns: 0 on success, -EINVAL when args[0] contains invalid features
+
+Valid feature flags in args[0] are
+
+#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+
+Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of
+KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC,
+allowing the use of 32-bit APIC IDs. See KVM_CAP_X2APIC_API in their
+respective sections.
+
+KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK must be enabled for x2APIC to work
+in logical mode or with more than 255 VCPUs. Otherwise, KVM treats 0xff
+as a broadcast even in x2APIC mode in order to support physical x2APIC
+without interrupt remapping. This is undesirable in logical mode,
+where 0xff represents CPUs 0-7 in cluster 0.
+
+7.8 KVM_CAP_S390_USER_INSTR0
+
+Architectures: s390
+Parameters: none
+
+With this capability enabled, all illegal instructions 0x0000 (2 bytes) will
+be intercepted and forwarded to user space. User space can use this
+mechanism e.g. to realize 2-byte software breakpoints. The kernel will
+not inject an operating exception for these instructions, user space has
+to take care of that.
+
+This capability can be enabled dynamically even if VCPUs were already
+created and are running.
+
8. Other capabilities.
----------------------
unsigned irqchip;
unsigned pin;
} irqchip;
- struct msi_msg msi;
+ struct {
+ u32 address_lo;
+ u32 address_hi;
+ u32 data;
+ u32 flags;
+ u32 devid;
+ } msi;
struct kvm_s390_adapter_int adapter;
struct kvm_hv_sint hv_sint;
};
}
#endif
-/* must be called with irqs disabled */
-static inline void __kvm_guest_enter(void)
-{
- guest_enter();
- /* KVM does not hold any references to rcu protected data when it
- * switches CPU into a guest mode. In fact switching to a guest mode
- * is very similar to exiting to userspace from rcu point of view. In
- * addition CPU may stay in a guest mode for quite a long time (up to
- * one time slice). Lets treat guest mode as quiescent state, just like
- * we do with user-mode execution.
- */
- if (!context_tracking_cpu_is_enabled())
- rcu_virt_note_context_switch(smp_processor_id());
-}
-
-/* must be called with irqs disabled */
-static inline void __kvm_guest_exit(void)
-{
- guest_exit();
-}
-
-static inline void kvm_guest_enter(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __kvm_guest_enter();
- local_irq_restore(flags);
-}
-
-static inline void kvm_guest_exit(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __kvm_guest_exit();
- local_irq_restore(flags);
-}
-
/*
* search_memslots() and __gfn_to_memslot() are here because they are
* used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
#ifdef CONFIG_S390
#define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
+ #elif defined(CONFIG_ARM64)
+ #define KVM_MAX_IRQ_ROUTES 4096
#else
#define KVM_MAX_IRQ_ROUTES 1024
#endif
- int kvm_setup_default_irq_routing(struct kvm *kvm);
- int kvm_setup_empty_irq_routing(struct kvm *kvm);
int kvm_set_irq_routing(struct kvm *kvm,
const struct kvm_irq_routing_entry *entries,
unsigned nr,
unsigned flags);
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue);
void kvm_free_irq_routing(struct kvm *kvm);
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vgic_vcpu_init(vcpu);
+ ret = kvm_vgic_setup_default_irq_routing(kvm);
+ if (ret)
+ goto out;
+
dist->initialized = true;
out:
return ret;
/* GENERIC PROBE */
-static void vgic_init_maintenance_interrupt(void *info)
+static int vgic_init_cpu_starting(unsigned int cpu)
{
enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0);
+ return 0;
}
-static int vgic_cpu_notify(struct notifier_block *self,
- unsigned long action, void *cpu)
-{
- switch (action) {
- case CPU_STARTING:
- case CPU_STARTING_FROZEN:
- vgic_init_maintenance_interrupt(NULL);
- break;
- case CPU_DYING:
- case CPU_DYING_FROZEN:
- disable_percpu_irq(kvm_vgic_global_state.maint_irq);
- break;
- }
- return NOTIFY_OK;
+static int vgic_init_cpu_dying(unsigned int cpu)
+{
+ disable_percpu_irq(kvm_vgic_global_state.maint_irq);
+ return 0;
}
-static struct notifier_block vgic_cpu_nb = {
- .notifier_call = vgic_cpu_notify,
-};
-
static irqreturn_t vgic_maintenance_handler(int irq, void *data)
{
/*
return ret;
}
- ret = __register_cpu_notifier(&vgic_cpu_nb);
+ ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
+ "AP_KVM_ARM_VGIC_INIT_STARTING",
+ vgic_init_cpu_starting, vgic_init_cpu_dying);
if (ret) {
kvm_err("Cannot register vgic CPU notifier\n");
goto out_free_irq;
}
- on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq);
return 0;
{
struct kvm_kernel_irq_routing_entry route;
- if (!irqchip_in_kernel(kvm) || msi->flags != 0)
+ if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
return -EINVAL;
route.msi.address_lo = msi->address_lo;
route.msi.address_hi = msi->address_hi;
route.msi.data = msi->data;
+ route.msi.flags = msi->flags;
+ route.msi.devid = msi->devid;
return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
}
free_irq_routing_table(rt);
}
-static int setup_routing_entry(struct kvm_irq_routing_table *rt,
+static int setup_routing_entry(struct kvm *kvm,
+ struct kvm_irq_routing_table *rt,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
e->gsi = ue->gsi;
e->type = ue->type;
- r = kvm_set_routing_entry(e, ue);
+ r = kvm_set_routing_entry(kvm, e, ue);
if (r)
goto out;
if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
unsigned flags)
{
struct kvm_irq_routing_table *new, *old;
+ struct kvm_kernel_irq_routing_entry *e;
u32 i, j, nr_rt_entries = 0;
int r;
new->chip[i][j] = -1;
for (i = 0; i < nr; ++i) {
- struct kvm_kernel_irq_routing_entry *e;
-
r = -ENOMEM;
e = kzalloc(sizeof(*e), GFP_KERNEL);
if (!e)
goto out;
r = -EINVAL;
- if (ue->flags) {
- kfree(e);
- goto out;
+ switch (ue->type) {
+ case KVM_IRQ_ROUTING_MSI:
+ if (ue->flags & ~KVM_MSI_VALID_DEVID)
+ goto free_entry;
+ break;
+ default:
+ if (ue->flags)
+ goto free_entry;
+ break;
}
- r = setup_routing_entry(new, e, ue);
+ r = setup_routing_entry(kvm, new, e, ue);
- if (r) {
- kfree(e);
- goto out;
- }
+ if (r)
+ goto free_entry;
++ue;
}
new = old;
r = 0;
+ goto out;
+ free_entry:
+ kfree(e);
out:
free_irq_routing_table(new);