2 * hosting zSeries kernel virtual machines
4 * Copyright IBM Corp. 2008, 2009
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
17 #include <linux/compiler.h>
18 #include <linux/err.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <linux/bitmap.h>
30 #include <asm/asm-offsets.h>
31 #include <asm/lowcore.h>
33 #include <asm/pgtable.h>
36 #include <asm/switch_to.h>
39 #include <asm/cpacf.h>
44 #define KMSG_COMPONENT "kvm-s390"
46 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48 #define CREATE_TRACE_POINTS
50 #include "trace-s390.h"
52 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
54 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
55 (KVM_MAX_VCPUS + LOCAL_IRQS))
57 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59 struct kvm_stats_debugfs_item debugfs_entries
[] = {
60 { "userspace_handled", VCPU_STAT(exit_userspace
) },
61 { "exit_null", VCPU_STAT(exit_null
) },
62 { "exit_validity", VCPU_STAT(exit_validity
) },
63 { "exit_stop_request", VCPU_STAT(exit_stop_request
) },
64 { "exit_external_request", VCPU_STAT(exit_external_request
) },
65 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt
) },
66 { "exit_instruction", VCPU_STAT(exit_instruction
) },
67 { "exit_program_interruption", VCPU_STAT(exit_program_interruption
) },
68 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program
) },
69 { "exit_operation_exception", VCPU_STAT(exit_operation_exception
) },
70 { "halt_successful_poll", VCPU_STAT(halt_successful_poll
) },
71 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll
) },
72 { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid
) },
73 { "halt_wakeup", VCPU_STAT(halt_wakeup
) },
74 { "instruction_lctlg", VCPU_STAT(instruction_lctlg
) },
75 { "instruction_lctl", VCPU_STAT(instruction_lctl
) },
76 { "instruction_stctl", VCPU_STAT(instruction_stctl
) },
77 { "instruction_stctg", VCPU_STAT(instruction_stctg
) },
78 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal
) },
79 { "deliver_external_call", VCPU_STAT(deliver_external_call
) },
80 { "deliver_service_signal", VCPU_STAT(deliver_service_signal
) },
81 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt
) },
82 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal
) },
83 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal
) },
84 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal
) },
85 { "deliver_program_interruption", VCPU_STAT(deliver_program_int
) },
86 { "exit_wait_state", VCPU_STAT(exit_wait_state
) },
87 { "instruction_pfmf", VCPU_STAT(instruction_pfmf
) },
88 { "instruction_stidp", VCPU_STAT(instruction_stidp
) },
89 { "instruction_spx", VCPU_STAT(instruction_spx
) },
90 { "instruction_stpx", VCPU_STAT(instruction_stpx
) },
91 { "instruction_stap", VCPU_STAT(instruction_stap
) },
92 { "instruction_storage_key", VCPU_STAT(instruction_storage_key
) },
93 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock
) },
94 { "instruction_stsch", VCPU_STAT(instruction_stsch
) },
95 { "instruction_chsc", VCPU_STAT(instruction_chsc
) },
96 { "instruction_essa", VCPU_STAT(instruction_essa
) },
97 { "instruction_stsi", VCPU_STAT(instruction_stsi
) },
98 { "instruction_stfl", VCPU_STAT(instruction_stfl
) },
99 { "instruction_tprot", VCPU_STAT(instruction_tprot
) },
100 { "instruction_sthyi", VCPU_STAT(instruction_sthyi
) },
101 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense
) },
102 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running
) },
103 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call
) },
104 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency
) },
105 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency
) },
106 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start
) },
107 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop
) },
108 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status
) },
109 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status
) },
110 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status
) },
111 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch
) },
112 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix
) },
113 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart
) },
114 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset
) },
115 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset
) },
116 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown
) },
117 { "diagnose_10", VCPU_STAT(diagnose_10
) },
118 { "diagnose_44", VCPU_STAT(diagnose_44
) },
119 { "diagnose_9c", VCPU_STAT(diagnose_9c
) },
120 { "diagnose_258", VCPU_STAT(diagnose_258
) },
121 { "diagnose_308", VCPU_STAT(diagnose_308
) },
122 { "diagnose_500", VCPU_STAT(diagnose_500
) },
126 /* upper facilities limit for kvm */
127 unsigned long kvm_s390_fac_list_mask
[16] = {
128 0xffe6000000000000UL
,
129 0x005e000000000000UL
,
132 unsigned long kvm_s390_fac_list_mask_size(void)
134 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask
) > S390_ARCH_FAC_MASK_SIZE_U64
);
135 return ARRAY_SIZE(kvm_s390_fac_list_mask
);
138 /* available cpu features supported by kvm */
139 static DECLARE_BITMAP(kvm_s390_available_cpu_feat
, KVM_S390_VM_CPU_FEAT_NR_BITS
);
140 /* available subfunctions indicated via query / "test bit" */
141 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc
;
143 static struct gmap_notifier gmap_notifier
;
144 debug_info_t
*kvm_s390_dbf
;
146 /* Section: not file related */
147 int kvm_arch_hardware_enable(void)
149 /* every s390 is virtualization enabled ;-) */
153 static void kvm_gmap_notifier(struct gmap
*gmap
, unsigned long address
);
156 * This callback is executed during stop_machine(). All CPUs are therefore
157 * temporarily stopped. In order not to change guest behavior, we have to
158 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
159 * so a CPU won't be stopped while calculating with the epoch.
161 static int kvm_clock_sync(struct notifier_block
*notifier
, unsigned long val
,
165 struct kvm_vcpu
*vcpu
;
167 unsigned long long *delta
= v
;
169 list_for_each_entry(kvm
, &vm_list
, vm_list
) {
170 kvm
->arch
.epoch
-= *delta
;
171 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
172 vcpu
->arch
.sie_block
->epoch
-= *delta
;
173 if (vcpu
->arch
.cputm_enabled
)
174 vcpu
->arch
.cputm_start
+= *delta
;
180 static struct notifier_block kvm_clock_notifier
= {
181 .notifier_call
= kvm_clock_sync
,
184 int kvm_arch_hardware_setup(void)
186 gmap_notifier
.notifier_call
= kvm_gmap_notifier
;
187 gmap_register_ipte_notifier(&gmap_notifier
);
188 atomic_notifier_chain_register(&s390_epoch_delta_notifier
,
189 &kvm_clock_notifier
);
193 void kvm_arch_hardware_unsetup(void)
195 gmap_unregister_ipte_notifier(&gmap_notifier
);
196 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier
,
197 &kvm_clock_notifier
);
200 static void allow_cpu_feat(unsigned long nr
)
202 set_bit_inv(nr
, kvm_s390_available_cpu_feat
);
205 static inline int plo_test_bit(unsigned char nr
)
207 register unsigned long r0
asm("0") = (unsigned long) nr
| 0x100;
208 int cc
= 3; /* subfunction not available */
211 /* Parameter registers are ignored for "test bit" */
221 static void kvm_s390_cpu_feat_init(void)
225 for (i
= 0; i
< 256; ++i
) {
227 kvm_s390_available_subfunc
.plo
[i
>> 3] |= 0x80 >> (i
& 7);
230 if (test_facility(28)) /* TOD-clock steering */
231 etr_ptff(kvm_s390_available_subfunc
.ptff
, ETR_PTFF_QAF
);
233 if (test_facility(17)) { /* MSA */
234 __cpacf_query(CPACF_KMAC
, kvm_s390_available_subfunc
.kmac
);
235 __cpacf_query(CPACF_KMC
, kvm_s390_available_subfunc
.kmc
);
236 __cpacf_query(CPACF_KM
, kvm_s390_available_subfunc
.km
);
237 __cpacf_query(CPACF_KIMD
, kvm_s390_available_subfunc
.kimd
);
238 __cpacf_query(CPACF_KLMD
, kvm_s390_available_subfunc
.klmd
);
240 if (test_facility(76)) /* MSA3 */
241 __cpacf_query(CPACF_PCKMO
, kvm_s390_available_subfunc
.pckmo
);
242 if (test_facility(77)) { /* MSA4 */
243 __cpacf_query(CPACF_KMCTR
, kvm_s390_available_subfunc
.kmctr
);
244 __cpacf_query(CPACF_KMF
, kvm_s390_available_subfunc
.kmf
);
245 __cpacf_query(CPACF_KMO
, kvm_s390_available_subfunc
.kmo
);
246 __cpacf_query(CPACF_PCC
, kvm_s390_available_subfunc
.pcc
);
248 if (test_facility(57)) /* MSA5 */
249 __cpacf_query(CPACF_PPNO
, kvm_s390_available_subfunc
.ppno
);
251 if (MACHINE_HAS_ESOP
)
252 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP
);
255 int kvm_arch_init(void *opaque
)
257 kvm_s390_dbf
= debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
261 if (debug_register_view(kvm_s390_dbf
, &debug_sprintf_view
)) {
262 debug_unregister(kvm_s390_dbf
);
266 kvm_s390_cpu_feat_init();
268 /* Register floating interrupt controller interface. */
269 return kvm_register_device_ops(&kvm_flic_ops
, KVM_DEV_TYPE_FLIC
);
272 void kvm_arch_exit(void)
274 debug_unregister(kvm_s390_dbf
);
277 /* Section: device related */
278 long kvm_arch_dev_ioctl(struct file
*filp
,
279 unsigned int ioctl
, unsigned long arg
)
281 if (ioctl
== KVM_S390_ENABLE_SIE
)
282 return s390_enable_sie();
286 int kvm_vm_ioctl_check_extension(struct kvm
*kvm
, long ext
)
291 case KVM_CAP_S390_PSW
:
292 case KVM_CAP_S390_GMAP
:
293 case KVM_CAP_SYNC_MMU
:
294 #ifdef CONFIG_KVM_S390_UCONTROL
295 case KVM_CAP_S390_UCONTROL
:
297 case KVM_CAP_ASYNC_PF
:
298 case KVM_CAP_SYNC_REGS
:
299 case KVM_CAP_ONE_REG
:
300 case KVM_CAP_ENABLE_CAP
:
301 case KVM_CAP_S390_CSS_SUPPORT
:
302 case KVM_CAP_IOEVENTFD
:
303 case KVM_CAP_DEVICE_CTRL
:
304 case KVM_CAP_ENABLE_CAP_VM
:
305 case KVM_CAP_S390_IRQCHIP
:
306 case KVM_CAP_VM_ATTRIBUTES
:
307 case KVM_CAP_MP_STATE
:
308 case KVM_CAP_S390_INJECT_IRQ
:
309 case KVM_CAP_S390_USER_SIGP
:
310 case KVM_CAP_S390_USER_STSI
:
311 case KVM_CAP_S390_SKEYS
:
312 case KVM_CAP_S390_IRQ_STATE
:
315 case KVM_CAP_S390_MEM_OP
:
318 case KVM_CAP_NR_VCPUS
:
319 case KVM_CAP_MAX_VCPUS
:
320 r
= KVM_S390_BSCA_CPU_SLOTS
;
321 if (sclp
.has_esca
&& sclp
.has_64bscao
)
322 r
= KVM_S390_ESCA_CPU_SLOTS
;
324 case KVM_CAP_NR_MEMSLOTS
:
325 r
= KVM_USER_MEM_SLOTS
;
327 case KVM_CAP_S390_COW
:
328 r
= MACHINE_HAS_ESOP
;
330 case KVM_CAP_S390_VECTOR_REGISTERS
:
333 case KVM_CAP_S390_RI
:
334 r
= test_facility(64);
342 static void kvm_s390_sync_dirty_log(struct kvm
*kvm
,
343 struct kvm_memory_slot
*memslot
)
345 gfn_t cur_gfn
, last_gfn
;
346 unsigned long address
;
347 struct gmap
*gmap
= kvm
->arch
.gmap
;
349 /* Loop over all guest pages */
350 last_gfn
= memslot
->base_gfn
+ memslot
->npages
;
351 for (cur_gfn
= memslot
->base_gfn
; cur_gfn
<= last_gfn
; cur_gfn
++) {
352 address
= gfn_to_hva_memslot(memslot
, cur_gfn
);
354 if (test_and_clear_guest_dirty(gmap
->mm
, address
))
355 mark_page_dirty(kvm
, cur_gfn
);
356 if (fatal_signal_pending(current
))
362 /* Section: vm related */
363 static void sca_del_vcpu(struct kvm_vcpu
*vcpu
);
366 * Get (and clear) the dirty memory log for a memory slot.
368 int kvm_vm_ioctl_get_dirty_log(struct kvm
*kvm
,
369 struct kvm_dirty_log
*log
)
373 struct kvm_memslots
*slots
;
374 struct kvm_memory_slot
*memslot
;
377 mutex_lock(&kvm
->slots_lock
);
380 if (log
->slot
>= KVM_USER_MEM_SLOTS
)
383 slots
= kvm_memslots(kvm
);
384 memslot
= id_to_memslot(slots
, log
->slot
);
386 if (!memslot
->dirty_bitmap
)
389 kvm_s390_sync_dirty_log(kvm
, memslot
);
390 r
= kvm_get_dirty_log(kvm
, log
, &is_dirty
);
394 /* Clear the dirty log */
396 n
= kvm_dirty_bitmap_bytes(memslot
);
397 memset(memslot
->dirty_bitmap
, 0, n
);
401 mutex_unlock(&kvm
->slots_lock
);
405 static int kvm_vm_ioctl_enable_cap(struct kvm
*kvm
, struct kvm_enable_cap
*cap
)
413 case KVM_CAP_S390_IRQCHIP
:
414 VM_EVENT(kvm
, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
415 kvm
->arch
.use_irqchip
= 1;
418 case KVM_CAP_S390_USER_SIGP
:
419 VM_EVENT(kvm
, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
420 kvm
->arch
.user_sigp
= 1;
423 case KVM_CAP_S390_VECTOR_REGISTERS
:
424 mutex_lock(&kvm
->lock
);
425 if (atomic_read(&kvm
->online_vcpus
)) {
427 } else if (MACHINE_HAS_VX
) {
428 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 129);
429 set_kvm_facility(kvm
->arch
.model
.fac_list
, 129);
433 mutex_unlock(&kvm
->lock
);
434 VM_EVENT(kvm
, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
435 r
? "(not available)" : "(success)");
437 case KVM_CAP_S390_RI
:
439 mutex_lock(&kvm
->lock
);
440 if (atomic_read(&kvm
->online_vcpus
)) {
442 } else if (test_facility(64)) {
443 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 64);
444 set_kvm_facility(kvm
->arch
.model
.fac_list
, 64);
447 mutex_unlock(&kvm
->lock
);
448 VM_EVENT(kvm
, 3, "ENABLE: CAP_S390_RI %s",
449 r
? "(not available)" : "(success)");
451 case KVM_CAP_S390_USER_STSI
:
452 VM_EVENT(kvm
, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
453 kvm
->arch
.user_stsi
= 1;
463 static int kvm_s390_get_mem_control(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
467 switch (attr
->attr
) {
468 case KVM_S390_VM_MEM_LIMIT_SIZE
:
470 VM_EVENT(kvm
, 3, "QUERY: max guest memory: %lu bytes",
471 kvm
->arch
.mem_limit
);
472 if (put_user(kvm
->arch
.mem_limit
, (u64 __user
*)attr
->addr
))
482 static int kvm_s390_set_mem_control(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
486 switch (attr
->attr
) {
487 case KVM_S390_VM_MEM_ENABLE_CMMA
:
488 /* enable CMMA only for z10 and later (EDAT_1) */
490 if (!MACHINE_IS_LPAR
|| !MACHINE_HAS_EDAT1
)
494 VM_EVENT(kvm
, 3, "%s", "ENABLE: CMMA support");
495 mutex_lock(&kvm
->lock
);
496 if (atomic_read(&kvm
->online_vcpus
) == 0) {
497 kvm
->arch
.use_cmma
= 1;
500 mutex_unlock(&kvm
->lock
);
502 case KVM_S390_VM_MEM_CLR_CMMA
:
504 if (!kvm
->arch
.use_cmma
)
507 VM_EVENT(kvm
, 3, "%s", "RESET: CMMA states");
508 mutex_lock(&kvm
->lock
);
509 idx
= srcu_read_lock(&kvm
->srcu
);
510 s390_reset_cmma(kvm
->arch
.gmap
->mm
);
511 srcu_read_unlock(&kvm
->srcu
, idx
);
512 mutex_unlock(&kvm
->lock
);
515 case KVM_S390_VM_MEM_LIMIT_SIZE
: {
516 unsigned long new_limit
;
518 if (kvm_is_ucontrol(kvm
))
521 if (get_user(new_limit
, (u64 __user
*)attr
->addr
))
524 if (kvm
->arch
.mem_limit
!= KVM_S390_NO_MEM_LIMIT
&&
525 new_limit
> kvm
->arch
.mem_limit
)
531 /* gmap_alloc takes last usable address */
532 if (new_limit
!= KVM_S390_NO_MEM_LIMIT
)
536 mutex_lock(&kvm
->lock
);
537 if (atomic_read(&kvm
->online_vcpus
) == 0) {
538 /* gmap_alloc will round the limit up */
539 struct gmap
*new = gmap_alloc(current
->mm
, new_limit
);
544 gmap_free(kvm
->arch
.gmap
);
546 kvm
->arch
.gmap
= new;
550 mutex_unlock(&kvm
->lock
);
551 VM_EVENT(kvm
, 3, "SET: max guest address: %lu", new_limit
);
552 VM_EVENT(kvm
, 3, "New guest asce: 0x%pK",
553 (void *) kvm
->arch
.gmap
->asce
);
563 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu
*vcpu
);
565 static int kvm_s390_vm_set_crypto(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
567 struct kvm_vcpu
*vcpu
;
570 if (!test_kvm_facility(kvm
, 76))
573 mutex_lock(&kvm
->lock
);
574 switch (attr
->attr
) {
575 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW
:
577 kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
,
578 sizeof(kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
));
579 kvm
->arch
.crypto
.aes_kw
= 1;
580 VM_EVENT(kvm
, 3, "%s", "ENABLE: AES keywrapping support");
582 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW
:
584 kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
,
585 sizeof(kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
));
586 kvm
->arch
.crypto
.dea_kw
= 1;
587 VM_EVENT(kvm
, 3, "%s", "ENABLE: DEA keywrapping support");
589 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW
:
590 kvm
->arch
.crypto
.aes_kw
= 0;
591 memset(kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
, 0,
592 sizeof(kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
));
593 VM_EVENT(kvm
, 3, "%s", "DISABLE: AES keywrapping support");
595 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW
:
596 kvm
->arch
.crypto
.dea_kw
= 0;
597 memset(kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
, 0,
598 sizeof(kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
));
599 VM_EVENT(kvm
, 3, "%s", "DISABLE: DEA keywrapping support");
602 mutex_unlock(&kvm
->lock
);
606 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
607 kvm_s390_vcpu_crypto_setup(vcpu
);
610 mutex_unlock(&kvm
->lock
);
614 static int kvm_s390_set_tod_high(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
618 if (copy_from_user(>od_high
, (void __user
*)attr
->addr
,
624 VM_EVENT(kvm
, 3, "SET: TOD extension: 0x%x", gtod_high
);
629 static int kvm_s390_set_tod_low(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
633 if (copy_from_user(>od
, (void __user
*)attr
->addr
, sizeof(gtod
)))
636 kvm_s390_set_tod_clock(kvm
, gtod
);
637 VM_EVENT(kvm
, 3, "SET: TOD base: 0x%llx", gtod
);
641 static int kvm_s390_set_tod(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
648 switch (attr
->attr
) {
649 case KVM_S390_VM_TOD_HIGH
:
650 ret
= kvm_s390_set_tod_high(kvm
, attr
);
652 case KVM_S390_VM_TOD_LOW
:
653 ret
= kvm_s390_set_tod_low(kvm
, attr
);
662 static int kvm_s390_get_tod_high(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
666 if (copy_to_user((void __user
*)attr
->addr
, >od_high
,
669 VM_EVENT(kvm
, 3, "QUERY: TOD extension: 0x%x", gtod_high
);
674 static int kvm_s390_get_tod_low(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
678 gtod
= kvm_s390_get_tod_clock_fast(kvm
);
679 if (copy_to_user((void __user
*)attr
->addr
, >od
, sizeof(gtod
)))
681 VM_EVENT(kvm
, 3, "QUERY: TOD base: 0x%llx", gtod
);
686 static int kvm_s390_get_tod(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
693 switch (attr
->attr
) {
694 case KVM_S390_VM_TOD_HIGH
:
695 ret
= kvm_s390_get_tod_high(kvm
, attr
);
697 case KVM_S390_VM_TOD_LOW
:
698 ret
= kvm_s390_get_tod_low(kvm
, attr
);
707 static int kvm_s390_set_processor(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
709 struct kvm_s390_vm_cpu_processor
*proc
;
710 u16 lowest_ibc
, unblocked_ibc
;
713 mutex_lock(&kvm
->lock
);
714 if (atomic_read(&kvm
->online_vcpus
)) {
718 proc
= kzalloc(sizeof(*proc
), GFP_KERNEL
);
723 if (!copy_from_user(proc
, (void __user
*)attr
->addr
,
725 kvm
->arch
.model
.cpuid
= proc
->cpuid
;
726 lowest_ibc
= sclp
.ibc
>> 16 & 0xfff;
727 unblocked_ibc
= sclp
.ibc
& 0xfff;
729 if (proc
->ibc
> unblocked_ibc
)
730 kvm
->arch
.model
.ibc
= unblocked_ibc
;
731 else if (proc
->ibc
< lowest_ibc
)
732 kvm
->arch
.model
.ibc
= lowest_ibc
;
734 kvm
->arch
.model
.ibc
= proc
->ibc
;
736 memcpy(kvm
->arch
.model
.fac_list
, proc
->fac_list
,
737 S390_ARCH_FAC_LIST_SIZE_BYTE
);
742 mutex_unlock(&kvm
->lock
);
746 static int kvm_s390_set_processor_feat(struct kvm
*kvm
,
747 struct kvm_device_attr
*attr
)
749 struct kvm_s390_vm_cpu_feat data
;
752 if (copy_from_user(&data
, (void __user
*)attr
->addr
, sizeof(data
)))
754 if (!bitmap_subset((unsigned long *) data
.feat
,
755 kvm_s390_available_cpu_feat
,
756 KVM_S390_VM_CPU_FEAT_NR_BITS
))
759 mutex_lock(&kvm
->lock
);
760 if (!atomic_read(&kvm
->online_vcpus
)) {
761 bitmap_copy(kvm
->arch
.cpu_feat
, (unsigned long *) data
.feat
,
762 KVM_S390_VM_CPU_FEAT_NR_BITS
);
765 mutex_unlock(&kvm
->lock
);
769 static int kvm_s390_set_processor_subfunc(struct kvm
*kvm
,
770 struct kvm_device_attr
*attr
)
773 * Once supported by kernel + hw, we have to store the subfunctions
774 * in kvm->arch and remember that user space configured them.
779 static int kvm_s390_set_cpu_model(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
783 switch (attr
->attr
) {
784 case KVM_S390_VM_CPU_PROCESSOR
:
785 ret
= kvm_s390_set_processor(kvm
, attr
);
787 case KVM_S390_VM_CPU_PROCESSOR_FEAT
:
788 ret
= kvm_s390_set_processor_feat(kvm
, attr
);
790 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC
:
791 ret
= kvm_s390_set_processor_subfunc(kvm
, attr
);
797 static int kvm_s390_get_processor(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
799 struct kvm_s390_vm_cpu_processor
*proc
;
802 proc
= kzalloc(sizeof(*proc
), GFP_KERNEL
);
807 proc
->cpuid
= kvm
->arch
.model
.cpuid
;
808 proc
->ibc
= kvm
->arch
.model
.ibc
;
809 memcpy(&proc
->fac_list
, kvm
->arch
.model
.fac_list
,
810 S390_ARCH_FAC_LIST_SIZE_BYTE
);
811 if (copy_to_user((void __user
*)attr
->addr
, proc
, sizeof(*proc
)))
818 static int kvm_s390_get_machine(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
820 struct kvm_s390_vm_cpu_machine
*mach
;
823 mach
= kzalloc(sizeof(*mach
), GFP_KERNEL
);
828 get_cpu_id((struct cpuid
*) &mach
->cpuid
);
829 mach
->ibc
= sclp
.ibc
;
830 memcpy(&mach
->fac_mask
, kvm
->arch
.model
.fac_mask
,
831 S390_ARCH_FAC_LIST_SIZE_BYTE
);
832 memcpy((unsigned long *)&mach
->fac_list
, S390_lowcore
.stfle_fac_list
,
833 S390_ARCH_FAC_LIST_SIZE_BYTE
);
834 if (copy_to_user((void __user
*)attr
->addr
, mach
, sizeof(*mach
)))
841 static int kvm_s390_get_processor_feat(struct kvm
*kvm
,
842 struct kvm_device_attr
*attr
)
844 struct kvm_s390_vm_cpu_feat data
;
846 bitmap_copy((unsigned long *) data
.feat
, kvm
->arch
.cpu_feat
,
847 KVM_S390_VM_CPU_FEAT_NR_BITS
);
848 if (copy_to_user((void __user
*)attr
->addr
, &data
, sizeof(data
)))
853 static int kvm_s390_get_machine_feat(struct kvm
*kvm
,
854 struct kvm_device_attr
*attr
)
856 struct kvm_s390_vm_cpu_feat data
;
858 bitmap_copy((unsigned long *) data
.feat
,
859 kvm_s390_available_cpu_feat
,
860 KVM_S390_VM_CPU_FEAT_NR_BITS
);
861 if (copy_to_user((void __user
*)attr
->addr
, &data
, sizeof(data
)))
866 static int kvm_s390_get_processor_subfunc(struct kvm
*kvm
,
867 struct kvm_device_attr
*attr
)
870 * Once we can actually configure subfunctions (kernel + hw support),
871 * we have to check if they were already set by user space, if so copy
872 * them from kvm->arch.
877 static int kvm_s390_get_machine_subfunc(struct kvm
*kvm
,
878 struct kvm_device_attr
*attr
)
880 if (copy_to_user((void __user
*)attr
->addr
, &kvm_s390_available_subfunc
,
881 sizeof(struct kvm_s390_vm_cpu_subfunc
)))
885 static int kvm_s390_get_cpu_model(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
889 switch (attr
->attr
) {
890 case KVM_S390_VM_CPU_PROCESSOR
:
891 ret
= kvm_s390_get_processor(kvm
, attr
);
893 case KVM_S390_VM_CPU_MACHINE
:
894 ret
= kvm_s390_get_machine(kvm
, attr
);
896 case KVM_S390_VM_CPU_PROCESSOR_FEAT
:
897 ret
= kvm_s390_get_processor_feat(kvm
, attr
);
899 case KVM_S390_VM_CPU_MACHINE_FEAT
:
900 ret
= kvm_s390_get_machine_feat(kvm
, attr
);
902 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC
:
903 ret
= kvm_s390_get_processor_subfunc(kvm
, attr
);
905 case KVM_S390_VM_CPU_MACHINE_SUBFUNC
:
906 ret
= kvm_s390_get_machine_subfunc(kvm
, attr
);
912 static int kvm_s390_vm_set_attr(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
916 switch (attr
->group
) {
917 case KVM_S390_VM_MEM_CTRL
:
918 ret
= kvm_s390_set_mem_control(kvm
, attr
);
920 case KVM_S390_VM_TOD
:
921 ret
= kvm_s390_set_tod(kvm
, attr
);
923 case KVM_S390_VM_CPU_MODEL
:
924 ret
= kvm_s390_set_cpu_model(kvm
, attr
);
926 case KVM_S390_VM_CRYPTO
:
927 ret
= kvm_s390_vm_set_crypto(kvm
, attr
);
937 static int kvm_s390_vm_get_attr(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
941 switch (attr
->group
) {
942 case KVM_S390_VM_MEM_CTRL
:
943 ret
= kvm_s390_get_mem_control(kvm
, attr
);
945 case KVM_S390_VM_TOD
:
946 ret
= kvm_s390_get_tod(kvm
, attr
);
948 case KVM_S390_VM_CPU_MODEL
:
949 ret
= kvm_s390_get_cpu_model(kvm
, attr
);
959 static int kvm_s390_vm_has_attr(struct kvm
*kvm
, struct kvm_device_attr
*attr
)
963 switch (attr
->group
) {
964 case KVM_S390_VM_MEM_CTRL
:
965 switch (attr
->attr
) {
966 case KVM_S390_VM_MEM_ENABLE_CMMA
:
967 case KVM_S390_VM_MEM_CLR_CMMA
:
968 case KVM_S390_VM_MEM_LIMIT_SIZE
:
976 case KVM_S390_VM_TOD
:
977 switch (attr
->attr
) {
978 case KVM_S390_VM_TOD_LOW
:
979 case KVM_S390_VM_TOD_HIGH
:
987 case KVM_S390_VM_CPU_MODEL
:
988 switch (attr
->attr
) {
989 case KVM_S390_VM_CPU_PROCESSOR
:
990 case KVM_S390_VM_CPU_MACHINE
:
991 case KVM_S390_VM_CPU_PROCESSOR_FEAT
:
992 case KVM_S390_VM_CPU_MACHINE_FEAT
:
993 case KVM_S390_VM_CPU_MACHINE_SUBFUNC
:
996 /* configuring subfunctions is not supported yet */
997 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC
:
1003 case KVM_S390_VM_CRYPTO
:
1004 switch (attr
->attr
) {
1005 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW
:
1006 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW
:
1007 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW
:
1008 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW
:
1024 static long kvm_s390_get_skeys(struct kvm
*kvm
, struct kvm_s390_skeys
*args
)
1028 unsigned long curkey
;
1031 if (args
->flags
!= 0)
1034 /* Is this guest using storage keys? */
1035 if (!mm_use_skey(current
->mm
))
1036 return KVM_S390_GET_SKEYS_NONE
;
1038 /* Enforce sane limit on memory allocation */
1039 if (args
->count
< 1 || args
->count
> KVM_S390_SKEYS_MAX
)
1042 keys
= kmalloc_array(args
->count
, sizeof(uint8_t),
1043 GFP_KERNEL
| __GFP_NOWARN
);
1045 keys
= vmalloc(sizeof(uint8_t) * args
->count
);
1049 for (i
= 0; i
< args
->count
; i
++) {
1050 hva
= gfn_to_hva(kvm
, args
->start_gfn
+ i
);
1051 if (kvm_is_error_hva(hva
)) {
1056 curkey
= get_guest_storage_key(current
->mm
, hva
);
1057 if (IS_ERR_VALUE(curkey
)) {
1064 r
= copy_to_user((uint8_t __user
*)args
->skeydata_addr
, keys
,
1065 sizeof(uint8_t) * args
->count
);
1073 static long kvm_s390_set_skeys(struct kvm
*kvm
, struct kvm_s390_skeys
*args
)
1079 if (args
->flags
!= 0)
1082 /* Enforce sane limit on memory allocation */
1083 if (args
->count
< 1 || args
->count
> KVM_S390_SKEYS_MAX
)
1086 keys
= kmalloc_array(args
->count
, sizeof(uint8_t),
1087 GFP_KERNEL
| __GFP_NOWARN
);
1089 keys
= vmalloc(sizeof(uint8_t) * args
->count
);
1093 r
= copy_from_user(keys
, (uint8_t __user
*)args
->skeydata_addr
,
1094 sizeof(uint8_t) * args
->count
);
1100 /* Enable storage key handling for the guest */
1101 r
= s390_enable_skey();
1105 for (i
= 0; i
< args
->count
; i
++) {
1106 hva
= gfn_to_hva(kvm
, args
->start_gfn
+ i
);
1107 if (kvm_is_error_hva(hva
)) {
1112 /* Lowest order bit is reserved */
1113 if (keys
[i
] & 0x01) {
1118 r
= set_guest_storage_key(current
->mm
, hva
,
1119 (unsigned long)keys
[i
], 0);
1128 long kvm_arch_vm_ioctl(struct file
*filp
,
1129 unsigned int ioctl
, unsigned long arg
)
1131 struct kvm
*kvm
= filp
->private_data
;
1132 void __user
*argp
= (void __user
*)arg
;
1133 struct kvm_device_attr attr
;
1137 case KVM_S390_INTERRUPT
: {
1138 struct kvm_s390_interrupt s390int
;
1141 if (copy_from_user(&s390int
, argp
, sizeof(s390int
)))
1143 r
= kvm_s390_inject_vm(kvm
, &s390int
);
1146 case KVM_ENABLE_CAP
: {
1147 struct kvm_enable_cap cap
;
1149 if (copy_from_user(&cap
, argp
, sizeof(cap
)))
1151 r
= kvm_vm_ioctl_enable_cap(kvm
, &cap
);
1154 case KVM_CREATE_IRQCHIP
: {
1155 struct kvm_irq_routing_entry routing
;
1158 if (kvm
->arch
.use_irqchip
) {
1159 /* Set up dummy routing. */
1160 memset(&routing
, 0, sizeof(routing
));
1161 r
= kvm_set_irq_routing(kvm
, &routing
, 0, 0);
1165 case KVM_SET_DEVICE_ATTR
: {
1167 if (copy_from_user(&attr
, (void __user
*)arg
, sizeof(attr
)))
1169 r
= kvm_s390_vm_set_attr(kvm
, &attr
);
1172 case KVM_GET_DEVICE_ATTR
: {
1174 if (copy_from_user(&attr
, (void __user
*)arg
, sizeof(attr
)))
1176 r
= kvm_s390_vm_get_attr(kvm
, &attr
);
1179 case KVM_HAS_DEVICE_ATTR
: {
1181 if (copy_from_user(&attr
, (void __user
*)arg
, sizeof(attr
)))
1183 r
= kvm_s390_vm_has_attr(kvm
, &attr
);
1186 case KVM_S390_GET_SKEYS
: {
1187 struct kvm_s390_skeys args
;
1190 if (copy_from_user(&args
, argp
,
1191 sizeof(struct kvm_s390_skeys
)))
1193 r
= kvm_s390_get_skeys(kvm
, &args
);
1196 case KVM_S390_SET_SKEYS
: {
1197 struct kvm_s390_skeys args
;
1200 if (copy_from_user(&args
, argp
,
1201 sizeof(struct kvm_s390_skeys
)))
1203 r
= kvm_s390_set_skeys(kvm
, &args
);
1213 static int kvm_s390_query_ap_config(u8
*config
)
1215 u32 fcn_code
= 0x04000000UL
;
1218 memset(config
, 0, 128);
1222 ".long 0xb2af0000\n" /* PQAP(QCI) */
1228 : "r" (fcn_code
), "r" (config
)
1229 : "cc", "0", "2", "memory"
1235 static int kvm_s390_apxa_installed(void)
1240 if (test_facility(12)) {
1241 cc
= kvm_s390_query_ap_config(config
);
1244 pr_err("PQAP(QCI) failed with cc=%d", cc
);
1246 return config
[0] & 0x40;
1252 static void kvm_s390_set_crycb_format(struct kvm
*kvm
)
1254 kvm
->arch
.crypto
.crycbd
= (__u32
)(unsigned long) kvm
->arch
.crypto
.crycb
;
1256 if (kvm_s390_apxa_installed())
1257 kvm
->arch
.crypto
.crycbd
|= CRYCB_FORMAT2
;
1259 kvm
->arch
.crypto
.crycbd
|= CRYCB_FORMAT1
;
1262 static u64
kvm_s390_get_initial_cpuid(void)
1267 cpuid
.version
= 0xff;
1268 return *((u64
*) &cpuid
);
1271 static void kvm_s390_crypto_init(struct kvm
*kvm
)
1273 if (!test_kvm_facility(kvm
, 76))
1276 kvm
->arch
.crypto
.crycb
= &kvm
->arch
.sie_page2
->crycb
;
1277 kvm_s390_set_crycb_format(kvm
);
1279 /* Enable AES/DEA protected key functions by default */
1280 kvm
->arch
.crypto
.aes_kw
= 1;
1281 kvm
->arch
.crypto
.dea_kw
= 1;
1282 get_random_bytes(kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
,
1283 sizeof(kvm
->arch
.crypto
.crycb
->aes_wrapping_key_mask
));
1284 get_random_bytes(kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
,
1285 sizeof(kvm
->arch
.crypto
.crycb
->dea_wrapping_key_mask
));
1288 static void sca_dispose(struct kvm
*kvm
)
1290 if (kvm
->arch
.use_esca
)
1291 free_pages_exact(kvm
->arch
.sca
, sizeof(struct esca_block
));
1293 free_page((unsigned long)(kvm
->arch
.sca
));
1294 kvm
->arch
.sca
= NULL
;
1297 int kvm_arch_init_vm(struct kvm
*kvm
, unsigned long type
)
1299 gfp_t alloc_flags
= GFP_KERNEL
;
1301 char debug_name
[16];
1302 static unsigned long sca_offset
;
1305 #ifdef CONFIG_KVM_S390_UCONTROL
1306 if (type
& ~KVM_VM_S390_UCONTROL
)
1308 if ((type
& KVM_VM_S390_UCONTROL
) && (!capable(CAP_SYS_ADMIN
)))
1315 rc
= s390_enable_sie();
1321 ratelimit_state_init(&kvm
->arch
.sthyi_limit
, 5 * HZ
, 500);
1323 kvm
->arch
.use_esca
= 0; /* start with basic SCA */
1324 if (!sclp
.has_64bscao
)
1325 alloc_flags
|= GFP_DMA
;
1326 rwlock_init(&kvm
->arch
.sca_lock
);
1327 kvm
->arch
.sca
= (struct bsca_block
*) get_zeroed_page(alloc_flags
);
1330 spin_lock(&kvm_lock
);
1332 if (sca_offset
+ sizeof(struct bsca_block
) > PAGE_SIZE
)
1334 kvm
->arch
.sca
= (struct bsca_block
*)
1335 ((char *) kvm
->arch
.sca
+ sca_offset
);
1336 spin_unlock(&kvm_lock
);
1338 sprintf(debug_name
, "kvm-%u", current
->pid
);
1340 kvm
->arch
.dbf
= debug_register(debug_name
, 32, 1, 7 * sizeof(long));
1344 kvm
->arch
.sie_page2
=
1345 (struct sie_page2
*) get_zeroed_page(GFP_KERNEL
| GFP_DMA
);
1346 if (!kvm
->arch
.sie_page2
)
1349 /* Populate the facility mask initially. */
1350 memcpy(kvm
->arch
.model
.fac_mask
, S390_lowcore
.stfle_fac_list
,
1351 S390_ARCH_FAC_LIST_SIZE_BYTE
);
1352 for (i
= 0; i
< S390_ARCH_FAC_LIST_SIZE_U64
; i
++) {
1353 if (i
< kvm_s390_fac_list_mask_size())
1354 kvm
->arch
.model
.fac_mask
[i
] &= kvm_s390_fac_list_mask
[i
];
1356 kvm
->arch
.model
.fac_mask
[i
] = 0UL;
1359 /* Populate the facility list initially. */
1360 kvm
->arch
.model
.fac_list
= kvm
->arch
.sie_page2
->fac_list
;
1361 memcpy(kvm
->arch
.model
.fac_list
, kvm
->arch
.model
.fac_mask
,
1362 S390_ARCH_FAC_LIST_SIZE_BYTE
);
1364 set_kvm_facility(kvm
->arch
.model
.fac_mask
, 74);
1365 set_kvm_facility(kvm
->arch
.model
.fac_list
, 74);
1367 kvm
->arch
.model
.cpuid
= kvm_s390_get_initial_cpuid();
1368 kvm
->arch
.model
.ibc
= sclp
.ibc
& 0x0fff;
1370 kvm_s390_crypto_init(kvm
);
1372 spin_lock_init(&kvm
->arch
.float_int
.lock
);
1373 for (i
= 0; i
< FIRQ_LIST_COUNT
; i
++)
1374 INIT_LIST_HEAD(&kvm
->arch
.float_int
.lists
[i
]);
1375 init_waitqueue_head(&kvm
->arch
.ipte_wq
);
1376 mutex_init(&kvm
->arch
.ipte_mutex
);
1378 debug_register_view(kvm
->arch
.dbf
, &debug_sprintf_view
);
1379 VM_EVENT(kvm
, 3, "vm created with type %lu", type
);
1381 if (type
& KVM_VM_S390_UCONTROL
) {
1382 kvm
->arch
.gmap
= NULL
;
1383 kvm
->arch
.mem_limit
= KVM_S390_NO_MEM_LIMIT
;
1385 if (sclp
.hamax
== U64_MAX
)
1386 kvm
->arch
.mem_limit
= TASK_MAX_SIZE
;
1388 kvm
->arch
.mem_limit
= min_t(unsigned long, TASK_MAX_SIZE
,
1390 kvm
->arch
.gmap
= gmap_alloc(current
->mm
, kvm
->arch
.mem_limit
- 1);
1391 if (!kvm
->arch
.gmap
)
1393 kvm
->arch
.gmap
->private = kvm
;
1394 kvm
->arch
.gmap
->pfault_enabled
= 0;
1397 kvm
->arch
.css_support
= 0;
1398 kvm
->arch
.use_irqchip
= 0;
1399 kvm
->arch
.epoch
= 0;
1401 spin_lock_init(&kvm
->arch
.start_stop_lock
);
1402 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm
, current
->pid
);
1406 free_page((unsigned long)kvm
->arch
.sie_page2
);
1407 debug_unregister(kvm
->arch
.dbf
);
1409 KVM_EVENT(3, "creation of vm failed: %d", rc
);
1413 void kvm_arch_vcpu_destroy(struct kvm_vcpu
*vcpu
)
1415 VCPU_EVENT(vcpu
, 3, "%s", "free cpu");
1416 trace_kvm_s390_destroy_vcpu(vcpu
->vcpu_id
);
1417 kvm_s390_clear_local_irqs(vcpu
);
1418 kvm_clear_async_pf_completion_queue(vcpu
);
1419 if (!kvm_is_ucontrol(vcpu
->kvm
))
1422 if (kvm_is_ucontrol(vcpu
->kvm
))
1423 gmap_free(vcpu
->arch
.gmap
);
1425 if (vcpu
->kvm
->arch
.use_cmma
)
1426 kvm_s390_vcpu_unsetup_cmma(vcpu
);
1427 free_page((unsigned long)(vcpu
->arch
.sie_block
));
1429 kvm_vcpu_uninit(vcpu
);
1430 kmem_cache_free(kvm_vcpu_cache
, vcpu
);
1433 static void kvm_free_vcpus(struct kvm
*kvm
)
1436 struct kvm_vcpu
*vcpu
;
1438 kvm_for_each_vcpu(i
, vcpu
, kvm
)
1439 kvm_arch_vcpu_destroy(vcpu
);
1441 mutex_lock(&kvm
->lock
);
1442 for (i
= 0; i
< atomic_read(&kvm
->online_vcpus
); i
++)
1443 kvm
->vcpus
[i
] = NULL
;
1445 atomic_set(&kvm
->online_vcpus
, 0);
1446 mutex_unlock(&kvm
->lock
);
1449 void kvm_arch_destroy_vm(struct kvm
*kvm
)
1451 kvm_free_vcpus(kvm
);
1453 debug_unregister(kvm
->arch
.dbf
);
1454 free_page((unsigned long)kvm
->arch
.sie_page2
);
1455 if (!kvm_is_ucontrol(kvm
))
1456 gmap_free(kvm
->arch
.gmap
);
1457 kvm_s390_destroy_adapters(kvm
);
1458 kvm_s390_clear_float_irqs(kvm
);
1459 KVM_EVENT(3, "vm 0x%pK destroyed", kvm
);
1462 /* Section: vcpu related */
1463 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu
*vcpu
)
1465 vcpu
->arch
.gmap
= gmap_alloc(current
->mm
, -1UL);
1466 if (!vcpu
->arch
.gmap
)
1468 vcpu
->arch
.gmap
->private = vcpu
->kvm
;
1473 static void sca_del_vcpu(struct kvm_vcpu
*vcpu
)
1475 read_lock(&vcpu
->kvm
->arch
.sca_lock
);
1476 if (vcpu
->kvm
->arch
.use_esca
) {
1477 struct esca_block
*sca
= vcpu
->kvm
->arch
.sca
;
1479 clear_bit_inv(vcpu
->vcpu_id
, (unsigned long *) sca
->mcn
);
1480 sca
->cpu
[vcpu
->vcpu_id
].sda
= 0;
1482 struct bsca_block
*sca
= vcpu
->kvm
->arch
.sca
;
1484 clear_bit_inv(vcpu
->vcpu_id
, (unsigned long *) &sca
->mcn
);
1485 sca
->cpu
[vcpu
->vcpu_id
].sda
= 0;
1487 read_unlock(&vcpu
->kvm
->arch
.sca_lock
);
1490 static void sca_add_vcpu(struct kvm_vcpu
*vcpu
)
1492 read_lock(&vcpu
->kvm
->arch
.sca_lock
);
1493 if (vcpu
->kvm
->arch
.use_esca
) {
1494 struct esca_block
*sca
= vcpu
->kvm
->arch
.sca
;
1496 sca
->cpu
[vcpu
->vcpu_id
].sda
= (__u64
) vcpu
->arch
.sie_block
;
1497 vcpu
->arch
.sie_block
->scaoh
= (__u32
)(((__u64
)sca
) >> 32);
1498 vcpu
->arch
.sie_block
->scaol
= (__u32
)(__u64
)sca
& ~0x3fU
;
1499 vcpu
->arch
.sie_block
->ecb2
|= 0x04U
;
1500 set_bit_inv(vcpu
->vcpu_id
, (unsigned long *) sca
->mcn
);
1502 struct bsca_block
*sca
= vcpu
->kvm
->arch
.sca
;
1504 sca
->cpu
[vcpu
->vcpu_id
].sda
= (__u64
) vcpu
->arch
.sie_block
;
1505 vcpu
->arch
.sie_block
->scaoh
= (__u32
)(((__u64
)sca
) >> 32);
1506 vcpu
->arch
.sie_block
->scaol
= (__u32
)(__u64
)sca
;
1507 set_bit_inv(vcpu
->vcpu_id
, (unsigned long *) &sca
->mcn
);
1509 read_unlock(&vcpu
->kvm
->arch
.sca_lock
);
1512 /* Basic SCA to Extended SCA data copy routines */
1513 static inline void sca_copy_entry(struct esca_entry
*d
, struct bsca_entry
*s
)
1516 d
->sigp_ctrl
.c
= s
->sigp_ctrl
.c
;
1517 d
->sigp_ctrl
.scn
= s
->sigp_ctrl
.scn
;
1520 static void sca_copy_b_to_e(struct esca_block
*d
, struct bsca_block
*s
)
1524 d
->ipte_control
= s
->ipte_control
;
1526 for (i
= 0; i
< KVM_S390_BSCA_CPU_SLOTS
; i
++)
1527 sca_copy_entry(&d
->cpu
[i
], &s
->cpu
[i
]);
1530 static int sca_switch_to_extended(struct kvm
*kvm
)
1532 struct bsca_block
*old_sca
= kvm
->arch
.sca
;
1533 struct esca_block
*new_sca
;
1534 struct kvm_vcpu
*vcpu
;
1535 unsigned int vcpu_idx
;
1538 new_sca
= alloc_pages_exact(sizeof(*new_sca
), GFP_KERNEL
|__GFP_ZERO
);
1542 scaoh
= (u32
)((u64
)(new_sca
) >> 32);
1543 scaol
= (u32
)(u64
)(new_sca
) & ~0x3fU
;
1545 kvm_s390_vcpu_block_all(kvm
);
1546 write_lock(&kvm
->arch
.sca_lock
);
1548 sca_copy_b_to_e(new_sca
, old_sca
);
1550 kvm_for_each_vcpu(vcpu_idx
, vcpu
, kvm
) {
1551 vcpu
->arch
.sie_block
->scaoh
= scaoh
;
1552 vcpu
->arch
.sie_block
->scaol
= scaol
;
1553 vcpu
->arch
.sie_block
->ecb2
|= 0x04U
;
1555 kvm
->arch
.sca
= new_sca
;
1556 kvm
->arch
.use_esca
= 1;
1558 write_unlock(&kvm
->arch
.sca_lock
);
1559 kvm_s390_vcpu_unblock_all(kvm
);
1561 free_page((unsigned long)old_sca
);
1563 VM_EVENT(kvm
, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1564 old_sca
, kvm
->arch
.sca
);
1568 static int sca_can_add_vcpu(struct kvm
*kvm
, unsigned int id
)
1572 if (id
< KVM_S390_BSCA_CPU_SLOTS
)
1574 if (!sclp
.has_esca
|| !sclp
.has_64bscao
)
1577 mutex_lock(&kvm
->lock
);
1578 rc
= kvm
->arch
.use_esca
? 0 : sca_switch_to_extended(kvm
);
1579 mutex_unlock(&kvm
->lock
);
1581 return rc
== 0 && id
< KVM_S390_ESCA_CPU_SLOTS
;
1584 int kvm_arch_vcpu_init(struct kvm_vcpu
*vcpu
)
1586 vcpu
->arch
.pfault_token
= KVM_S390_PFAULT_TOKEN_INVALID
;
1587 kvm_clear_async_pf_completion_queue(vcpu
);
1588 vcpu
->run
->kvm_valid_regs
= KVM_SYNC_PREFIX
|
1594 if (test_kvm_facility(vcpu
->kvm
, 64))
1595 vcpu
->run
->kvm_valid_regs
|= KVM_SYNC_RICCB
;
1596 /* fprs can be synchronized via vrs, even if the guest has no vx. With
1597 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1600 vcpu
->run
->kvm_valid_regs
|= KVM_SYNC_VRS
;
1602 vcpu
->run
->kvm_valid_regs
|= KVM_SYNC_FPRS
;
1604 if (kvm_is_ucontrol(vcpu
->kvm
))
1605 return __kvm_ucontrol_vcpu_init(vcpu
);
1610 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1611 static void __start_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
1613 WARN_ON_ONCE(vcpu
->arch
.cputm_start
!= 0);
1614 raw_write_seqcount_begin(&vcpu
->arch
.cputm_seqcount
);
1615 vcpu
->arch
.cputm_start
= get_tod_clock_fast();
1616 raw_write_seqcount_end(&vcpu
->arch
.cputm_seqcount
);
1619 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1620 static void __stop_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
1622 WARN_ON_ONCE(vcpu
->arch
.cputm_start
== 0);
1623 raw_write_seqcount_begin(&vcpu
->arch
.cputm_seqcount
);
1624 vcpu
->arch
.sie_block
->cputm
-= get_tod_clock_fast() - vcpu
->arch
.cputm_start
;
1625 vcpu
->arch
.cputm_start
= 0;
1626 raw_write_seqcount_end(&vcpu
->arch
.cputm_seqcount
);
1629 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1630 static void __enable_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
1632 WARN_ON_ONCE(vcpu
->arch
.cputm_enabled
);
1633 vcpu
->arch
.cputm_enabled
= true;
1634 __start_cpu_timer_accounting(vcpu
);
1637 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1638 static void __disable_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
1640 WARN_ON_ONCE(!vcpu
->arch
.cputm_enabled
);
1641 __stop_cpu_timer_accounting(vcpu
);
1642 vcpu
->arch
.cputm_enabled
= false;
1645 static void enable_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
1647 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1648 __enable_cpu_timer_accounting(vcpu
);
1652 static void disable_cpu_timer_accounting(struct kvm_vcpu
*vcpu
)
1654 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1655 __disable_cpu_timer_accounting(vcpu
);
1659 /* set the cpu timer - may only be called from the VCPU thread itself */
1660 void kvm_s390_set_cpu_timer(struct kvm_vcpu
*vcpu
, __u64 cputm
)
1662 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1663 raw_write_seqcount_begin(&vcpu
->arch
.cputm_seqcount
);
1664 if (vcpu
->arch
.cputm_enabled
)
1665 vcpu
->arch
.cputm_start
= get_tod_clock_fast();
1666 vcpu
->arch
.sie_block
->cputm
= cputm
;
1667 raw_write_seqcount_end(&vcpu
->arch
.cputm_seqcount
);
1671 /* update and get the cpu timer - can also be called from other VCPU threads */
1672 __u64
kvm_s390_get_cpu_timer(struct kvm_vcpu
*vcpu
)
1677 if (unlikely(!vcpu
->arch
.cputm_enabled
))
1678 return vcpu
->arch
.sie_block
->cputm
;
1680 preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1682 seq
= raw_read_seqcount(&vcpu
->arch
.cputm_seqcount
);
1684 * If the writer would ever execute a read in the critical
1685 * section, e.g. in irq context, we have a deadlock.
1687 WARN_ON_ONCE((seq
& 1) && smp_processor_id() == vcpu
->cpu
);
1688 value
= vcpu
->arch
.sie_block
->cputm
;
1689 /* if cputm_start is 0, accounting is being started/stopped */
1690 if (likely(vcpu
->arch
.cputm_start
))
1691 value
-= get_tod_clock_fast() - vcpu
->arch
.cputm_start
;
1692 } while (read_seqcount_retry(&vcpu
->arch
.cputm_seqcount
, seq
& ~1));
1697 void kvm_arch_vcpu_load(struct kvm_vcpu
*vcpu
, int cpu
)
1699 /* Save host register state */
1701 vcpu
->arch
.host_fpregs
.fpc
= current
->thread
.fpu
.fpc
;
1702 vcpu
->arch
.host_fpregs
.regs
= current
->thread
.fpu
.regs
;
1705 current
->thread
.fpu
.regs
= vcpu
->run
->s
.regs
.vrs
;
1707 current
->thread
.fpu
.regs
= vcpu
->run
->s
.regs
.fprs
;
1708 current
->thread
.fpu
.fpc
= vcpu
->run
->s
.regs
.fpc
;
1709 if (test_fp_ctl(current
->thread
.fpu
.fpc
))
1710 /* User space provided an invalid FPC, let's clear it */
1711 current
->thread
.fpu
.fpc
= 0;
1713 save_access_regs(vcpu
->arch
.host_acrs
);
1714 restore_access_regs(vcpu
->run
->s
.regs
.acrs
);
1715 gmap_enable(vcpu
->arch
.gmap
);
1716 atomic_or(CPUSTAT_RUNNING
, &vcpu
->arch
.sie_block
->cpuflags
);
1717 if (vcpu
->arch
.cputm_enabled
&& !is_vcpu_idle(vcpu
))
1718 __start_cpu_timer_accounting(vcpu
);
1722 void kvm_arch_vcpu_put(struct kvm_vcpu
*vcpu
)
1725 if (vcpu
->arch
.cputm_enabled
&& !is_vcpu_idle(vcpu
))
1726 __stop_cpu_timer_accounting(vcpu
);
1727 atomic_andnot(CPUSTAT_RUNNING
, &vcpu
->arch
.sie_block
->cpuflags
);
1728 gmap_disable(vcpu
->arch
.gmap
);
1730 /* Save guest register state */
1732 vcpu
->run
->s
.regs
.fpc
= current
->thread
.fpu
.fpc
;
1734 /* Restore host register state */
1735 current
->thread
.fpu
.fpc
= vcpu
->arch
.host_fpregs
.fpc
;
1736 current
->thread
.fpu
.regs
= vcpu
->arch
.host_fpregs
.regs
;
1738 save_access_regs(vcpu
->run
->s
.regs
.acrs
);
1739 restore_access_regs(vcpu
->arch
.host_acrs
);
1742 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu
*vcpu
)
1744 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1745 vcpu
->arch
.sie_block
->gpsw
.mask
= 0UL;
1746 vcpu
->arch
.sie_block
->gpsw
.addr
= 0UL;
1747 kvm_s390_set_prefix(vcpu
, 0);
1748 kvm_s390_set_cpu_timer(vcpu
, 0);
1749 vcpu
->arch
.sie_block
->ckc
= 0UL;
1750 vcpu
->arch
.sie_block
->todpr
= 0;
1751 memset(vcpu
->arch
.sie_block
->gcr
, 0, 16 * sizeof(__u64
));
1752 vcpu
->arch
.sie_block
->gcr
[0] = 0xE0UL
;
1753 vcpu
->arch
.sie_block
->gcr
[14] = 0xC2000000UL
;
1754 /* make sure the new fpc will be lazily loaded */
1756 current
->thread
.fpu
.fpc
= 0;
1757 vcpu
->arch
.sie_block
->gbea
= 1;
1758 vcpu
->arch
.sie_block
->pp
= 0;
1759 vcpu
->arch
.pfault_token
= KVM_S390_PFAULT_TOKEN_INVALID
;
1760 kvm_clear_async_pf_completion_queue(vcpu
);
1761 if (!kvm_s390_user_cpu_state_ctrl(vcpu
->kvm
))
1762 kvm_s390_vcpu_stop(vcpu
);
1763 kvm_s390_clear_local_irqs(vcpu
);
1766 void kvm_arch_vcpu_postcreate(struct kvm_vcpu
*vcpu
)
1768 mutex_lock(&vcpu
->kvm
->lock
);
1770 vcpu
->arch
.sie_block
->epoch
= vcpu
->kvm
->arch
.epoch
;
1772 mutex_unlock(&vcpu
->kvm
->lock
);
1773 if (!kvm_is_ucontrol(vcpu
->kvm
)) {
1774 vcpu
->arch
.gmap
= vcpu
->kvm
->arch
.gmap
;
1780 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu
*vcpu
)
1782 if (!test_kvm_facility(vcpu
->kvm
, 76))
1785 vcpu
->arch
.sie_block
->ecb3
&= ~(ECB3_AES
| ECB3_DEA
);
1787 if (vcpu
->kvm
->arch
.crypto
.aes_kw
)
1788 vcpu
->arch
.sie_block
->ecb3
|= ECB3_AES
;
1789 if (vcpu
->kvm
->arch
.crypto
.dea_kw
)
1790 vcpu
->arch
.sie_block
->ecb3
|= ECB3_DEA
;
1792 vcpu
->arch
.sie_block
->crycbd
= vcpu
->kvm
->arch
.crypto
.crycbd
;
1795 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu
*vcpu
)
1797 free_page(vcpu
->arch
.sie_block
->cbrlo
);
1798 vcpu
->arch
.sie_block
->cbrlo
= 0;
1801 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu
*vcpu
)
1803 vcpu
->arch
.sie_block
->cbrlo
= get_zeroed_page(GFP_KERNEL
);
1804 if (!vcpu
->arch
.sie_block
->cbrlo
)
1807 vcpu
->arch
.sie_block
->ecb2
|= 0x80;
1808 vcpu
->arch
.sie_block
->ecb2
&= ~0x08;
1812 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu
*vcpu
)
1814 struct kvm_s390_cpu_model
*model
= &vcpu
->kvm
->arch
.model
;
1816 vcpu
->arch
.sie_block
->ibc
= model
->ibc
;
1817 if (test_kvm_facility(vcpu
->kvm
, 7))
1818 vcpu
->arch
.sie_block
->fac
= (u32
)(u64
) model
->fac_list
;
1821 int kvm_arch_vcpu_setup(struct kvm_vcpu
*vcpu
)
1825 atomic_set(&vcpu
->arch
.sie_block
->cpuflags
, CPUSTAT_ZARCH
|
1829 if (test_kvm_facility(vcpu
->kvm
, 78))
1830 atomic_or(CPUSTAT_GED2
, &vcpu
->arch
.sie_block
->cpuflags
);
1831 else if (test_kvm_facility(vcpu
->kvm
, 8))
1832 atomic_or(CPUSTAT_GED
, &vcpu
->arch
.sie_block
->cpuflags
);
1834 kvm_s390_vcpu_setup_model(vcpu
);
1836 vcpu
->arch
.sie_block
->ecb
= 0x02;
1837 if (test_kvm_facility(vcpu
->kvm
, 9))
1838 vcpu
->arch
.sie_block
->ecb
|= 0x04;
1839 if (test_kvm_facility(vcpu
->kvm
, 50) && test_kvm_facility(vcpu
->kvm
, 73))
1840 vcpu
->arch
.sie_block
->ecb
|= 0x10;
1842 if (test_kvm_facility(vcpu
->kvm
, 8))
1843 vcpu
->arch
.sie_block
->ecb2
|= 0x08;
1844 vcpu
->arch
.sie_block
->eca
= 0xC1002000U
;
1846 vcpu
->arch
.sie_block
->eca
|= 1;
1847 if (sclp
.has_sigpif
)
1848 vcpu
->arch
.sie_block
->eca
|= 0x10000000U
;
1849 if (test_kvm_facility(vcpu
->kvm
, 64))
1850 vcpu
->arch
.sie_block
->ecb3
|= 0x01;
1851 if (test_kvm_facility(vcpu
->kvm
, 129)) {
1852 vcpu
->arch
.sie_block
->eca
|= 0x00020000;
1853 vcpu
->arch
.sie_block
->ecd
|= 0x20000000;
1855 vcpu
->arch
.sie_block
->riccbd
= (unsigned long) &vcpu
->run
->s
.regs
.riccb
;
1856 vcpu
->arch
.sie_block
->ictl
|= ICTL_ISKE
| ICTL_SSKE
| ICTL_RRBE
;
1857 if (test_kvm_facility(vcpu
->kvm
, 74))
1858 vcpu
->arch
.sie_block
->ictl
|= ICTL_OPEREXC
;
1860 if (vcpu
->kvm
->arch
.use_cmma
) {
1861 rc
= kvm_s390_vcpu_setup_cmma(vcpu
);
1865 hrtimer_init(&vcpu
->arch
.ckc_timer
, CLOCK_MONOTONIC
, HRTIMER_MODE_REL
);
1866 vcpu
->arch
.ckc_timer
.function
= kvm_s390_idle_wakeup
;
1868 kvm_s390_vcpu_crypto_setup(vcpu
);
1873 struct kvm_vcpu
*kvm_arch_vcpu_create(struct kvm
*kvm
,
1876 struct kvm_vcpu
*vcpu
;
1877 struct sie_page
*sie_page
;
1880 if (!kvm_is_ucontrol(kvm
) && !sca_can_add_vcpu(kvm
, id
))
1885 vcpu
= kmem_cache_zalloc(kvm_vcpu_cache
, GFP_KERNEL
);
1889 sie_page
= (struct sie_page
*) get_zeroed_page(GFP_KERNEL
);
1893 vcpu
->arch
.sie_block
= &sie_page
->sie_block
;
1894 vcpu
->arch
.sie_block
->itdba
= (unsigned long) &sie_page
->itdb
;
1896 vcpu
->arch
.sie_block
->icpua
= id
;
1897 spin_lock_init(&vcpu
->arch
.local_int
.lock
);
1898 vcpu
->arch
.local_int
.float_int
= &kvm
->arch
.float_int
;
1899 vcpu
->arch
.local_int
.wq
= &vcpu
->wq
;
1900 vcpu
->arch
.local_int
.cpuflags
= &vcpu
->arch
.sie_block
->cpuflags
;
1901 seqcount_init(&vcpu
->arch
.cputm_seqcount
);
1903 rc
= kvm_vcpu_init(vcpu
, kvm
, id
);
1905 goto out_free_sie_block
;
1906 VM_EVENT(kvm
, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id
, vcpu
,
1907 vcpu
->arch
.sie_block
);
1908 trace_kvm_s390_create_vcpu(id
, vcpu
, vcpu
->arch
.sie_block
);
1912 free_page((unsigned long)(vcpu
->arch
.sie_block
));
1914 kmem_cache_free(kvm_vcpu_cache
, vcpu
);
1919 int kvm_arch_vcpu_runnable(struct kvm_vcpu
*vcpu
)
1921 return kvm_s390_vcpu_has_irq(vcpu
, 0);
1924 void kvm_s390_vcpu_block(struct kvm_vcpu
*vcpu
)
1926 atomic_or(PROG_BLOCK_SIE
, &vcpu
->arch
.sie_block
->prog20
);
1930 void kvm_s390_vcpu_unblock(struct kvm_vcpu
*vcpu
)
1932 atomic_andnot(PROG_BLOCK_SIE
, &vcpu
->arch
.sie_block
->prog20
);
1935 static void kvm_s390_vcpu_request(struct kvm_vcpu
*vcpu
)
1937 atomic_or(PROG_REQUEST
, &vcpu
->arch
.sie_block
->prog20
);
1941 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu
*vcpu
)
1943 atomic_andnot(PROG_REQUEST
, &vcpu
->arch
.sie_block
->prog20
);
1947 * Kick a guest cpu out of SIE and wait until SIE is not running.
1948 * If the CPU is not running (e.g. waiting as idle) the function will
1949 * return immediately. */
1950 void exit_sie(struct kvm_vcpu
*vcpu
)
1952 atomic_or(CPUSTAT_STOP_INT
, &vcpu
->arch
.sie_block
->cpuflags
);
1953 while (vcpu
->arch
.sie_block
->prog0c
& PROG_IN_SIE
)
1957 /* Kick a guest cpu out of SIE to process a request synchronously */
1958 void kvm_s390_sync_request(int req
, struct kvm_vcpu
*vcpu
)
1960 kvm_make_request(req
, vcpu
);
1961 kvm_s390_vcpu_request(vcpu
);
1964 static void kvm_gmap_notifier(struct gmap
*gmap
, unsigned long address
)
1967 struct kvm
*kvm
= gmap
->private;
1968 struct kvm_vcpu
*vcpu
;
1970 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
1971 /* match against both prefix pages */
1972 if (kvm_s390_get_prefix(vcpu
) == (address
& ~0x1000UL
)) {
1973 VCPU_EVENT(vcpu
, 2, "gmap notifier for %lx", address
);
1974 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD
, vcpu
);
1979 int kvm_arch_vcpu_should_kick(struct kvm_vcpu
*vcpu
)
1981 /* kvm common code refers to this, but never calls it */
1986 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu
*vcpu
,
1987 struct kvm_one_reg
*reg
)
1992 case KVM_REG_S390_TODPR
:
1993 r
= put_user(vcpu
->arch
.sie_block
->todpr
,
1994 (u32 __user
*)reg
->addr
);
1996 case KVM_REG_S390_EPOCHDIFF
:
1997 r
= put_user(vcpu
->arch
.sie_block
->epoch
,
1998 (u64 __user
*)reg
->addr
);
2000 case KVM_REG_S390_CPU_TIMER
:
2001 r
= put_user(kvm_s390_get_cpu_timer(vcpu
),
2002 (u64 __user
*)reg
->addr
);
2004 case KVM_REG_S390_CLOCK_COMP
:
2005 r
= put_user(vcpu
->arch
.sie_block
->ckc
,
2006 (u64 __user
*)reg
->addr
);
2008 case KVM_REG_S390_PFTOKEN
:
2009 r
= put_user(vcpu
->arch
.pfault_token
,
2010 (u64 __user
*)reg
->addr
);
2012 case KVM_REG_S390_PFCOMPARE
:
2013 r
= put_user(vcpu
->arch
.pfault_compare
,
2014 (u64 __user
*)reg
->addr
);
2016 case KVM_REG_S390_PFSELECT
:
2017 r
= put_user(vcpu
->arch
.pfault_select
,
2018 (u64 __user
*)reg
->addr
);
2020 case KVM_REG_S390_PP
:
2021 r
= put_user(vcpu
->arch
.sie_block
->pp
,
2022 (u64 __user
*)reg
->addr
);
2024 case KVM_REG_S390_GBEA
:
2025 r
= put_user(vcpu
->arch
.sie_block
->gbea
,
2026 (u64 __user
*)reg
->addr
);
2035 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu
*vcpu
,
2036 struct kvm_one_reg
*reg
)
2042 case KVM_REG_S390_TODPR
:
2043 r
= get_user(vcpu
->arch
.sie_block
->todpr
,
2044 (u32 __user
*)reg
->addr
);
2046 case KVM_REG_S390_EPOCHDIFF
:
2047 r
= get_user(vcpu
->arch
.sie_block
->epoch
,
2048 (u64 __user
*)reg
->addr
);
2050 case KVM_REG_S390_CPU_TIMER
:
2051 r
= get_user(val
, (u64 __user
*)reg
->addr
);
2053 kvm_s390_set_cpu_timer(vcpu
, val
);
2055 case KVM_REG_S390_CLOCK_COMP
:
2056 r
= get_user(vcpu
->arch
.sie_block
->ckc
,
2057 (u64 __user
*)reg
->addr
);
2059 case KVM_REG_S390_PFTOKEN
:
2060 r
= get_user(vcpu
->arch
.pfault_token
,
2061 (u64 __user
*)reg
->addr
);
2062 if (vcpu
->arch
.pfault_token
== KVM_S390_PFAULT_TOKEN_INVALID
)
2063 kvm_clear_async_pf_completion_queue(vcpu
);
2065 case KVM_REG_S390_PFCOMPARE
:
2066 r
= get_user(vcpu
->arch
.pfault_compare
,
2067 (u64 __user
*)reg
->addr
);
2069 case KVM_REG_S390_PFSELECT
:
2070 r
= get_user(vcpu
->arch
.pfault_select
,
2071 (u64 __user
*)reg
->addr
);
2073 case KVM_REG_S390_PP
:
2074 r
= get_user(vcpu
->arch
.sie_block
->pp
,
2075 (u64 __user
*)reg
->addr
);
2077 case KVM_REG_S390_GBEA
:
2078 r
= get_user(vcpu
->arch
.sie_block
->gbea
,
2079 (u64 __user
*)reg
->addr
);
2088 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu
*vcpu
)
2090 kvm_s390_vcpu_initial_reset(vcpu
);
2094 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
2096 memcpy(&vcpu
->run
->s
.regs
.gprs
, ®s
->gprs
, sizeof(regs
->gprs
));
2100 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu
*vcpu
, struct kvm_regs
*regs
)
2102 memcpy(®s
->gprs
, &vcpu
->run
->s
.regs
.gprs
, sizeof(regs
->gprs
));
2106 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu
*vcpu
,
2107 struct kvm_sregs
*sregs
)
2109 memcpy(&vcpu
->run
->s
.regs
.acrs
, &sregs
->acrs
, sizeof(sregs
->acrs
));
2110 memcpy(&vcpu
->arch
.sie_block
->gcr
, &sregs
->crs
, sizeof(sregs
->crs
));
2111 restore_access_regs(vcpu
->run
->s
.regs
.acrs
);
2115 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu
*vcpu
,
2116 struct kvm_sregs
*sregs
)
2118 memcpy(&sregs
->acrs
, &vcpu
->run
->s
.regs
.acrs
, sizeof(sregs
->acrs
));
2119 memcpy(&sregs
->crs
, &vcpu
->arch
.sie_block
->gcr
, sizeof(sregs
->crs
));
2123 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
2125 /* make sure the new values will be lazily loaded */
2127 if (test_fp_ctl(fpu
->fpc
))
2129 current
->thread
.fpu
.fpc
= fpu
->fpc
;
2131 convert_fp_to_vx(current
->thread
.fpu
.vxrs
, (freg_t
*)fpu
->fprs
);
2133 memcpy(current
->thread
.fpu
.fprs
, &fpu
->fprs
, sizeof(fpu
->fprs
));
2137 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu
*vcpu
, struct kvm_fpu
*fpu
)
2139 /* make sure we have the latest values */
2142 convert_vx_to_fp((freg_t
*)fpu
->fprs
, current
->thread
.fpu
.vxrs
);
2144 memcpy(fpu
->fprs
, current
->thread
.fpu
.fprs
, sizeof(fpu
->fprs
));
2145 fpu
->fpc
= current
->thread
.fpu
.fpc
;
2149 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu
*vcpu
, psw_t psw
)
2153 if (!is_vcpu_stopped(vcpu
))
2156 vcpu
->run
->psw_mask
= psw
.mask
;
2157 vcpu
->run
->psw_addr
= psw
.addr
;
2162 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu
*vcpu
,
2163 struct kvm_translation
*tr
)
2165 return -EINVAL
; /* not implemented yet */
2168 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2169 KVM_GUESTDBG_USE_HW_BP | \
2170 KVM_GUESTDBG_ENABLE)
2172 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu
*vcpu
,
2173 struct kvm_guest_debug
*dbg
)
2177 vcpu
->guest_debug
= 0;
2178 kvm_s390_clear_bp_data(vcpu
);
2180 if (dbg
->control
& ~VALID_GUESTDBG_FLAGS
)
2182 if (!sclp
.has_gpere
)
2185 if (dbg
->control
& KVM_GUESTDBG_ENABLE
) {
2186 vcpu
->guest_debug
= dbg
->control
;
2187 /* enforce guest PER */
2188 atomic_or(CPUSTAT_P
, &vcpu
->arch
.sie_block
->cpuflags
);
2190 if (dbg
->control
& KVM_GUESTDBG_USE_HW_BP
)
2191 rc
= kvm_s390_import_bp_data(vcpu
, dbg
);
2193 atomic_andnot(CPUSTAT_P
, &vcpu
->arch
.sie_block
->cpuflags
);
2194 vcpu
->arch
.guestdbg
.last_bp
= 0;
2198 vcpu
->guest_debug
= 0;
2199 kvm_s390_clear_bp_data(vcpu
);
2200 atomic_andnot(CPUSTAT_P
, &vcpu
->arch
.sie_block
->cpuflags
);
2206 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu
*vcpu
,
2207 struct kvm_mp_state
*mp_state
)
2209 /* CHECK_STOP and LOAD are not supported yet */
2210 return is_vcpu_stopped(vcpu
) ? KVM_MP_STATE_STOPPED
:
2211 KVM_MP_STATE_OPERATING
;
2214 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu
*vcpu
,
2215 struct kvm_mp_state
*mp_state
)
2219 /* user space knows about this interface - let it control the state */
2220 vcpu
->kvm
->arch
.user_cpu_state_ctrl
= 1;
2222 switch (mp_state
->mp_state
) {
2223 case KVM_MP_STATE_STOPPED
:
2224 kvm_s390_vcpu_stop(vcpu
);
2226 case KVM_MP_STATE_OPERATING
:
2227 kvm_s390_vcpu_start(vcpu
);
2229 case KVM_MP_STATE_LOAD
:
2230 case KVM_MP_STATE_CHECK_STOP
:
2231 /* fall through - CHECK_STOP and LOAD are not supported yet */
2239 static bool ibs_enabled(struct kvm_vcpu
*vcpu
)
2241 return atomic_read(&vcpu
->arch
.sie_block
->cpuflags
) & CPUSTAT_IBS
;
2244 static int kvm_s390_handle_requests(struct kvm_vcpu
*vcpu
)
2247 kvm_s390_vcpu_request_handled(vcpu
);
2248 if (!vcpu
->requests
)
2251 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2252 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2253 * This ensures that the ipte instruction for this request has
2254 * already finished. We might race against a second unmapper that
2255 * wants to set the blocking bit. Lets just retry the request loop.
2257 if (kvm_check_request(KVM_REQ_MMU_RELOAD
, vcpu
)) {
2259 rc
= gmap_ipte_notify(vcpu
->arch
.gmap
,
2260 kvm_s390_get_prefix(vcpu
),
2267 if (kvm_check_request(KVM_REQ_TLB_FLUSH
, vcpu
)) {
2268 vcpu
->arch
.sie_block
->ihcpu
= 0xffff;
2272 if (kvm_check_request(KVM_REQ_ENABLE_IBS
, vcpu
)) {
2273 if (!ibs_enabled(vcpu
)) {
2274 trace_kvm_s390_enable_disable_ibs(vcpu
->vcpu_id
, 1);
2275 atomic_or(CPUSTAT_IBS
,
2276 &vcpu
->arch
.sie_block
->cpuflags
);
2281 if (kvm_check_request(KVM_REQ_DISABLE_IBS
, vcpu
)) {
2282 if (ibs_enabled(vcpu
)) {
2283 trace_kvm_s390_enable_disable_ibs(vcpu
->vcpu_id
, 0);
2284 atomic_andnot(CPUSTAT_IBS
,
2285 &vcpu
->arch
.sie_block
->cpuflags
);
2290 /* nothing to do, just clear the request */
2291 clear_bit(KVM_REQ_UNHALT
, &vcpu
->requests
);
2296 void kvm_s390_set_tod_clock(struct kvm
*kvm
, u64 tod
)
2298 struct kvm_vcpu
*vcpu
;
2301 mutex_lock(&kvm
->lock
);
2303 kvm
->arch
.epoch
= tod
- get_tod_clock();
2304 kvm_s390_vcpu_block_all(kvm
);
2305 kvm_for_each_vcpu(i
, vcpu
, kvm
)
2306 vcpu
->arch
.sie_block
->epoch
= kvm
->arch
.epoch
;
2307 kvm_s390_vcpu_unblock_all(kvm
);
2309 mutex_unlock(&kvm
->lock
);
2313 * kvm_arch_fault_in_page - fault-in guest page if necessary
2314 * @vcpu: The corresponding virtual cpu
2315 * @gpa: Guest physical address
2316 * @writable: Whether the page should be writable or not
2318 * Make sure that a guest page has been faulted-in on the host.
2320 * Return: Zero on success, negative error code otherwise.
2322 long kvm_arch_fault_in_page(struct kvm_vcpu
*vcpu
, gpa_t gpa
, int writable
)
2324 return gmap_fault(vcpu
->arch
.gmap
, gpa
,
2325 writable
? FAULT_FLAG_WRITE
: 0);
2328 static void __kvm_inject_pfault_token(struct kvm_vcpu
*vcpu
, bool start_token
,
2329 unsigned long token
)
2331 struct kvm_s390_interrupt inti
;
2332 struct kvm_s390_irq irq
;
2335 irq
.u
.ext
.ext_params2
= token
;
2336 irq
.type
= KVM_S390_INT_PFAULT_INIT
;
2337 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu
, &irq
));
2339 inti
.type
= KVM_S390_INT_PFAULT_DONE
;
2340 inti
.parm64
= token
;
2341 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu
->kvm
, &inti
));
2345 void kvm_arch_async_page_not_present(struct kvm_vcpu
*vcpu
,
2346 struct kvm_async_pf
*work
)
2348 trace_kvm_s390_pfault_init(vcpu
, work
->arch
.pfault_token
);
2349 __kvm_inject_pfault_token(vcpu
, true, work
->arch
.pfault_token
);
2352 void kvm_arch_async_page_present(struct kvm_vcpu
*vcpu
,
2353 struct kvm_async_pf
*work
)
2355 trace_kvm_s390_pfault_done(vcpu
, work
->arch
.pfault_token
);
2356 __kvm_inject_pfault_token(vcpu
, false, work
->arch
.pfault_token
);
2359 void kvm_arch_async_page_ready(struct kvm_vcpu
*vcpu
,
2360 struct kvm_async_pf
*work
)
2362 /* s390 will always inject the page directly */
2365 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu
*vcpu
)
2368 * s390 will always inject the page directly,
2369 * but we still want check_async_completion to cleanup
2374 static int kvm_arch_setup_async_pf(struct kvm_vcpu
*vcpu
)
2377 struct kvm_arch_async_pf arch
;
2380 if (vcpu
->arch
.pfault_token
== KVM_S390_PFAULT_TOKEN_INVALID
)
2382 if ((vcpu
->arch
.sie_block
->gpsw
.mask
& vcpu
->arch
.pfault_select
) !=
2383 vcpu
->arch
.pfault_compare
)
2385 if (psw_extint_disabled(vcpu
))
2387 if (kvm_s390_vcpu_has_irq(vcpu
, 0))
2389 if (!(vcpu
->arch
.sie_block
->gcr
[0] & 0x200ul
))
2391 if (!vcpu
->arch
.gmap
->pfault_enabled
)
2394 hva
= gfn_to_hva(vcpu
->kvm
, gpa_to_gfn(current
->thread
.gmap_addr
));
2395 hva
+= current
->thread
.gmap_addr
& ~PAGE_MASK
;
2396 if (read_guest_real(vcpu
, vcpu
->arch
.pfault_token
, &arch
.pfault_token
, 8))
2399 rc
= kvm_setup_async_pf(vcpu
, current
->thread
.gmap_addr
, hva
, &arch
);
2403 static int vcpu_pre_run(struct kvm_vcpu
*vcpu
)
2408 * On s390 notifications for arriving pages will be delivered directly
2409 * to the guest but the house keeping for completed pfaults is
2410 * handled outside the worker.
2412 kvm_check_async_pf_completion(vcpu
);
2414 vcpu
->arch
.sie_block
->gg14
= vcpu
->run
->s
.regs
.gprs
[14];
2415 vcpu
->arch
.sie_block
->gg15
= vcpu
->run
->s
.regs
.gprs
[15];
2420 if (test_cpu_flag(CIF_MCCK_PENDING
))
2423 if (!kvm_is_ucontrol(vcpu
->kvm
)) {
2424 rc
= kvm_s390_deliver_pending_interrupts(vcpu
);
2429 rc
= kvm_s390_handle_requests(vcpu
);
2433 if (guestdbg_enabled(vcpu
)) {
2434 kvm_s390_backup_guest_per_regs(vcpu
);
2435 kvm_s390_patch_guest_per_regs(vcpu
);
2438 vcpu
->arch
.sie_block
->icptcode
= 0;
2439 cpuflags
= atomic_read(&vcpu
->arch
.sie_block
->cpuflags
);
2440 VCPU_EVENT(vcpu
, 6, "entering sie flags %x", cpuflags
);
2441 trace_kvm_s390_sie_enter(vcpu
, cpuflags
);
2446 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu
*vcpu
)
2448 struct kvm_s390_pgm_info pgm_info
= {
2449 .code
= PGM_ADDRESSING
,
2454 VCPU_EVENT(vcpu
, 3, "%s", "fault in sie instruction");
2455 trace_kvm_s390_sie_fault(vcpu
);
2458 * We want to inject an addressing exception, which is defined as a
2459 * suppressing or terminating exception. However, since we came here
2460 * by a DAT access exception, the PSW still points to the faulting
2461 * instruction since DAT exceptions are nullifying. So we've got
2462 * to look up the current opcode to get the length of the instruction
2463 * to be able to forward the PSW.
2465 rc
= read_guest_instr(vcpu
, &opcode
, 1);
2466 ilen
= insn_length(opcode
);
2470 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2471 * Forward by arbitrary ilc, injection will take care of
2472 * nullification if necessary.
2474 pgm_info
= vcpu
->arch
.pgm
;
2477 pgm_info
.flags
= ilen
| KVM_S390_PGM_FLAGS_ILC_VALID
;
2478 kvm_s390_forward_psw(vcpu
, ilen
);
2479 return kvm_s390_inject_prog_irq(vcpu
, &pgm_info
);
2482 static int vcpu_post_run(struct kvm_vcpu
*vcpu
, int exit_reason
)
2484 VCPU_EVENT(vcpu
, 6, "exit sie icptcode %d",
2485 vcpu
->arch
.sie_block
->icptcode
);
2486 trace_kvm_s390_sie_exit(vcpu
, vcpu
->arch
.sie_block
->icptcode
);
2488 if (guestdbg_enabled(vcpu
))
2489 kvm_s390_restore_guest_per_regs(vcpu
);
2491 vcpu
->run
->s
.regs
.gprs
[14] = vcpu
->arch
.sie_block
->gg14
;
2492 vcpu
->run
->s
.regs
.gprs
[15] = vcpu
->arch
.sie_block
->gg15
;
2494 if (vcpu
->arch
.sie_block
->icptcode
> 0) {
2495 int rc
= kvm_handle_sie_intercept(vcpu
);
2497 if (rc
!= -EOPNOTSUPP
)
2499 vcpu
->run
->exit_reason
= KVM_EXIT_S390_SIEIC
;
2500 vcpu
->run
->s390_sieic
.icptcode
= vcpu
->arch
.sie_block
->icptcode
;
2501 vcpu
->run
->s390_sieic
.ipa
= vcpu
->arch
.sie_block
->ipa
;
2502 vcpu
->run
->s390_sieic
.ipb
= vcpu
->arch
.sie_block
->ipb
;
2504 } else if (exit_reason
!= -EFAULT
) {
2505 vcpu
->stat
.exit_null
++;
2507 } else if (kvm_is_ucontrol(vcpu
->kvm
)) {
2508 vcpu
->run
->exit_reason
= KVM_EXIT_S390_UCONTROL
;
2509 vcpu
->run
->s390_ucontrol
.trans_exc_code
=
2510 current
->thread
.gmap_addr
;
2511 vcpu
->run
->s390_ucontrol
.pgm_code
= 0x10;
2513 } else if (current
->thread
.gmap_pfault
) {
2514 trace_kvm_s390_major_guest_pfault(vcpu
);
2515 current
->thread
.gmap_pfault
= 0;
2516 if (kvm_arch_setup_async_pf(vcpu
))
2518 return kvm_arch_fault_in_page(vcpu
, current
->thread
.gmap_addr
, 1);
2520 return vcpu_post_run_fault_in_sie(vcpu
);
2523 static int __vcpu_run(struct kvm_vcpu
*vcpu
)
2525 int rc
, exit_reason
;
2528 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2529 * ning the guest), so that memslots (and other stuff) are protected
2531 vcpu
->srcu_idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
2534 rc
= vcpu_pre_run(vcpu
);
2538 srcu_read_unlock(&vcpu
->kvm
->srcu
, vcpu
->srcu_idx
);
2540 * As PF_VCPU will be used in fault handler, between
2541 * guest_enter and guest_exit should be no uaccess.
2543 local_irq_disable();
2544 __kvm_guest_enter();
2545 __disable_cpu_timer_accounting(vcpu
);
2547 exit_reason
= sie64a(vcpu
->arch
.sie_block
,
2548 vcpu
->run
->s
.regs
.gprs
);
2549 local_irq_disable();
2550 __enable_cpu_timer_accounting(vcpu
);
2553 vcpu
->srcu_idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
2555 rc
= vcpu_post_run(vcpu
, exit_reason
);
2556 } while (!signal_pending(current
) && !guestdbg_exit_pending(vcpu
) && !rc
);
2558 srcu_read_unlock(&vcpu
->kvm
->srcu
, vcpu
->srcu_idx
);
2562 static void sync_regs(struct kvm_vcpu
*vcpu
, struct kvm_run
*kvm_run
)
2564 vcpu
->arch
.sie_block
->gpsw
.mask
= kvm_run
->psw_mask
;
2565 vcpu
->arch
.sie_block
->gpsw
.addr
= kvm_run
->psw_addr
;
2566 if (kvm_run
->kvm_dirty_regs
& KVM_SYNC_PREFIX
)
2567 kvm_s390_set_prefix(vcpu
, kvm_run
->s
.regs
.prefix
);
2568 if (kvm_run
->kvm_dirty_regs
& KVM_SYNC_CRS
) {
2569 memcpy(&vcpu
->arch
.sie_block
->gcr
, &kvm_run
->s
.regs
.crs
, 128);
2570 /* some control register changes require a tlb flush */
2571 kvm_make_request(KVM_REQ_TLB_FLUSH
, vcpu
);
2573 if (kvm_run
->kvm_dirty_regs
& KVM_SYNC_ARCH0
) {
2574 kvm_s390_set_cpu_timer(vcpu
, kvm_run
->s
.regs
.cputm
);
2575 vcpu
->arch
.sie_block
->ckc
= kvm_run
->s
.regs
.ckc
;
2576 vcpu
->arch
.sie_block
->todpr
= kvm_run
->s
.regs
.todpr
;
2577 vcpu
->arch
.sie_block
->pp
= kvm_run
->s
.regs
.pp
;
2578 vcpu
->arch
.sie_block
->gbea
= kvm_run
->s
.regs
.gbea
;
2580 if (kvm_run
->kvm_dirty_regs
& KVM_SYNC_PFAULT
) {
2581 vcpu
->arch
.pfault_token
= kvm_run
->s
.regs
.pft
;
2582 vcpu
->arch
.pfault_select
= kvm_run
->s
.regs
.pfs
;
2583 vcpu
->arch
.pfault_compare
= kvm_run
->s
.regs
.pfc
;
2584 if (vcpu
->arch
.pfault_token
== KVM_S390_PFAULT_TOKEN_INVALID
)
2585 kvm_clear_async_pf_completion_queue(vcpu
);
2587 kvm_run
->kvm_dirty_regs
= 0;
2590 static void store_regs(struct kvm_vcpu
*vcpu
, struct kvm_run
*kvm_run
)
2592 kvm_run
->psw_mask
= vcpu
->arch
.sie_block
->gpsw
.mask
;
2593 kvm_run
->psw_addr
= vcpu
->arch
.sie_block
->gpsw
.addr
;
2594 kvm_run
->s
.regs
.prefix
= kvm_s390_get_prefix(vcpu
);
2595 memcpy(&kvm_run
->s
.regs
.crs
, &vcpu
->arch
.sie_block
->gcr
, 128);
2596 kvm_run
->s
.regs
.cputm
= kvm_s390_get_cpu_timer(vcpu
);
2597 kvm_run
->s
.regs
.ckc
= vcpu
->arch
.sie_block
->ckc
;
2598 kvm_run
->s
.regs
.todpr
= vcpu
->arch
.sie_block
->todpr
;
2599 kvm_run
->s
.regs
.pp
= vcpu
->arch
.sie_block
->pp
;
2600 kvm_run
->s
.regs
.gbea
= vcpu
->arch
.sie_block
->gbea
;
2601 kvm_run
->s
.regs
.pft
= vcpu
->arch
.pfault_token
;
2602 kvm_run
->s
.regs
.pfs
= vcpu
->arch
.pfault_select
;
2603 kvm_run
->s
.regs
.pfc
= vcpu
->arch
.pfault_compare
;
2606 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu
*vcpu
, struct kvm_run
*kvm_run
)
2611 if (guestdbg_exit_pending(vcpu
)) {
2612 kvm_s390_prepare_debug_exit(vcpu
);
2616 if (vcpu
->sigset_active
)
2617 sigprocmask(SIG_SETMASK
, &vcpu
->sigset
, &sigsaved
);
2619 if (!kvm_s390_user_cpu_state_ctrl(vcpu
->kvm
)) {
2620 kvm_s390_vcpu_start(vcpu
);
2621 } else if (is_vcpu_stopped(vcpu
)) {
2622 pr_err_ratelimited("can't run stopped vcpu %d\n",
2627 sync_regs(vcpu
, kvm_run
);
2628 enable_cpu_timer_accounting(vcpu
);
2631 rc
= __vcpu_run(vcpu
);
2633 if (signal_pending(current
) && !rc
) {
2634 kvm_run
->exit_reason
= KVM_EXIT_INTR
;
2638 if (guestdbg_exit_pending(vcpu
) && !rc
) {
2639 kvm_s390_prepare_debug_exit(vcpu
);
2643 if (rc
== -EREMOTE
) {
2644 /* userspace support is needed, kvm_run has been prepared */
2648 disable_cpu_timer_accounting(vcpu
);
2649 store_regs(vcpu
, kvm_run
);
2651 if (vcpu
->sigset_active
)
2652 sigprocmask(SIG_SETMASK
, &sigsaved
, NULL
);
2654 vcpu
->stat
.exit_userspace
++;
2659 * store status at address
2660 * we use have two special cases:
2661 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2662 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2664 int kvm_s390_store_status_unloaded(struct kvm_vcpu
*vcpu
, unsigned long gpa
)
2666 unsigned char archmode
= 1;
2667 freg_t fprs
[NUM_FPRS
];
2672 px
= kvm_s390_get_prefix(vcpu
);
2673 if (gpa
== KVM_S390_STORE_STATUS_NOADDR
) {
2674 if (write_guest_abs(vcpu
, 163, &archmode
, 1))
2677 } else if (gpa
== KVM_S390_STORE_STATUS_PREFIXED
) {
2678 if (write_guest_real(vcpu
, 163, &archmode
, 1))
2682 gpa
-= __LC_FPREGS_SAVE_AREA
;
2684 /* manually convert vector registers if necessary */
2685 if (MACHINE_HAS_VX
) {
2686 convert_vx_to_fp(fprs
, (__vector128
*) vcpu
->run
->s
.regs
.vrs
);
2687 rc
= write_guest_abs(vcpu
, gpa
+ __LC_FPREGS_SAVE_AREA
,
2690 rc
= write_guest_abs(vcpu
, gpa
+ __LC_FPREGS_SAVE_AREA
,
2691 vcpu
->run
->s
.regs
.fprs
, 128);
2693 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_GPREGS_SAVE_AREA
,
2694 vcpu
->run
->s
.regs
.gprs
, 128);
2695 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_PSW_SAVE_AREA
,
2696 &vcpu
->arch
.sie_block
->gpsw
, 16);
2697 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_PREFIX_SAVE_AREA
,
2699 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_FP_CREG_SAVE_AREA
,
2700 &vcpu
->run
->s
.regs
.fpc
, 4);
2701 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_TOD_PROGREG_SAVE_AREA
,
2702 &vcpu
->arch
.sie_block
->todpr
, 4);
2703 cputm
= kvm_s390_get_cpu_timer(vcpu
);
2704 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_CPU_TIMER_SAVE_AREA
,
2706 clkcomp
= vcpu
->arch
.sie_block
->ckc
>> 8;
2707 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_CLOCK_COMP_SAVE_AREA
,
2709 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_AREGS_SAVE_AREA
,
2710 &vcpu
->run
->s
.regs
.acrs
, 64);
2711 rc
|= write_guest_abs(vcpu
, gpa
+ __LC_CREGS_SAVE_AREA
,
2712 &vcpu
->arch
.sie_block
->gcr
, 128);
2713 return rc
? -EFAULT
: 0;
2716 int kvm_s390_vcpu_store_status(struct kvm_vcpu
*vcpu
, unsigned long addr
)
2719 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2720 * copying in vcpu load/put. Lets update our copies before we save
2721 * it into the save area
2724 vcpu
->run
->s
.regs
.fpc
= current
->thread
.fpu
.fpc
;
2725 save_access_regs(vcpu
->run
->s
.regs
.acrs
);
2727 return kvm_s390_store_status_unloaded(vcpu
, addr
);
2731 * store additional status at address
2733 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu
*vcpu
,
2736 /* Only bits 0-53 are used for address formation */
2737 if (!(gpa
& ~0x3ff))
2740 return write_guest_abs(vcpu
, gpa
& ~0x3ff,
2741 (void *)&vcpu
->run
->s
.regs
.vrs
, 512);
2744 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu
*vcpu
, unsigned long addr
)
2746 if (!test_kvm_facility(vcpu
->kvm
, 129))
2750 * The guest VXRS are in the host VXRs due to the lazy
2751 * copying in vcpu load/put. We can simply call save_fpu_regs()
2752 * to save the current register state because we are in the
2753 * middle of a load/put cycle.
2755 * Let's update our copies before we save it into the save area.
2759 return kvm_s390_store_adtl_status_unloaded(vcpu
, addr
);
2762 static void __disable_ibs_on_vcpu(struct kvm_vcpu
*vcpu
)
2764 kvm_check_request(KVM_REQ_ENABLE_IBS
, vcpu
);
2765 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS
, vcpu
);
2768 static void __disable_ibs_on_all_vcpus(struct kvm
*kvm
)
2771 struct kvm_vcpu
*vcpu
;
2773 kvm_for_each_vcpu(i
, vcpu
, kvm
) {
2774 __disable_ibs_on_vcpu(vcpu
);
2778 static void __enable_ibs_on_vcpu(struct kvm_vcpu
*vcpu
)
2780 kvm_check_request(KVM_REQ_DISABLE_IBS
, vcpu
);
2781 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS
, vcpu
);
2784 void kvm_s390_vcpu_start(struct kvm_vcpu
*vcpu
)
2786 int i
, online_vcpus
, started_vcpus
= 0;
2788 if (!is_vcpu_stopped(vcpu
))
2791 trace_kvm_s390_vcpu_start_stop(vcpu
->vcpu_id
, 1);
2792 /* Only one cpu at a time may enter/leave the STOPPED state. */
2793 spin_lock(&vcpu
->kvm
->arch
.start_stop_lock
);
2794 online_vcpus
= atomic_read(&vcpu
->kvm
->online_vcpus
);
2796 for (i
= 0; i
< online_vcpus
; i
++) {
2797 if (!is_vcpu_stopped(vcpu
->kvm
->vcpus
[i
]))
2801 if (started_vcpus
== 0) {
2802 /* we're the only active VCPU -> speed it up */
2803 __enable_ibs_on_vcpu(vcpu
);
2804 } else if (started_vcpus
== 1) {
2806 * As we are starting a second VCPU, we have to disable
2807 * the IBS facility on all VCPUs to remove potentially
2808 * oustanding ENABLE requests.
2810 __disable_ibs_on_all_vcpus(vcpu
->kvm
);
2813 atomic_andnot(CPUSTAT_STOPPED
, &vcpu
->arch
.sie_block
->cpuflags
);
2815 * Another VCPU might have used IBS while we were offline.
2816 * Let's play safe and flush the VCPU at startup.
2818 kvm_make_request(KVM_REQ_TLB_FLUSH
, vcpu
);
2819 spin_unlock(&vcpu
->kvm
->arch
.start_stop_lock
);
2823 void kvm_s390_vcpu_stop(struct kvm_vcpu
*vcpu
)
2825 int i
, online_vcpus
, started_vcpus
= 0;
2826 struct kvm_vcpu
*started_vcpu
= NULL
;
2828 if (is_vcpu_stopped(vcpu
))
2831 trace_kvm_s390_vcpu_start_stop(vcpu
->vcpu_id
, 0);
2832 /* Only one cpu at a time may enter/leave the STOPPED state. */
2833 spin_lock(&vcpu
->kvm
->arch
.start_stop_lock
);
2834 online_vcpus
= atomic_read(&vcpu
->kvm
->online_vcpus
);
2836 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2837 kvm_s390_clear_stop_irq(vcpu
);
2839 atomic_or(CPUSTAT_STOPPED
, &vcpu
->arch
.sie_block
->cpuflags
);
2840 __disable_ibs_on_vcpu(vcpu
);
2842 for (i
= 0; i
< online_vcpus
; i
++) {
2843 if (!is_vcpu_stopped(vcpu
->kvm
->vcpus
[i
])) {
2845 started_vcpu
= vcpu
->kvm
->vcpus
[i
];
2849 if (started_vcpus
== 1) {
2851 * As we only have one VCPU left, we want to enable the
2852 * IBS facility for that VCPU to speed it up.
2854 __enable_ibs_on_vcpu(started_vcpu
);
2857 spin_unlock(&vcpu
->kvm
->arch
.start_stop_lock
);
2861 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu
*vcpu
,
2862 struct kvm_enable_cap
*cap
)
2870 case KVM_CAP_S390_CSS_SUPPORT
:
2871 if (!vcpu
->kvm
->arch
.css_support
) {
2872 vcpu
->kvm
->arch
.css_support
= 1;
2873 VM_EVENT(vcpu
->kvm
, 3, "%s", "ENABLE: CSS support");
2874 trace_kvm_s390_enable_css(vcpu
->kvm
);
2885 static long kvm_s390_guest_mem_op(struct kvm_vcpu
*vcpu
,
2886 struct kvm_s390_mem_op
*mop
)
2888 void __user
*uaddr
= (void __user
*)mop
->buf
;
2889 void *tmpbuf
= NULL
;
2891 const u64 supported_flags
= KVM_S390_MEMOP_F_INJECT_EXCEPTION
2892 | KVM_S390_MEMOP_F_CHECK_ONLY
;
2894 if (mop
->flags
& ~supported_flags
)
2897 if (mop
->size
> MEM_OP_MAX_SIZE
)
2900 if (!(mop
->flags
& KVM_S390_MEMOP_F_CHECK_ONLY
)) {
2901 tmpbuf
= vmalloc(mop
->size
);
2906 srcu_idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
2909 case KVM_S390_MEMOP_LOGICAL_READ
:
2910 if (mop
->flags
& KVM_S390_MEMOP_F_CHECK_ONLY
) {
2911 r
= check_gva_range(vcpu
, mop
->gaddr
, mop
->ar
,
2912 mop
->size
, GACC_FETCH
);
2915 r
= read_guest(vcpu
, mop
->gaddr
, mop
->ar
, tmpbuf
, mop
->size
);
2917 if (copy_to_user(uaddr
, tmpbuf
, mop
->size
))
2921 case KVM_S390_MEMOP_LOGICAL_WRITE
:
2922 if (mop
->flags
& KVM_S390_MEMOP_F_CHECK_ONLY
) {
2923 r
= check_gva_range(vcpu
, mop
->gaddr
, mop
->ar
,
2924 mop
->size
, GACC_STORE
);
2927 if (copy_from_user(tmpbuf
, uaddr
, mop
->size
)) {
2931 r
= write_guest(vcpu
, mop
->gaddr
, mop
->ar
, tmpbuf
, mop
->size
);
2937 srcu_read_unlock(&vcpu
->kvm
->srcu
, srcu_idx
);
2939 if (r
> 0 && (mop
->flags
& KVM_S390_MEMOP_F_INJECT_EXCEPTION
) != 0)
2940 kvm_s390_inject_prog_irq(vcpu
, &vcpu
->arch
.pgm
);
2946 long kvm_arch_vcpu_ioctl(struct file
*filp
,
2947 unsigned int ioctl
, unsigned long arg
)
2949 struct kvm_vcpu
*vcpu
= filp
->private_data
;
2950 void __user
*argp
= (void __user
*)arg
;
2955 case KVM_S390_IRQ
: {
2956 struct kvm_s390_irq s390irq
;
2959 if (copy_from_user(&s390irq
, argp
, sizeof(s390irq
)))
2961 r
= kvm_s390_inject_vcpu(vcpu
, &s390irq
);
2964 case KVM_S390_INTERRUPT
: {
2965 struct kvm_s390_interrupt s390int
;
2966 struct kvm_s390_irq s390irq
;
2969 if (copy_from_user(&s390int
, argp
, sizeof(s390int
)))
2971 if (s390int_to_s390irq(&s390int
, &s390irq
))
2973 r
= kvm_s390_inject_vcpu(vcpu
, &s390irq
);
2976 case KVM_S390_STORE_STATUS
:
2977 idx
= srcu_read_lock(&vcpu
->kvm
->srcu
);
2978 r
= kvm_s390_vcpu_store_status(vcpu
, arg
);
2979 srcu_read_unlock(&vcpu
->kvm
->srcu
, idx
);
2981 case KVM_S390_SET_INITIAL_PSW
: {
2985 if (copy_from_user(&psw
, argp
, sizeof(psw
)))
2987 r
= kvm_arch_vcpu_ioctl_set_initial_psw(vcpu
, psw
);
2990 case KVM_S390_INITIAL_RESET
:
2991 r
= kvm_arch_vcpu_ioctl_initial_reset(vcpu
);
2993 case KVM_SET_ONE_REG
:
2994 case KVM_GET_ONE_REG
: {
2995 struct kvm_one_reg reg
;
2997 if (copy_from_user(®
, argp
, sizeof(reg
)))
2999 if (ioctl
== KVM_SET_ONE_REG
)
3000 r
= kvm_arch_vcpu_ioctl_set_one_reg(vcpu
, ®
);
3002 r
= kvm_arch_vcpu_ioctl_get_one_reg(vcpu
, ®
);
3005 #ifdef CONFIG_KVM_S390_UCONTROL
3006 case KVM_S390_UCAS_MAP
: {
3007 struct kvm_s390_ucas_mapping ucasmap
;
3009 if (copy_from_user(&ucasmap
, argp
, sizeof(ucasmap
))) {
3014 if (!kvm_is_ucontrol(vcpu
->kvm
)) {
3019 r
= gmap_map_segment(vcpu
->arch
.gmap
, ucasmap
.user_addr
,
3020 ucasmap
.vcpu_addr
, ucasmap
.length
);
3023 case KVM_S390_UCAS_UNMAP
: {
3024 struct kvm_s390_ucas_mapping ucasmap
;
3026 if (copy_from_user(&ucasmap
, argp
, sizeof(ucasmap
))) {
3031 if (!kvm_is_ucontrol(vcpu
->kvm
)) {
3036 r
= gmap_unmap_segment(vcpu
->arch
.gmap
, ucasmap
.vcpu_addr
,
3041 case KVM_S390_VCPU_FAULT
: {
3042 r
= gmap_fault(vcpu
->arch
.gmap
, arg
, 0);
3045 case KVM_ENABLE_CAP
:
3047 struct kvm_enable_cap cap
;
3049 if (copy_from_user(&cap
, argp
, sizeof(cap
)))
3051 r
= kvm_vcpu_ioctl_enable_cap(vcpu
, &cap
);
3054 case KVM_S390_MEM_OP
: {
3055 struct kvm_s390_mem_op mem_op
;
3057 if (copy_from_user(&mem_op
, argp
, sizeof(mem_op
)) == 0)
3058 r
= kvm_s390_guest_mem_op(vcpu
, &mem_op
);
3063 case KVM_S390_SET_IRQ_STATE
: {
3064 struct kvm_s390_irq_state irq_state
;
3067 if (copy_from_user(&irq_state
, argp
, sizeof(irq_state
)))
3069 if (irq_state
.len
> VCPU_IRQS_MAX_BUF
||
3070 irq_state
.len
== 0 ||
3071 irq_state
.len
% sizeof(struct kvm_s390_irq
) > 0) {
3075 r
= kvm_s390_set_irq_state(vcpu
,
3076 (void __user
*) irq_state
.buf
,
3080 case KVM_S390_GET_IRQ_STATE
: {
3081 struct kvm_s390_irq_state irq_state
;
3084 if (copy_from_user(&irq_state
, argp
, sizeof(irq_state
)))
3086 if (irq_state
.len
== 0) {
3090 r
= kvm_s390_get_irq_state(vcpu
,
3091 (__u8 __user
*) irq_state
.buf
,
3101 int kvm_arch_vcpu_fault(struct kvm_vcpu
*vcpu
, struct vm_fault
*vmf
)
3103 #ifdef CONFIG_KVM_S390_UCONTROL
3104 if ((vmf
->pgoff
== KVM_S390_SIE_PAGE_OFFSET
)
3105 && (kvm_is_ucontrol(vcpu
->kvm
))) {
3106 vmf
->page
= virt_to_page(vcpu
->arch
.sie_block
);
3107 get_page(vmf
->page
);
3111 return VM_FAULT_SIGBUS
;
3114 int kvm_arch_create_memslot(struct kvm
*kvm
, struct kvm_memory_slot
*slot
,
3115 unsigned long npages
)
3120 /* Section: memory related */
3121 int kvm_arch_prepare_memory_region(struct kvm
*kvm
,
3122 struct kvm_memory_slot
*memslot
,
3123 const struct kvm_userspace_memory_region
*mem
,
3124 enum kvm_mr_change change
)
3126 /* A few sanity checks. We can have memory slots which have to be
3127 located/ended at a segment boundary (1MB). The memory in userland is
3128 ok to be fragmented into various different vmas. It is okay to mmap()
3129 and munmap() stuff in this slot after doing this call at any time */
3131 if (mem
->userspace_addr
& 0xffffful
)
3134 if (mem
->memory_size
& 0xffffful
)
3137 if (mem
->guest_phys_addr
+ mem
->memory_size
> kvm
->arch
.mem_limit
)
3143 void kvm_arch_commit_memory_region(struct kvm
*kvm
,
3144 const struct kvm_userspace_memory_region
*mem
,
3145 const struct kvm_memory_slot
*old
,
3146 const struct kvm_memory_slot
*new,
3147 enum kvm_mr_change change
)
3151 /* If the basics of the memslot do not change, we do not want
3152 * to update the gmap. Every update causes several unnecessary
3153 * segment translation exceptions. This is usually handled just
3154 * fine by the normal fault handler + gmap, but it will also
3155 * cause faults on the prefix page of running guest CPUs.
3157 if (old
->userspace_addr
== mem
->userspace_addr
&&
3158 old
->base_gfn
* PAGE_SIZE
== mem
->guest_phys_addr
&&
3159 old
->npages
* PAGE_SIZE
== mem
->memory_size
)
3162 rc
= gmap_map_segment(kvm
->arch
.gmap
, mem
->userspace_addr
,
3163 mem
->guest_phys_addr
, mem
->memory_size
);
3165 pr_warn("failed to commit memory region\n");
3169 static inline unsigned long nonhyp_mask(int i
)
3171 unsigned int nonhyp_fai
= (sclp
.hmfai
<< i
* 2) >> 30;
3173 return 0x0000ffffffffffffUL
>> (nonhyp_fai
<< 4);
3176 void kvm_arch_vcpu_block_finish(struct kvm_vcpu
*vcpu
)
3178 vcpu
->valid_wakeup
= false;
3181 static int __init
kvm_s390_init(void)
3185 if (!sclp
.has_sief2
) {
3186 pr_info("SIE not available\n");
3190 for (i
= 0; i
< 16; i
++)
3191 kvm_s390_fac_list_mask
[i
] |=
3192 S390_lowcore
.stfle_fac_list
[i
] & nonhyp_mask(i
);
3194 return kvm_init(NULL
, sizeof(struct kvm_vcpu
), 0, THIS_MODULE
);
3197 static void __exit
kvm_s390_exit(void)
3202 module_init(kvm_s390_init
);
3203 module_exit(kvm_s390_exit
);
3206 * Enable autoloading of the kvm module.
3207 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3208 * since x86 takes a different approach.
3210 #include <linux/miscdevice.h>
3211 MODULE_ALIAS_MISCDEV(KVM_MINOR
);
3212 MODULE_ALIAS("devname:kvm");