KVM: s390: fix guest fprs memory leak
[deliverable/linux.git] / arch / s390 / kvm / kvm-s390.c
1 /*
2 * hosting zSeries kernel virtual machines
3 *
4 * Copyright IBM Corp. 2008, 2009
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Author(s): Carsten Otte <cotte@de.ibm.com>
11 * Christian Borntraeger <borntraeger@de.ibm.com>
12 * Heiko Carstens <heiko.carstens@de.ibm.com>
13 * Christian Ehrhardt <ehrhardt@de.ibm.com>
14 * Jason J. Herne <jjherne@us.ibm.com>
15 */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/module.h>
25 #include <linux/random.h>
26 #include <linux/slab.h>
27 #include <linux/timer.h>
28 #include <linux/vmalloc.h>
29 #include <asm/asm-offsets.h>
30 #include <asm/lowcore.h>
31 #include <asm/etr.h>
32 #include <asm/pgtable.h>
33 #include <asm/nmi.h>
34 #include <asm/switch_to.h>
35 #include <asm/isc.h>
36 #include <asm/sclp.h>
37 #include "kvm-s390.h"
38 #include "gaccess.h"
39
40 #define KMSG_COMPONENT "kvm-s390"
41 #undef pr_fmt
42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
43
44 #define CREATE_TRACE_POINTS
45 #include "trace.h"
46 #include "trace-s390.h"
47
48 #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
49 #define LOCAL_IRQS 32
50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
51 (KVM_MAX_VCPUS + LOCAL_IRQS))
52
53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
54
55 struct kvm_stats_debugfs_item debugfs_entries[] = {
56 { "userspace_handled", VCPU_STAT(exit_userspace) },
57 { "exit_null", VCPU_STAT(exit_null) },
58 { "exit_validity", VCPU_STAT(exit_validity) },
59 { "exit_stop_request", VCPU_STAT(exit_stop_request) },
60 { "exit_external_request", VCPU_STAT(exit_external_request) },
61 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
62 { "exit_instruction", VCPU_STAT(exit_instruction) },
63 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
64 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
65 { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
66 { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
67 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
68 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
69 { "instruction_lctl", VCPU_STAT(instruction_lctl) },
70 { "instruction_stctl", VCPU_STAT(instruction_stctl) },
71 { "instruction_stctg", VCPU_STAT(instruction_stctg) },
72 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
73 { "deliver_external_call", VCPU_STAT(deliver_external_call) },
74 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
75 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
76 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
77 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
78 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
79 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
80 { "exit_wait_state", VCPU_STAT(exit_wait_state) },
81 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
82 { "instruction_stidp", VCPU_STAT(instruction_stidp) },
83 { "instruction_spx", VCPU_STAT(instruction_spx) },
84 { "instruction_stpx", VCPU_STAT(instruction_stpx) },
85 { "instruction_stap", VCPU_STAT(instruction_stap) },
86 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
87 { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
88 { "instruction_stsch", VCPU_STAT(instruction_stsch) },
89 { "instruction_chsc", VCPU_STAT(instruction_chsc) },
90 { "instruction_essa", VCPU_STAT(instruction_essa) },
91 { "instruction_stsi", VCPU_STAT(instruction_stsi) },
92 { "instruction_stfl", VCPU_STAT(instruction_stfl) },
93 { "instruction_tprot", VCPU_STAT(instruction_tprot) },
94 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
95 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
96 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
97 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
98 { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
99 { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
100 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
101 { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
102 { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
103 { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
104 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
105 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
106 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
107 { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
108 { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
109 { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
110 { "diagnose_10", VCPU_STAT(diagnose_10) },
111 { "diagnose_44", VCPU_STAT(diagnose_44) },
112 { "diagnose_9c", VCPU_STAT(diagnose_9c) },
113 { "diagnose_258", VCPU_STAT(diagnose_258) },
114 { "diagnose_308", VCPU_STAT(diagnose_308) },
115 { "diagnose_500", VCPU_STAT(diagnose_500) },
116 { NULL }
117 };
118
119 /* upper facilities limit for kvm */
120 unsigned long kvm_s390_fac_list_mask[] = {
121 0xffe6fffbfcfdfc40UL,
122 0x005e800000000000UL,
123 };
124
125 unsigned long kvm_s390_fac_list_mask_size(void)
126 {
127 BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
128 return ARRAY_SIZE(kvm_s390_fac_list_mask);
129 }
130
131 static struct gmap_notifier gmap_notifier;
132 debug_info_t *kvm_s390_dbf;
133
134 /* Section: not file related */
135 int kvm_arch_hardware_enable(void)
136 {
137 /* every s390 is virtualization enabled ;-) */
138 return 0;
139 }
140
141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
142
143 /*
144 * This callback is executed during stop_machine(). All CPUs are therefore
145 * temporarily stopped. In order not to change guest behavior, we have to
146 * disable preemption whenever we touch the epoch of kvm and the VCPUs,
147 * so a CPU won't be stopped while calculating with the epoch.
148 */
149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
150 void *v)
151 {
152 struct kvm *kvm;
153 struct kvm_vcpu *vcpu;
154 int i;
155 unsigned long long *delta = v;
156
157 list_for_each_entry(kvm, &vm_list, vm_list) {
158 kvm->arch.epoch -= *delta;
159 kvm_for_each_vcpu(i, vcpu, kvm) {
160 vcpu->arch.sie_block->epoch -= *delta;
161 }
162 }
163 return NOTIFY_OK;
164 }
165
166 static struct notifier_block kvm_clock_notifier = {
167 .notifier_call = kvm_clock_sync,
168 };
169
170 int kvm_arch_hardware_setup(void)
171 {
172 gmap_notifier.notifier_call = kvm_gmap_notifier;
173 gmap_register_ipte_notifier(&gmap_notifier);
174 atomic_notifier_chain_register(&s390_epoch_delta_notifier,
175 &kvm_clock_notifier);
176 return 0;
177 }
178
179 void kvm_arch_hardware_unsetup(void)
180 {
181 gmap_unregister_ipte_notifier(&gmap_notifier);
182 atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
183 &kvm_clock_notifier);
184 }
185
186 int kvm_arch_init(void *opaque)
187 {
188 kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
189 if (!kvm_s390_dbf)
190 return -ENOMEM;
191
192 if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
193 debug_unregister(kvm_s390_dbf);
194 return -ENOMEM;
195 }
196
197 /* Register floating interrupt controller interface. */
198 return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
199 }
200
201 void kvm_arch_exit(void)
202 {
203 debug_unregister(kvm_s390_dbf);
204 }
205
206 /* Section: device related */
207 long kvm_arch_dev_ioctl(struct file *filp,
208 unsigned int ioctl, unsigned long arg)
209 {
210 if (ioctl == KVM_S390_ENABLE_SIE)
211 return s390_enable_sie();
212 return -EINVAL;
213 }
214
215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
216 {
217 int r;
218
219 switch (ext) {
220 case KVM_CAP_S390_PSW:
221 case KVM_CAP_S390_GMAP:
222 case KVM_CAP_SYNC_MMU:
223 #ifdef CONFIG_KVM_S390_UCONTROL
224 case KVM_CAP_S390_UCONTROL:
225 #endif
226 case KVM_CAP_ASYNC_PF:
227 case KVM_CAP_SYNC_REGS:
228 case KVM_CAP_ONE_REG:
229 case KVM_CAP_ENABLE_CAP:
230 case KVM_CAP_S390_CSS_SUPPORT:
231 case KVM_CAP_IOEVENTFD:
232 case KVM_CAP_DEVICE_CTRL:
233 case KVM_CAP_ENABLE_CAP_VM:
234 case KVM_CAP_S390_IRQCHIP:
235 case KVM_CAP_VM_ATTRIBUTES:
236 case KVM_CAP_MP_STATE:
237 case KVM_CAP_S390_INJECT_IRQ:
238 case KVM_CAP_S390_USER_SIGP:
239 case KVM_CAP_S390_USER_STSI:
240 case KVM_CAP_S390_SKEYS:
241 case KVM_CAP_S390_IRQ_STATE:
242 r = 1;
243 break;
244 case KVM_CAP_S390_MEM_OP:
245 r = MEM_OP_MAX_SIZE;
246 break;
247 case KVM_CAP_NR_VCPUS:
248 case KVM_CAP_MAX_VCPUS:
249 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
250 : KVM_S390_BSCA_CPU_SLOTS;
251 break;
252 case KVM_CAP_NR_MEMSLOTS:
253 r = KVM_USER_MEM_SLOTS;
254 break;
255 case KVM_CAP_S390_COW:
256 r = MACHINE_HAS_ESOP;
257 break;
258 case KVM_CAP_S390_VECTOR_REGISTERS:
259 r = MACHINE_HAS_VX;
260 break;
261 case KVM_CAP_S390_RI:
262 r = test_facility(64);
263 break;
264 default:
265 r = 0;
266 }
267 return r;
268 }
269
270 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
271 struct kvm_memory_slot *memslot)
272 {
273 gfn_t cur_gfn, last_gfn;
274 unsigned long address;
275 struct gmap *gmap = kvm->arch.gmap;
276
277 down_read(&gmap->mm->mmap_sem);
278 /* Loop over all guest pages */
279 last_gfn = memslot->base_gfn + memslot->npages;
280 for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
281 address = gfn_to_hva_memslot(memslot, cur_gfn);
282
283 if (gmap_test_and_clear_dirty(address, gmap))
284 mark_page_dirty(kvm, cur_gfn);
285 }
286 up_read(&gmap->mm->mmap_sem);
287 }
288
289 /* Section: vm related */
290 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
291
292 /*
293 * Get (and clear) the dirty memory log for a memory slot.
294 */
295 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
296 struct kvm_dirty_log *log)
297 {
298 int r;
299 unsigned long n;
300 struct kvm_memslots *slots;
301 struct kvm_memory_slot *memslot;
302 int is_dirty = 0;
303
304 mutex_lock(&kvm->slots_lock);
305
306 r = -EINVAL;
307 if (log->slot >= KVM_USER_MEM_SLOTS)
308 goto out;
309
310 slots = kvm_memslots(kvm);
311 memslot = id_to_memslot(slots, log->slot);
312 r = -ENOENT;
313 if (!memslot->dirty_bitmap)
314 goto out;
315
316 kvm_s390_sync_dirty_log(kvm, memslot);
317 r = kvm_get_dirty_log(kvm, log, &is_dirty);
318 if (r)
319 goto out;
320
321 /* Clear the dirty log */
322 if (is_dirty) {
323 n = kvm_dirty_bitmap_bytes(memslot);
324 memset(memslot->dirty_bitmap, 0, n);
325 }
326 r = 0;
327 out:
328 mutex_unlock(&kvm->slots_lock);
329 return r;
330 }
331
332 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
333 {
334 int r;
335
336 if (cap->flags)
337 return -EINVAL;
338
339 switch (cap->cap) {
340 case KVM_CAP_S390_IRQCHIP:
341 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
342 kvm->arch.use_irqchip = 1;
343 r = 0;
344 break;
345 case KVM_CAP_S390_USER_SIGP:
346 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
347 kvm->arch.user_sigp = 1;
348 r = 0;
349 break;
350 case KVM_CAP_S390_VECTOR_REGISTERS:
351 mutex_lock(&kvm->lock);
352 if (atomic_read(&kvm->online_vcpus)) {
353 r = -EBUSY;
354 } else if (MACHINE_HAS_VX) {
355 set_kvm_facility(kvm->arch.model.fac->mask, 129);
356 set_kvm_facility(kvm->arch.model.fac->list, 129);
357 r = 0;
358 } else
359 r = -EINVAL;
360 mutex_unlock(&kvm->lock);
361 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
362 r ? "(not available)" : "(success)");
363 break;
364 case KVM_CAP_S390_RI:
365 r = -EINVAL;
366 mutex_lock(&kvm->lock);
367 if (atomic_read(&kvm->online_vcpus)) {
368 r = -EBUSY;
369 } else if (test_facility(64)) {
370 set_kvm_facility(kvm->arch.model.fac->mask, 64);
371 set_kvm_facility(kvm->arch.model.fac->list, 64);
372 r = 0;
373 }
374 mutex_unlock(&kvm->lock);
375 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
376 r ? "(not available)" : "(success)");
377 break;
378 case KVM_CAP_S390_USER_STSI:
379 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
380 kvm->arch.user_stsi = 1;
381 r = 0;
382 break;
383 default:
384 r = -EINVAL;
385 break;
386 }
387 return r;
388 }
389
390 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
391 {
392 int ret;
393
394 switch (attr->attr) {
395 case KVM_S390_VM_MEM_LIMIT_SIZE:
396 ret = 0;
397 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
398 kvm->arch.mem_limit);
399 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
400 ret = -EFAULT;
401 break;
402 default:
403 ret = -ENXIO;
404 break;
405 }
406 return ret;
407 }
408
409 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
410 {
411 int ret;
412 unsigned int idx;
413 switch (attr->attr) {
414 case KVM_S390_VM_MEM_ENABLE_CMMA:
415 /* enable CMMA only for z10 and later (EDAT_1) */
416 ret = -EINVAL;
417 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
418 break;
419
420 ret = -EBUSY;
421 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
422 mutex_lock(&kvm->lock);
423 if (atomic_read(&kvm->online_vcpus) == 0) {
424 kvm->arch.use_cmma = 1;
425 ret = 0;
426 }
427 mutex_unlock(&kvm->lock);
428 break;
429 case KVM_S390_VM_MEM_CLR_CMMA:
430 ret = -EINVAL;
431 if (!kvm->arch.use_cmma)
432 break;
433
434 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
435 mutex_lock(&kvm->lock);
436 idx = srcu_read_lock(&kvm->srcu);
437 s390_reset_cmma(kvm->arch.gmap->mm);
438 srcu_read_unlock(&kvm->srcu, idx);
439 mutex_unlock(&kvm->lock);
440 ret = 0;
441 break;
442 case KVM_S390_VM_MEM_LIMIT_SIZE: {
443 unsigned long new_limit;
444
445 if (kvm_is_ucontrol(kvm))
446 return -EINVAL;
447
448 if (get_user(new_limit, (u64 __user *)attr->addr))
449 return -EFAULT;
450
451 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
452 new_limit > kvm->arch.mem_limit)
453 return -E2BIG;
454
455 if (!new_limit)
456 return -EINVAL;
457
458 /* gmap_alloc takes last usable address */
459 if (new_limit != KVM_S390_NO_MEM_LIMIT)
460 new_limit -= 1;
461
462 ret = -EBUSY;
463 mutex_lock(&kvm->lock);
464 if (atomic_read(&kvm->online_vcpus) == 0) {
465 /* gmap_alloc will round the limit up */
466 struct gmap *new = gmap_alloc(current->mm, new_limit);
467
468 if (!new) {
469 ret = -ENOMEM;
470 } else {
471 gmap_free(kvm->arch.gmap);
472 new->private = kvm;
473 kvm->arch.gmap = new;
474 ret = 0;
475 }
476 }
477 mutex_unlock(&kvm->lock);
478 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
479 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
480 (void *) kvm->arch.gmap->asce);
481 break;
482 }
483 default:
484 ret = -ENXIO;
485 break;
486 }
487 return ret;
488 }
489
490 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
491
492 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
493 {
494 struct kvm_vcpu *vcpu;
495 int i;
496
497 if (!test_kvm_facility(kvm, 76))
498 return -EINVAL;
499
500 mutex_lock(&kvm->lock);
501 switch (attr->attr) {
502 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
503 get_random_bytes(
504 kvm->arch.crypto.crycb->aes_wrapping_key_mask,
505 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
506 kvm->arch.crypto.aes_kw = 1;
507 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
508 break;
509 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
510 get_random_bytes(
511 kvm->arch.crypto.crycb->dea_wrapping_key_mask,
512 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
513 kvm->arch.crypto.dea_kw = 1;
514 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
515 break;
516 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
517 kvm->arch.crypto.aes_kw = 0;
518 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
519 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
520 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
521 break;
522 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
523 kvm->arch.crypto.dea_kw = 0;
524 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
525 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
526 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
527 break;
528 default:
529 mutex_unlock(&kvm->lock);
530 return -ENXIO;
531 }
532
533 kvm_for_each_vcpu(i, vcpu, kvm) {
534 kvm_s390_vcpu_crypto_setup(vcpu);
535 exit_sie(vcpu);
536 }
537 mutex_unlock(&kvm->lock);
538 return 0;
539 }
540
541 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
542 {
543 u8 gtod_high;
544
545 if (copy_from_user(&gtod_high, (void __user *)attr->addr,
546 sizeof(gtod_high)))
547 return -EFAULT;
548
549 if (gtod_high != 0)
550 return -EINVAL;
551 VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
552
553 return 0;
554 }
555
556 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
557 {
558 u64 gtod;
559
560 if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
561 return -EFAULT;
562
563 kvm_s390_set_tod_clock(kvm, gtod);
564 VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
565 return 0;
566 }
567
568 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
569 {
570 int ret;
571
572 if (attr->flags)
573 return -EINVAL;
574
575 switch (attr->attr) {
576 case KVM_S390_VM_TOD_HIGH:
577 ret = kvm_s390_set_tod_high(kvm, attr);
578 break;
579 case KVM_S390_VM_TOD_LOW:
580 ret = kvm_s390_set_tod_low(kvm, attr);
581 break;
582 default:
583 ret = -ENXIO;
584 break;
585 }
586 return ret;
587 }
588
589 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
590 {
591 u8 gtod_high = 0;
592
593 if (copy_to_user((void __user *)attr->addr, &gtod_high,
594 sizeof(gtod_high)))
595 return -EFAULT;
596 VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
597
598 return 0;
599 }
600
601 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
602 {
603 u64 gtod;
604
605 gtod = kvm_s390_get_tod_clock_fast(kvm);
606 if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
607 return -EFAULT;
608 VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
609
610 return 0;
611 }
612
613 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
614 {
615 int ret;
616
617 if (attr->flags)
618 return -EINVAL;
619
620 switch (attr->attr) {
621 case KVM_S390_VM_TOD_HIGH:
622 ret = kvm_s390_get_tod_high(kvm, attr);
623 break;
624 case KVM_S390_VM_TOD_LOW:
625 ret = kvm_s390_get_tod_low(kvm, attr);
626 break;
627 default:
628 ret = -ENXIO;
629 break;
630 }
631 return ret;
632 }
633
634 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
635 {
636 struct kvm_s390_vm_cpu_processor *proc;
637 int ret = 0;
638
639 mutex_lock(&kvm->lock);
640 if (atomic_read(&kvm->online_vcpus)) {
641 ret = -EBUSY;
642 goto out;
643 }
644 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
645 if (!proc) {
646 ret = -ENOMEM;
647 goto out;
648 }
649 if (!copy_from_user(proc, (void __user *)attr->addr,
650 sizeof(*proc))) {
651 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
652 sizeof(struct cpuid));
653 kvm->arch.model.ibc = proc->ibc;
654 memcpy(kvm->arch.model.fac->list, proc->fac_list,
655 S390_ARCH_FAC_LIST_SIZE_BYTE);
656 } else
657 ret = -EFAULT;
658 kfree(proc);
659 out:
660 mutex_unlock(&kvm->lock);
661 return ret;
662 }
663
664 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
665 {
666 int ret = -ENXIO;
667
668 switch (attr->attr) {
669 case KVM_S390_VM_CPU_PROCESSOR:
670 ret = kvm_s390_set_processor(kvm, attr);
671 break;
672 }
673 return ret;
674 }
675
676 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
677 {
678 struct kvm_s390_vm_cpu_processor *proc;
679 int ret = 0;
680
681 proc = kzalloc(sizeof(*proc), GFP_KERNEL);
682 if (!proc) {
683 ret = -ENOMEM;
684 goto out;
685 }
686 memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
687 proc->ibc = kvm->arch.model.ibc;
688 memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
689 if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
690 ret = -EFAULT;
691 kfree(proc);
692 out:
693 return ret;
694 }
695
696 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
697 {
698 struct kvm_s390_vm_cpu_machine *mach;
699 int ret = 0;
700
701 mach = kzalloc(sizeof(*mach), GFP_KERNEL);
702 if (!mach) {
703 ret = -ENOMEM;
704 goto out;
705 }
706 get_cpu_id((struct cpuid *) &mach->cpuid);
707 mach->ibc = sclp.ibc;
708 memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
709 S390_ARCH_FAC_LIST_SIZE_BYTE);
710 memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
711 S390_ARCH_FAC_LIST_SIZE_BYTE);
712 if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
713 ret = -EFAULT;
714 kfree(mach);
715 out:
716 return ret;
717 }
718
719 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
720 {
721 int ret = -ENXIO;
722
723 switch (attr->attr) {
724 case KVM_S390_VM_CPU_PROCESSOR:
725 ret = kvm_s390_get_processor(kvm, attr);
726 break;
727 case KVM_S390_VM_CPU_MACHINE:
728 ret = kvm_s390_get_machine(kvm, attr);
729 break;
730 }
731 return ret;
732 }
733
734 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
735 {
736 int ret;
737
738 switch (attr->group) {
739 case KVM_S390_VM_MEM_CTRL:
740 ret = kvm_s390_set_mem_control(kvm, attr);
741 break;
742 case KVM_S390_VM_TOD:
743 ret = kvm_s390_set_tod(kvm, attr);
744 break;
745 case KVM_S390_VM_CPU_MODEL:
746 ret = kvm_s390_set_cpu_model(kvm, attr);
747 break;
748 case KVM_S390_VM_CRYPTO:
749 ret = kvm_s390_vm_set_crypto(kvm, attr);
750 break;
751 default:
752 ret = -ENXIO;
753 break;
754 }
755
756 return ret;
757 }
758
759 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
760 {
761 int ret;
762
763 switch (attr->group) {
764 case KVM_S390_VM_MEM_CTRL:
765 ret = kvm_s390_get_mem_control(kvm, attr);
766 break;
767 case KVM_S390_VM_TOD:
768 ret = kvm_s390_get_tod(kvm, attr);
769 break;
770 case KVM_S390_VM_CPU_MODEL:
771 ret = kvm_s390_get_cpu_model(kvm, attr);
772 break;
773 default:
774 ret = -ENXIO;
775 break;
776 }
777
778 return ret;
779 }
780
781 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
782 {
783 int ret;
784
785 switch (attr->group) {
786 case KVM_S390_VM_MEM_CTRL:
787 switch (attr->attr) {
788 case KVM_S390_VM_MEM_ENABLE_CMMA:
789 case KVM_S390_VM_MEM_CLR_CMMA:
790 case KVM_S390_VM_MEM_LIMIT_SIZE:
791 ret = 0;
792 break;
793 default:
794 ret = -ENXIO;
795 break;
796 }
797 break;
798 case KVM_S390_VM_TOD:
799 switch (attr->attr) {
800 case KVM_S390_VM_TOD_LOW:
801 case KVM_S390_VM_TOD_HIGH:
802 ret = 0;
803 break;
804 default:
805 ret = -ENXIO;
806 break;
807 }
808 break;
809 case KVM_S390_VM_CPU_MODEL:
810 switch (attr->attr) {
811 case KVM_S390_VM_CPU_PROCESSOR:
812 case KVM_S390_VM_CPU_MACHINE:
813 ret = 0;
814 break;
815 default:
816 ret = -ENXIO;
817 break;
818 }
819 break;
820 case KVM_S390_VM_CRYPTO:
821 switch (attr->attr) {
822 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
823 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
824 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
825 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
826 ret = 0;
827 break;
828 default:
829 ret = -ENXIO;
830 break;
831 }
832 break;
833 default:
834 ret = -ENXIO;
835 break;
836 }
837
838 return ret;
839 }
840
841 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
842 {
843 uint8_t *keys;
844 uint64_t hva;
845 unsigned long curkey;
846 int i, r = 0;
847
848 if (args->flags != 0)
849 return -EINVAL;
850
851 /* Is this guest using storage keys? */
852 if (!mm_use_skey(current->mm))
853 return KVM_S390_GET_SKEYS_NONE;
854
855 /* Enforce sane limit on memory allocation */
856 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
857 return -EINVAL;
858
859 keys = kmalloc_array(args->count, sizeof(uint8_t),
860 GFP_KERNEL | __GFP_NOWARN);
861 if (!keys)
862 keys = vmalloc(sizeof(uint8_t) * args->count);
863 if (!keys)
864 return -ENOMEM;
865
866 for (i = 0; i < args->count; i++) {
867 hva = gfn_to_hva(kvm, args->start_gfn + i);
868 if (kvm_is_error_hva(hva)) {
869 r = -EFAULT;
870 goto out;
871 }
872
873 curkey = get_guest_storage_key(current->mm, hva);
874 if (IS_ERR_VALUE(curkey)) {
875 r = curkey;
876 goto out;
877 }
878 keys[i] = curkey;
879 }
880
881 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
882 sizeof(uint8_t) * args->count);
883 if (r)
884 r = -EFAULT;
885 out:
886 kvfree(keys);
887 return r;
888 }
889
890 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
891 {
892 uint8_t *keys;
893 uint64_t hva;
894 int i, r = 0;
895
896 if (args->flags != 0)
897 return -EINVAL;
898
899 /* Enforce sane limit on memory allocation */
900 if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
901 return -EINVAL;
902
903 keys = kmalloc_array(args->count, sizeof(uint8_t),
904 GFP_KERNEL | __GFP_NOWARN);
905 if (!keys)
906 keys = vmalloc(sizeof(uint8_t) * args->count);
907 if (!keys)
908 return -ENOMEM;
909
910 r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
911 sizeof(uint8_t) * args->count);
912 if (r) {
913 r = -EFAULT;
914 goto out;
915 }
916
917 /* Enable storage key handling for the guest */
918 r = s390_enable_skey();
919 if (r)
920 goto out;
921
922 for (i = 0; i < args->count; i++) {
923 hva = gfn_to_hva(kvm, args->start_gfn + i);
924 if (kvm_is_error_hva(hva)) {
925 r = -EFAULT;
926 goto out;
927 }
928
929 /* Lowest order bit is reserved */
930 if (keys[i] & 0x01) {
931 r = -EINVAL;
932 goto out;
933 }
934
935 r = set_guest_storage_key(current->mm, hva,
936 (unsigned long)keys[i], 0);
937 if (r)
938 goto out;
939 }
940 out:
941 kvfree(keys);
942 return r;
943 }
944
945 long kvm_arch_vm_ioctl(struct file *filp,
946 unsigned int ioctl, unsigned long arg)
947 {
948 struct kvm *kvm = filp->private_data;
949 void __user *argp = (void __user *)arg;
950 struct kvm_device_attr attr;
951 int r;
952
953 switch (ioctl) {
954 case KVM_S390_INTERRUPT: {
955 struct kvm_s390_interrupt s390int;
956
957 r = -EFAULT;
958 if (copy_from_user(&s390int, argp, sizeof(s390int)))
959 break;
960 r = kvm_s390_inject_vm(kvm, &s390int);
961 break;
962 }
963 case KVM_ENABLE_CAP: {
964 struct kvm_enable_cap cap;
965 r = -EFAULT;
966 if (copy_from_user(&cap, argp, sizeof(cap)))
967 break;
968 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
969 break;
970 }
971 case KVM_CREATE_IRQCHIP: {
972 struct kvm_irq_routing_entry routing;
973
974 r = -EINVAL;
975 if (kvm->arch.use_irqchip) {
976 /* Set up dummy routing. */
977 memset(&routing, 0, sizeof(routing));
978 r = kvm_set_irq_routing(kvm, &routing, 0, 0);
979 }
980 break;
981 }
982 case KVM_SET_DEVICE_ATTR: {
983 r = -EFAULT;
984 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
985 break;
986 r = kvm_s390_vm_set_attr(kvm, &attr);
987 break;
988 }
989 case KVM_GET_DEVICE_ATTR: {
990 r = -EFAULT;
991 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
992 break;
993 r = kvm_s390_vm_get_attr(kvm, &attr);
994 break;
995 }
996 case KVM_HAS_DEVICE_ATTR: {
997 r = -EFAULT;
998 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
999 break;
1000 r = kvm_s390_vm_has_attr(kvm, &attr);
1001 break;
1002 }
1003 case KVM_S390_GET_SKEYS: {
1004 struct kvm_s390_skeys args;
1005
1006 r = -EFAULT;
1007 if (copy_from_user(&args, argp,
1008 sizeof(struct kvm_s390_skeys)))
1009 break;
1010 r = kvm_s390_get_skeys(kvm, &args);
1011 break;
1012 }
1013 case KVM_S390_SET_SKEYS: {
1014 struct kvm_s390_skeys args;
1015
1016 r = -EFAULT;
1017 if (copy_from_user(&args, argp,
1018 sizeof(struct kvm_s390_skeys)))
1019 break;
1020 r = kvm_s390_set_skeys(kvm, &args);
1021 break;
1022 }
1023 default:
1024 r = -ENOTTY;
1025 }
1026
1027 return r;
1028 }
1029
1030 static int kvm_s390_query_ap_config(u8 *config)
1031 {
1032 u32 fcn_code = 0x04000000UL;
1033 u32 cc = 0;
1034
1035 memset(config, 0, 128);
1036 asm volatile(
1037 "lgr 0,%1\n"
1038 "lgr 2,%2\n"
1039 ".long 0xb2af0000\n" /* PQAP(QCI) */
1040 "0: ipm %0\n"
1041 "srl %0,28\n"
1042 "1:\n"
1043 EX_TABLE(0b, 1b)
1044 : "+r" (cc)
1045 : "r" (fcn_code), "r" (config)
1046 : "cc", "0", "2", "memory"
1047 );
1048
1049 return cc;
1050 }
1051
1052 static int kvm_s390_apxa_installed(void)
1053 {
1054 u8 config[128];
1055 int cc;
1056
1057 if (test_facility(12)) {
1058 cc = kvm_s390_query_ap_config(config);
1059
1060 if (cc)
1061 pr_err("PQAP(QCI) failed with cc=%d", cc);
1062 else
1063 return config[0] & 0x40;
1064 }
1065
1066 return 0;
1067 }
1068
1069 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1070 {
1071 kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1072
1073 if (kvm_s390_apxa_installed())
1074 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1075 else
1076 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1077 }
1078
1079 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1080 {
1081 get_cpu_id(cpu_id);
1082 cpu_id->version = 0xff;
1083 }
1084
1085 static int kvm_s390_crypto_init(struct kvm *kvm)
1086 {
1087 if (!test_kvm_facility(kvm, 76))
1088 return 0;
1089
1090 kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1091 GFP_KERNEL | GFP_DMA);
1092 if (!kvm->arch.crypto.crycb)
1093 return -ENOMEM;
1094
1095 kvm_s390_set_crycb_format(kvm);
1096
1097 /* Enable AES/DEA protected key functions by default */
1098 kvm->arch.crypto.aes_kw = 1;
1099 kvm->arch.crypto.dea_kw = 1;
1100 get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1101 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1102 get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1103 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1104
1105 return 0;
1106 }
1107
1108 static void sca_dispose(struct kvm *kvm)
1109 {
1110 if (kvm->arch.use_esca)
1111 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1112 else
1113 free_page((unsigned long)(kvm->arch.sca));
1114 kvm->arch.sca = NULL;
1115 }
1116
1117 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1118 {
1119 int i, rc;
1120 char debug_name[16];
1121 static unsigned long sca_offset;
1122
1123 rc = -EINVAL;
1124 #ifdef CONFIG_KVM_S390_UCONTROL
1125 if (type & ~KVM_VM_S390_UCONTROL)
1126 goto out_err;
1127 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1128 goto out_err;
1129 #else
1130 if (type)
1131 goto out_err;
1132 #endif
1133
1134 rc = s390_enable_sie();
1135 if (rc)
1136 goto out_err;
1137
1138 rc = -ENOMEM;
1139
1140 kvm->arch.use_esca = 0; /* start with basic SCA */
1141 rwlock_init(&kvm->arch.sca_lock);
1142 kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1143 if (!kvm->arch.sca)
1144 goto out_err;
1145 spin_lock(&kvm_lock);
1146 sca_offset += 16;
1147 if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1148 sca_offset = 0;
1149 kvm->arch.sca = (struct bsca_block *)
1150 ((char *) kvm->arch.sca + sca_offset);
1151 spin_unlock(&kvm_lock);
1152
1153 sprintf(debug_name, "kvm-%u", current->pid);
1154
1155 kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1156 if (!kvm->arch.dbf)
1157 goto out_err;
1158
1159 /*
1160 * The architectural maximum amount of facilities is 16 kbit. To store
1161 * this amount, 2 kbyte of memory is required. Thus we need a full
1162 * page to hold the guest facility list (arch.model.fac->list) and the
1163 * facility mask (arch.model.fac->mask). Its address size has to be
1164 * 31 bits and word aligned.
1165 */
1166 kvm->arch.model.fac =
1167 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1168 if (!kvm->arch.model.fac)
1169 goto out_err;
1170
1171 /* Populate the facility mask initially. */
1172 memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1173 S390_ARCH_FAC_LIST_SIZE_BYTE);
1174 for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1175 if (i < kvm_s390_fac_list_mask_size())
1176 kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1177 else
1178 kvm->arch.model.fac->mask[i] = 0UL;
1179 }
1180
1181 /* Populate the facility list initially. */
1182 memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1183 S390_ARCH_FAC_LIST_SIZE_BYTE);
1184
1185 kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1186 kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1187
1188 if (kvm_s390_crypto_init(kvm) < 0)
1189 goto out_err;
1190
1191 spin_lock_init(&kvm->arch.float_int.lock);
1192 for (i = 0; i < FIRQ_LIST_COUNT; i++)
1193 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1194 init_waitqueue_head(&kvm->arch.ipte_wq);
1195 mutex_init(&kvm->arch.ipte_mutex);
1196
1197 debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1198 VM_EVENT(kvm, 3, "vm created with type %lu", type);
1199
1200 if (type & KVM_VM_S390_UCONTROL) {
1201 kvm->arch.gmap = NULL;
1202 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1203 } else {
1204 if (sclp.hamax == U64_MAX)
1205 kvm->arch.mem_limit = TASK_MAX_SIZE;
1206 else
1207 kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1208 sclp.hamax + 1);
1209 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1210 if (!kvm->arch.gmap)
1211 goto out_err;
1212 kvm->arch.gmap->private = kvm;
1213 kvm->arch.gmap->pfault_enabled = 0;
1214 }
1215
1216 kvm->arch.css_support = 0;
1217 kvm->arch.use_irqchip = 0;
1218 kvm->arch.epoch = 0;
1219
1220 spin_lock_init(&kvm->arch.start_stop_lock);
1221 KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1222
1223 return 0;
1224 out_err:
1225 kfree(kvm->arch.crypto.crycb);
1226 free_page((unsigned long)kvm->arch.model.fac);
1227 debug_unregister(kvm->arch.dbf);
1228 sca_dispose(kvm);
1229 KVM_EVENT(3, "creation of vm failed: %d", rc);
1230 return rc;
1231 }
1232
1233 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1234 {
1235 VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1236 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1237 kvm_s390_clear_local_irqs(vcpu);
1238 kvm_clear_async_pf_completion_queue(vcpu);
1239 if (!kvm_is_ucontrol(vcpu->kvm))
1240 sca_del_vcpu(vcpu);
1241
1242 if (kvm_is_ucontrol(vcpu->kvm))
1243 gmap_free(vcpu->arch.gmap);
1244
1245 if (vcpu->kvm->arch.use_cmma)
1246 kvm_s390_vcpu_unsetup_cmma(vcpu);
1247 kfree(vcpu->arch.guest_fpregs.fprs);
1248 free_page((unsigned long)(vcpu->arch.sie_block));
1249
1250 kvm_vcpu_uninit(vcpu);
1251 kmem_cache_free(kvm_vcpu_cache, vcpu);
1252 }
1253
1254 static void kvm_free_vcpus(struct kvm *kvm)
1255 {
1256 unsigned int i;
1257 struct kvm_vcpu *vcpu;
1258
1259 kvm_for_each_vcpu(i, vcpu, kvm)
1260 kvm_arch_vcpu_destroy(vcpu);
1261
1262 mutex_lock(&kvm->lock);
1263 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1264 kvm->vcpus[i] = NULL;
1265
1266 atomic_set(&kvm->online_vcpus, 0);
1267 mutex_unlock(&kvm->lock);
1268 }
1269
1270 void kvm_arch_destroy_vm(struct kvm *kvm)
1271 {
1272 kvm_free_vcpus(kvm);
1273 free_page((unsigned long)kvm->arch.model.fac);
1274 sca_dispose(kvm);
1275 debug_unregister(kvm->arch.dbf);
1276 kfree(kvm->arch.crypto.crycb);
1277 if (!kvm_is_ucontrol(kvm))
1278 gmap_free(kvm->arch.gmap);
1279 kvm_s390_destroy_adapters(kvm);
1280 kvm_s390_clear_float_irqs(kvm);
1281 KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1282 }
1283
1284 /* Section: vcpu related */
1285 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1286 {
1287 vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1288 if (!vcpu->arch.gmap)
1289 return -ENOMEM;
1290 vcpu->arch.gmap->private = vcpu->kvm;
1291
1292 return 0;
1293 }
1294
1295 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1296 {
1297 read_lock(&vcpu->kvm->arch.sca_lock);
1298 if (vcpu->kvm->arch.use_esca) {
1299 struct esca_block *sca = vcpu->kvm->arch.sca;
1300
1301 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1302 sca->cpu[vcpu->vcpu_id].sda = 0;
1303 } else {
1304 struct bsca_block *sca = vcpu->kvm->arch.sca;
1305
1306 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1307 sca->cpu[vcpu->vcpu_id].sda = 0;
1308 }
1309 read_unlock(&vcpu->kvm->arch.sca_lock);
1310 }
1311
1312 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1313 {
1314 read_lock(&vcpu->kvm->arch.sca_lock);
1315 if (vcpu->kvm->arch.use_esca) {
1316 struct esca_block *sca = vcpu->kvm->arch.sca;
1317
1318 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1319 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1320 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1321 vcpu->arch.sie_block->ecb2 |= 0x04U;
1322 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1323 } else {
1324 struct bsca_block *sca = vcpu->kvm->arch.sca;
1325
1326 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1327 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1328 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1329 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1330 }
1331 read_unlock(&vcpu->kvm->arch.sca_lock);
1332 }
1333
1334 /* Basic SCA to Extended SCA data copy routines */
1335 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1336 {
1337 d->sda = s->sda;
1338 d->sigp_ctrl.c = s->sigp_ctrl.c;
1339 d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1340 }
1341
1342 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1343 {
1344 int i;
1345
1346 d->ipte_control = s->ipte_control;
1347 d->mcn[0] = s->mcn;
1348 for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1349 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1350 }
1351
1352 static int sca_switch_to_extended(struct kvm *kvm)
1353 {
1354 struct bsca_block *old_sca = kvm->arch.sca;
1355 struct esca_block *new_sca;
1356 struct kvm_vcpu *vcpu;
1357 unsigned int vcpu_idx;
1358 u32 scaol, scaoh;
1359
1360 new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1361 if (!new_sca)
1362 return -ENOMEM;
1363
1364 scaoh = (u32)((u64)(new_sca) >> 32);
1365 scaol = (u32)(u64)(new_sca) & ~0x3fU;
1366
1367 kvm_s390_vcpu_block_all(kvm);
1368 write_lock(&kvm->arch.sca_lock);
1369
1370 sca_copy_b_to_e(new_sca, old_sca);
1371
1372 kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1373 vcpu->arch.sie_block->scaoh = scaoh;
1374 vcpu->arch.sie_block->scaol = scaol;
1375 vcpu->arch.sie_block->ecb2 |= 0x04U;
1376 }
1377 kvm->arch.sca = new_sca;
1378 kvm->arch.use_esca = 1;
1379
1380 write_unlock(&kvm->arch.sca_lock);
1381 kvm_s390_vcpu_unblock_all(kvm);
1382
1383 free_page((unsigned long)old_sca);
1384
1385 VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1386 old_sca, kvm->arch.sca);
1387 return 0;
1388 }
1389
1390 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1391 {
1392 int rc;
1393
1394 if (id < KVM_S390_BSCA_CPU_SLOTS)
1395 return true;
1396 if (!sclp.has_esca)
1397 return false;
1398
1399 mutex_lock(&kvm->lock);
1400 rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1401 mutex_unlock(&kvm->lock);
1402
1403 return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1404 }
1405
1406 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1407 {
1408 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1409 kvm_clear_async_pf_completion_queue(vcpu);
1410 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1411 KVM_SYNC_GPRS |
1412 KVM_SYNC_ACRS |
1413 KVM_SYNC_CRS |
1414 KVM_SYNC_ARCH0 |
1415 KVM_SYNC_PFAULT;
1416 if (test_kvm_facility(vcpu->kvm, 64))
1417 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1418 if (test_kvm_facility(vcpu->kvm, 129))
1419 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1420
1421 if (kvm_is_ucontrol(vcpu->kvm))
1422 return __kvm_ucontrol_vcpu_init(vcpu);
1423
1424 return 0;
1425 }
1426
1427 /*
1428 * Backs up the current FP/VX register save area on a particular
1429 * destination. Used to switch between different register save
1430 * areas.
1431 */
1432 static inline void save_fpu_to(struct fpu *dst)
1433 {
1434 dst->fpc = current->thread.fpu.fpc;
1435 dst->regs = current->thread.fpu.regs;
1436 }
1437
1438 /*
1439 * Switches the FP/VX register save area from which to lazy
1440 * restore register contents.
1441 */
1442 static inline void load_fpu_from(struct fpu *from)
1443 {
1444 current->thread.fpu.fpc = from->fpc;
1445 current->thread.fpu.regs = from->regs;
1446 }
1447
1448 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1449 {
1450 /* Save host register state */
1451 save_fpu_regs();
1452 save_fpu_to(&vcpu->arch.host_fpregs);
1453
1454 if (test_kvm_facility(vcpu->kvm, 129)) {
1455 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1456 /*
1457 * Use the register save area in the SIE-control block
1458 * for register restore and save in kvm_arch_vcpu_put()
1459 */
1460 current->thread.fpu.vxrs =
1461 (__vector128 *)&vcpu->run->s.regs.vrs;
1462 } else
1463 load_fpu_from(&vcpu->arch.guest_fpregs);
1464
1465 if (test_fp_ctl(current->thread.fpu.fpc))
1466 /* User space provided an invalid FPC, let's clear it */
1467 current->thread.fpu.fpc = 0;
1468
1469 save_access_regs(vcpu->arch.host_acrs);
1470 restore_access_regs(vcpu->run->s.regs.acrs);
1471 gmap_enable(vcpu->arch.gmap);
1472 atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1473 }
1474
1475 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1476 {
1477 atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1478 gmap_disable(vcpu->arch.gmap);
1479
1480 save_fpu_regs();
1481
1482 if (test_kvm_facility(vcpu->kvm, 129))
1483 /*
1484 * kvm_arch_vcpu_load() set up the register save area to
1485 * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1486 * are already saved. Only the floating-point control must be
1487 * copied.
1488 */
1489 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1490 else
1491 save_fpu_to(&vcpu->arch.guest_fpregs);
1492 load_fpu_from(&vcpu->arch.host_fpregs);
1493
1494 save_access_regs(vcpu->run->s.regs.acrs);
1495 restore_access_regs(vcpu->arch.host_acrs);
1496 }
1497
1498 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1499 {
1500 /* this equals initial cpu reset in pop, but we don't switch to ESA */
1501 vcpu->arch.sie_block->gpsw.mask = 0UL;
1502 vcpu->arch.sie_block->gpsw.addr = 0UL;
1503 kvm_s390_set_prefix(vcpu, 0);
1504 vcpu->arch.sie_block->cputm = 0UL;
1505 vcpu->arch.sie_block->ckc = 0UL;
1506 vcpu->arch.sie_block->todpr = 0;
1507 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1508 vcpu->arch.sie_block->gcr[0] = 0xE0UL;
1509 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1510 vcpu->arch.guest_fpregs.fpc = 0;
1511 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1512 vcpu->arch.sie_block->gbea = 1;
1513 vcpu->arch.sie_block->pp = 0;
1514 vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1515 kvm_clear_async_pf_completion_queue(vcpu);
1516 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1517 kvm_s390_vcpu_stop(vcpu);
1518 kvm_s390_clear_local_irqs(vcpu);
1519 }
1520
1521 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1522 {
1523 mutex_lock(&vcpu->kvm->lock);
1524 preempt_disable();
1525 vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1526 preempt_enable();
1527 mutex_unlock(&vcpu->kvm->lock);
1528 if (!kvm_is_ucontrol(vcpu->kvm)) {
1529 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1530 sca_add_vcpu(vcpu);
1531 }
1532
1533 }
1534
1535 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1536 {
1537 if (!test_kvm_facility(vcpu->kvm, 76))
1538 return;
1539
1540 vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1541
1542 if (vcpu->kvm->arch.crypto.aes_kw)
1543 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1544 if (vcpu->kvm->arch.crypto.dea_kw)
1545 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1546
1547 vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1548 }
1549
1550 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1551 {
1552 free_page(vcpu->arch.sie_block->cbrlo);
1553 vcpu->arch.sie_block->cbrlo = 0;
1554 }
1555
1556 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1557 {
1558 vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1559 if (!vcpu->arch.sie_block->cbrlo)
1560 return -ENOMEM;
1561
1562 vcpu->arch.sie_block->ecb2 |= 0x80;
1563 vcpu->arch.sie_block->ecb2 &= ~0x08;
1564 return 0;
1565 }
1566
1567 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1568 {
1569 struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1570
1571 vcpu->arch.cpu_id = model->cpu_id;
1572 vcpu->arch.sie_block->ibc = model->ibc;
1573 vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1574 }
1575
1576 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1577 {
1578 int rc = 0;
1579
1580 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1581 CPUSTAT_SM |
1582 CPUSTAT_STOPPED);
1583
1584 if (test_kvm_facility(vcpu->kvm, 78))
1585 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1586 else if (test_kvm_facility(vcpu->kvm, 8))
1587 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1588
1589 kvm_s390_vcpu_setup_model(vcpu);
1590
1591 vcpu->arch.sie_block->ecb = 6;
1592 if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1593 vcpu->arch.sie_block->ecb |= 0x10;
1594
1595 vcpu->arch.sie_block->ecb2 = 8;
1596 vcpu->arch.sie_block->eca = 0xC1002000U;
1597 if (sclp.has_siif)
1598 vcpu->arch.sie_block->eca |= 1;
1599 if (sclp.has_sigpif)
1600 vcpu->arch.sie_block->eca |= 0x10000000U;
1601 if (test_kvm_facility(vcpu->kvm, 64))
1602 vcpu->arch.sie_block->ecb3 |= 0x01;
1603 if (test_kvm_facility(vcpu->kvm, 129)) {
1604 vcpu->arch.sie_block->eca |= 0x00020000;
1605 vcpu->arch.sie_block->ecd |= 0x20000000;
1606 }
1607 vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1608 vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1609
1610 if (vcpu->kvm->arch.use_cmma) {
1611 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1612 if (rc)
1613 return rc;
1614 }
1615 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1616 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1617
1618 kvm_s390_vcpu_crypto_setup(vcpu);
1619
1620 return rc;
1621 }
1622
1623 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1624 unsigned int id)
1625 {
1626 struct kvm_vcpu *vcpu;
1627 struct sie_page *sie_page;
1628 int rc = -EINVAL;
1629
1630 if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1631 goto out;
1632
1633 rc = -ENOMEM;
1634
1635 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1636 if (!vcpu)
1637 goto out;
1638
1639 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1640 if (!sie_page)
1641 goto out_free_cpu;
1642
1643 vcpu->arch.sie_block = &sie_page->sie_block;
1644 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1645
1646 vcpu->arch.sie_block->icpua = id;
1647 spin_lock_init(&vcpu->arch.local_int.lock);
1648 vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1649 vcpu->arch.local_int.wq = &vcpu->wq;
1650 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1651
1652 /*
1653 * Allocate a save area for floating-point registers. If the vector
1654 * extension is available, register contents are saved in the SIE
1655 * control block. The allocated save area is still required in
1656 * particular places, for example, in kvm_s390_vcpu_store_status().
1657 */
1658 vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1659 GFP_KERNEL);
1660 if (!vcpu->arch.guest_fpregs.fprs)
1661 goto out_free_sie_block;
1662
1663 rc = kvm_vcpu_init(vcpu, kvm, id);
1664 if (rc)
1665 goto out_free_fprs;
1666 VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1667 vcpu->arch.sie_block);
1668 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1669
1670 return vcpu;
1671 out_free_fprs:
1672 kfree(vcpu->arch.guest_fpregs.fprs);
1673 out_free_sie_block:
1674 free_page((unsigned long)(vcpu->arch.sie_block));
1675 out_free_cpu:
1676 kmem_cache_free(kvm_vcpu_cache, vcpu);
1677 out:
1678 return ERR_PTR(rc);
1679 }
1680
1681 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1682 {
1683 return kvm_s390_vcpu_has_irq(vcpu, 0);
1684 }
1685
1686 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1687 {
1688 atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1689 exit_sie(vcpu);
1690 }
1691
1692 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1693 {
1694 atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1695 }
1696
1697 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1698 {
1699 atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1700 exit_sie(vcpu);
1701 }
1702
1703 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1704 {
1705 atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1706 }
1707
1708 /*
1709 * Kick a guest cpu out of SIE and wait until SIE is not running.
1710 * If the CPU is not running (e.g. waiting as idle) the function will
1711 * return immediately. */
1712 void exit_sie(struct kvm_vcpu *vcpu)
1713 {
1714 atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1715 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1716 cpu_relax();
1717 }
1718
1719 /* Kick a guest cpu out of SIE to process a request synchronously */
1720 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1721 {
1722 kvm_make_request(req, vcpu);
1723 kvm_s390_vcpu_request(vcpu);
1724 }
1725
1726 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1727 {
1728 int i;
1729 struct kvm *kvm = gmap->private;
1730 struct kvm_vcpu *vcpu;
1731
1732 kvm_for_each_vcpu(i, vcpu, kvm) {
1733 /* match against both prefix pages */
1734 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1735 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1736 kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1737 }
1738 }
1739 }
1740
1741 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1742 {
1743 /* kvm common code refers to this, but never calls it */
1744 BUG();
1745 return 0;
1746 }
1747
1748 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1749 struct kvm_one_reg *reg)
1750 {
1751 int r = -EINVAL;
1752
1753 switch (reg->id) {
1754 case KVM_REG_S390_TODPR:
1755 r = put_user(vcpu->arch.sie_block->todpr,
1756 (u32 __user *)reg->addr);
1757 break;
1758 case KVM_REG_S390_EPOCHDIFF:
1759 r = put_user(vcpu->arch.sie_block->epoch,
1760 (u64 __user *)reg->addr);
1761 break;
1762 case KVM_REG_S390_CPU_TIMER:
1763 r = put_user(vcpu->arch.sie_block->cputm,
1764 (u64 __user *)reg->addr);
1765 break;
1766 case KVM_REG_S390_CLOCK_COMP:
1767 r = put_user(vcpu->arch.sie_block->ckc,
1768 (u64 __user *)reg->addr);
1769 break;
1770 case KVM_REG_S390_PFTOKEN:
1771 r = put_user(vcpu->arch.pfault_token,
1772 (u64 __user *)reg->addr);
1773 break;
1774 case KVM_REG_S390_PFCOMPARE:
1775 r = put_user(vcpu->arch.pfault_compare,
1776 (u64 __user *)reg->addr);
1777 break;
1778 case KVM_REG_S390_PFSELECT:
1779 r = put_user(vcpu->arch.pfault_select,
1780 (u64 __user *)reg->addr);
1781 break;
1782 case KVM_REG_S390_PP:
1783 r = put_user(vcpu->arch.sie_block->pp,
1784 (u64 __user *)reg->addr);
1785 break;
1786 case KVM_REG_S390_GBEA:
1787 r = put_user(vcpu->arch.sie_block->gbea,
1788 (u64 __user *)reg->addr);
1789 break;
1790 default:
1791 break;
1792 }
1793
1794 return r;
1795 }
1796
1797 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1798 struct kvm_one_reg *reg)
1799 {
1800 int r = -EINVAL;
1801
1802 switch (reg->id) {
1803 case KVM_REG_S390_TODPR:
1804 r = get_user(vcpu->arch.sie_block->todpr,
1805 (u32 __user *)reg->addr);
1806 break;
1807 case KVM_REG_S390_EPOCHDIFF:
1808 r = get_user(vcpu->arch.sie_block->epoch,
1809 (u64 __user *)reg->addr);
1810 break;
1811 case KVM_REG_S390_CPU_TIMER:
1812 r = get_user(vcpu->arch.sie_block->cputm,
1813 (u64 __user *)reg->addr);
1814 break;
1815 case KVM_REG_S390_CLOCK_COMP:
1816 r = get_user(vcpu->arch.sie_block->ckc,
1817 (u64 __user *)reg->addr);
1818 break;
1819 case KVM_REG_S390_PFTOKEN:
1820 r = get_user(vcpu->arch.pfault_token,
1821 (u64 __user *)reg->addr);
1822 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1823 kvm_clear_async_pf_completion_queue(vcpu);
1824 break;
1825 case KVM_REG_S390_PFCOMPARE:
1826 r = get_user(vcpu->arch.pfault_compare,
1827 (u64 __user *)reg->addr);
1828 break;
1829 case KVM_REG_S390_PFSELECT:
1830 r = get_user(vcpu->arch.pfault_select,
1831 (u64 __user *)reg->addr);
1832 break;
1833 case KVM_REG_S390_PP:
1834 r = get_user(vcpu->arch.sie_block->pp,
1835 (u64 __user *)reg->addr);
1836 break;
1837 case KVM_REG_S390_GBEA:
1838 r = get_user(vcpu->arch.sie_block->gbea,
1839 (u64 __user *)reg->addr);
1840 break;
1841 default:
1842 break;
1843 }
1844
1845 return r;
1846 }
1847
1848 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1849 {
1850 kvm_s390_vcpu_initial_reset(vcpu);
1851 return 0;
1852 }
1853
1854 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1855 {
1856 memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1857 return 0;
1858 }
1859
1860 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1861 {
1862 memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1863 return 0;
1864 }
1865
1866 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1867 struct kvm_sregs *sregs)
1868 {
1869 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1870 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1871 restore_access_regs(vcpu->run->s.regs.acrs);
1872 return 0;
1873 }
1874
1875 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1876 struct kvm_sregs *sregs)
1877 {
1878 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1879 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1880 return 0;
1881 }
1882
1883 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1884 {
1885 if (test_fp_ctl(fpu->fpc))
1886 return -EINVAL;
1887 memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1888 vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1889 save_fpu_regs();
1890 load_fpu_from(&vcpu->arch.guest_fpregs);
1891 return 0;
1892 }
1893
1894 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1895 {
1896 memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1897 fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1898 return 0;
1899 }
1900
1901 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1902 {
1903 int rc = 0;
1904
1905 if (!is_vcpu_stopped(vcpu))
1906 rc = -EBUSY;
1907 else {
1908 vcpu->run->psw_mask = psw.mask;
1909 vcpu->run->psw_addr = psw.addr;
1910 }
1911 return rc;
1912 }
1913
1914 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1915 struct kvm_translation *tr)
1916 {
1917 return -EINVAL; /* not implemented yet */
1918 }
1919
1920 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1921 KVM_GUESTDBG_USE_HW_BP | \
1922 KVM_GUESTDBG_ENABLE)
1923
1924 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1925 struct kvm_guest_debug *dbg)
1926 {
1927 int rc = 0;
1928
1929 vcpu->guest_debug = 0;
1930 kvm_s390_clear_bp_data(vcpu);
1931
1932 if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1933 return -EINVAL;
1934
1935 if (dbg->control & KVM_GUESTDBG_ENABLE) {
1936 vcpu->guest_debug = dbg->control;
1937 /* enforce guest PER */
1938 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1939
1940 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1941 rc = kvm_s390_import_bp_data(vcpu, dbg);
1942 } else {
1943 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1944 vcpu->arch.guestdbg.last_bp = 0;
1945 }
1946
1947 if (rc) {
1948 vcpu->guest_debug = 0;
1949 kvm_s390_clear_bp_data(vcpu);
1950 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1951 }
1952
1953 return rc;
1954 }
1955
1956 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1957 struct kvm_mp_state *mp_state)
1958 {
1959 /* CHECK_STOP and LOAD are not supported yet */
1960 return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1961 KVM_MP_STATE_OPERATING;
1962 }
1963
1964 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1965 struct kvm_mp_state *mp_state)
1966 {
1967 int rc = 0;
1968
1969 /* user space knows about this interface - let it control the state */
1970 vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1971
1972 switch (mp_state->mp_state) {
1973 case KVM_MP_STATE_STOPPED:
1974 kvm_s390_vcpu_stop(vcpu);
1975 break;
1976 case KVM_MP_STATE_OPERATING:
1977 kvm_s390_vcpu_start(vcpu);
1978 break;
1979 case KVM_MP_STATE_LOAD:
1980 case KVM_MP_STATE_CHECK_STOP:
1981 /* fall through - CHECK_STOP and LOAD are not supported yet */
1982 default:
1983 rc = -ENXIO;
1984 }
1985
1986 return rc;
1987 }
1988
1989 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1990 {
1991 return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1992 }
1993
1994 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1995 {
1996 retry:
1997 kvm_s390_vcpu_request_handled(vcpu);
1998 if (!vcpu->requests)
1999 return 0;
2000 /*
2001 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2002 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2003 * This ensures that the ipte instruction for this request has
2004 * already finished. We might race against a second unmapper that
2005 * wants to set the blocking bit. Lets just retry the request loop.
2006 */
2007 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2008 int rc;
2009 rc = gmap_ipte_notify(vcpu->arch.gmap,
2010 kvm_s390_get_prefix(vcpu),
2011 PAGE_SIZE * 2);
2012 if (rc)
2013 return rc;
2014 goto retry;
2015 }
2016
2017 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2018 vcpu->arch.sie_block->ihcpu = 0xffff;
2019 goto retry;
2020 }
2021
2022 if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2023 if (!ibs_enabled(vcpu)) {
2024 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2025 atomic_or(CPUSTAT_IBS,
2026 &vcpu->arch.sie_block->cpuflags);
2027 }
2028 goto retry;
2029 }
2030
2031 if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2032 if (ibs_enabled(vcpu)) {
2033 trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2034 atomic_andnot(CPUSTAT_IBS,
2035 &vcpu->arch.sie_block->cpuflags);
2036 }
2037 goto retry;
2038 }
2039
2040 /* nothing to do, just clear the request */
2041 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2042
2043 return 0;
2044 }
2045
2046 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2047 {
2048 struct kvm_vcpu *vcpu;
2049 int i;
2050
2051 mutex_lock(&kvm->lock);
2052 preempt_disable();
2053 kvm->arch.epoch = tod - get_tod_clock();
2054 kvm_s390_vcpu_block_all(kvm);
2055 kvm_for_each_vcpu(i, vcpu, kvm)
2056 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2057 kvm_s390_vcpu_unblock_all(kvm);
2058 preempt_enable();
2059 mutex_unlock(&kvm->lock);
2060 }
2061
2062 /**
2063 * kvm_arch_fault_in_page - fault-in guest page if necessary
2064 * @vcpu: The corresponding virtual cpu
2065 * @gpa: Guest physical address
2066 * @writable: Whether the page should be writable or not
2067 *
2068 * Make sure that a guest page has been faulted-in on the host.
2069 *
2070 * Return: Zero on success, negative error code otherwise.
2071 */
2072 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2073 {
2074 return gmap_fault(vcpu->arch.gmap, gpa,
2075 writable ? FAULT_FLAG_WRITE : 0);
2076 }
2077
2078 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2079 unsigned long token)
2080 {
2081 struct kvm_s390_interrupt inti;
2082 struct kvm_s390_irq irq;
2083
2084 if (start_token) {
2085 irq.u.ext.ext_params2 = token;
2086 irq.type = KVM_S390_INT_PFAULT_INIT;
2087 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2088 } else {
2089 inti.type = KVM_S390_INT_PFAULT_DONE;
2090 inti.parm64 = token;
2091 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2092 }
2093 }
2094
2095 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2096 struct kvm_async_pf *work)
2097 {
2098 trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2099 __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2100 }
2101
2102 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2103 struct kvm_async_pf *work)
2104 {
2105 trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2106 __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2107 }
2108
2109 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2110 struct kvm_async_pf *work)
2111 {
2112 /* s390 will always inject the page directly */
2113 }
2114
2115 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2116 {
2117 /*
2118 * s390 will always inject the page directly,
2119 * but we still want check_async_completion to cleanup
2120 */
2121 return true;
2122 }
2123
2124 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2125 {
2126 hva_t hva;
2127 struct kvm_arch_async_pf arch;
2128 int rc;
2129
2130 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2131 return 0;
2132 if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2133 vcpu->arch.pfault_compare)
2134 return 0;
2135 if (psw_extint_disabled(vcpu))
2136 return 0;
2137 if (kvm_s390_vcpu_has_irq(vcpu, 0))
2138 return 0;
2139 if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2140 return 0;
2141 if (!vcpu->arch.gmap->pfault_enabled)
2142 return 0;
2143
2144 hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2145 hva += current->thread.gmap_addr & ~PAGE_MASK;
2146 if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2147 return 0;
2148
2149 rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2150 return rc;
2151 }
2152
2153 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2154 {
2155 int rc, cpuflags;
2156
2157 /*
2158 * On s390 notifications for arriving pages will be delivered directly
2159 * to the guest but the house keeping for completed pfaults is
2160 * handled outside the worker.
2161 */
2162 kvm_check_async_pf_completion(vcpu);
2163
2164 vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2165 vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2166
2167 if (need_resched())
2168 schedule();
2169
2170 if (test_cpu_flag(CIF_MCCK_PENDING))
2171 s390_handle_mcck();
2172
2173 if (!kvm_is_ucontrol(vcpu->kvm)) {
2174 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2175 if (rc)
2176 return rc;
2177 }
2178
2179 rc = kvm_s390_handle_requests(vcpu);
2180 if (rc)
2181 return rc;
2182
2183 if (guestdbg_enabled(vcpu)) {
2184 kvm_s390_backup_guest_per_regs(vcpu);
2185 kvm_s390_patch_guest_per_regs(vcpu);
2186 }
2187
2188 vcpu->arch.sie_block->icptcode = 0;
2189 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2190 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2191 trace_kvm_s390_sie_enter(vcpu, cpuflags);
2192
2193 return 0;
2194 }
2195
2196 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2197 {
2198 psw_t *psw = &vcpu->arch.sie_block->gpsw;
2199 u8 opcode;
2200 int rc;
2201
2202 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2203 trace_kvm_s390_sie_fault(vcpu);
2204
2205 /*
2206 * We want to inject an addressing exception, which is defined as a
2207 * suppressing or terminating exception. However, since we came here
2208 * by a DAT access exception, the PSW still points to the faulting
2209 * instruction since DAT exceptions are nullifying. So we've got
2210 * to look up the current opcode to get the length of the instruction
2211 * to be able to forward the PSW.
2212 */
2213 rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2214 if (rc)
2215 return kvm_s390_inject_prog_cond(vcpu, rc);
2216 psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2217
2218 return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2219 }
2220
2221 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2222 {
2223 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2224 vcpu->arch.sie_block->icptcode);
2225 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2226
2227 if (guestdbg_enabled(vcpu))
2228 kvm_s390_restore_guest_per_regs(vcpu);
2229
2230 vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2231 vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2232
2233 if (vcpu->arch.sie_block->icptcode > 0) {
2234 int rc = kvm_handle_sie_intercept(vcpu);
2235
2236 if (rc != -EOPNOTSUPP)
2237 return rc;
2238 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2239 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2240 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2241 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2242 return -EREMOTE;
2243 } else if (exit_reason != -EFAULT) {
2244 vcpu->stat.exit_null++;
2245 return 0;
2246 } else if (kvm_is_ucontrol(vcpu->kvm)) {
2247 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2248 vcpu->run->s390_ucontrol.trans_exc_code =
2249 current->thread.gmap_addr;
2250 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2251 return -EREMOTE;
2252 } else if (current->thread.gmap_pfault) {
2253 trace_kvm_s390_major_guest_pfault(vcpu);
2254 current->thread.gmap_pfault = 0;
2255 if (kvm_arch_setup_async_pf(vcpu))
2256 return 0;
2257 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2258 }
2259 return vcpu_post_run_fault_in_sie(vcpu);
2260 }
2261
2262 static int __vcpu_run(struct kvm_vcpu *vcpu)
2263 {
2264 int rc, exit_reason;
2265
2266 /*
2267 * We try to hold kvm->srcu during most of vcpu_run (except when run-
2268 * ning the guest), so that memslots (and other stuff) are protected
2269 */
2270 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2271
2272 do {
2273 rc = vcpu_pre_run(vcpu);
2274 if (rc)
2275 break;
2276
2277 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2278 /*
2279 * As PF_VCPU will be used in fault handler, between
2280 * guest_enter and guest_exit should be no uaccess.
2281 */
2282 local_irq_disable();
2283 __kvm_guest_enter();
2284 local_irq_enable();
2285 exit_reason = sie64a(vcpu->arch.sie_block,
2286 vcpu->run->s.regs.gprs);
2287 local_irq_disable();
2288 __kvm_guest_exit();
2289 local_irq_enable();
2290 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2291
2292 rc = vcpu_post_run(vcpu, exit_reason);
2293 } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2294
2295 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2296 return rc;
2297 }
2298
2299 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2300 {
2301 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2302 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2303 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2304 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2305 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2306 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2307 /* some control register changes require a tlb flush */
2308 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2309 }
2310 if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2311 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2312 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2313 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2314 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2315 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2316 }
2317 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2318 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2319 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2320 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2321 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2322 kvm_clear_async_pf_completion_queue(vcpu);
2323 }
2324 kvm_run->kvm_dirty_regs = 0;
2325 }
2326
2327 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2328 {
2329 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2330 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2331 kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2332 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2333 kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2334 kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2335 kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2336 kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2337 kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2338 kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2339 kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2340 kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2341 }
2342
2343 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2344 {
2345 int rc;
2346 sigset_t sigsaved;
2347
2348 if (guestdbg_exit_pending(vcpu)) {
2349 kvm_s390_prepare_debug_exit(vcpu);
2350 return 0;
2351 }
2352
2353 if (vcpu->sigset_active)
2354 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2355
2356 if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2357 kvm_s390_vcpu_start(vcpu);
2358 } else if (is_vcpu_stopped(vcpu)) {
2359 pr_err_ratelimited("can't run stopped vcpu %d\n",
2360 vcpu->vcpu_id);
2361 return -EINVAL;
2362 }
2363
2364 sync_regs(vcpu, kvm_run);
2365
2366 might_fault();
2367 rc = __vcpu_run(vcpu);
2368
2369 if (signal_pending(current) && !rc) {
2370 kvm_run->exit_reason = KVM_EXIT_INTR;
2371 rc = -EINTR;
2372 }
2373
2374 if (guestdbg_exit_pending(vcpu) && !rc) {
2375 kvm_s390_prepare_debug_exit(vcpu);
2376 rc = 0;
2377 }
2378
2379 if (rc == -EREMOTE) {
2380 /* userspace support is needed, kvm_run has been prepared */
2381 rc = 0;
2382 }
2383
2384 store_regs(vcpu, kvm_run);
2385
2386 if (vcpu->sigset_active)
2387 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2388
2389 vcpu->stat.exit_userspace++;
2390 return rc;
2391 }
2392
2393 /*
2394 * store status at address
2395 * we use have two special cases:
2396 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2397 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2398 */
2399 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2400 {
2401 unsigned char archmode = 1;
2402 unsigned int px;
2403 u64 clkcomp;
2404 int rc;
2405
2406 px = kvm_s390_get_prefix(vcpu);
2407 if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2408 if (write_guest_abs(vcpu, 163, &archmode, 1))
2409 return -EFAULT;
2410 gpa = 0;
2411 } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2412 if (write_guest_real(vcpu, 163, &archmode, 1))
2413 return -EFAULT;
2414 gpa = px;
2415 } else
2416 gpa -= __LC_FPREGS_SAVE_AREA;
2417 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2418 vcpu->arch.guest_fpregs.fprs, 128);
2419 rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2420 vcpu->run->s.regs.gprs, 128);
2421 rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2422 &vcpu->arch.sie_block->gpsw, 16);
2423 rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2424 &px, 4);
2425 rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2426 &vcpu->arch.guest_fpregs.fpc, 4);
2427 rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2428 &vcpu->arch.sie_block->todpr, 4);
2429 rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2430 &vcpu->arch.sie_block->cputm, 8);
2431 clkcomp = vcpu->arch.sie_block->ckc >> 8;
2432 rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2433 &clkcomp, 8);
2434 rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2435 &vcpu->run->s.regs.acrs, 64);
2436 rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2437 &vcpu->arch.sie_block->gcr, 128);
2438 return rc ? -EFAULT : 0;
2439 }
2440
2441 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2442 {
2443 /*
2444 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2445 * copying in vcpu load/put. Lets update our copies before we save
2446 * it into the save area
2447 */
2448 save_fpu_regs();
2449 if (test_kvm_facility(vcpu->kvm, 129)) {
2450 /*
2451 * If the vector extension is available, the vector registers
2452 * which overlaps with floating-point registers are saved in
2453 * the SIE-control block. Hence, extract the floating-point
2454 * registers and the FPC value and store them in the
2455 * guest_fpregs structure.
2456 */
2457 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2458 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2459 current->thread.fpu.vxrs);
2460 } else
2461 save_fpu_to(&vcpu->arch.guest_fpregs);
2462 save_access_regs(vcpu->run->s.regs.acrs);
2463
2464 return kvm_s390_store_status_unloaded(vcpu, addr);
2465 }
2466
2467 /*
2468 * store additional status at address
2469 */
2470 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2471 unsigned long gpa)
2472 {
2473 /* Only bits 0-53 are used for address formation */
2474 if (!(gpa & ~0x3ff))
2475 return 0;
2476
2477 return write_guest_abs(vcpu, gpa & ~0x3ff,
2478 (void *)&vcpu->run->s.regs.vrs, 512);
2479 }
2480
2481 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2482 {
2483 if (!test_kvm_facility(vcpu->kvm, 129))
2484 return 0;
2485
2486 /*
2487 * The guest VXRS are in the host VXRs due to the lazy
2488 * copying in vcpu load/put. We can simply call save_fpu_regs()
2489 * to save the current register state because we are in the
2490 * middle of a load/put cycle.
2491 *
2492 * Let's update our copies before we save it into the save area.
2493 */
2494 save_fpu_regs();
2495
2496 return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2497 }
2498
2499 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2500 {
2501 kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2502 kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2503 }
2504
2505 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2506 {
2507 unsigned int i;
2508 struct kvm_vcpu *vcpu;
2509
2510 kvm_for_each_vcpu(i, vcpu, kvm) {
2511 __disable_ibs_on_vcpu(vcpu);
2512 }
2513 }
2514
2515 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2516 {
2517 kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2518 kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2519 }
2520
2521 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2522 {
2523 int i, online_vcpus, started_vcpus = 0;
2524
2525 if (!is_vcpu_stopped(vcpu))
2526 return;
2527
2528 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2529 /* Only one cpu at a time may enter/leave the STOPPED state. */
2530 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2531 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2532
2533 for (i = 0; i < online_vcpus; i++) {
2534 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2535 started_vcpus++;
2536 }
2537
2538 if (started_vcpus == 0) {
2539 /* we're the only active VCPU -> speed it up */
2540 __enable_ibs_on_vcpu(vcpu);
2541 } else if (started_vcpus == 1) {
2542 /*
2543 * As we are starting a second VCPU, we have to disable
2544 * the IBS facility on all VCPUs to remove potentially
2545 * oustanding ENABLE requests.
2546 */
2547 __disable_ibs_on_all_vcpus(vcpu->kvm);
2548 }
2549
2550 atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2551 /*
2552 * Another VCPU might have used IBS while we were offline.
2553 * Let's play safe and flush the VCPU at startup.
2554 */
2555 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2556 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2557 return;
2558 }
2559
2560 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2561 {
2562 int i, online_vcpus, started_vcpus = 0;
2563 struct kvm_vcpu *started_vcpu = NULL;
2564
2565 if (is_vcpu_stopped(vcpu))
2566 return;
2567
2568 trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2569 /* Only one cpu at a time may enter/leave the STOPPED state. */
2570 spin_lock(&vcpu->kvm->arch.start_stop_lock);
2571 online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2572
2573 /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2574 kvm_s390_clear_stop_irq(vcpu);
2575
2576 atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2577 __disable_ibs_on_vcpu(vcpu);
2578
2579 for (i = 0; i < online_vcpus; i++) {
2580 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2581 started_vcpus++;
2582 started_vcpu = vcpu->kvm->vcpus[i];
2583 }
2584 }
2585
2586 if (started_vcpus == 1) {
2587 /*
2588 * As we only have one VCPU left, we want to enable the
2589 * IBS facility for that VCPU to speed it up.
2590 */
2591 __enable_ibs_on_vcpu(started_vcpu);
2592 }
2593
2594 spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2595 return;
2596 }
2597
2598 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2599 struct kvm_enable_cap *cap)
2600 {
2601 int r;
2602
2603 if (cap->flags)
2604 return -EINVAL;
2605
2606 switch (cap->cap) {
2607 case KVM_CAP_S390_CSS_SUPPORT:
2608 if (!vcpu->kvm->arch.css_support) {
2609 vcpu->kvm->arch.css_support = 1;
2610 VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2611 trace_kvm_s390_enable_css(vcpu->kvm);
2612 }
2613 r = 0;
2614 break;
2615 default:
2616 r = -EINVAL;
2617 break;
2618 }
2619 return r;
2620 }
2621
2622 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2623 struct kvm_s390_mem_op *mop)
2624 {
2625 void __user *uaddr = (void __user *)mop->buf;
2626 void *tmpbuf = NULL;
2627 int r, srcu_idx;
2628 const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2629 | KVM_S390_MEMOP_F_CHECK_ONLY;
2630
2631 if (mop->flags & ~supported_flags)
2632 return -EINVAL;
2633
2634 if (mop->size > MEM_OP_MAX_SIZE)
2635 return -E2BIG;
2636
2637 if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2638 tmpbuf = vmalloc(mop->size);
2639 if (!tmpbuf)
2640 return -ENOMEM;
2641 }
2642
2643 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2644
2645 switch (mop->op) {
2646 case KVM_S390_MEMOP_LOGICAL_READ:
2647 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2648 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2649 break;
2650 }
2651 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2652 if (r == 0) {
2653 if (copy_to_user(uaddr, tmpbuf, mop->size))
2654 r = -EFAULT;
2655 }
2656 break;
2657 case KVM_S390_MEMOP_LOGICAL_WRITE:
2658 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2659 r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2660 break;
2661 }
2662 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2663 r = -EFAULT;
2664 break;
2665 }
2666 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2667 break;
2668 default:
2669 r = -EINVAL;
2670 }
2671
2672 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2673
2674 if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2675 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2676
2677 vfree(tmpbuf);
2678 return r;
2679 }
2680
2681 long kvm_arch_vcpu_ioctl(struct file *filp,
2682 unsigned int ioctl, unsigned long arg)
2683 {
2684 struct kvm_vcpu *vcpu = filp->private_data;
2685 void __user *argp = (void __user *)arg;
2686 int idx;
2687 long r;
2688
2689 switch (ioctl) {
2690 case KVM_S390_IRQ: {
2691 struct kvm_s390_irq s390irq;
2692
2693 r = -EFAULT;
2694 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2695 break;
2696 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2697 break;
2698 }
2699 case KVM_S390_INTERRUPT: {
2700 struct kvm_s390_interrupt s390int;
2701 struct kvm_s390_irq s390irq;
2702
2703 r = -EFAULT;
2704 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2705 break;
2706 if (s390int_to_s390irq(&s390int, &s390irq))
2707 return -EINVAL;
2708 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2709 break;
2710 }
2711 case KVM_S390_STORE_STATUS:
2712 idx = srcu_read_lock(&vcpu->kvm->srcu);
2713 r = kvm_s390_vcpu_store_status(vcpu, arg);
2714 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2715 break;
2716 case KVM_S390_SET_INITIAL_PSW: {
2717 psw_t psw;
2718
2719 r = -EFAULT;
2720 if (copy_from_user(&psw, argp, sizeof(psw)))
2721 break;
2722 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2723 break;
2724 }
2725 case KVM_S390_INITIAL_RESET:
2726 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2727 break;
2728 case KVM_SET_ONE_REG:
2729 case KVM_GET_ONE_REG: {
2730 struct kvm_one_reg reg;
2731 r = -EFAULT;
2732 if (copy_from_user(&reg, argp, sizeof(reg)))
2733 break;
2734 if (ioctl == KVM_SET_ONE_REG)
2735 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2736 else
2737 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2738 break;
2739 }
2740 #ifdef CONFIG_KVM_S390_UCONTROL
2741 case KVM_S390_UCAS_MAP: {
2742 struct kvm_s390_ucas_mapping ucasmap;
2743
2744 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2745 r = -EFAULT;
2746 break;
2747 }
2748
2749 if (!kvm_is_ucontrol(vcpu->kvm)) {
2750 r = -EINVAL;
2751 break;
2752 }
2753
2754 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2755 ucasmap.vcpu_addr, ucasmap.length);
2756 break;
2757 }
2758 case KVM_S390_UCAS_UNMAP: {
2759 struct kvm_s390_ucas_mapping ucasmap;
2760
2761 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2762 r = -EFAULT;
2763 break;
2764 }
2765
2766 if (!kvm_is_ucontrol(vcpu->kvm)) {
2767 r = -EINVAL;
2768 break;
2769 }
2770
2771 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2772 ucasmap.length);
2773 break;
2774 }
2775 #endif
2776 case KVM_S390_VCPU_FAULT: {
2777 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2778 break;
2779 }
2780 case KVM_ENABLE_CAP:
2781 {
2782 struct kvm_enable_cap cap;
2783 r = -EFAULT;
2784 if (copy_from_user(&cap, argp, sizeof(cap)))
2785 break;
2786 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2787 break;
2788 }
2789 case KVM_S390_MEM_OP: {
2790 struct kvm_s390_mem_op mem_op;
2791
2792 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2793 r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2794 else
2795 r = -EFAULT;
2796 break;
2797 }
2798 case KVM_S390_SET_IRQ_STATE: {
2799 struct kvm_s390_irq_state irq_state;
2800
2801 r = -EFAULT;
2802 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2803 break;
2804 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2805 irq_state.len == 0 ||
2806 irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2807 r = -EINVAL;
2808 break;
2809 }
2810 r = kvm_s390_set_irq_state(vcpu,
2811 (void __user *) irq_state.buf,
2812 irq_state.len);
2813 break;
2814 }
2815 case KVM_S390_GET_IRQ_STATE: {
2816 struct kvm_s390_irq_state irq_state;
2817
2818 r = -EFAULT;
2819 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2820 break;
2821 if (irq_state.len == 0) {
2822 r = -EINVAL;
2823 break;
2824 }
2825 r = kvm_s390_get_irq_state(vcpu,
2826 (__u8 __user *) irq_state.buf,
2827 irq_state.len);
2828 break;
2829 }
2830 default:
2831 r = -ENOTTY;
2832 }
2833 return r;
2834 }
2835
2836 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2837 {
2838 #ifdef CONFIG_KVM_S390_UCONTROL
2839 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2840 && (kvm_is_ucontrol(vcpu->kvm))) {
2841 vmf->page = virt_to_page(vcpu->arch.sie_block);
2842 get_page(vmf->page);
2843 return 0;
2844 }
2845 #endif
2846 return VM_FAULT_SIGBUS;
2847 }
2848
2849 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2850 unsigned long npages)
2851 {
2852 return 0;
2853 }
2854
2855 /* Section: memory related */
2856 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2857 struct kvm_memory_slot *memslot,
2858 const struct kvm_userspace_memory_region *mem,
2859 enum kvm_mr_change change)
2860 {
2861 /* A few sanity checks. We can have memory slots which have to be
2862 located/ended at a segment boundary (1MB). The memory in userland is
2863 ok to be fragmented into various different vmas. It is okay to mmap()
2864 and munmap() stuff in this slot after doing this call at any time */
2865
2866 if (mem->userspace_addr & 0xffffful)
2867 return -EINVAL;
2868
2869 if (mem->memory_size & 0xffffful)
2870 return -EINVAL;
2871
2872 if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2873 return -EINVAL;
2874
2875 return 0;
2876 }
2877
2878 void kvm_arch_commit_memory_region(struct kvm *kvm,
2879 const struct kvm_userspace_memory_region *mem,
2880 const struct kvm_memory_slot *old,
2881 const struct kvm_memory_slot *new,
2882 enum kvm_mr_change change)
2883 {
2884 int rc;
2885
2886 /* If the basics of the memslot do not change, we do not want
2887 * to update the gmap. Every update causes several unnecessary
2888 * segment translation exceptions. This is usually handled just
2889 * fine by the normal fault handler + gmap, but it will also
2890 * cause faults on the prefix page of running guest CPUs.
2891 */
2892 if (old->userspace_addr == mem->userspace_addr &&
2893 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2894 old->npages * PAGE_SIZE == mem->memory_size)
2895 return;
2896
2897 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2898 mem->guest_phys_addr, mem->memory_size);
2899 if (rc)
2900 pr_warn("failed to commit memory region\n");
2901 return;
2902 }
2903
2904 static int __init kvm_s390_init(void)
2905 {
2906 if (!sclp.has_sief2) {
2907 pr_info("SIE not available\n");
2908 return -ENODEV;
2909 }
2910
2911 return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2912 }
2913
2914 static void __exit kvm_s390_exit(void)
2915 {
2916 kvm_exit();
2917 }
2918
2919 module_init(kvm_s390_init);
2920 module_exit(kvm_s390_exit);
2921
2922 /*
2923 * Enable autoloading of the kvm module.
2924 * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2925 * since x86 takes a different approach.
2926 */
2927 #include <linux/miscdevice.h>
2928 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2929 MODULE_ALIAS("devname:kvm");
This page took 0.09096 seconds and 6 git commands to generate.