arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <linux/bitmap.h>
  30 #include <asm/asm-offsets.h>
  31 #include <asm/lowcore.h>
  32 #include <asm/etr.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/gmap.h>
  35 #include <asm/nmi.h>
  36 #include <asm/switch_to.h>
  37 #include <asm/isc.h>
  38 #include <asm/sclp.h>
  39 #include <asm/cpacf.h>
  40 #include <asm/etr.h>
  41 #include "kvm-s390.h"
  42 #include "gaccess.h"
  43
  44 #define KMSG_COMPONENT "kvm-s390"
  45 #undef pr_fmt
  46 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  47
  48 #define CREATE_TRACE_POINTS
  49 #include "trace.h"
  50 #include "trace-s390.h"
  51
  52 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  53 #define LOCAL_IRQS 32
  54 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  55                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  56
  57 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  58
  59 struct kvm_stats_debugfs_item debugfs_entries[] = {
  60         { "userspace_handled", VCPU_STAT(exit_userspace) },
  61         { "exit_null", VCPU_STAT(exit_null) },
  62         { "exit_validity", VCPU_STAT(exit_validity) },
  63         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  64         { "exit_external_request", VCPU_STAT(exit_external_request) },
  65         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  66         { "exit_instruction", VCPU_STAT(exit_instruction) },
  67         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  68         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  69         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  70         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  71         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  72         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  73         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  74         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  75         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  76         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  77         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  78         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  79         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  80         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  81         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  82         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  83         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  84         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  85         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  86         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  87         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  88         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  89         { "instruction_spx", VCPU_STAT(instruction_spx) },
  90         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  91         { "instruction_stap", VCPU_STAT(instruction_stap) },
  92         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  93         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  94         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  95         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  96         { "instruction_essa", VCPU_STAT(instruction_essa) },
  97         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  98         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  99         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 100         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 101         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 102         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 103         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 104         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 105         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 106         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 107         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 108         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 109         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 110         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 111         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 112         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 113         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 114         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 115         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 116         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 117         { "diagnose_10", VCPU_STAT(diagnose_10) },
 118         { "diagnose_44", VCPU_STAT(diagnose_44) },
 119         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 120         { "diagnose_258", VCPU_STAT(diagnose_258) },
 121         { "diagnose_308", VCPU_STAT(diagnose_308) },
 122         { "diagnose_500", VCPU_STAT(diagnose_500) },
 123         { NULL }
 124 };
 125
 126 /* upper facilities limit for kvm */
 127 unsigned long kvm_s390_fac_list_mask[16] = {
 128         0xffe6000000000000UL,
 129         0x005e000000000000UL,
 130 };
 131
 132 unsigned long kvm_s390_fac_list_mask_size(void)
 133 {
 134         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 135         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 136 }
 137
 138 /* available cpu features supported by kvm */
 139 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 140 /* available subfunctions indicated via query / "test bit" */
 141 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 142
 143 static struct gmap_notifier gmap_notifier;
 144 debug_info_t *kvm_s390_dbf;
 145
 146 /* Section: not file related */
 147 int kvm_arch_hardware_enable(void)
 148 {
 149         /* every s390 is virtualization enabled ;-) */
 150         return 0;
 151 }
 152
 153 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 154
 155 /*
 156  * This callback is executed during stop_machine(). All CPUs are therefore
 157  * temporarily stopped. In order not to change guest behavior, we have to
 158  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 159  * so a CPU won't be stopped while calculating with the epoch.
 160  */
 161 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 162                           void *v)
 163 {
 164         struct kvm *kvm;
 165         struct kvm_vcpu *vcpu;
 166         int i;
 167         unsigned long long *delta = v;
 168
 169         list_for_each_entry(kvm, &vm_list, vm_list) {
 170                 kvm->arch.epoch -= *delta;
 171                 kvm_for_each_vcpu(i, vcpu, kvm) {
 172                         vcpu->arch.sie_block->epoch -= *delta;
 173                         if (vcpu->arch.cputm_enabled)
 174                                 vcpu->arch.cputm_start += *delta;
 175                 }
 176         }
 177         return NOTIFY_OK;
 178 }
 179
 180 static struct notifier_block kvm_clock_notifier = {
 181         .notifier_call = kvm_clock_sync,
 182 };
 183
 184 int kvm_arch_hardware_setup(void)
 185 {
 186         gmap_notifier.notifier_call = kvm_gmap_notifier;
 187         gmap_register_ipte_notifier(&gmap_notifier);
 188         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 189                                        &kvm_clock_notifier);
 190         return 0;
 191 }
 192
 193 void kvm_arch_hardware_unsetup(void)
 194 {
 195         gmap_unregister_ipte_notifier(&gmap_notifier);
 196         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 197                                          &kvm_clock_notifier);
 198 }
 199
 200 static void allow_cpu_feat(unsigned long nr)
 201 {
 202         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 203 }
 204
 205 static inline int plo_test_bit(unsigned char nr)
 206 {
 207         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 208         int cc = 3; /* subfunction not available */
 209
 210         asm volatile(
 211                 /* Parameter registers are ignored for "test bit" */
 212                 "       plo     0,0,0,0(0)\n"
 213                 "       ipm     %0\n"
 214                 "       srl     %0,28\n"
 215                 : "=d" (cc)
 216                 : "d" (r0)
 217                 : "cc");
 218         return cc == 0;
 219 }
 220
 221 static void kvm_s390_cpu_feat_init(void)
 222 {
 223         int i;
 224
 225         for (i = 0; i < 256; ++i) {
 226                 if (plo_test_bit(i))
 227                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 228         }
 229
 230         if (test_facility(28)) /* TOD-clock steering */
 231                 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
 232
 233         if (test_facility(17)) { /* MSA */
 234                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 235                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 236                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 237                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 238                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 239         }
 240         if (test_facility(76)) /* MSA3 */
 241                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 242         if (test_facility(77)) { /* MSA4 */
 243                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 244                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 245                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 246                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 247         }
 248         if (test_facility(57)) /* MSA5 */
 249                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 250
 251         if (MACHINE_HAS_ESOP)
 252                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 253 }
 254
 255 int kvm_arch_init(void *opaque)
 256 {
 257         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 258         if (!kvm_s390_dbf)
 259                 return -ENOMEM;
 260
 261         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 262                 debug_unregister(kvm_s390_dbf);
 263                 return -ENOMEM;
 264         }
 265
 266         kvm_s390_cpu_feat_init();
 267
 268         /* Register floating interrupt controller interface. */
 269         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 270 }
 271
 272 void kvm_arch_exit(void)
 273 {
 274         debug_unregister(kvm_s390_dbf);
 275 }
 276
 277 /* Section: device related */
 278 long kvm_arch_dev_ioctl(struct file *filp,
 279                         unsigned int ioctl, unsigned long arg)
 280 {
 281         if (ioctl == KVM_S390_ENABLE_SIE)
 282                 return s390_enable_sie();
 283         return -EINVAL;
 284 }
 285
 286 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 287 {
 288         int r;
 289
 290         switch (ext) {
 291         case KVM_CAP_S390_PSW:
 292         case KVM_CAP_S390_GMAP:
 293         case KVM_CAP_SYNC_MMU:
 294 #ifdef CONFIG_KVM_S390_UCONTROL
 295         case KVM_CAP_S390_UCONTROL:
 296 #endif
 297         case KVM_CAP_ASYNC_PF:
 298         case KVM_CAP_SYNC_REGS:
 299         case KVM_CAP_ONE_REG:
 300         case KVM_CAP_ENABLE_CAP:
 301         case KVM_CAP_S390_CSS_SUPPORT:
 302         case KVM_CAP_IOEVENTFD:
 303         case KVM_CAP_DEVICE_CTRL:
 304         case KVM_CAP_ENABLE_CAP_VM:
 305         case KVM_CAP_S390_IRQCHIP:
 306         case KVM_CAP_VM_ATTRIBUTES:
 307         case KVM_CAP_MP_STATE:
 308         case KVM_CAP_S390_INJECT_IRQ:
 309         case KVM_CAP_S390_USER_SIGP:
 310         case KVM_CAP_S390_USER_STSI:
 311         case KVM_CAP_S390_SKEYS:
 312         case KVM_CAP_S390_IRQ_STATE:
 313                 r = 1;
 314                 break;
 315         case KVM_CAP_S390_MEM_OP:
 316                 r = MEM_OP_MAX_SIZE;
 317                 break;
 318         case KVM_CAP_NR_VCPUS:
 319         case KVM_CAP_MAX_VCPUS:
 320                 r = KVM_S390_BSCA_CPU_SLOTS;
 321                 if (sclp.has_esca && sclp.has_64bscao)
 322                         r = KVM_S390_ESCA_CPU_SLOTS;
 323                 break;
 324         case KVM_CAP_NR_MEMSLOTS:
 325                 r = KVM_USER_MEM_SLOTS;
 326                 break;
 327         case KVM_CAP_S390_COW:
 328                 r = MACHINE_HAS_ESOP;
 329                 break;
 330         case KVM_CAP_S390_VECTOR_REGISTERS:
 331                 r = MACHINE_HAS_VX;
 332                 break;
 333         case KVM_CAP_S390_RI:
 334                 r = test_facility(64);
 335                 break;
 336         default:
 337                 r = 0;
 338         }
 339         return r;
 340 }
 341
 342 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 343                                         struct kvm_memory_slot *memslot)
 344 {
 345         gfn_t cur_gfn, last_gfn;
 346         unsigned long address;
 347         struct gmap *gmap = kvm->arch.gmap;
 348
 349         /* Loop over all guest pages */
 350         last_gfn = memslot->base_gfn + memslot->npages;
 351         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 352                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 353
 354                 if (test_and_clear_guest_dirty(gmap->mm, address))
 355                         mark_page_dirty(kvm, cur_gfn);
 356                 if (fatal_signal_pending(current))
 357                         return;
 358                 cond_resched();
 359         }
 360 }
 361
 362 /* Section: vm related */
 363 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 364
 365 /*
 366  * Get (and clear) the dirty memory log for a memory slot.
 367  */
 368 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 369                                struct kvm_dirty_log *log)
 370 {
 371         int r;
 372         unsigned long n;
 373         struct kvm_memslots *slots;
 374         struct kvm_memory_slot *memslot;
 375         int is_dirty = 0;
 376
 377         mutex_lock(&kvm->slots_lock);
 378
 379         r = -EINVAL;
 380         if (log->slot >= KVM_USER_MEM_SLOTS)
 381                 goto out;
 382
 383         slots = kvm_memslots(kvm);
 384         memslot = id_to_memslot(slots, log->slot);
 385         r = -ENOENT;
 386         if (!memslot->dirty_bitmap)
 387                 goto out;
 388
 389         kvm_s390_sync_dirty_log(kvm, memslot);
 390         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 391         if (r)
 392                 goto out;
 393
 394         /* Clear the dirty log */
 395         if (is_dirty) {
 396                 n = kvm_dirty_bitmap_bytes(memslot);
 397                 memset(memslot->dirty_bitmap, 0, n);
 398         }
 399         r = 0;
 400 out:
 401         mutex_unlock(&kvm->slots_lock);
 402         return r;
 403 }
 404
 405 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 406 {
 407         int r;
 408
 409         if (cap->flags)
 410                 return -EINVAL;
 411
 412         switch (cap->cap) {
 413         case KVM_CAP_S390_IRQCHIP:
 414                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 415                 kvm->arch.use_irqchip = 1;
 416                 r = 0;
 417                 break;
 418         case KVM_CAP_S390_USER_SIGP:
 419                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 420                 kvm->arch.user_sigp = 1;
 421                 r = 0;
 422                 break;
 423         case KVM_CAP_S390_VECTOR_REGISTERS:
 424                 mutex_lock(&kvm->lock);
 425                 if (atomic_read(&kvm->online_vcpus)) {
 426                         r = -EBUSY;
 427                 } else if (MACHINE_HAS_VX) {
 428                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 429                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 430                         r = 0;
 431                 } else
 432                         r = -EINVAL;
 433                 mutex_unlock(&kvm->lock);
 434                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 435                          r ? "(not available)" : "(success)");
 436                 break;
 437         case KVM_CAP_S390_RI:
 438                 r = -EINVAL;
 439                 mutex_lock(&kvm->lock);
 440                 if (atomic_read(&kvm->online_vcpus)) {
 441                         r = -EBUSY;
 442                 } else if (test_facility(64)) {
 443                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 444                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 445                         r = 0;
 446                 }
 447                 mutex_unlock(&kvm->lock);
 448                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 449                          r ? "(not available)" : "(success)");
 450                 break;
 451         case KVM_CAP_S390_USER_STSI:
 452                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 453                 kvm->arch.user_stsi = 1;
 454                 r = 0;
 455                 break;
 456         default:
 457                 r = -EINVAL;
 458                 break;
 459         }
 460         return r;
 461 }
 462
 463 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 464 {
 465         int ret;
 466
 467         switch (attr->attr) {
 468         case KVM_S390_VM_MEM_LIMIT_SIZE:
 469                 ret = 0;
 470                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 471                          kvm->arch.mem_limit);
 472                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 473                         ret = -EFAULT;
 474                 break;
 475         default:
 476                 ret = -ENXIO;
 477                 break;
 478         }
 479         return ret;
 480 }
 481
 482 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 483 {
 484         int ret;
 485         unsigned int idx;
 486         switch (attr->attr) {
 487         case KVM_S390_VM_MEM_ENABLE_CMMA:
 488                 ret = -ENXIO;
 489                 if (!sclp.has_cmma)
 490                         break;
 491
 492                 ret = -EBUSY;
 493                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 494                 mutex_lock(&kvm->lock);
 495                 if (atomic_read(&kvm->online_vcpus) == 0) {
 496                         kvm->arch.use_cmma = 1;
 497                         ret = 0;
 498                 }
 499                 mutex_unlock(&kvm->lock);
 500                 break;
 501         case KVM_S390_VM_MEM_CLR_CMMA:
 502                 ret = -ENXIO;
 503                 if (!sclp.has_cmma)
 504                         break;
 505                 ret = -EINVAL;
 506                 if (!kvm->arch.use_cmma)
 507                         break;
 508
 509                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 510                 mutex_lock(&kvm->lock);
 511                 idx = srcu_read_lock(&kvm->srcu);
 512                 s390_reset_cmma(kvm->arch.gmap->mm);
 513                 srcu_read_unlock(&kvm->srcu, idx);
 514                 mutex_unlock(&kvm->lock);
 515                 ret = 0;
 516                 break;
 517         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 518                 unsigned long new_limit;
 519
 520                 if (kvm_is_ucontrol(kvm))
 521                         return -EINVAL;
 522
 523                 if (get_user(new_limit, (u64 __user *)attr->addr))
 524                         return -EFAULT;
 525
 526                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 527                     new_limit > kvm->arch.mem_limit)
 528                         return -E2BIG;
 529
 530                 if (!new_limit)
 531                         return -EINVAL;
 532
 533                 /* gmap_alloc takes last usable address */
 534                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 535                         new_limit -= 1;
 536
 537                 ret = -EBUSY;
 538                 mutex_lock(&kvm->lock);
 539                 if (atomic_read(&kvm->online_vcpus) == 0) {
 540                         /* gmap_alloc will round the limit up */
 541                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 542
 543                         if (!new) {
 544                                 ret = -ENOMEM;
 545                         } else {
 546                                 gmap_free(kvm->arch.gmap);
 547                                 new->private = kvm;
 548                                 kvm->arch.gmap = new;
 549                                 ret = 0;
 550                         }
 551                 }
 552                 mutex_unlock(&kvm->lock);
 553                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 554                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 555                          (void *) kvm->arch.gmap->asce);
 556                 break;
 557         }
 558         default:
 559                 ret = -ENXIO;
 560                 break;
 561         }
 562         return ret;
 563 }
 564
 565 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 566
 567 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 568 {
 569         struct kvm_vcpu *vcpu;
 570         int i;
 571
 572         if (!test_kvm_facility(kvm, 76))
 573                 return -EINVAL;
 574
 575         mutex_lock(&kvm->lock);
 576         switch (attr->attr) {
 577         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 578                 get_random_bytes(
 579                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 580                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 581                 kvm->arch.crypto.aes_kw = 1;
 582                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 583                 break;
 584         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 585                 get_random_bytes(
 586                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 587                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 588                 kvm->arch.crypto.dea_kw = 1;
 589                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 590                 break;
 591         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 592                 kvm->arch.crypto.aes_kw = 0;
 593                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 594                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 595                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 596                 break;
 597         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 598                 kvm->arch.crypto.dea_kw = 0;
 599                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 600                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 601                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 602                 break;
 603         default:
 604                 mutex_unlock(&kvm->lock);
 605                 return -ENXIO;
 606         }
 607
 608         kvm_for_each_vcpu(i, vcpu, kvm) {
 609                 kvm_s390_vcpu_crypto_setup(vcpu);
 610                 exit_sie(vcpu);
 611         }
 612         mutex_unlock(&kvm->lock);
 613         return 0;
 614 }
 615
 616 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 617 {
 618         u8 gtod_high;
 619
 620         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 621                                            sizeof(gtod_high)))
 622                 return -EFAULT;
 623
 624         if (gtod_high != 0)
 625                 return -EINVAL;
 626         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 627
 628         return 0;
 629 }
 630
 631 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 632 {
 633         u64 gtod;
 634
 635         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 636                 return -EFAULT;
 637
 638         kvm_s390_set_tod_clock(kvm, gtod);
 639         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 640         return 0;
 641 }
 642
 643 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 644 {
 645         int ret;
 646
 647         if (attr->flags)
 648                 return -EINVAL;
 649
 650         switch (attr->attr) {
 651         case KVM_S390_VM_TOD_HIGH:
 652                 ret = kvm_s390_set_tod_high(kvm, attr);
 653                 break;
 654         case KVM_S390_VM_TOD_LOW:
 655                 ret = kvm_s390_set_tod_low(kvm, attr);
 656                 break;
 657         default:
 658                 ret = -ENXIO;
 659                 break;
 660         }
 661         return ret;
 662 }
 663
 664 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 665 {
 666         u8 gtod_high = 0;
 667
 668         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 669                                          sizeof(gtod_high)))
 670                 return -EFAULT;
 671         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 672
 673         return 0;
 674 }
 675
 676 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 677 {
 678         u64 gtod;
 679
 680         gtod = kvm_s390_get_tod_clock_fast(kvm);
 681         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 682                 return -EFAULT;
 683         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 684
 685         return 0;
 686 }
 687
 688 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 689 {
 690         int ret;
 691
 692         if (attr->flags)
 693                 return -EINVAL;
 694
 695         switch (attr->attr) {
 696         case KVM_S390_VM_TOD_HIGH:
 697                 ret = kvm_s390_get_tod_high(kvm, attr);
 698                 break;
 699         case KVM_S390_VM_TOD_LOW:
 700                 ret = kvm_s390_get_tod_low(kvm, attr);
 701                 break;
 702         default:
 703                 ret = -ENXIO;
 704                 break;
 705         }
 706         return ret;
 707 }
 708
 709 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 710 {
 711         struct kvm_s390_vm_cpu_processor *proc;
 712         u16 lowest_ibc, unblocked_ibc;
 713         int ret = 0;
 714
 715         mutex_lock(&kvm->lock);
 716         if (atomic_read(&kvm->online_vcpus)) {
 717                 ret = -EBUSY;
 718                 goto out;
 719         }
 720         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 721         if (!proc) {
 722                 ret = -ENOMEM;
 723                 goto out;
 724         }
 725         if (!copy_from_user(proc, (void __user *)attr->addr,
 726                             sizeof(*proc))) {
 727                 kvm->arch.model.cpuid = proc->cpuid;
 728                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 729                 unblocked_ibc = sclp.ibc & 0xfff;
 730                 if (lowest_ibc) {
 731                         if (proc->ibc > unblocked_ibc)
 732                                 kvm->arch.model.ibc = unblocked_ibc;
 733                         else if (proc->ibc < lowest_ibc)
 734                                 kvm->arch.model.ibc = lowest_ibc;
 735                         else
 736                                 kvm->arch.model.ibc = proc->ibc;
 737                 }
 738                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 739                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 740         } else
 741                 ret = -EFAULT;
 742         kfree(proc);
 743 out:
 744         mutex_unlock(&kvm->lock);
 745         return ret;
 746 }
 747
 748 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 749                                        struct kvm_device_attr *attr)
 750 {
 751         struct kvm_s390_vm_cpu_feat data;
 752         int ret = -EBUSY;
 753
 754         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 755                 return -EFAULT;
 756         if (!bitmap_subset((unsigned long *) data.feat,
 757                            kvm_s390_available_cpu_feat,
 758                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 759                 return -EINVAL;
 760
 761         mutex_lock(&kvm->lock);
 762         if (!atomic_read(&kvm->online_vcpus)) {
 763                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 764                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 765                 ret = 0;
 766         }
 767         mutex_unlock(&kvm->lock);
 768         return ret;
 769 }
 770
 771 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 772                                           struct kvm_device_attr *attr)
 773 {
 774         /*
 775          * Once supported by kernel + hw, we have to store the subfunctions
 776          * in kvm->arch and remember that user space configured them.
 777          */
 778         return -ENXIO;
 779 }
 780
 781 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 782 {
 783         int ret = -ENXIO;
 784
 785         switch (attr->attr) {
 786         case KVM_S390_VM_CPU_PROCESSOR:
 787                 ret = kvm_s390_set_processor(kvm, attr);
 788                 break;
 789         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 790                 ret = kvm_s390_set_processor_feat(kvm, attr);
 791                 break;
 792         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 793                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 794                 break;
 795         }
 796         return ret;
 797 }
 798
 799 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 800 {
 801         struct kvm_s390_vm_cpu_processor *proc;
 802         int ret = 0;
 803
 804         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 805         if (!proc) {
 806                 ret = -ENOMEM;
 807                 goto out;
 808         }
 809         proc->cpuid = kvm->arch.model.cpuid;
 810         proc->ibc = kvm->arch.model.ibc;
 811         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 812                S390_ARCH_FAC_LIST_SIZE_BYTE);
 813         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 814                 ret = -EFAULT;
 815         kfree(proc);
 816 out:
 817         return ret;
 818 }
 819
 820 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 821 {
 822         struct kvm_s390_vm_cpu_machine *mach;
 823         int ret = 0;
 824
 825         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 826         if (!mach) {
 827                 ret = -ENOMEM;
 828                 goto out;
 829         }
 830         get_cpu_id((struct cpuid *) &mach->cpuid);
 831         mach->ibc = sclp.ibc;
 832         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 833                S390_ARCH_FAC_LIST_SIZE_BYTE);
 834         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 835                S390_ARCH_FAC_LIST_SIZE_BYTE);
 836         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 837                 ret = -EFAULT;
 838         kfree(mach);
 839 out:
 840         return ret;
 841 }
 842
 843 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 844                                        struct kvm_device_attr *attr)
 845 {
 846         struct kvm_s390_vm_cpu_feat data;
 847
 848         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 849                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 850         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 851                 return -EFAULT;
 852         return 0;
 853 }
 854
 855 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 856                                      struct kvm_device_attr *attr)
 857 {
 858         struct kvm_s390_vm_cpu_feat data;
 859
 860         bitmap_copy((unsigned long *) data.feat,
 861                     kvm_s390_available_cpu_feat,
 862                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 863         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 864                 return -EFAULT;
 865         return 0;
 866 }
 867
 868 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 869                                           struct kvm_device_attr *attr)
 870 {
 871         /*
 872          * Once we can actually configure subfunctions (kernel + hw support),
 873          * we have to check if they were already set by user space, if so copy
 874          * them from kvm->arch.
 875          */
 876         return -ENXIO;
 877 }
 878
 879 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 880                                         struct kvm_device_attr *attr)
 881 {
 882         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 883             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 884                 return -EFAULT;
 885         return 0;
 886 }
 887 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 888 {
 889         int ret = -ENXIO;
 890
 891         switch (attr->attr) {
 892         case KVM_S390_VM_CPU_PROCESSOR:
 893                 ret = kvm_s390_get_processor(kvm, attr);
 894                 break;
 895         case KVM_S390_VM_CPU_MACHINE:
 896                 ret = kvm_s390_get_machine(kvm, attr);
 897                 break;
 898         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 899                 ret = kvm_s390_get_processor_feat(kvm, attr);
 900                 break;
 901         case KVM_S390_VM_CPU_MACHINE_FEAT:
 902                 ret = kvm_s390_get_machine_feat(kvm, attr);
 903                 break;
 904         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 905                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 906                 break;
 907         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 908                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 909                 break;
 910         }
 911         return ret;
 912 }
 913
 914 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 915 {
 916         int ret;
 917
 918         switch (attr->group) {
 919         case KVM_S390_VM_MEM_CTRL:
 920                 ret = kvm_s390_set_mem_control(kvm, attr);
 921                 break;
 922         case KVM_S390_VM_TOD:
 923                 ret = kvm_s390_set_tod(kvm, attr);
 924                 break;
 925         case KVM_S390_VM_CPU_MODEL:
 926                 ret = kvm_s390_set_cpu_model(kvm, attr);
 927                 break;
 928         case KVM_S390_VM_CRYPTO:
 929                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 930                 break;
 931         default:
 932                 ret = -ENXIO;
 933                 break;
 934         }
 935
 936         return ret;
 937 }
 938
 939 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 940 {
 941         int ret;
 942
 943         switch (attr->group) {
 944         case KVM_S390_VM_MEM_CTRL:
 945                 ret = kvm_s390_get_mem_control(kvm, attr);
 946                 break;
 947         case KVM_S390_VM_TOD:
 948                 ret = kvm_s390_get_tod(kvm, attr);
 949                 break;
 950         case KVM_S390_VM_CPU_MODEL:
 951                 ret = kvm_s390_get_cpu_model(kvm, attr);
 952                 break;
 953         default:
 954                 ret = -ENXIO;
 955                 break;
 956         }
 957
 958         return ret;
 959 }
 960
 961 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 962 {
 963         int ret;
 964
 965         switch (attr->group) {
 966         case KVM_S390_VM_MEM_CTRL:
 967                 switch (attr->attr) {
 968                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 969                 case KVM_S390_VM_MEM_CLR_CMMA:
 970                         ret = sclp.has_cmma ? 0 : -ENXIO;
 971                         break;
 972                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 973                         ret = 0;
 974                         break;
 975                 default:
 976                         ret = -ENXIO;
 977                         break;
 978                 }
 979                 break;
 980         case KVM_S390_VM_TOD:
 981                 switch (attr->attr) {
 982                 case KVM_S390_VM_TOD_LOW:
 983                 case KVM_S390_VM_TOD_HIGH:
 984                         ret = 0;
 985                         break;
 986                 default:
 987                         ret = -ENXIO;
 988                         break;
 989                 }
 990                 break;
 991         case KVM_S390_VM_CPU_MODEL:
 992                 switch (attr->attr) {
 993                 case KVM_S390_VM_CPU_PROCESSOR:
 994                 case KVM_S390_VM_CPU_MACHINE:
 995                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 996                 case KVM_S390_VM_CPU_MACHINE_FEAT:
 997                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 998                         ret = 0;
 999                         break;
1000                 /* configuring subfunctions is not supported yet */
1001                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1002                 default:
1003                         ret = -ENXIO;
1004                         break;
1005                 }
1006                 break;
1007         case KVM_S390_VM_CRYPTO:
1008                 switch (attr->attr) {
1009                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1010                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1011                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1012                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1013                         ret = 0;
1014                         break;
1015                 default:
1016                         ret = -ENXIO;
1017                         break;
1018                 }
1019                 break;
1020         default:
1021                 ret = -ENXIO;
1022                 break;
1023         }
1024
1025         return ret;
1026 }
1027
1028 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1029 {
1030         uint8_t *keys;
1031         uint64_t hva;
1032         int i, r = 0;
1033
1034         if (args->flags != 0)
1035                 return -EINVAL;
1036
1037         /* Is this guest using storage keys? */
1038         if (!mm_use_skey(current->mm))
1039                 return KVM_S390_GET_SKEYS_NONE;
1040
1041         /* Enforce sane limit on memory allocation */
1042         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1043                 return -EINVAL;
1044
1045         keys = kmalloc_array(args->count, sizeof(uint8_t),
1046                              GFP_KERNEL | __GFP_NOWARN);
1047         if (!keys)
1048                 keys = vmalloc(sizeof(uint8_t) * args->count);
1049         if (!keys)
1050                 return -ENOMEM;
1051
1052         down_read(&current->mm->mmap_sem);
1053         for (i = 0; i < args->count; i++) {
1054                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1055                 if (kvm_is_error_hva(hva)) {
1056                         r = -EFAULT;
1057                         break;
1058                 }
1059
1060                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1061                 if (r)
1062                         break;
1063         }
1064         up_read(&current->mm->mmap_sem);
1065
1066         if (!r) {
1067                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1068                                  sizeof(uint8_t) * args->count);
1069                 if (r)
1070                         r = -EFAULT;
1071         }
1072
1073         kvfree(keys);
1074         return r;
1075 }
1076
1077 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1078 {
1079         uint8_t *keys;
1080         uint64_t hva;
1081         int i, r = 0;
1082
1083         if (args->flags != 0)
1084                 return -EINVAL;
1085
1086         /* Enforce sane limit on memory allocation */
1087         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1088                 return -EINVAL;
1089
1090         keys = kmalloc_array(args->count, sizeof(uint8_t),
1091                              GFP_KERNEL | __GFP_NOWARN);
1092         if (!keys)
1093                 keys = vmalloc(sizeof(uint8_t) * args->count);
1094         if (!keys)
1095                 return -ENOMEM;
1096
1097         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1098                            sizeof(uint8_t) * args->count);
1099         if (r) {
1100                 r = -EFAULT;
1101                 goto out;
1102         }
1103
1104         /* Enable storage key handling for the guest */
1105         r = s390_enable_skey();
1106         if (r)
1107                 goto out;
1108
1109         down_read(&current->mm->mmap_sem);
1110         for (i = 0; i < args->count; i++) {
1111                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1112                 if (kvm_is_error_hva(hva)) {
1113                         r = -EFAULT;
1114                         break;
1115                 }
1116
1117                 /* Lowest order bit is reserved */
1118                 if (keys[i] & 0x01) {
1119                         r = -EINVAL;
1120                         break;
1121                 }
1122
1123                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1124                 if (r)
1125                         break;
1126         }
1127         up_read(&current->mm->mmap_sem);
1128 out:
1129         kvfree(keys);
1130         return r;
1131 }
1132
1133 long kvm_arch_vm_ioctl(struct file *filp,
1134                        unsigned int ioctl, unsigned long arg)
1135 {
1136         struct kvm *kvm = filp->private_data;
1137         void __user *argp = (void __user *)arg;
1138         struct kvm_device_attr attr;
1139         int r;
1140
1141         switch (ioctl) {
1142         case KVM_S390_INTERRUPT: {
1143                 struct kvm_s390_interrupt s390int;
1144
1145                 r = -EFAULT;
1146                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1147                         break;
1148                 r = kvm_s390_inject_vm(kvm, &s390int);
1149                 break;
1150         }
1151         case KVM_ENABLE_CAP: {
1152                 struct kvm_enable_cap cap;
1153                 r = -EFAULT;
1154                 if (copy_from_user(&cap, argp, sizeof(cap)))
1155                         break;
1156                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1157                 break;
1158         }
1159         case KVM_CREATE_IRQCHIP: {
1160                 struct kvm_irq_routing_entry routing;
1161
1162                 r = -EINVAL;
1163                 if (kvm->arch.use_irqchip) {
1164                         /* Set up dummy routing. */
1165                         memset(&routing, 0, sizeof(routing));
1166                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1167                 }
1168                 break;
1169         }
1170         case KVM_SET_DEVICE_ATTR: {
1171                 r = -EFAULT;
1172                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1173                         break;
1174                 r = kvm_s390_vm_set_attr(kvm, &attr);
1175                 break;
1176         }
1177         case KVM_GET_DEVICE_ATTR: {
1178                 r = -EFAULT;
1179                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1180                         break;
1181                 r = kvm_s390_vm_get_attr(kvm, &attr);
1182                 break;
1183         }
1184         case KVM_HAS_DEVICE_ATTR: {
1185                 r = -EFAULT;
1186                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1187                         break;
1188                 r = kvm_s390_vm_has_attr(kvm, &attr);
1189                 break;
1190         }
1191         case KVM_S390_GET_SKEYS: {
1192                 struct kvm_s390_skeys args;
1193
1194                 r = -EFAULT;
1195                 if (copy_from_user(&args, argp,
1196                                    sizeof(struct kvm_s390_skeys)))
1197                         break;
1198                 r = kvm_s390_get_skeys(kvm, &args);
1199                 break;
1200         }
1201         case KVM_S390_SET_SKEYS: {
1202                 struct kvm_s390_skeys args;
1203
1204                 r = -EFAULT;
1205                 if (copy_from_user(&args, argp,
1206                                    sizeof(struct kvm_s390_skeys)))
1207                         break;
1208                 r = kvm_s390_set_skeys(kvm, &args);
1209                 break;
1210         }
1211         default:
1212                 r = -ENOTTY;
1213         }
1214
1215         return r;
1216 }
1217
1218 static int kvm_s390_query_ap_config(u8 *config)
1219 {
1220         u32 fcn_code = 0x04000000UL;
1221         u32 cc = 0;
1222
1223         memset(config, 0, 128);
1224         asm volatile(
1225                 "lgr 0,%1\n"
1226                 "lgr 2,%2\n"
1227                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1228                 "0: ipm %0\n"
1229                 "srl %0,28\n"
1230                 "1:\n"
1231                 EX_TABLE(0b, 1b)
1232                 : "+r" (cc)
1233                 : "r" (fcn_code), "r" (config)
1234                 : "cc", "0", "2", "memory"
1235         );
1236
1237         return cc;
1238 }
1239
1240 static int kvm_s390_apxa_installed(void)
1241 {
1242         u8 config[128];
1243         int cc;
1244
1245         if (test_facility(12)) {
1246                 cc = kvm_s390_query_ap_config(config);
1247
1248                 if (cc)
1249                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1250                 else
1251                         return config[0] & 0x40;
1252         }
1253
1254         return 0;
1255 }
1256
1257 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1258 {
1259         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1260
1261         if (kvm_s390_apxa_installed())
1262                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1263         else
1264                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1265 }
1266
1267 static u64 kvm_s390_get_initial_cpuid(void)
1268 {
1269         struct cpuid cpuid;
1270
1271         get_cpu_id(&cpuid);
1272         cpuid.version = 0xff;
1273         return *((u64 *) &cpuid);
1274 }
1275
1276 static void kvm_s390_crypto_init(struct kvm *kvm)
1277 {
1278         if (!test_kvm_facility(kvm, 76))
1279                 return;
1280
1281         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1282         kvm_s390_set_crycb_format(kvm);
1283
1284         /* Enable AES/DEA protected key functions by default */
1285         kvm->arch.crypto.aes_kw = 1;
1286         kvm->arch.crypto.dea_kw = 1;
1287         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1288                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1289         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1290                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1291 }
1292
1293 static void sca_dispose(struct kvm *kvm)
1294 {
1295         if (kvm->arch.use_esca)
1296                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1297         else
1298                 free_page((unsigned long)(kvm->arch.sca));
1299         kvm->arch.sca = NULL;
1300 }
1301
1302 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1303 {
1304         gfp_t alloc_flags = GFP_KERNEL;
1305         int i, rc;
1306         char debug_name[16];
1307         static unsigned long sca_offset;
1308
1309         rc = -EINVAL;
1310 #ifdef CONFIG_KVM_S390_UCONTROL
1311         if (type & ~KVM_VM_S390_UCONTROL)
1312                 goto out_err;
1313         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1314                 goto out_err;
1315 #else
1316         if (type)
1317                 goto out_err;
1318 #endif
1319
1320         rc = s390_enable_sie();
1321         if (rc)
1322                 goto out_err;
1323
1324         rc = -ENOMEM;
1325
1326         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1327
1328         kvm->arch.use_esca = 0; /* start with basic SCA */
1329         if (!sclp.has_64bscao)
1330                 alloc_flags |= GFP_DMA;
1331         rwlock_init(&kvm->arch.sca_lock);
1332         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1333         if (!kvm->arch.sca)
1334                 goto out_err;
1335         spin_lock(&kvm_lock);
1336         sca_offset += 16;
1337         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1338                 sca_offset = 0;
1339         kvm->arch.sca = (struct bsca_block *)
1340                         ((char *) kvm->arch.sca + sca_offset);
1341         spin_unlock(&kvm_lock);
1342
1343         sprintf(debug_name, "kvm-%u", current->pid);
1344
1345         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1346         if (!kvm->arch.dbf)
1347                 goto out_err;
1348
1349         kvm->arch.sie_page2 =
1350              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1351         if (!kvm->arch.sie_page2)
1352                 goto out_err;
1353
1354         /* Populate the facility mask initially. */
1355         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1356                S390_ARCH_FAC_LIST_SIZE_BYTE);
1357         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1358                 if (i < kvm_s390_fac_list_mask_size())
1359                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1360                 else
1361                         kvm->arch.model.fac_mask[i] = 0UL;
1362         }
1363
1364         /* Populate the facility list initially. */
1365         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1366         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1367                S390_ARCH_FAC_LIST_SIZE_BYTE);
1368
1369         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1370         set_kvm_facility(kvm->arch.model.fac_list, 74);
1371
1372         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1373         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1374
1375         kvm_s390_crypto_init(kvm);
1376
1377         spin_lock_init(&kvm->arch.float_int.lock);
1378         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1379                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1380         init_waitqueue_head(&kvm->arch.ipte_wq);
1381         mutex_init(&kvm->arch.ipte_mutex);
1382
1383         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1384         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1385
1386         if (type & KVM_VM_S390_UCONTROL) {
1387                 kvm->arch.gmap = NULL;
1388                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1389         } else {
1390                 if (sclp.hamax == U64_MAX)
1391                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1392                 else
1393                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1394                                                     sclp.hamax + 1);
1395                 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1396                 if (!kvm->arch.gmap)
1397                         goto out_err;
1398                 kvm->arch.gmap->private = kvm;
1399                 kvm->arch.gmap->pfault_enabled = 0;
1400         }
1401
1402         kvm->arch.css_support = 0;
1403         kvm->arch.use_irqchip = 0;
1404         kvm->arch.epoch = 0;
1405
1406         spin_lock_init(&kvm->arch.start_stop_lock);
1407         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1408
1409         return 0;
1410 out_err:
1411         free_page((unsigned long)kvm->arch.sie_page2);
1412         debug_unregister(kvm->arch.dbf);
1413         sca_dispose(kvm);
1414         KVM_EVENT(3, "creation of vm failed: %d", rc);
1415         return rc;
1416 }
1417
1418 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1419 {
1420         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1421         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1422         kvm_s390_clear_local_irqs(vcpu);
1423         kvm_clear_async_pf_completion_queue(vcpu);
1424         if (!kvm_is_ucontrol(vcpu->kvm))
1425                 sca_del_vcpu(vcpu);
1426
1427         if (kvm_is_ucontrol(vcpu->kvm))
1428                 gmap_free(vcpu->arch.gmap);
1429
1430         if (vcpu->kvm->arch.use_cmma)
1431                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1432         free_page((unsigned long)(vcpu->arch.sie_block));
1433
1434         kvm_vcpu_uninit(vcpu);
1435         kmem_cache_free(kvm_vcpu_cache, vcpu);
1436 }
1437
1438 static void kvm_free_vcpus(struct kvm *kvm)
1439 {
1440         unsigned int i;
1441         struct kvm_vcpu *vcpu;
1442
1443         kvm_for_each_vcpu(i, vcpu, kvm)
1444                 kvm_arch_vcpu_destroy(vcpu);
1445
1446         mutex_lock(&kvm->lock);
1447         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1448                 kvm->vcpus[i] = NULL;
1449
1450         atomic_set(&kvm->online_vcpus, 0);
1451         mutex_unlock(&kvm->lock);
1452 }
1453
1454 void kvm_arch_destroy_vm(struct kvm *kvm)
1455 {
1456         kvm_free_vcpus(kvm);
1457         sca_dispose(kvm);
1458         debug_unregister(kvm->arch.dbf);
1459         free_page((unsigned long)kvm->arch.sie_page2);
1460         if (!kvm_is_ucontrol(kvm))
1461                 gmap_free(kvm->arch.gmap);
1462         kvm_s390_destroy_adapters(kvm);
1463         kvm_s390_clear_float_irqs(kvm);
1464         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1465 }
1466
1467 /* Section: vcpu related */
1468 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1469 {
1470         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1471         if (!vcpu->arch.gmap)
1472                 return -ENOMEM;
1473         vcpu->arch.gmap->private = vcpu->kvm;
1474
1475         return 0;
1476 }
1477
1478 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1479 {
1480         read_lock(&vcpu->kvm->arch.sca_lock);
1481         if (vcpu->kvm->arch.use_esca) {
1482                 struct esca_block *sca = vcpu->kvm->arch.sca;
1483
1484                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1485                 sca->cpu[vcpu->vcpu_id].sda = 0;
1486         } else {
1487                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1488
1489                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1490                 sca->cpu[vcpu->vcpu_id].sda = 0;
1491         }
1492         read_unlock(&vcpu->kvm->arch.sca_lock);
1493 }
1494
1495 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1496 {
1497         read_lock(&vcpu->kvm->arch.sca_lock);
1498         if (vcpu->kvm->arch.use_esca) {
1499                 struct esca_block *sca = vcpu->kvm->arch.sca;
1500
1501                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1502                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1503                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1504                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1505                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1506         } else {
1507                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1508
1509                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1510                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1511                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1512                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1513         }
1514         read_unlock(&vcpu->kvm->arch.sca_lock);
1515 }
1516
1517 /* Basic SCA to Extended SCA data copy routines */
1518 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1519 {
1520         d->sda = s->sda;
1521         d->sigp_ctrl.c = s->sigp_ctrl.c;
1522         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1523 }
1524
1525 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1526 {
1527         int i;
1528
1529         d->ipte_control = s->ipte_control;
1530         d->mcn[0] = s->mcn;
1531         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1532                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1533 }
1534
1535 static int sca_switch_to_extended(struct kvm *kvm)
1536 {
1537         struct bsca_block *old_sca = kvm->arch.sca;
1538         struct esca_block *new_sca;
1539         struct kvm_vcpu *vcpu;
1540         unsigned int vcpu_idx;
1541         u32 scaol, scaoh;
1542
1543         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1544         if (!new_sca)
1545                 return -ENOMEM;
1546
1547         scaoh = (u32)((u64)(new_sca) >> 32);
1548         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1549
1550         kvm_s390_vcpu_block_all(kvm);
1551         write_lock(&kvm->arch.sca_lock);
1552
1553         sca_copy_b_to_e(new_sca, old_sca);
1554
1555         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1556                 vcpu->arch.sie_block->scaoh = scaoh;
1557                 vcpu->arch.sie_block->scaol = scaol;
1558                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1559         }
1560         kvm->arch.sca = new_sca;
1561         kvm->arch.use_esca = 1;
1562
1563         write_unlock(&kvm->arch.sca_lock);
1564         kvm_s390_vcpu_unblock_all(kvm);
1565
1566         free_page((unsigned long)old_sca);
1567
1568         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1569                  old_sca, kvm->arch.sca);
1570         return 0;
1571 }
1572
1573 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1574 {
1575         int rc;
1576
1577         if (id < KVM_S390_BSCA_CPU_SLOTS)
1578                 return true;
1579         if (!sclp.has_esca || !sclp.has_64bscao)
1580                 return false;
1581
1582         mutex_lock(&kvm->lock);
1583         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1584         mutex_unlock(&kvm->lock);
1585
1586         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1587 }
1588
1589 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1590 {
1591         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1592         kvm_clear_async_pf_completion_queue(vcpu);
1593         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1594                                     KVM_SYNC_GPRS |
1595                                     KVM_SYNC_ACRS |
1596                                     KVM_SYNC_CRS |
1597                                     KVM_SYNC_ARCH0 |
1598                                     KVM_SYNC_PFAULT;
1599         if (test_kvm_facility(vcpu->kvm, 64))
1600                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1601         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1602          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1603          */
1604         if (MACHINE_HAS_VX)
1605                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1606         else
1607                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1608
1609         if (kvm_is_ucontrol(vcpu->kvm))
1610                 return __kvm_ucontrol_vcpu_init(vcpu);
1611
1612         return 0;
1613 }
1614
1615 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1616 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1617 {
1618         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1619         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1620         vcpu->arch.cputm_start = get_tod_clock_fast();
1621         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1622 }
1623
1624 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1625 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1626 {
1627         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1628         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1629         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1630         vcpu->arch.cputm_start = 0;
1631         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1632 }
1633
1634 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1635 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1636 {
1637         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1638         vcpu->arch.cputm_enabled = true;
1639         __start_cpu_timer_accounting(vcpu);
1640 }
1641
1642 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1643 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1644 {
1645         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1646         __stop_cpu_timer_accounting(vcpu);
1647         vcpu->arch.cputm_enabled = false;
1648 }
1649
1650 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1651 {
1652         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1653         __enable_cpu_timer_accounting(vcpu);
1654         preempt_enable();
1655 }
1656
1657 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1658 {
1659         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1660         __disable_cpu_timer_accounting(vcpu);
1661         preempt_enable();
1662 }
1663
1664 /* set the cpu timer - may only be called from the VCPU thread itself */
1665 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1666 {
1667         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1668         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1669         if (vcpu->arch.cputm_enabled)
1670                 vcpu->arch.cputm_start = get_tod_clock_fast();
1671         vcpu->arch.sie_block->cputm = cputm;
1672         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1673         preempt_enable();
1674 }
1675
1676 /* update and get the cpu timer - can also be called from other VCPU threads */
1677 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1678 {
1679         unsigned int seq;
1680         __u64 value;
1681
1682         if (unlikely(!vcpu->arch.cputm_enabled))
1683                 return vcpu->arch.sie_block->cputm;
1684
1685         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1686         do {
1687                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1688                 /*
1689                  * If the writer would ever execute a read in the critical
1690                  * section, e.g. in irq context, we have a deadlock.
1691                  */
1692                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1693                 value = vcpu->arch.sie_block->cputm;
1694                 /* if cputm_start is 0, accounting is being started/stopped */
1695                 if (likely(vcpu->arch.cputm_start))
1696                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1697         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1698         preempt_enable();
1699         return value;
1700 }
1701
1702 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1703 {
1704         /* Save host register state */
1705         save_fpu_regs();
1706         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1707         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1708
1709         if (MACHINE_HAS_VX)
1710                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1711         else
1712                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1713         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1714         if (test_fp_ctl(current->thread.fpu.fpc))
1715                 /* User space provided an invalid FPC, let's clear it */
1716                 current->thread.fpu.fpc = 0;
1717
1718         save_access_regs(vcpu->arch.host_acrs);
1719         restore_access_regs(vcpu->run->s.regs.acrs);
1720         gmap_enable(vcpu->arch.gmap);
1721         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1722         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1723                 __start_cpu_timer_accounting(vcpu);
1724         vcpu->cpu = cpu;
1725 }
1726
1727 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1728 {
1729         vcpu->cpu = -1;
1730         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1731                 __stop_cpu_timer_accounting(vcpu);
1732         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1733         gmap_disable(vcpu->arch.gmap);
1734
1735         /* Save guest register state */
1736         save_fpu_regs();
1737         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1738
1739         /* Restore host register state */
1740         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1741         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1742
1743         save_access_regs(vcpu->run->s.regs.acrs);
1744         restore_access_regs(vcpu->arch.host_acrs);
1745 }
1746
1747 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1748 {
1749         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1750         vcpu->arch.sie_block->gpsw.mask = 0UL;
1751         vcpu->arch.sie_block->gpsw.addr = 0UL;
1752         kvm_s390_set_prefix(vcpu, 0);
1753         kvm_s390_set_cpu_timer(vcpu, 0);
1754         vcpu->arch.sie_block->ckc       = 0UL;
1755         vcpu->arch.sie_block->todpr     = 0;
1756         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1757         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1758         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1759         /* make sure the new fpc will be lazily loaded */
1760         save_fpu_regs();
1761         current->thread.fpu.fpc = 0;
1762         vcpu->arch.sie_block->gbea = 1;
1763         vcpu->arch.sie_block->pp = 0;
1764         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1765         kvm_clear_async_pf_completion_queue(vcpu);
1766         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1767                 kvm_s390_vcpu_stop(vcpu);
1768         kvm_s390_clear_local_irqs(vcpu);
1769 }
1770
1771 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1772 {
1773         mutex_lock(&vcpu->kvm->lock);
1774         preempt_disable();
1775         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1776         preempt_enable();
1777         mutex_unlock(&vcpu->kvm->lock);
1778         if (!kvm_is_ucontrol(vcpu->kvm)) {
1779                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1780                 sca_add_vcpu(vcpu);
1781         }
1782
1783 }
1784
1785 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1786 {
1787         if (!test_kvm_facility(vcpu->kvm, 76))
1788                 return;
1789
1790         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1791
1792         if (vcpu->kvm->arch.crypto.aes_kw)
1793                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1794         if (vcpu->kvm->arch.crypto.dea_kw)
1795                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1796
1797         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1798 }
1799
1800 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1801 {
1802         free_page(vcpu->arch.sie_block->cbrlo);
1803         vcpu->arch.sie_block->cbrlo = 0;
1804 }
1805
1806 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1807 {
1808         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1809         if (!vcpu->arch.sie_block->cbrlo)
1810                 return -ENOMEM;
1811
1812         vcpu->arch.sie_block->ecb2 |= 0x80;
1813         vcpu->arch.sie_block->ecb2 &= ~0x08;
1814         return 0;
1815 }
1816
1817 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1818 {
1819         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1820
1821         vcpu->arch.sie_block->ibc = model->ibc;
1822         if (test_kvm_facility(vcpu->kvm, 7))
1823                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1824 }
1825
1826 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1827 {
1828         int rc = 0;
1829
1830         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1831                                                     CPUSTAT_SM |
1832                                                     CPUSTAT_STOPPED);
1833
1834         if (test_kvm_facility(vcpu->kvm, 78))
1835                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1836         else if (test_kvm_facility(vcpu->kvm, 8))
1837                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1838
1839         kvm_s390_vcpu_setup_model(vcpu);
1840
1841         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1842         if (MACHINE_HAS_ESOP)
1843                 vcpu->arch.sie_block->ecb |= 0x02;
1844         if (test_kvm_facility(vcpu->kvm, 9))
1845                 vcpu->arch.sie_block->ecb |= 0x04;
1846         if (test_kvm_facility(vcpu->kvm, 73))
1847                 vcpu->arch.sie_block->ecb |= 0x10;
1848
1849         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1850                 vcpu->arch.sie_block->ecb2 |= 0x08;
1851         vcpu->arch.sie_block->eca = 0x1002000U;
1852         if (sclp.has_cei)
1853                 vcpu->arch.sie_block->eca |= 0x80000000U;
1854         if (sclp.has_ib)
1855                 vcpu->arch.sie_block->eca |= 0x40000000U;
1856         if (sclp.has_siif)
1857                 vcpu->arch.sie_block->eca |= 1;
1858         if (sclp.has_sigpif)
1859                 vcpu->arch.sie_block->eca |= 0x10000000U;
1860         if (test_kvm_facility(vcpu->kvm, 64))
1861                 vcpu->arch.sie_block->ecb3 |= 0x01;
1862         if (test_kvm_facility(vcpu->kvm, 129)) {
1863                 vcpu->arch.sie_block->eca |= 0x00020000;
1864                 vcpu->arch.sie_block->ecd |= 0x20000000;
1865         }
1866         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1867         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1868         if (test_kvm_facility(vcpu->kvm, 74))
1869                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1870
1871         if (vcpu->kvm->arch.use_cmma) {
1872                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1873                 if (rc)
1874                         return rc;
1875         }
1876         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1877         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1878
1879         kvm_s390_vcpu_crypto_setup(vcpu);
1880
1881         return rc;
1882 }
1883
1884 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1885                                       unsigned int id)
1886 {
1887         struct kvm_vcpu *vcpu;
1888         struct sie_page *sie_page;
1889         int rc = -EINVAL;
1890
1891         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1892                 goto out;
1893
1894         rc = -ENOMEM;
1895
1896         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1897         if (!vcpu)
1898                 goto out;
1899
1900         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1901         if (!sie_page)
1902                 goto out_free_cpu;
1903
1904         vcpu->arch.sie_block = &sie_page->sie_block;
1905         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1906
1907         /* the real guest size will always be smaller than msl */
1908         vcpu->arch.sie_block->mso = 0;
1909         vcpu->arch.sie_block->msl = sclp.hamax;
1910
1911         vcpu->arch.sie_block->icpua = id;
1912         spin_lock_init(&vcpu->arch.local_int.lock);
1913         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1914         vcpu->arch.local_int.wq = &vcpu->wq;
1915         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1916         seqcount_init(&vcpu->arch.cputm_seqcount);
1917
1918         rc = kvm_vcpu_init(vcpu, kvm, id);
1919         if (rc)
1920                 goto out_free_sie_block;
1921         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1922                  vcpu->arch.sie_block);
1923         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1924
1925         return vcpu;
1926 out_free_sie_block:
1927         free_page((unsigned long)(vcpu->arch.sie_block));
1928 out_free_cpu:
1929         kmem_cache_free(kvm_vcpu_cache, vcpu);
1930 out:
1931         return ERR_PTR(rc);
1932 }
1933
1934 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1935 {
1936         return kvm_s390_vcpu_has_irq(vcpu, 0);
1937 }
1938
1939 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1940 {
1941         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1942         exit_sie(vcpu);
1943 }
1944
1945 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1946 {
1947         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1948 }
1949
1950 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1951 {
1952         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1953         exit_sie(vcpu);
1954 }
1955
1956 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1957 {
1958         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1959 }
1960
1961 /*
1962  * Kick a guest cpu out of SIE and wait until SIE is not running.
1963  * If the CPU is not running (e.g. waiting as idle) the function will
1964  * return immediately. */
1965 void exit_sie(struct kvm_vcpu *vcpu)
1966 {
1967         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1968         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1969                 cpu_relax();
1970 }
1971
1972 /* Kick a guest cpu out of SIE to process a request synchronously */
1973 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1974 {
1975         kvm_make_request(req, vcpu);
1976         kvm_s390_vcpu_request(vcpu);
1977 }
1978
1979 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1980 {
1981         int i;
1982         struct kvm *kvm = gmap->private;
1983         struct kvm_vcpu *vcpu;
1984
1985         kvm_for_each_vcpu(i, vcpu, kvm) {
1986                 /* match against both prefix pages */
1987                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1988                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1989                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1990                 }
1991         }
1992 }
1993
1994 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1995 {
1996         /* kvm common code refers to this, but never calls it */
1997         BUG();
1998         return 0;
1999 }
2000
2001 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2002                                            struct kvm_one_reg *reg)
2003 {
2004         int r = -EINVAL;
2005
2006         switch (reg->id) {
2007         case KVM_REG_S390_TODPR:
2008                 r = put_user(vcpu->arch.sie_block->todpr,
2009                              (u32 __user *)reg->addr);
2010                 break;
2011         case KVM_REG_S390_EPOCHDIFF:
2012                 r = put_user(vcpu->arch.sie_block->epoch,
2013                              (u64 __user *)reg->addr);
2014                 break;
2015         case KVM_REG_S390_CPU_TIMER:
2016                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2017                              (u64 __user *)reg->addr);
2018                 break;
2019         case KVM_REG_S390_CLOCK_COMP:
2020                 r = put_user(vcpu->arch.sie_block->ckc,
2021                              (u64 __user *)reg->addr);
2022                 break;
2023         case KVM_REG_S390_PFTOKEN:
2024                 r = put_user(vcpu->arch.pfault_token,
2025                              (u64 __user *)reg->addr);
2026                 break;
2027         case KVM_REG_S390_PFCOMPARE:
2028                 r = put_user(vcpu->arch.pfault_compare,
2029                              (u64 __user *)reg->addr);
2030                 break;
2031         case KVM_REG_S390_PFSELECT:
2032                 r = put_user(vcpu->arch.pfault_select,
2033                              (u64 __user *)reg->addr);
2034                 break;
2035         case KVM_REG_S390_PP:
2036                 r = put_user(vcpu->arch.sie_block->pp,
2037                              (u64 __user *)reg->addr);
2038                 break;
2039         case KVM_REG_S390_GBEA:
2040                 r = put_user(vcpu->arch.sie_block->gbea,
2041                              (u64 __user *)reg->addr);
2042                 break;
2043         default:
2044                 break;
2045         }
2046
2047         return r;
2048 }
2049
2050 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2051                                            struct kvm_one_reg *reg)
2052 {
2053         int r = -EINVAL;
2054         __u64 val;
2055
2056         switch (reg->id) {
2057         case KVM_REG_S390_TODPR:
2058                 r = get_user(vcpu->arch.sie_block->todpr,
2059                              (u32 __user *)reg->addr);
2060                 break;
2061         case KVM_REG_S390_EPOCHDIFF:
2062                 r = get_user(vcpu->arch.sie_block->epoch,
2063                              (u64 __user *)reg->addr);
2064                 break;
2065         case KVM_REG_S390_CPU_TIMER:
2066                 r = get_user(val, (u64 __user *)reg->addr);
2067                 if (!r)
2068                         kvm_s390_set_cpu_timer(vcpu, val);
2069                 break;
2070         case KVM_REG_S390_CLOCK_COMP:
2071                 r = get_user(vcpu->arch.sie_block->ckc,
2072                              (u64 __user *)reg->addr);
2073                 break;
2074         case KVM_REG_S390_PFTOKEN:
2075                 r = get_user(vcpu->arch.pfault_token,
2076                              (u64 __user *)reg->addr);
2077                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2078                         kvm_clear_async_pf_completion_queue(vcpu);
2079                 break;
2080         case KVM_REG_S390_PFCOMPARE:
2081                 r = get_user(vcpu->arch.pfault_compare,
2082                              (u64 __user *)reg->addr);
2083                 break;
2084         case KVM_REG_S390_PFSELECT:
2085                 r = get_user(vcpu->arch.pfault_select,
2086                              (u64 __user *)reg->addr);
2087                 break;
2088         case KVM_REG_S390_PP:
2089                 r = get_user(vcpu->arch.sie_block->pp,
2090                              (u64 __user *)reg->addr);
2091                 break;
2092         case KVM_REG_S390_GBEA:
2093                 r = get_user(vcpu->arch.sie_block->gbea,
2094                              (u64 __user *)reg->addr);
2095                 break;
2096         default:
2097                 break;
2098         }
2099
2100         return r;
2101 }
2102
2103 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2104 {
2105         kvm_s390_vcpu_initial_reset(vcpu);
2106         return 0;
2107 }
2108
2109 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2110 {
2111         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2112         return 0;
2113 }
2114
2115 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2116 {
2117         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2118         return 0;
2119 }
2120
2121 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2122                                   struct kvm_sregs *sregs)
2123 {
2124         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2125         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2126         restore_access_regs(vcpu->run->s.regs.acrs);
2127         return 0;
2128 }
2129
2130 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2131                                   struct kvm_sregs *sregs)
2132 {
2133         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2134         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2135         return 0;
2136 }
2137
2138 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2139 {
2140         /* make sure the new values will be lazily loaded */
2141         save_fpu_regs();
2142         if (test_fp_ctl(fpu->fpc))
2143                 return -EINVAL;
2144         current->thread.fpu.fpc = fpu->fpc;
2145         if (MACHINE_HAS_VX)
2146                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2147         else
2148                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2149         return 0;
2150 }
2151
2152 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2153 {
2154         /* make sure we have the latest values */
2155         save_fpu_regs();
2156         if (MACHINE_HAS_VX)
2157                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2158         else
2159                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2160         fpu->fpc = current->thread.fpu.fpc;
2161         return 0;
2162 }
2163
2164 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2165 {
2166         int rc = 0;
2167
2168         if (!is_vcpu_stopped(vcpu))
2169                 rc = -EBUSY;
2170         else {
2171                 vcpu->run->psw_mask = psw.mask;
2172                 vcpu->run->psw_addr = psw.addr;
2173         }
2174         return rc;
2175 }
2176
2177 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2178                                   struct kvm_translation *tr)
2179 {
2180         return -EINVAL; /* not implemented yet */
2181 }
2182
2183 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2184                               KVM_GUESTDBG_USE_HW_BP | \
2185                               KVM_GUESTDBG_ENABLE)
2186
2187 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2188                                         struct kvm_guest_debug *dbg)
2189 {
2190         int rc = 0;
2191
2192         vcpu->guest_debug = 0;
2193         kvm_s390_clear_bp_data(vcpu);
2194
2195         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2196                 return -EINVAL;
2197         if (!sclp.has_gpere)
2198                 return -EINVAL;
2199
2200         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2201                 vcpu->guest_debug = dbg->control;
2202                 /* enforce guest PER */
2203                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2204
2205                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2206                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2207         } else {
2208                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2209                 vcpu->arch.guestdbg.last_bp = 0;
2210         }
2211
2212         if (rc) {
2213                 vcpu->guest_debug = 0;
2214                 kvm_s390_clear_bp_data(vcpu);
2215                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2216         }
2217
2218         return rc;
2219 }
2220
2221 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2222                                     struct kvm_mp_state *mp_state)
2223 {
2224         /* CHECK_STOP and LOAD are not supported yet */
2225         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2226                                        KVM_MP_STATE_OPERATING;
2227 }
2228
2229 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2230                                     struct kvm_mp_state *mp_state)
2231 {
2232         int rc = 0;
2233
2234         /* user space knows about this interface - let it control the state */
2235         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2236
2237         switch (mp_state->mp_state) {
2238         case KVM_MP_STATE_STOPPED:
2239                 kvm_s390_vcpu_stop(vcpu);
2240                 break;
2241         case KVM_MP_STATE_OPERATING:
2242                 kvm_s390_vcpu_start(vcpu);
2243                 break;
2244         case KVM_MP_STATE_LOAD:
2245         case KVM_MP_STATE_CHECK_STOP:
2246                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2247         default:
2248                 rc = -ENXIO;
2249         }
2250
2251         return rc;
2252 }
2253
2254 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2255 {
2256         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2257 }
2258
2259 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2260 {
2261 retry:
2262         kvm_s390_vcpu_request_handled(vcpu);
2263         if (!vcpu->requests)
2264                 return 0;
2265         /*
2266          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2267          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2268          * This ensures that the ipte instruction for this request has
2269          * already finished. We might race against a second unmapper that
2270          * wants to set the blocking bit. Lets just retry the request loop.
2271          */
2272         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2273                 int rc;
2274                 rc = gmap_ipte_notify(vcpu->arch.gmap,
2275                                       kvm_s390_get_prefix(vcpu),
2276                                       PAGE_SIZE * 2);
2277                 if (rc)
2278                         return rc;
2279                 goto retry;
2280         }
2281
2282         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2283                 vcpu->arch.sie_block->ihcpu = 0xffff;
2284                 goto retry;
2285         }
2286
2287         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2288                 if (!ibs_enabled(vcpu)) {
2289                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2290                         atomic_or(CPUSTAT_IBS,
2291                                         &vcpu->arch.sie_block->cpuflags);
2292                 }
2293                 goto retry;
2294         }
2295
2296         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2297                 if (ibs_enabled(vcpu)) {
2298                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2299                         atomic_andnot(CPUSTAT_IBS,
2300                                           &vcpu->arch.sie_block->cpuflags);
2301                 }
2302                 goto retry;
2303         }
2304
2305         /* nothing to do, just clear the request */
2306         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2307
2308         return 0;
2309 }
2310
2311 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2312 {
2313         struct kvm_vcpu *vcpu;
2314         int i;
2315
2316         mutex_lock(&kvm->lock);
2317         preempt_disable();
2318         kvm->arch.epoch = tod - get_tod_clock();
2319         kvm_s390_vcpu_block_all(kvm);
2320         kvm_for_each_vcpu(i, vcpu, kvm)
2321                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2322         kvm_s390_vcpu_unblock_all(kvm);
2323         preempt_enable();
2324         mutex_unlock(&kvm->lock);
2325 }
2326
2327 /**
2328  * kvm_arch_fault_in_page - fault-in guest page if necessary
2329  * @vcpu: The corresponding virtual cpu
2330  * @gpa: Guest physical address
2331  * @writable: Whether the page should be writable or not
2332  *
2333  * Make sure that a guest page has been faulted-in on the host.
2334  *
2335  * Return: Zero on success, negative error code otherwise.
2336  */
2337 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2338 {
2339         return gmap_fault(vcpu->arch.gmap, gpa,
2340                           writable ? FAULT_FLAG_WRITE : 0);
2341 }
2342
2343 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2344                                       unsigned long token)
2345 {
2346         struct kvm_s390_interrupt inti;
2347         struct kvm_s390_irq irq;
2348
2349         if (start_token) {
2350                 irq.u.ext.ext_params2 = token;
2351                 irq.type = KVM_S390_INT_PFAULT_INIT;
2352                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2353         } else {
2354                 inti.type = KVM_S390_INT_PFAULT_DONE;
2355                 inti.parm64 = token;
2356                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2357         }
2358 }
2359
2360 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2361                                      struct kvm_async_pf *work)
2362 {
2363         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2364         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2365 }
2366
2367 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2368                                  struct kvm_async_pf *work)
2369 {
2370         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2371         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2372 }
2373
2374 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2375                                struct kvm_async_pf *work)
2376 {
2377         /* s390 will always inject the page directly */
2378 }
2379
2380 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2381 {
2382         /*
2383          * s390 will always inject the page directly,
2384          * but we still want check_async_completion to cleanup
2385          */
2386         return true;
2387 }
2388
2389 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2390 {
2391         hva_t hva;
2392         struct kvm_arch_async_pf arch;
2393         int rc;
2394
2395         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2396                 return 0;
2397         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2398             vcpu->arch.pfault_compare)
2399                 return 0;
2400         if (psw_extint_disabled(vcpu))
2401                 return 0;
2402         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2403                 return 0;
2404         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2405                 return 0;
2406         if (!vcpu->arch.gmap->pfault_enabled)
2407                 return 0;
2408
2409         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2410         hva += current->thread.gmap_addr & ~PAGE_MASK;
2411         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2412                 return 0;
2413
2414         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2415         return rc;
2416 }
2417
2418 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2419 {
2420         int rc, cpuflags;
2421
2422         /*
2423          * On s390 notifications for arriving pages will be delivered directly
2424          * to the guest but the house keeping for completed pfaults is
2425          * handled outside the worker.
2426          */
2427         kvm_check_async_pf_completion(vcpu);
2428
2429         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2430         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2431
2432         if (need_resched())
2433                 schedule();
2434
2435         if (test_cpu_flag(CIF_MCCK_PENDING))
2436                 s390_handle_mcck();
2437
2438         if (!kvm_is_ucontrol(vcpu->kvm)) {
2439                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2440                 if (rc)
2441                         return rc;
2442         }
2443
2444         rc = kvm_s390_handle_requests(vcpu);
2445         if (rc)
2446                 return rc;
2447
2448         if (guestdbg_enabled(vcpu)) {
2449                 kvm_s390_backup_guest_per_regs(vcpu);
2450                 kvm_s390_patch_guest_per_regs(vcpu);
2451         }
2452
2453         vcpu->arch.sie_block->icptcode = 0;
2454         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2455         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2456         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2457
2458         return 0;
2459 }
2460
2461 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2462 {
2463         struct kvm_s390_pgm_info pgm_info = {
2464                 .code = PGM_ADDRESSING,
2465         };
2466         u8 opcode, ilen;
2467         int rc;
2468
2469         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2470         trace_kvm_s390_sie_fault(vcpu);
2471
2472         /*
2473          * We want to inject an addressing exception, which is defined as a
2474          * suppressing or terminating exception. However, since we came here
2475          * by a DAT access exception, the PSW still points to the faulting
2476          * instruction since DAT exceptions are nullifying. So we've got
2477          * to look up the current opcode to get the length of the instruction
2478          * to be able to forward the PSW.
2479          */
2480         rc = read_guest_instr(vcpu, &opcode, 1);
2481         ilen = insn_length(opcode);
2482         if (rc < 0) {
2483                 return rc;
2484         } else if (rc) {
2485                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2486                  * Forward by arbitrary ilc, injection will take care of
2487                  * nullification if necessary.
2488                  */
2489                 pgm_info = vcpu->arch.pgm;
2490                 ilen = 4;
2491         }
2492         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2493         kvm_s390_forward_psw(vcpu, ilen);
2494         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2495 }
2496
2497 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2498 {
2499         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2500                    vcpu->arch.sie_block->icptcode);
2501         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2502
2503         if (guestdbg_enabled(vcpu))
2504                 kvm_s390_restore_guest_per_regs(vcpu);
2505
2506         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2507         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2508
2509         if (vcpu->arch.sie_block->icptcode > 0) {
2510                 int rc = kvm_handle_sie_intercept(vcpu);
2511
2512                 if (rc != -EOPNOTSUPP)
2513                         return rc;
2514                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2515                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2516                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2517                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2518                 return -EREMOTE;
2519         } else if (exit_reason != -EFAULT) {
2520                 vcpu->stat.exit_null++;
2521                 return 0;
2522         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2523                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2524                 vcpu->run->s390_ucontrol.trans_exc_code =
2525                                                 current->thread.gmap_addr;
2526                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2527                 return -EREMOTE;
2528         } else if (current->thread.gmap_pfault) {
2529                 trace_kvm_s390_major_guest_pfault(vcpu);
2530                 current->thread.gmap_pfault = 0;
2531                 if (kvm_arch_setup_async_pf(vcpu))
2532                         return 0;
2533                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2534         }
2535         return vcpu_post_run_fault_in_sie(vcpu);
2536 }
2537
2538 static int __vcpu_run(struct kvm_vcpu *vcpu)
2539 {
2540         int rc, exit_reason;
2541
2542         /*
2543          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2544          * ning the guest), so that memslots (and other stuff) are protected
2545          */
2546         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2547
2548         do {
2549                 rc = vcpu_pre_run(vcpu);
2550                 if (rc)
2551                         break;
2552
2553                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2554                 /*
2555                  * As PF_VCPU will be used in fault handler, between
2556                  * guest_enter and guest_exit should be no uaccess.
2557                  */
2558                 local_irq_disable();
2559                 __kvm_guest_enter();
2560                 __disable_cpu_timer_accounting(vcpu);
2561                 local_irq_enable();
2562                 exit_reason = sie64a(vcpu->arch.sie_block,
2563                                      vcpu->run->s.regs.gprs);
2564                 local_irq_disable();
2565                 __enable_cpu_timer_accounting(vcpu);
2566                 __kvm_guest_exit();
2567                 local_irq_enable();
2568                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2569
2570                 rc = vcpu_post_run(vcpu, exit_reason);
2571         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2572
2573         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2574         return rc;
2575 }
2576
2577 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2578 {
2579         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2580         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2581         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2582                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2583         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2584                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2585                 /* some control register changes require a tlb flush */
2586                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2587         }
2588         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2589                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2590                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2591                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2592                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2593                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2594         }
2595         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2596                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2597                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2598                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2599                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2600                         kvm_clear_async_pf_completion_queue(vcpu);
2601         }
2602         kvm_run->kvm_dirty_regs = 0;
2603 }
2604
2605 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2606 {
2607         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2608         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2609         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2610         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2611         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2612         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2613         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2614         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2615         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2616         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2617         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2618         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2619 }
2620
2621 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2622 {
2623         int rc;
2624         sigset_t sigsaved;
2625
2626         if (guestdbg_exit_pending(vcpu)) {
2627                 kvm_s390_prepare_debug_exit(vcpu);
2628                 return 0;
2629         }
2630
2631         if (vcpu->sigset_active)
2632                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2633
2634         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2635                 kvm_s390_vcpu_start(vcpu);
2636         } else if (is_vcpu_stopped(vcpu)) {
2637                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2638                                    vcpu->vcpu_id);
2639                 return -EINVAL;
2640         }
2641
2642         sync_regs(vcpu, kvm_run);
2643         enable_cpu_timer_accounting(vcpu);
2644
2645         might_fault();
2646         rc = __vcpu_run(vcpu);
2647
2648         if (signal_pending(current) && !rc) {
2649                 kvm_run->exit_reason = KVM_EXIT_INTR;
2650                 rc = -EINTR;
2651         }
2652
2653         if (guestdbg_exit_pending(vcpu) && !rc)  {
2654                 kvm_s390_prepare_debug_exit(vcpu);
2655                 rc = 0;
2656         }
2657
2658         if (rc == -EREMOTE) {
2659                 /* userspace support is needed, kvm_run has been prepared */
2660                 rc = 0;
2661         }
2662
2663         disable_cpu_timer_accounting(vcpu);
2664         store_regs(vcpu, kvm_run);
2665
2666         if (vcpu->sigset_active)
2667                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2668
2669         vcpu->stat.exit_userspace++;
2670         return rc;
2671 }
2672
2673 /*
2674  * store status at address
2675  * we use have two special cases:
2676  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2677  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2678  */
2679 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2680 {
2681         unsigned char archmode = 1;
2682         freg_t fprs[NUM_FPRS];
2683         unsigned int px;
2684         u64 clkcomp, cputm;
2685         int rc;
2686
2687         px = kvm_s390_get_prefix(vcpu);
2688         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2689                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2690                         return -EFAULT;
2691                 gpa = 0;
2692         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2693                 if (write_guest_real(vcpu, 163, &archmode, 1))
2694                         return -EFAULT;
2695                 gpa = px;
2696         } else
2697                 gpa -= __LC_FPREGS_SAVE_AREA;
2698
2699         /* manually convert vector registers if necessary */
2700         if (MACHINE_HAS_VX) {
2701                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2702                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2703                                      fprs, 128);
2704         } else {
2705                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2706                                      vcpu->run->s.regs.fprs, 128);
2707         }
2708         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2709                               vcpu->run->s.regs.gprs, 128);
2710         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2711                               &vcpu->arch.sie_block->gpsw, 16);
2712         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2713                               &px, 4);
2714         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2715                               &vcpu->run->s.regs.fpc, 4);
2716         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2717                               &vcpu->arch.sie_block->todpr, 4);
2718         cputm = kvm_s390_get_cpu_timer(vcpu);
2719         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2720                               &cputm, 8);
2721         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2722         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2723                               &clkcomp, 8);
2724         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2725                               &vcpu->run->s.regs.acrs, 64);
2726         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2727                               &vcpu->arch.sie_block->gcr, 128);
2728         return rc ? -EFAULT : 0;
2729 }
2730
2731 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2732 {
2733         /*
2734          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2735          * copying in vcpu load/put. Lets update our copies before we save
2736          * it into the save area
2737          */
2738         save_fpu_regs();
2739         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2740         save_access_regs(vcpu->run->s.regs.acrs);
2741
2742         return kvm_s390_store_status_unloaded(vcpu, addr);
2743 }
2744
2745 /*
2746  * store additional status at address
2747  */
2748 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2749                                         unsigned long gpa)
2750 {
2751         /* Only bits 0-53 are used for address formation */
2752         if (!(gpa & ~0x3ff))
2753                 return 0;
2754
2755         return write_guest_abs(vcpu, gpa & ~0x3ff,
2756                                (void *)&vcpu->run->s.regs.vrs, 512);
2757 }
2758
2759 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2760 {
2761         if (!test_kvm_facility(vcpu->kvm, 129))
2762                 return 0;
2763
2764         /*
2765          * The guest VXRS are in the host VXRs due to the lazy
2766          * copying in vcpu load/put. We can simply call save_fpu_regs()
2767          * to save the current register state because we are in the
2768          * middle of a load/put cycle.
2769          *
2770          * Let's update our copies before we save it into the save area.
2771          */
2772         save_fpu_regs();
2773
2774         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2775 }
2776
2777 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2778 {
2779         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2780         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2781 }
2782
2783 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2784 {
2785         unsigned int i;
2786         struct kvm_vcpu *vcpu;
2787
2788         kvm_for_each_vcpu(i, vcpu, kvm) {
2789                 __disable_ibs_on_vcpu(vcpu);
2790         }
2791 }
2792
2793 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2794 {
2795         if (!sclp.has_ibs)
2796                 return;
2797         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2798         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2799 }
2800
2801 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2802 {
2803         int i, online_vcpus, started_vcpus = 0;
2804
2805         if (!is_vcpu_stopped(vcpu))
2806                 return;
2807
2808         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2809         /* Only one cpu at a time may enter/leave the STOPPED state. */
2810         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2811         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2812
2813         for (i = 0; i < online_vcpus; i++) {
2814                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2815                         started_vcpus++;
2816         }
2817
2818         if (started_vcpus == 0) {
2819                 /* we're the only active VCPU -> speed it up */
2820                 __enable_ibs_on_vcpu(vcpu);
2821         } else if (started_vcpus == 1) {
2822                 /*
2823                  * As we are starting a second VCPU, we have to disable
2824                  * the IBS facility on all VCPUs to remove potentially
2825                  * oustanding ENABLE requests.
2826                  */
2827                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2828         }
2829
2830         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2831         /*
2832          * Another VCPU might have used IBS while we were offline.
2833          * Let's play safe and flush the VCPU at startup.
2834          */
2835         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2836         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2837         return;
2838 }
2839
2840 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2841 {
2842         int i, online_vcpus, started_vcpus = 0;
2843         struct kvm_vcpu *started_vcpu = NULL;
2844
2845         if (is_vcpu_stopped(vcpu))
2846                 return;
2847
2848         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2849         /* Only one cpu at a time may enter/leave the STOPPED state. */
2850         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2851         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2852
2853         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2854         kvm_s390_clear_stop_irq(vcpu);
2855
2856         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2857         __disable_ibs_on_vcpu(vcpu);
2858
2859         for (i = 0; i < online_vcpus; i++) {
2860                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2861                         started_vcpus++;
2862                         started_vcpu = vcpu->kvm->vcpus[i];
2863                 }
2864         }
2865
2866         if (started_vcpus == 1) {
2867                 /*
2868                  * As we only have one VCPU left, we want to enable the
2869                  * IBS facility for that VCPU to speed it up.
2870                  */
2871                 __enable_ibs_on_vcpu(started_vcpu);
2872         }
2873
2874         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2875         return;
2876 }
2877
2878 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2879                                      struct kvm_enable_cap *cap)
2880 {
2881         int r;
2882
2883         if (cap->flags)
2884                 return -EINVAL;
2885
2886         switch (cap->cap) {
2887         case KVM_CAP_S390_CSS_SUPPORT:
2888                 if (!vcpu->kvm->arch.css_support) {
2889                         vcpu->kvm->arch.css_support = 1;
2890                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2891                         trace_kvm_s390_enable_css(vcpu->kvm);
2892                 }
2893                 r = 0;
2894                 break;
2895         default:
2896                 r = -EINVAL;
2897                 break;
2898         }
2899         return r;
2900 }
2901
2902 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2903                                   struct kvm_s390_mem_op *mop)
2904 {
2905         void __user *uaddr = (void __user *)mop->buf;
2906         void *tmpbuf = NULL;
2907         int r, srcu_idx;
2908         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2909                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2910
2911         if (mop->flags & ~supported_flags)
2912                 return -EINVAL;
2913
2914         if (mop->size > MEM_OP_MAX_SIZE)
2915                 return -E2BIG;
2916
2917         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2918                 tmpbuf = vmalloc(mop->size);
2919                 if (!tmpbuf)
2920                         return -ENOMEM;
2921         }
2922
2923         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2924
2925         switch (mop->op) {
2926         case KVM_S390_MEMOP_LOGICAL_READ:
2927                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2928                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2929                                             mop->size, GACC_FETCH);
2930                         break;
2931                 }
2932                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2933                 if (r == 0) {
2934                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2935                                 r = -EFAULT;
2936                 }
2937                 break;
2938         case KVM_S390_MEMOP_LOGICAL_WRITE:
2939                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2940                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2941                                             mop->size, GACC_STORE);
2942                         break;
2943                 }
2944                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2945                         r = -EFAULT;
2946                         break;
2947                 }
2948                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2949                 break;
2950         default:
2951                 r = -EINVAL;
2952         }
2953
2954         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2955
2956         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2957                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2958
2959         vfree(tmpbuf);
2960         return r;
2961 }
2962
2963 long kvm_arch_vcpu_ioctl(struct file *filp,
2964                          unsigned int ioctl, unsigned long arg)
2965 {
2966         struct kvm_vcpu *vcpu = filp->private_data;
2967         void __user *argp = (void __user *)arg;
2968         int idx;
2969         long r;
2970
2971         switch (ioctl) {
2972         case KVM_S390_IRQ: {
2973                 struct kvm_s390_irq s390irq;
2974
2975                 r = -EFAULT;
2976                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2977                         break;
2978                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2979                 break;
2980         }
2981         case KVM_S390_INTERRUPT: {
2982                 struct kvm_s390_interrupt s390int;
2983                 struct kvm_s390_irq s390irq;
2984
2985                 r = -EFAULT;
2986                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2987                         break;
2988                 if (s390int_to_s390irq(&s390int, &s390irq))
2989                         return -EINVAL;
2990                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2991                 break;
2992         }
2993         case KVM_S390_STORE_STATUS:
2994                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2995                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2996                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2997                 break;
2998         case KVM_S390_SET_INITIAL_PSW: {
2999                 psw_t psw;
3000
3001                 r = -EFAULT;
3002                 if (copy_from_user(&psw, argp, sizeof(psw)))
3003                         break;
3004                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3005                 break;
3006         }
3007         case KVM_S390_INITIAL_RESET:
3008                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3009                 break;
3010         case KVM_SET_ONE_REG:
3011         case KVM_GET_ONE_REG: {
3012                 struct kvm_one_reg reg;
3013                 r = -EFAULT;
3014                 if (copy_from_user(&reg, argp, sizeof(reg)))
3015                         break;
3016                 if (ioctl == KVM_SET_ONE_REG)
3017                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3018                 else
3019                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3020                 break;
3021         }
3022 #ifdef CONFIG_KVM_S390_UCONTROL
3023         case KVM_S390_UCAS_MAP: {
3024                 struct kvm_s390_ucas_mapping ucasmap;
3025
3026                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3027                         r = -EFAULT;
3028                         break;
3029                 }
3030
3031                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3032                         r = -EINVAL;
3033                         break;
3034                 }
3035
3036                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3037                                      ucasmap.vcpu_addr, ucasmap.length);
3038                 break;
3039         }
3040         case KVM_S390_UCAS_UNMAP: {
3041                 struct kvm_s390_ucas_mapping ucasmap;
3042
3043                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3044                         r = -EFAULT;
3045                         break;
3046                 }
3047
3048                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3049                         r = -EINVAL;
3050                         break;
3051                 }
3052
3053                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3054                         ucasmap.length);
3055                 break;
3056         }
3057 #endif
3058         case KVM_S390_VCPU_FAULT: {
3059                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3060                 break;
3061         }
3062         case KVM_ENABLE_CAP:
3063         {
3064                 struct kvm_enable_cap cap;
3065                 r = -EFAULT;
3066                 if (copy_from_user(&cap, argp, sizeof(cap)))
3067                         break;
3068                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3069                 break;
3070         }
3071         case KVM_S390_MEM_OP: {
3072                 struct kvm_s390_mem_op mem_op;
3073
3074                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3075                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3076                 else
3077                         r = -EFAULT;
3078                 break;
3079         }
3080         case KVM_S390_SET_IRQ_STATE: {
3081                 struct kvm_s390_irq_state irq_state;
3082
3083                 r = -EFAULT;
3084                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3085                         break;
3086                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3087                     irq_state.len == 0 ||
3088                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3089                         r = -EINVAL;
3090                         break;
3091                 }
3092                 r = kvm_s390_set_irq_state(vcpu,
3093                                            (void __user *) irq_state.buf,
3094                                            irq_state.len);
3095                 break;
3096         }
3097         case KVM_S390_GET_IRQ_STATE: {
3098                 struct kvm_s390_irq_state irq_state;
3099
3100                 r = -EFAULT;
3101                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3102                         break;
3103                 if (irq_state.len == 0) {
3104                         r = -EINVAL;
3105                         break;
3106                 }
3107                 r = kvm_s390_get_irq_state(vcpu,
3108                                            (__u8 __user *)  irq_state.buf,
3109                                            irq_state.len);
3110                 break;
3111         }
3112         default:
3113                 r = -ENOTTY;
3114         }
3115         return r;
3116 }
3117
3118 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3119 {
3120 #ifdef CONFIG_KVM_S390_UCONTROL
3121         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3122                  && (kvm_is_ucontrol(vcpu->kvm))) {
3123                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3124                 get_page(vmf->page);
3125                 return 0;
3126         }
3127 #endif
3128         return VM_FAULT_SIGBUS;
3129 }
3130
3131 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3132                             unsigned long npages)
3133 {
3134         return 0;
3135 }
3136
3137 /* Section: memory related */
3138 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3139                                    struct kvm_memory_slot *memslot,
3140                                    const struct kvm_userspace_memory_region *mem,
3141                                    enum kvm_mr_change change)
3142 {
3143         /* A few sanity checks. We can have memory slots which have to be
3144            located/ended at a segment boundary (1MB). The memory in userland is
3145            ok to be fragmented into various different vmas. It is okay to mmap()
3146            and munmap() stuff in this slot after doing this call at any time */
3147
3148         if (mem->userspace_addr & 0xffffful)
3149                 return -EINVAL;
3150
3151         if (mem->memory_size & 0xffffful)
3152                 return -EINVAL;
3153
3154         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3155                 return -EINVAL;
3156
3157         return 0;
3158 }
3159
3160 void kvm_arch_commit_memory_region(struct kvm *kvm,
3161                                 const struct kvm_userspace_memory_region *mem,
3162                                 const struct kvm_memory_slot *old,
3163                                 const struct kvm_memory_slot *new,
3164                                 enum kvm_mr_change change)
3165 {
3166         int rc;
3167
3168         /* If the basics of the memslot do not change, we do not want
3169          * to update the gmap. Every update causes several unnecessary
3170          * segment translation exceptions. This is usually handled just
3171          * fine by the normal fault handler + gmap, but it will also
3172          * cause faults on the prefix page of running guest CPUs.
3173          */
3174         if (old->userspace_addr == mem->userspace_addr &&
3175             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3176             old->npages * PAGE_SIZE == mem->memory_size)
3177                 return;
3178
3179         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3180                 mem->guest_phys_addr, mem->memory_size);
3181         if (rc)
3182                 pr_warn("failed to commit memory region\n");
3183         return;
3184 }
3185
3186 static inline unsigned long nonhyp_mask(int i)
3187 {
3188         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3189
3190         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3191 }
3192
3193 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3194 {
3195         vcpu->valid_wakeup = false;
3196 }
3197
3198 static int __init kvm_s390_init(void)
3199 {
3200         int i;
3201
3202         if (!sclp.has_sief2) {
3203                 pr_info("SIE not available\n");
3204                 return -ENODEV;
3205         }
3206
3207         for (i = 0; i < 16; i++)
3208                 kvm_s390_fac_list_mask[i] |=
3209                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3210
3211         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3212 }
3213
3214 static void __exit kvm_s390_exit(void)
3215 {
3216         kvm_exit();
3217 }
3218
3219 module_init(kvm_s390_init);
3220 module_exit(kvm_s390_exit);
3221
3222 /*
3223  * Enable autoloading of the kvm module.
3224  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3225  * since x86 takes a different approach.
3226  */
3227 #include <linux/miscdevice.h>
3228 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3229 MODULE_ALIAS("devname:kvm");