arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <linux/bitmap.h>
  30 #include <asm/asm-offsets.h>
  31 #include <asm/lowcore.h>
  32 #include <asm/etr.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/gmap.h>
  35 #include <asm/nmi.h>
  36 #include <asm/switch_to.h>
  37 #include <asm/isc.h>
  38 #include <asm/sclp.h>
  39 #include <asm/cpacf.h>
  40 #include <asm/etr.h>
  41 #include "kvm-s390.h"
  42 #include "gaccess.h"
  43
  44 #define KMSG_COMPONENT "kvm-s390"
  45 #undef pr_fmt
  46 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  47
  48 #define CREATE_TRACE_POINTS
  49 #include "trace.h"
  50 #include "trace-s390.h"
  51
  52 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  53 #define LOCAL_IRQS 32
  54 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  55                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  56
  57 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  58
  59 struct kvm_stats_debugfs_item debugfs_entries[] = {
  60         { "userspace_handled", VCPU_STAT(exit_userspace) },
  61         { "exit_null", VCPU_STAT(exit_null) },
  62         { "exit_validity", VCPU_STAT(exit_validity) },
  63         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  64         { "exit_external_request", VCPU_STAT(exit_external_request) },
  65         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  66         { "exit_instruction", VCPU_STAT(exit_instruction) },
  67         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  68         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  69         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
  70         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  71         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  72         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  73         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  74         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  75         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  76         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  77         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  78         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  79         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  80         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  81         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  82         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  83         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  84         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  85         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  86         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  87         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  88         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  89         { "instruction_spx", VCPU_STAT(instruction_spx) },
  90         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  91         { "instruction_stap", VCPU_STAT(instruction_stap) },
  92         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  93         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  94         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  95         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  96         { "instruction_essa", VCPU_STAT(instruction_essa) },
  97         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  98         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  99         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
 100         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
 101         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
 102         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 103         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 104         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 105         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 106         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 107         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 108         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 109         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 110         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 111         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 112         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 113         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 114         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 115         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 116         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 117         { "diagnose_10", VCPU_STAT(diagnose_10) },
 118         { "diagnose_44", VCPU_STAT(diagnose_44) },
 119         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 120         { "diagnose_258", VCPU_STAT(diagnose_258) },
 121         { "diagnose_308", VCPU_STAT(diagnose_308) },
 122         { "diagnose_500", VCPU_STAT(diagnose_500) },
 123         { NULL }
 124 };
 125
 126 /* upper facilities limit for kvm */
 127 unsigned long kvm_s390_fac_list_mask[16] = {
 128         0xffe6000000000000UL,
 129         0x005e000000000000UL,
 130 };
 131
 132 unsigned long kvm_s390_fac_list_mask_size(void)
 133 {
 134         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 135         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 136 }
 137
 138 /* available cpu features supported by kvm */
 139 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 140 /* available subfunctions indicated via query / "test bit" */
 141 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
 142
 143 static struct gmap_notifier gmap_notifier;
 144 debug_info_t *kvm_s390_dbf;
 145
 146 /* Section: not file related */
 147 int kvm_arch_hardware_enable(void)
 148 {
 149         /* every s390 is virtualization enabled ;-) */
 150         return 0;
 151 }
 152
 153 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 154
 155 /*
 156  * This callback is executed during stop_machine(). All CPUs are therefore
 157  * temporarily stopped. In order not to change guest behavior, we have to
 158  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 159  * so a CPU won't be stopped while calculating with the epoch.
 160  */
 161 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 162                           void *v)
 163 {
 164         struct kvm *kvm;
 165         struct kvm_vcpu *vcpu;
 166         int i;
 167         unsigned long long *delta = v;
 168
 169         list_for_each_entry(kvm, &vm_list, vm_list) {
 170                 kvm->arch.epoch -= *delta;
 171                 kvm_for_each_vcpu(i, vcpu, kvm) {
 172                         vcpu->arch.sie_block->epoch -= *delta;
 173                         if (vcpu->arch.cputm_enabled)
 174                                 vcpu->arch.cputm_start += *delta;
 175                 }
 176         }
 177         return NOTIFY_OK;
 178 }
 179
 180 static struct notifier_block kvm_clock_notifier = {
 181         .notifier_call = kvm_clock_sync,
 182 };
 183
 184 int kvm_arch_hardware_setup(void)
 185 {
 186         gmap_notifier.notifier_call = kvm_gmap_notifier;
 187         gmap_register_ipte_notifier(&gmap_notifier);
 188         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 189                                        &kvm_clock_notifier);
 190         return 0;
 191 }
 192
 193 void kvm_arch_hardware_unsetup(void)
 194 {
 195         gmap_unregister_ipte_notifier(&gmap_notifier);
 196         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 197                                          &kvm_clock_notifier);
 198 }
 199
 200 static void allow_cpu_feat(unsigned long nr)
 201 {
 202         set_bit_inv(nr, kvm_s390_available_cpu_feat);
 203 }
 204
 205 static inline int plo_test_bit(unsigned char nr)
 206 {
 207         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
 208         int cc = 3; /* subfunction not available */
 209
 210         asm volatile(
 211                 /* Parameter registers are ignored for "test bit" */
 212                 "       plo     0,0,0,0(0)\n"
 213                 "       ipm     %0\n"
 214                 "       srl     %0,28\n"
 215                 : "=d" (cc)
 216                 : "d" (r0)
 217                 : "cc");
 218         return cc == 0;
 219 }
 220
 221 static void kvm_s390_cpu_feat_init(void)
 222 {
 223         int i;
 224
 225         for (i = 0; i < 256; ++i) {
 226                 if (plo_test_bit(i))
 227                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
 228         }
 229
 230         if (test_facility(28)) /* TOD-clock steering */
 231                 etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
 232
 233         if (test_facility(17)) { /* MSA */
 234                 __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
 235                 __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
 236                 __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
 237                 __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
 238                 __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
 239         }
 240         if (test_facility(76)) /* MSA3 */
 241                 __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
 242         if (test_facility(77)) { /* MSA4 */
 243                 __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
 244                 __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
 245                 __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
 246                 __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
 247         }
 248         if (test_facility(57)) /* MSA5 */
 249                 __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
 250
 251         if (MACHINE_HAS_ESOP)
 252                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
 253 }
 254
 255 int kvm_arch_init(void *opaque)
 256 {
 257         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 258         if (!kvm_s390_dbf)
 259                 return -ENOMEM;
 260
 261         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 262                 debug_unregister(kvm_s390_dbf);
 263                 return -ENOMEM;
 264         }
 265
 266         kvm_s390_cpu_feat_init();
 267
 268         /* Register floating interrupt controller interface. */
 269         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 270 }
 271
 272 void kvm_arch_exit(void)
 273 {
 274         debug_unregister(kvm_s390_dbf);
 275 }
 276
 277 /* Section: device related */
 278 long kvm_arch_dev_ioctl(struct file *filp,
 279                         unsigned int ioctl, unsigned long arg)
 280 {
 281         if (ioctl == KVM_S390_ENABLE_SIE)
 282                 return s390_enable_sie();
 283         return -EINVAL;
 284 }
 285
 286 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 287 {
 288         int r;
 289
 290         switch (ext) {
 291         case KVM_CAP_S390_PSW:
 292         case KVM_CAP_S390_GMAP:
 293         case KVM_CAP_SYNC_MMU:
 294 #ifdef CONFIG_KVM_S390_UCONTROL
 295         case KVM_CAP_S390_UCONTROL:
 296 #endif
 297         case KVM_CAP_ASYNC_PF:
 298         case KVM_CAP_SYNC_REGS:
 299         case KVM_CAP_ONE_REG:
 300         case KVM_CAP_ENABLE_CAP:
 301         case KVM_CAP_S390_CSS_SUPPORT:
 302         case KVM_CAP_IOEVENTFD:
 303         case KVM_CAP_DEVICE_CTRL:
 304         case KVM_CAP_ENABLE_CAP_VM:
 305         case KVM_CAP_S390_IRQCHIP:
 306         case KVM_CAP_VM_ATTRIBUTES:
 307         case KVM_CAP_MP_STATE:
 308         case KVM_CAP_S390_INJECT_IRQ:
 309         case KVM_CAP_S390_USER_SIGP:
 310         case KVM_CAP_S390_USER_STSI:
 311         case KVM_CAP_S390_SKEYS:
 312         case KVM_CAP_S390_IRQ_STATE:
 313                 r = 1;
 314                 break;
 315         case KVM_CAP_S390_MEM_OP:
 316                 r = MEM_OP_MAX_SIZE;
 317                 break;
 318         case KVM_CAP_NR_VCPUS:
 319         case KVM_CAP_MAX_VCPUS:
 320                 r = KVM_S390_BSCA_CPU_SLOTS;
 321                 if (sclp.has_esca && sclp.has_64bscao)
 322                         r = KVM_S390_ESCA_CPU_SLOTS;
 323                 break;
 324         case KVM_CAP_NR_MEMSLOTS:
 325                 r = KVM_USER_MEM_SLOTS;
 326                 break;
 327         case KVM_CAP_S390_COW:
 328                 r = MACHINE_HAS_ESOP;
 329                 break;
 330         case KVM_CAP_S390_VECTOR_REGISTERS:
 331                 r = MACHINE_HAS_VX;
 332                 break;
 333         case KVM_CAP_S390_RI:
 334                 r = test_facility(64);
 335                 break;
 336         default:
 337                 r = 0;
 338         }
 339         return r;
 340 }
 341
 342 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 343                                         struct kvm_memory_slot *memslot)
 344 {
 345         gfn_t cur_gfn, last_gfn;
 346         unsigned long address;
 347         struct gmap *gmap = kvm->arch.gmap;
 348
 349         /* Loop over all guest pages */
 350         last_gfn = memslot->base_gfn + memslot->npages;
 351         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 352                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 353
 354                 if (test_and_clear_guest_dirty(gmap->mm, address))
 355                         mark_page_dirty(kvm, cur_gfn);
 356                 if (fatal_signal_pending(current))
 357                         return;
 358                 cond_resched();
 359         }
 360 }
 361
 362 /* Section: vm related */
 363 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 364
 365 /*
 366  * Get (and clear) the dirty memory log for a memory slot.
 367  */
 368 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 369                                struct kvm_dirty_log *log)
 370 {
 371         int r;
 372         unsigned long n;
 373         struct kvm_memslots *slots;
 374         struct kvm_memory_slot *memslot;
 375         int is_dirty = 0;
 376
 377         mutex_lock(&kvm->slots_lock);
 378
 379         r = -EINVAL;
 380         if (log->slot >= KVM_USER_MEM_SLOTS)
 381                 goto out;
 382
 383         slots = kvm_memslots(kvm);
 384         memslot = id_to_memslot(slots, log->slot);
 385         r = -ENOENT;
 386         if (!memslot->dirty_bitmap)
 387                 goto out;
 388
 389         kvm_s390_sync_dirty_log(kvm, memslot);
 390         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 391         if (r)
 392                 goto out;
 393
 394         /* Clear the dirty log */
 395         if (is_dirty) {
 396                 n = kvm_dirty_bitmap_bytes(memslot);
 397                 memset(memslot->dirty_bitmap, 0, n);
 398         }
 399         r = 0;
 400 out:
 401         mutex_unlock(&kvm->slots_lock);
 402         return r;
 403 }
 404
 405 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 406 {
 407         int r;
 408
 409         if (cap->flags)
 410                 return -EINVAL;
 411
 412         switch (cap->cap) {
 413         case KVM_CAP_S390_IRQCHIP:
 414                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 415                 kvm->arch.use_irqchip = 1;
 416                 r = 0;
 417                 break;
 418         case KVM_CAP_S390_USER_SIGP:
 419                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 420                 kvm->arch.user_sigp = 1;
 421                 r = 0;
 422                 break;
 423         case KVM_CAP_S390_VECTOR_REGISTERS:
 424                 mutex_lock(&kvm->lock);
 425                 if (atomic_read(&kvm->online_vcpus)) {
 426                         r = -EBUSY;
 427                 } else if (MACHINE_HAS_VX) {
 428                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 429                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 430                         r = 0;
 431                 } else
 432                         r = -EINVAL;
 433                 mutex_unlock(&kvm->lock);
 434                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 435                          r ? "(not available)" : "(success)");
 436                 break;
 437         case KVM_CAP_S390_RI:
 438                 r = -EINVAL;
 439                 mutex_lock(&kvm->lock);
 440                 if (atomic_read(&kvm->online_vcpus)) {
 441                         r = -EBUSY;
 442                 } else if (test_facility(64)) {
 443                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 444                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 445                         r = 0;
 446                 }
 447                 mutex_unlock(&kvm->lock);
 448                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 449                          r ? "(not available)" : "(success)");
 450                 break;
 451         case KVM_CAP_S390_USER_STSI:
 452                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 453                 kvm->arch.user_stsi = 1;
 454                 r = 0;
 455                 break;
 456         default:
 457                 r = -EINVAL;
 458                 break;
 459         }
 460         return r;
 461 }
 462
 463 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 464 {
 465         int ret;
 466
 467         switch (attr->attr) {
 468         case KVM_S390_VM_MEM_LIMIT_SIZE:
 469                 ret = 0;
 470                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 471                          kvm->arch.mem_limit);
 472                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 473                         ret = -EFAULT;
 474                 break;
 475         default:
 476                 ret = -ENXIO;
 477                 break;
 478         }
 479         return ret;
 480 }
 481
 482 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 483 {
 484         int ret;
 485         unsigned int idx;
 486         switch (attr->attr) {
 487         case KVM_S390_VM_MEM_ENABLE_CMMA:
 488                 /* enable CMMA only for z10 and later (EDAT_1) */
 489                 ret = -EINVAL;
 490                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
 491                         break;
 492
 493                 ret = -EBUSY;
 494                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 495                 mutex_lock(&kvm->lock);
 496                 if (atomic_read(&kvm->online_vcpus) == 0) {
 497                         kvm->arch.use_cmma = 1;
 498                         ret = 0;
 499                 }
 500                 mutex_unlock(&kvm->lock);
 501                 break;
 502         case KVM_S390_VM_MEM_CLR_CMMA:
 503                 ret = -EINVAL;
 504                 if (!kvm->arch.use_cmma)
 505                         break;
 506
 507                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 508                 mutex_lock(&kvm->lock);
 509                 idx = srcu_read_lock(&kvm->srcu);
 510                 s390_reset_cmma(kvm->arch.gmap->mm);
 511                 srcu_read_unlock(&kvm->srcu, idx);
 512                 mutex_unlock(&kvm->lock);
 513                 ret = 0;
 514                 break;
 515         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 516                 unsigned long new_limit;
 517
 518                 if (kvm_is_ucontrol(kvm))
 519                         return -EINVAL;
 520
 521                 if (get_user(new_limit, (u64 __user *)attr->addr))
 522                         return -EFAULT;
 523
 524                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 525                     new_limit > kvm->arch.mem_limit)
 526                         return -E2BIG;
 527
 528                 if (!new_limit)
 529                         return -EINVAL;
 530
 531                 /* gmap_alloc takes last usable address */
 532                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 533                         new_limit -= 1;
 534
 535                 ret = -EBUSY;
 536                 mutex_lock(&kvm->lock);
 537                 if (atomic_read(&kvm->online_vcpus) == 0) {
 538                         /* gmap_alloc will round the limit up */
 539                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 540
 541                         if (!new) {
 542                                 ret = -ENOMEM;
 543                         } else {
 544                                 gmap_free(kvm->arch.gmap);
 545                                 new->private = kvm;
 546                                 kvm->arch.gmap = new;
 547                                 ret = 0;
 548                         }
 549                 }
 550                 mutex_unlock(&kvm->lock);
 551                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 552                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 553                          (void *) kvm->arch.gmap->asce);
 554                 break;
 555         }
 556         default:
 557                 ret = -ENXIO;
 558                 break;
 559         }
 560         return ret;
 561 }
 562
 563 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 564
 565 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 566 {
 567         struct kvm_vcpu *vcpu;
 568         int i;
 569
 570         if (!test_kvm_facility(kvm, 76))
 571                 return -EINVAL;
 572
 573         mutex_lock(&kvm->lock);
 574         switch (attr->attr) {
 575         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 576                 get_random_bytes(
 577                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 578                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 579                 kvm->arch.crypto.aes_kw = 1;
 580                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 581                 break;
 582         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 583                 get_random_bytes(
 584                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 585                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 586                 kvm->arch.crypto.dea_kw = 1;
 587                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 588                 break;
 589         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 590                 kvm->arch.crypto.aes_kw = 0;
 591                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 592                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 593                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 594                 break;
 595         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 596                 kvm->arch.crypto.dea_kw = 0;
 597                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 598                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 599                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 600                 break;
 601         default:
 602                 mutex_unlock(&kvm->lock);
 603                 return -ENXIO;
 604         }
 605
 606         kvm_for_each_vcpu(i, vcpu, kvm) {
 607                 kvm_s390_vcpu_crypto_setup(vcpu);
 608                 exit_sie(vcpu);
 609         }
 610         mutex_unlock(&kvm->lock);
 611         return 0;
 612 }
 613
 614 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 615 {
 616         u8 gtod_high;
 617
 618         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 619                                            sizeof(gtod_high)))
 620                 return -EFAULT;
 621
 622         if (gtod_high != 0)
 623                 return -EINVAL;
 624         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 625
 626         return 0;
 627 }
 628
 629 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 630 {
 631         u64 gtod;
 632
 633         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 634                 return -EFAULT;
 635
 636         kvm_s390_set_tod_clock(kvm, gtod);
 637         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 638         return 0;
 639 }
 640
 641 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 642 {
 643         int ret;
 644
 645         if (attr->flags)
 646                 return -EINVAL;
 647
 648         switch (attr->attr) {
 649         case KVM_S390_VM_TOD_HIGH:
 650                 ret = kvm_s390_set_tod_high(kvm, attr);
 651                 break;
 652         case KVM_S390_VM_TOD_LOW:
 653                 ret = kvm_s390_set_tod_low(kvm, attr);
 654                 break;
 655         default:
 656                 ret = -ENXIO;
 657                 break;
 658         }
 659         return ret;
 660 }
 661
 662 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 663 {
 664         u8 gtod_high = 0;
 665
 666         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 667                                          sizeof(gtod_high)))
 668                 return -EFAULT;
 669         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 670
 671         return 0;
 672 }
 673
 674 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 675 {
 676         u64 gtod;
 677
 678         gtod = kvm_s390_get_tod_clock_fast(kvm);
 679         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 680                 return -EFAULT;
 681         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 682
 683         return 0;
 684 }
 685
 686 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 687 {
 688         int ret;
 689
 690         if (attr->flags)
 691                 return -EINVAL;
 692
 693         switch (attr->attr) {
 694         case KVM_S390_VM_TOD_HIGH:
 695                 ret = kvm_s390_get_tod_high(kvm, attr);
 696                 break;
 697         case KVM_S390_VM_TOD_LOW:
 698                 ret = kvm_s390_get_tod_low(kvm, attr);
 699                 break;
 700         default:
 701                 ret = -ENXIO;
 702                 break;
 703         }
 704         return ret;
 705 }
 706
 707 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 708 {
 709         struct kvm_s390_vm_cpu_processor *proc;
 710         u16 lowest_ibc, unblocked_ibc;
 711         int ret = 0;
 712
 713         mutex_lock(&kvm->lock);
 714         if (atomic_read(&kvm->online_vcpus)) {
 715                 ret = -EBUSY;
 716                 goto out;
 717         }
 718         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 719         if (!proc) {
 720                 ret = -ENOMEM;
 721                 goto out;
 722         }
 723         if (!copy_from_user(proc, (void __user *)attr->addr,
 724                             sizeof(*proc))) {
 725                 kvm->arch.model.cpuid = proc->cpuid;
 726                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 727                 unblocked_ibc = sclp.ibc & 0xfff;
 728                 if (lowest_ibc) {
 729                         if (proc->ibc > unblocked_ibc)
 730                                 kvm->arch.model.ibc = unblocked_ibc;
 731                         else if (proc->ibc < lowest_ibc)
 732                                 kvm->arch.model.ibc = lowest_ibc;
 733                         else
 734                                 kvm->arch.model.ibc = proc->ibc;
 735                 }
 736                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 737                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 738         } else
 739                 ret = -EFAULT;
 740         kfree(proc);
 741 out:
 742         mutex_unlock(&kvm->lock);
 743         return ret;
 744 }
 745
 746 static int kvm_s390_set_processor_feat(struct kvm *kvm,
 747                                        struct kvm_device_attr *attr)
 748 {
 749         struct kvm_s390_vm_cpu_feat data;
 750         int ret = -EBUSY;
 751
 752         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
 753                 return -EFAULT;
 754         if (!bitmap_subset((unsigned long *) data.feat,
 755                            kvm_s390_available_cpu_feat,
 756                            KVM_S390_VM_CPU_FEAT_NR_BITS))
 757                 return -EINVAL;
 758
 759         mutex_lock(&kvm->lock);
 760         if (!atomic_read(&kvm->online_vcpus)) {
 761                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
 762                             KVM_S390_VM_CPU_FEAT_NR_BITS);
 763                 ret = 0;
 764         }
 765         mutex_unlock(&kvm->lock);
 766         return ret;
 767 }
 768
 769 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
 770                                           struct kvm_device_attr *attr)
 771 {
 772         /*
 773          * Once supported by kernel + hw, we have to store the subfunctions
 774          * in kvm->arch and remember that user space configured them.
 775          */
 776         return -ENXIO;
 777 }
 778
 779 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 780 {
 781         int ret = -ENXIO;
 782
 783         switch (attr->attr) {
 784         case KVM_S390_VM_CPU_PROCESSOR:
 785                 ret = kvm_s390_set_processor(kvm, attr);
 786                 break;
 787         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 788                 ret = kvm_s390_set_processor_feat(kvm, attr);
 789                 break;
 790         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 791                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
 792                 break;
 793         }
 794         return ret;
 795 }
 796
 797 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 798 {
 799         struct kvm_s390_vm_cpu_processor *proc;
 800         int ret = 0;
 801
 802         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 803         if (!proc) {
 804                 ret = -ENOMEM;
 805                 goto out;
 806         }
 807         proc->cpuid = kvm->arch.model.cpuid;
 808         proc->ibc = kvm->arch.model.ibc;
 809         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 810                S390_ARCH_FAC_LIST_SIZE_BYTE);
 811         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 812                 ret = -EFAULT;
 813         kfree(proc);
 814 out:
 815         return ret;
 816 }
 817
 818 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 819 {
 820         struct kvm_s390_vm_cpu_machine *mach;
 821         int ret = 0;
 822
 823         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 824         if (!mach) {
 825                 ret = -ENOMEM;
 826                 goto out;
 827         }
 828         get_cpu_id((struct cpuid *) &mach->cpuid);
 829         mach->ibc = sclp.ibc;
 830         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 831                S390_ARCH_FAC_LIST_SIZE_BYTE);
 832         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 833                S390_ARCH_FAC_LIST_SIZE_BYTE);
 834         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 835                 ret = -EFAULT;
 836         kfree(mach);
 837 out:
 838         return ret;
 839 }
 840
 841 static int kvm_s390_get_processor_feat(struct kvm *kvm,
 842                                        struct kvm_device_attr *attr)
 843 {
 844         struct kvm_s390_vm_cpu_feat data;
 845
 846         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
 847                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 848         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 849                 return -EFAULT;
 850         return 0;
 851 }
 852
 853 static int kvm_s390_get_machine_feat(struct kvm *kvm,
 854                                      struct kvm_device_attr *attr)
 855 {
 856         struct kvm_s390_vm_cpu_feat data;
 857
 858         bitmap_copy((unsigned long *) data.feat,
 859                     kvm_s390_available_cpu_feat,
 860                     KVM_S390_VM_CPU_FEAT_NR_BITS);
 861         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
 862                 return -EFAULT;
 863         return 0;
 864 }
 865
 866 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
 867                                           struct kvm_device_attr *attr)
 868 {
 869         /*
 870          * Once we can actually configure subfunctions (kernel + hw support),
 871          * we have to check if they were already set by user space, if so copy
 872          * them from kvm->arch.
 873          */
 874         return -ENXIO;
 875 }
 876
 877 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
 878                                         struct kvm_device_attr *attr)
 879 {
 880         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
 881             sizeof(struct kvm_s390_vm_cpu_subfunc)))
 882                 return -EFAULT;
 883         return 0;
 884 }
 885 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 886 {
 887         int ret = -ENXIO;
 888
 889         switch (attr->attr) {
 890         case KVM_S390_VM_CPU_PROCESSOR:
 891                 ret = kvm_s390_get_processor(kvm, attr);
 892                 break;
 893         case KVM_S390_VM_CPU_MACHINE:
 894                 ret = kvm_s390_get_machine(kvm, attr);
 895                 break;
 896         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 897                 ret = kvm_s390_get_processor_feat(kvm, attr);
 898                 break;
 899         case KVM_S390_VM_CPU_MACHINE_FEAT:
 900                 ret = kvm_s390_get_machine_feat(kvm, attr);
 901                 break;
 902         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 903                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
 904                 break;
 905         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 906                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
 907                 break;
 908         }
 909         return ret;
 910 }
 911
 912 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 913 {
 914         int ret;
 915
 916         switch (attr->group) {
 917         case KVM_S390_VM_MEM_CTRL:
 918                 ret = kvm_s390_set_mem_control(kvm, attr);
 919                 break;
 920         case KVM_S390_VM_TOD:
 921                 ret = kvm_s390_set_tod(kvm, attr);
 922                 break;
 923         case KVM_S390_VM_CPU_MODEL:
 924                 ret = kvm_s390_set_cpu_model(kvm, attr);
 925                 break;
 926         case KVM_S390_VM_CRYPTO:
 927                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 928                 break;
 929         default:
 930                 ret = -ENXIO;
 931                 break;
 932         }
 933
 934         return ret;
 935 }
 936
 937 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 938 {
 939         int ret;
 940
 941         switch (attr->group) {
 942         case KVM_S390_VM_MEM_CTRL:
 943                 ret = kvm_s390_get_mem_control(kvm, attr);
 944                 break;
 945         case KVM_S390_VM_TOD:
 946                 ret = kvm_s390_get_tod(kvm, attr);
 947                 break;
 948         case KVM_S390_VM_CPU_MODEL:
 949                 ret = kvm_s390_get_cpu_model(kvm, attr);
 950                 break;
 951         default:
 952                 ret = -ENXIO;
 953                 break;
 954         }
 955
 956         return ret;
 957 }
 958
 959 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 960 {
 961         int ret;
 962
 963         switch (attr->group) {
 964         case KVM_S390_VM_MEM_CTRL:
 965                 switch (attr->attr) {
 966                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 967                 case KVM_S390_VM_MEM_CLR_CMMA:
 968                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 969                         ret = 0;
 970                         break;
 971                 default:
 972                         ret = -ENXIO;
 973                         break;
 974                 }
 975                 break;
 976         case KVM_S390_VM_TOD:
 977                 switch (attr->attr) {
 978                 case KVM_S390_VM_TOD_LOW:
 979                 case KVM_S390_VM_TOD_HIGH:
 980                         ret = 0;
 981                         break;
 982                 default:
 983                         ret = -ENXIO;
 984                         break;
 985                 }
 986                 break;
 987         case KVM_S390_VM_CPU_MODEL:
 988                 switch (attr->attr) {
 989                 case KVM_S390_VM_CPU_PROCESSOR:
 990                 case KVM_S390_VM_CPU_MACHINE:
 991                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
 992                 case KVM_S390_VM_CPU_MACHINE_FEAT:
 993                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
 994                         ret = 0;
 995                         break;
 996                 /* configuring subfunctions is not supported yet */
 997                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
 998                 default:
 999                         ret = -ENXIO;
1000                         break;
1001                 }
1002                 break;
1003         case KVM_S390_VM_CRYPTO:
1004                 switch (attr->attr) {
1005                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1006                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1007                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1008                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1009                         ret = 0;
1010                         break;
1011                 default:
1012                         ret = -ENXIO;
1013                         break;
1014                 }
1015                 break;
1016         default:
1017                 ret = -ENXIO;
1018                 break;
1019         }
1020
1021         return ret;
1022 }
1023
1024 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1025 {
1026         uint8_t *keys;
1027         uint64_t hva;
1028         unsigned long curkey;
1029         int i, r = 0;
1030
1031         if (args->flags != 0)
1032                 return -EINVAL;
1033
1034         /* Is this guest using storage keys? */
1035         if (!mm_use_skey(current->mm))
1036                 return KVM_S390_GET_SKEYS_NONE;
1037
1038         /* Enforce sane limit on memory allocation */
1039         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1040                 return -EINVAL;
1041
1042         keys = kmalloc_array(args->count, sizeof(uint8_t),
1043                              GFP_KERNEL | __GFP_NOWARN);
1044         if (!keys)
1045                 keys = vmalloc(sizeof(uint8_t) * args->count);
1046         if (!keys)
1047                 return -ENOMEM;
1048
1049         for (i = 0; i < args->count; i++) {
1050                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1051                 if (kvm_is_error_hva(hva)) {
1052                         r = -EFAULT;
1053                         goto out;
1054                 }
1055
1056                 curkey = get_guest_storage_key(current->mm, hva);
1057                 if (IS_ERR_VALUE(curkey)) {
1058                         r = curkey;
1059                         goto out;
1060                 }
1061                 keys[i] = curkey;
1062         }
1063
1064         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1065                          sizeof(uint8_t) * args->count);
1066         if (r)
1067                 r = -EFAULT;
1068 out:
1069         kvfree(keys);
1070         return r;
1071 }
1072
1073 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1074 {
1075         uint8_t *keys;
1076         uint64_t hva;
1077         int i, r = 0;
1078
1079         if (args->flags != 0)
1080                 return -EINVAL;
1081
1082         /* Enforce sane limit on memory allocation */
1083         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1084                 return -EINVAL;
1085
1086         keys = kmalloc_array(args->count, sizeof(uint8_t),
1087                              GFP_KERNEL | __GFP_NOWARN);
1088         if (!keys)
1089                 keys = vmalloc(sizeof(uint8_t) * args->count);
1090         if (!keys)
1091                 return -ENOMEM;
1092
1093         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1094                            sizeof(uint8_t) * args->count);
1095         if (r) {
1096                 r = -EFAULT;
1097                 goto out;
1098         }
1099
1100         /* Enable storage key handling for the guest */
1101         r = s390_enable_skey();
1102         if (r)
1103                 goto out;
1104
1105         for (i = 0; i < args->count; i++) {
1106                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1107                 if (kvm_is_error_hva(hva)) {
1108                         r = -EFAULT;
1109                         goto out;
1110                 }
1111
1112                 /* Lowest order bit is reserved */
1113                 if (keys[i] & 0x01) {
1114                         r = -EINVAL;
1115                         goto out;
1116                 }
1117
1118                 r = set_guest_storage_key(current->mm, hva,
1119                                           (unsigned long)keys[i], 0);
1120                 if (r)
1121                         goto out;
1122         }
1123 out:
1124         kvfree(keys);
1125         return r;
1126 }
1127
1128 long kvm_arch_vm_ioctl(struct file *filp,
1129                        unsigned int ioctl, unsigned long arg)
1130 {
1131         struct kvm *kvm = filp->private_data;
1132         void __user *argp = (void __user *)arg;
1133         struct kvm_device_attr attr;
1134         int r;
1135
1136         switch (ioctl) {
1137         case KVM_S390_INTERRUPT: {
1138                 struct kvm_s390_interrupt s390int;
1139
1140                 r = -EFAULT;
1141                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1142                         break;
1143                 r = kvm_s390_inject_vm(kvm, &s390int);
1144                 break;
1145         }
1146         case KVM_ENABLE_CAP: {
1147                 struct kvm_enable_cap cap;
1148                 r = -EFAULT;
1149                 if (copy_from_user(&cap, argp, sizeof(cap)))
1150                         break;
1151                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1152                 break;
1153         }
1154         case KVM_CREATE_IRQCHIP: {
1155                 struct kvm_irq_routing_entry routing;
1156
1157                 r = -EINVAL;
1158                 if (kvm->arch.use_irqchip) {
1159                         /* Set up dummy routing. */
1160                         memset(&routing, 0, sizeof(routing));
1161                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1162                 }
1163                 break;
1164         }
1165         case KVM_SET_DEVICE_ATTR: {
1166                 r = -EFAULT;
1167                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1168                         break;
1169                 r = kvm_s390_vm_set_attr(kvm, &attr);
1170                 break;
1171         }
1172         case KVM_GET_DEVICE_ATTR: {
1173                 r = -EFAULT;
1174                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1175                         break;
1176                 r = kvm_s390_vm_get_attr(kvm, &attr);
1177                 break;
1178         }
1179         case KVM_HAS_DEVICE_ATTR: {
1180                 r = -EFAULT;
1181                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1182                         break;
1183                 r = kvm_s390_vm_has_attr(kvm, &attr);
1184                 break;
1185         }
1186         case KVM_S390_GET_SKEYS: {
1187                 struct kvm_s390_skeys args;
1188
1189                 r = -EFAULT;
1190                 if (copy_from_user(&args, argp,
1191                                    sizeof(struct kvm_s390_skeys)))
1192                         break;
1193                 r = kvm_s390_get_skeys(kvm, &args);
1194                 break;
1195         }
1196         case KVM_S390_SET_SKEYS: {
1197                 struct kvm_s390_skeys args;
1198
1199                 r = -EFAULT;
1200                 if (copy_from_user(&args, argp,
1201                                    sizeof(struct kvm_s390_skeys)))
1202                         break;
1203                 r = kvm_s390_set_skeys(kvm, &args);
1204                 break;
1205         }
1206         default:
1207                 r = -ENOTTY;
1208         }
1209
1210         return r;
1211 }
1212
1213 static int kvm_s390_query_ap_config(u8 *config)
1214 {
1215         u32 fcn_code = 0x04000000UL;
1216         u32 cc = 0;
1217
1218         memset(config, 0, 128);
1219         asm volatile(
1220                 "lgr 0,%1\n"
1221                 "lgr 2,%2\n"
1222                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1223                 "0: ipm %0\n"
1224                 "srl %0,28\n"
1225                 "1:\n"
1226                 EX_TABLE(0b, 1b)
1227                 : "+r" (cc)
1228                 : "r" (fcn_code), "r" (config)
1229                 : "cc", "0", "2", "memory"
1230         );
1231
1232         return cc;
1233 }
1234
1235 static int kvm_s390_apxa_installed(void)
1236 {
1237         u8 config[128];
1238         int cc;
1239
1240         if (test_facility(12)) {
1241                 cc = kvm_s390_query_ap_config(config);
1242
1243                 if (cc)
1244                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1245                 else
1246                         return config[0] & 0x40;
1247         }
1248
1249         return 0;
1250 }
1251
1252 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1253 {
1254         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1255
1256         if (kvm_s390_apxa_installed())
1257                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1258         else
1259                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1260 }
1261
1262 static u64 kvm_s390_get_initial_cpuid(void)
1263 {
1264         struct cpuid cpuid;
1265
1266         get_cpu_id(&cpuid);
1267         cpuid.version = 0xff;
1268         return *((u64 *) &cpuid);
1269 }
1270
1271 static void kvm_s390_crypto_init(struct kvm *kvm)
1272 {
1273         if (!test_kvm_facility(kvm, 76))
1274                 return;
1275
1276         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1277         kvm_s390_set_crycb_format(kvm);
1278
1279         /* Enable AES/DEA protected key functions by default */
1280         kvm->arch.crypto.aes_kw = 1;
1281         kvm->arch.crypto.dea_kw = 1;
1282         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1283                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1284         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1285                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1286 }
1287
1288 static void sca_dispose(struct kvm *kvm)
1289 {
1290         if (kvm->arch.use_esca)
1291                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1292         else
1293                 free_page((unsigned long)(kvm->arch.sca));
1294         kvm->arch.sca = NULL;
1295 }
1296
1297 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1298 {
1299         gfp_t alloc_flags = GFP_KERNEL;
1300         int i, rc;
1301         char debug_name[16];
1302         static unsigned long sca_offset;
1303
1304         rc = -EINVAL;
1305 #ifdef CONFIG_KVM_S390_UCONTROL
1306         if (type & ~KVM_VM_S390_UCONTROL)
1307                 goto out_err;
1308         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1309                 goto out_err;
1310 #else
1311         if (type)
1312                 goto out_err;
1313 #endif
1314
1315         rc = s390_enable_sie();
1316         if (rc)
1317                 goto out_err;
1318
1319         rc = -ENOMEM;
1320
1321         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1322
1323         kvm->arch.use_esca = 0; /* start with basic SCA */
1324         if (!sclp.has_64bscao)
1325                 alloc_flags |= GFP_DMA;
1326         rwlock_init(&kvm->arch.sca_lock);
1327         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1328         if (!kvm->arch.sca)
1329                 goto out_err;
1330         spin_lock(&kvm_lock);
1331         sca_offset += 16;
1332         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1333                 sca_offset = 0;
1334         kvm->arch.sca = (struct bsca_block *)
1335                         ((char *) kvm->arch.sca + sca_offset);
1336         spin_unlock(&kvm_lock);
1337
1338         sprintf(debug_name, "kvm-%u", current->pid);
1339
1340         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1341         if (!kvm->arch.dbf)
1342                 goto out_err;
1343
1344         kvm->arch.sie_page2 =
1345              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1346         if (!kvm->arch.sie_page2)
1347                 goto out_err;
1348
1349         /* Populate the facility mask initially. */
1350         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1351                S390_ARCH_FAC_LIST_SIZE_BYTE);
1352         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1353                 if (i < kvm_s390_fac_list_mask_size())
1354                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1355                 else
1356                         kvm->arch.model.fac_mask[i] = 0UL;
1357         }
1358
1359         /* Populate the facility list initially. */
1360         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1361         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1362                S390_ARCH_FAC_LIST_SIZE_BYTE);
1363
1364         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1365         set_kvm_facility(kvm->arch.model.fac_list, 74);
1366
1367         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1368         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1369
1370         kvm_s390_crypto_init(kvm);
1371
1372         spin_lock_init(&kvm->arch.float_int.lock);
1373         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1374                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1375         init_waitqueue_head(&kvm->arch.ipte_wq);
1376         mutex_init(&kvm->arch.ipte_mutex);
1377
1378         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1379         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1380
1381         if (type & KVM_VM_S390_UCONTROL) {
1382                 kvm->arch.gmap = NULL;
1383                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1384         } else {
1385                 if (sclp.hamax == U64_MAX)
1386                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1387                 else
1388                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1389                                                     sclp.hamax + 1);
1390                 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1391                 if (!kvm->arch.gmap)
1392                         goto out_err;
1393                 kvm->arch.gmap->private = kvm;
1394                 kvm->arch.gmap->pfault_enabled = 0;
1395         }
1396
1397         kvm->arch.css_support = 0;
1398         kvm->arch.use_irqchip = 0;
1399         kvm->arch.epoch = 0;
1400
1401         spin_lock_init(&kvm->arch.start_stop_lock);
1402         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1403
1404         return 0;
1405 out_err:
1406         free_page((unsigned long)kvm->arch.sie_page2);
1407         debug_unregister(kvm->arch.dbf);
1408         sca_dispose(kvm);
1409         KVM_EVENT(3, "creation of vm failed: %d", rc);
1410         return rc;
1411 }
1412
1413 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1414 {
1415         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1416         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1417         kvm_s390_clear_local_irqs(vcpu);
1418         kvm_clear_async_pf_completion_queue(vcpu);
1419         if (!kvm_is_ucontrol(vcpu->kvm))
1420                 sca_del_vcpu(vcpu);
1421
1422         if (kvm_is_ucontrol(vcpu->kvm))
1423                 gmap_free(vcpu->arch.gmap);
1424
1425         if (vcpu->kvm->arch.use_cmma)
1426                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1427         free_page((unsigned long)(vcpu->arch.sie_block));
1428
1429         kvm_vcpu_uninit(vcpu);
1430         kmem_cache_free(kvm_vcpu_cache, vcpu);
1431 }
1432
1433 static void kvm_free_vcpus(struct kvm *kvm)
1434 {
1435         unsigned int i;
1436         struct kvm_vcpu *vcpu;
1437
1438         kvm_for_each_vcpu(i, vcpu, kvm)
1439                 kvm_arch_vcpu_destroy(vcpu);
1440
1441         mutex_lock(&kvm->lock);
1442         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1443                 kvm->vcpus[i] = NULL;
1444
1445         atomic_set(&kvm->online_vcpus, 0);
1446         mutex_unlock(&kvm->lock);
1447 }
1448
1449 void kvm_arch_destroy_vm(struct kvm *kvm)
1450 {
1451         kvm_free_vcpus(kvm);
1452         sca_dispose(kvm);
1453         debug_unregister(kvm->arch.dbf);
1454         free_page((unsigned long)kvm->arch.sie_page2);
1455         if (!kvm_is_ucontrol(kvm))
1456                 gmap_free(kvm->arch.gmap);
1457         kvm_s390_destroy_adapters(kvm);
1458         kvm_s390_clear_float_irqs(kvm);
1459         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1460 }
1461
1462 /* Section: vcpu related */
1463 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1464 {
1465         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1466         if (!vcpu->arch.gmap)
1467                 return -ENOMEM;
1468         vcpu->arch.gmap->private = vcpu->kvm;
1469
1470         return 0;
1471 }
1472
1473 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1474 {
1475         read_lock(&vcpu->kvm->arch.sca_lock);
1476         if (vcpu->kvm->arch.use_esca) {
1477                 struct esca_block *sca = vcpu->kvm->arch.sca;
1478
1479                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1480                 sca->cpu[vcpu->vcpu_id].sda = 0;
1481         } else {
1482                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1483
1484                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1485                 sca->cpu[vcpu->vcpu_id].sda = 0;
1486         }
1487         read_unlock(&vcpu->kvm->arch.sca_lock);
1488 }
1489
1490 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1491 {
1492         read_lock(&vcpu->kvm->arch.sca_lock);
1493         if (vcpu->kvm->arch.use_esca) {
1494                 struct esca_block *sca = vcpu->kvm->arch.sca;
1495
1496                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1497                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1498                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1499                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1500                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1501         } else {
1502                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1503
1504                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1505                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1506                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1507                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1508         }
1509         read_unlock(&vcpu->kvm->arch.sca_lock);
1510 }
1511
1512 /* Basic SCA to Extended SCA data copy routines */
1513 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1514 {
1515         d->sda = s->sda;
1516         d->sigp_ctrl.c = s->sigp_ctrl.c;
1517         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1518 }
1519
1520 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1521 {
1522         int i;
1523
1524         d->ipte_control = s->ipte_control;
1525         d->mcn[0] = s->mcn;
1526         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1527                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1528 }
1529
1530 static int sca_switch_to_extended(struct kvm *kvm)
1531 {
1532         struct bsca_block *old_sca = kvm->arch.sca;
1533         struct esca_block *new_sca;
1534         struct kvm_vcpu *vcpu;
1535         unsigned int vcpu_idx;
1536         u32 scaol, scaoh;
1537
1538         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1539         if (!new_sca)
1540                 return -ENOMEM;
1541
1542         scaoh = (u32)((u64)(new_sca) >> 32);
1543         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1544
1545         kvm_s390_vcpu_block_all(kvm);
1546         write_lock(&kvm->arch.sca_lock);
1547
1548         sca_copy_b_to_e(new_sca, old_sca);
1549
1550         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1551                 vcpu->arch.sie_block->scaoh = scaoh;
1552                 vcpu->arch.sie_block->scaol = scaol;
1553                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1554         }
1555         kvm->arch.sca = new_sca;
1556         kvm->arch.use_esca = 1;
1557
1558         write_unlock(&kvm->arch.sca_lock);
1559         kvm_s390_vcpu_unblock_all(kvm);
1560
1561         free_page((unsigned long)old_sca);
1562
1563         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1564                  old_sca, kvm->arch.sca);
1565         return 0;
1566 }
1567
1568 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1569 {
1570         int rc;
1571
1572         if (id < KVM_S390_BSCA_CPU_SLOTS)
1573                 return true;
1574         if (!sclp.has_esca || !sclp.has_64bscao)
1575                 return false;
1576
1577         mutex_lock(&kvm->lock);
1578         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1579         mutex_unlock(&kvm->lock);
1580
1581         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1582 }
1583
1584 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1585 {
1586         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1587         kvm_clear_async_pf_completion_queue(vcpu);
1588         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1589                                     KVM_SYNC_GPRS |
1590                                     KVM_SYNC_ACRS |
1591                                     KVM_SYNC_CRS |
1592                                     KVM_SYNC_ARCH0 |
1593                                     KVM_SYNC_PFAULT;
1594         if (test_kvm_facility(vcpu->kvm, 64))
1595                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1596         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1597          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1598          */
1599         if (MACHINE_HAS_VX)
1600                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1601         else
1602                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1603
1604         if (kvm_is_ucontrol(vcpu->kvm))
1605                 return __kvm_ucontrol_vcpu_init(vcpu);
1606
1607         return 0;
1608 }
1609
1610 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1611 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1612 {
1613         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1614         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1615         vcpu->arch.cputm_start = get_tod_clock_fast();
1616         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1617 }
1618
1619 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1620 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1621 {
1622         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1623         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1624         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1625         vcpu->arch.cputm_start = 0;
1626         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1627 }
1628
1629 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1630 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1631 {
1632         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1633         vcpu->arch.cputm_enabled = true;
1634         __start_cpu_timer_accounting(vcpu);
1635 }
1636
1637 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1638 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1639 {
1640         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1641         __stop_cpu_timer_accounting(vcpu);
1642         vcpu->arch.cputm_enabled = false;
1643 }
1644
1645 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1646 {
1647         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1648         __enable_cpu_timer_accounting(vcpu);
1649         preempt_enable();
1650 }
1651
1652 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1653 {
1654         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1655         __disable_cpu_timer_accounting(vcpu);
1656         preempt_enable();
1657 }
1658
1659 /* set the cpu timer - may only be called from the VCPU thread itself */
1660 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1661 {
1662         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1663         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1664         if (vcpu->arch.cputm_enabled)
1665                 vcpu->arch.cputm_start = get_tod_clock_fast();
1666         vcpu->arch.sie_block->cputm = cputm;
1667         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1668         preempt_enable();
1669 }
1670
1671 /* update and get the cpu timer - can also be called from other VCPU threads */
1672 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1673 {
1674         unsigned int seq;
1675         __u64 value;
1676
1677         if (unlikely(!vcpu->arch.cputm_enabled))
1678                 return vcpu->arch.sie_block->cputm;
1679
1680         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1681         do {
1682                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1683                 /*
1684                  * If the writer would ever execute a read in the critical
1685                  * section, e.g. in irq context, we have a deadlock.
1686                  */
1687                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1688                 value = vcpu->arch.sie_block->cputm;
1689                 /* if cputm_start is 0, accounting is being started/stopped */
1690                 if (likely(vcpu->arch.cputm_start))
1691                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1692         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1693         preempt_enable();
1694         return value;
1695 }
1696
1697 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1698 {
1699         /* Save host register state */
1700         save_fpu_regs();
1701         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1702         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1703
1704         if (MACHINE_HAS_VX)
1705                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1706         else
1707                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1708         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1709         if (test_fp_ctl(current->thread.fpu.fpc))
1710                 /* User space provided an invalid FPC, let's clear it */
1711                 current->thread.fpu.fpc = 0;
1712
1713         save_access_regs(vcpu->arch.host_acrs);
1714         restore_access_regs(vcpu->run->s.regs.acrs);
1715         gmap_enable(vcpu->arch.gmap);
1716         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1717         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1718                 __start_cpu_timer_accounting(vcpu);
1719         vcpu->cpu = cpu;
1720 }
1721
1722 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1723 {
1724         vcpu->cpu = -1;
1725         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1726                 __stop_cpu_timer_accounting(vcpu);
1727         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1728         gmap_disable(vcpu->arch.gmap);
1729
1730         /* Save guest register state */
1731         save_fpu_regs();
1732         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1733
1734         /* Restore host register state */
1735         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1736         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1737
1738         save_access_regs(vcpu->run->s.regs.acrs);
1739         restore_access_regs(vcpu->arch.host_acrs);
1740 }
1741
1742 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1743 {
1744         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1745         vcpu->arch.sie_block->gpsw.mask = 0UL;
1746         vcpu->arch.sie_block->gpsw.addr = 0UL;
1747         kvm_s390_set_prefix(vcpu, 0);
1748         kvm_s390_set_cpu_timer(vcpu, 0);
1749         vcpu->arch.sie_block->ckc       = 0UL;
1750         vcpu->arch.sie_block->todpr     = 0;
1751         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1752         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1753         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1754         /* make sure the new fpc will be lazily loaded */
1755         save_fpu_regs();
1756         current->thread.fpu.fpc = 0;
1757         vcpu->arch.sie_block->gbea = 1;
1758         vcpu->arch.sie_block->pp = 0;
1759         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1760         kvm_clear_async_pf_completion_queue(vcpu);
1761         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1762                 kvm_s390_vcpu_stop(vcpu);
1763         kvm_s390_clear_local_irqs(vcpu);
1764 }
1765
1766 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1767 {
1768         mutex_lock(&vcpu->kvm->lock);
1769         preempt_disable();
1770         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1771         preempt_enable();
1772         mutex_unlock(&vcpu->kvm->lock);
1773         if (!kvm_is_ucontrol(vcpu->kvm)) {
1774                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1775                 sca_add_vcpu(vcpu);
1776         }
1777
1778 }
1779
1780 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1781 {
1782         if (!test_kvm_facility(vcpu->kvm, 76))
1783                 return;
1784
1785         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1786
1787         if (vcpu->kvm->arch.crypto.aes_kw)
1788                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1789         if (vcpu->kvm->arch.crypto.dea_kw)
1790                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1791
1792         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1793 }
1794
1795 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1796 {
1797         free_page(vcpu->arch.sie_block->cbrlo);
1798         vcpu->arch.sie_block->cbrlo = 0;
1799 }
1800
1801 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1802 {
1803         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1804         if (!vcpu->arch.sie_block->cbrlo)
1805                 return -ENOMEM;
1806
1807         vcpu->arch.sie_block->ecb2 |= 0x80;
1808         vcpu->arch.sie_block->ecb2 &= ~0x08;
1809         return 0;
1810 }
1811
1812 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1813 {
1814         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1815
1816         vcpu->arch.sie_block->ibc = model->ibc;
1817         if (test_kvm_facility(vcpu->kvm, 7))
1818                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1819 }
1820
1821 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1822 {
1823         int rc = 0;
1824
1825         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1826                                                     CPUSTAT_SM |
1827                                                     CPUSTAT_STOPPED);
1828
1829         if (test_kvm_facility(vcpu->kvm, 78))
1830                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1831         else if (test_kvm_facility(vcpu->kvm, 8))
1832                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1833
1834         kvm_s390_vcpu_setup_model(vcpu);
1835
1836         vcpu->arch.sie_block->ecb = 0x02;
1837         if (test_kvm_facility(vcpu->kvm, 9))
1838                 vcpu->arch.sie_block->ecb |= 0x04;
1839         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1840                 vcpu->arch.sie_block->ecb |= 0x10;
1841
1842         if (test_kvm_facility(vcpu->kvm, 8))
1843                 vcpu->arch.sie_block->ecb2 |= 0x08;
1844         vcpu->arch.sie_block->eca   = 0xC1002000U;
1845         if (sclp.has_siif)
1846                 vcpu->arch.sie_block->eca |= 1;
1847         if (sclp.has_sigpif)
1848                 vcpu->arch.sie_block->eca |= 0x10000000U;
1849         if (test_kvm_facility(vcpu->kvm, 64))
1850                 vcpu->arch.sie_block->ecb3 |= 0x01;
1851         if (test_kvm_facility(vcpu->kvm, 129)) {
1852                 vcpu->arch.sie_block->eca |= 0x00020000;
1853                 vcpu->arch.sie_block->ecd |= 0x20000000;
1854         }
1855         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1856         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1857         if (test_kvm_facility(vcpu->kvm, 74))
1858                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1859
1860         if (vcpu->kvm->arch.use_cmma) {
1861                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1862                 if (rc)
1863                         return rc;
1864         }
1865         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1866         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1867
1868         kvm_s390_vcpu_crypto_setup(vcpu);
1869
1870         return rc;
1871 }
1872
1873 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1874                                       unsigned int id)
1875 {
1876         struct kvm_vcpu *vcpu;
1877         struct sie_page *sie_page;
1878         int rc = -EINVAL;
1879
1880         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1881                 goto out;
1882
1883         rc = -ENOMEM;
1884
1885         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1886         if (!vcpu)
1887                 goto out;
1888
1889         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1890         if (!sie_page)
1891                 goto out_free_cpu;
1892
1893         vcpu->arch.sie_block = &sie_page->sie_block;
1894         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1895
1896         vcpu->arch.sie_block->icpua = id;
1897         spin_lock_init(&vcpu->arch.local_int.lock);
1898         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1899         vcpu->arch.local_int.wq = &vcpu->wq;
1900         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1901         seqcount_init(&vcpu->arch.cputm_seqcount);
1902
1903         rc = kvm_vcpu_init(vcpu, kvm, id);
1904         if (rc)
1905                 goto out_free_sie_block;
1906         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1907                  vcpu->arch.sie_block);
1908         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1909
1910         return vcpu;
1911 out_free_sie_block:
1912         free_page((unsigned long)(vcpu->arch.sie_block));
1913 out_free_cpu:
1914         kmem_cache_free(kvm_vcpu_cache, vcpu);
1915 out:
1916         return ERR_PTR(rc);
1917 }
1918
1919 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1920 {
1921         return kvm_s390_vcpu_has_irq(vcpu, 0);
1922 }
1923
1924 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1925 {
1926         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1927         exit_sie(vcpu);
1928 }
1929
1930 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1931 {
1932         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1933 }
1934
1935 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1936 {
1937         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1938         exit_sie(vcpu);
1939 }
1940
1941 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1942 {
1943         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1944 }
1945
1946 /*
1947  * Kick a guest cpu out of SIE and wait until SIE is not running.
1948  * If the CPU is not running (e.g. waiting as idle) the function will
1949  * return immediately. */
1950 void exit_sie(struct kvm_vcpu *vcpu)
1951 {
1952         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1953         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1954                 cpu_relax();
1955 }
1956
1957 /* Kick a guest cpu out of SIE to process a request synchronously */
1958 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1959 {
1960         kvm_make_request(req, vcpu);
1961         kvm_s390_vcpu_request(vcpu);
1962 }
1963
1964 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1965 {
1966         int i;
1967         struct kvm *kvm = gmap->private;
1968         struct kvm_vcpu *vcpu;
1969
1970         kvm_for_each_vcpu(i, vcpu, kvm) {
1971                 /* match against both prefix pages */
1972                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1973                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1974                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1975                 }
1976         }
1977 }
1978
1979 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1980 {
1981         /* kvm common code refers to this, but never calls it */
1982         BUG();
1983         return 0;
1984 }
1985
1986 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1987                                            struct kvm_one_reg *reg)
1988 {
1989         int r = -EINVAL;
1990
1991         switch (reg->id) {
1992         case KVM_REG_S390_TODPR:
1993                 r = put_user(vcpu->arch.sie_block->todpr,
1994                              (u32 __user *)reg->addr);
1995                 break;
1996         case KVM_REG_S390_EPOCHDIFF:
1997                 r = put_user(vcpu->arch.sie_block->epoch,
1998                              (u64 __user *)reg->addr);
1999                 break;
2000         case KVM_REG_S390_CPU_TIMER:
2001                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2002                              (u64 __user *)reg->addr);
2003                 break;
2004         case KVM_REG_S390_CLOCK_COMP:
2005                 r = put_user(vcpu->arch.sie_block->ckc,
2006                              (u64 __user *)reg->addr);
2007                 break;
2008         case KVM_REG_S390_PFTOKEN:
2009                 r = put_user(vcpu->arch.pfault_token,
2010                              (u64 __user *)reg->addr);
2011                 break;
2012         case KVM_REG_S390_PFCOMPARE:
2013                 r = put_user(vcpu->arch.pfault_compare,
2014                              (u64 __user *)reg->addr);
2015                 break;
2016         case KVM_REG_S390_PFSELECT:
2017                 r = put_user(vcpu->arch.pfault_select,
2018                              (u64 __user *)reg->addr);
2019                 break;
2020         case KVM_REG_S390_PP:
2021                 r = put_user(vcpu->arch.sie_block->pp,
2022                              (u64 __user *)reg->addr);
2023                 break;
2024         case KVM_REG_S390_GBEA:
2025                 r = put_user(vcpu->arch.sie_block->gbea,
2026                              (u64 __user *)reg->addr);
2027                 break;
2028         default:
2029                 break;
2030         }
2031
2032         return r;
2033 }
2034
2035 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2036                                            struct kvm_one_reg *reg)
2037 {
2038         int r = -EINVAL;
2039         __u64 val;
2040
2041         switch (reg->id) {
2042         case KVM_REG_S390_TODPR:
2043                 r = get_user(vcpu->arch.sie_block->todpr,
2044                              (u32 __user *)reg->addr);
2045                 break;
2046         case KVM_REG_S390_EPOCHDIFF:
2047                 r = get_user(vcpu->arch.sie_block->epoch,
2048                              (u64 __user *)reg->addr);
2049                 break;
2050         case KVM_REG_S390_CPU_TIMER:
2051                 r = get_user(val, (u64 __user *)reg->addr);
2052                 if (!r)
2053                         kvm_s390_set_cpu_timer(vcpu, val);
2054                 break;
2055         case KVM_REG_S390_CLOCK_COMP:
2056                 r = get_user(vcpu->arch.sie_block->ckc,
2057                              (u64 __user *)reg->addr);
2058                 break;
2059         case KVM_REG_S390_PFTOKEN:
2060                 r = get_user(vcpu->arch.pfault_token,
2061                              (u64 __user *)reg->addr);
2062                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2063                         kvm_clear_async_pf_completion_queue(vcpu);
2064                 break;
2065         case KVM_REG_S390_PFCOMPARE:
2066                 r = get_user(vcpu->arch.pfault_compare,
2067                              (u64 __user *)reg->addr);
2068                 break;
2069         case KVM_REG_S390_PFSELECT:
2070                 r = get_user(vcpu->arch.pfault_select,
2071                              (u64 __user *)reg->addr);
2072                 break;
2073         case KVM_REG_S390_PP:
2074                 r = get_user(vcpu->arch.sie_block->pp,
2075                              (u64 __user *)reg->addr);
2076                 break;
2077         case KVM_REG_S390_GBEA:
2078                 r = get_user(vcpu->arch.sie_block->gbea,
2079                              (u64 __user *)reg->addr);
2080                 break;
2081         default:
2082                 break;
2083         }
2084
2085         return r;
2086 }
2087
2088 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2089 {
2090         kvm_s390_vcpu_initial_reset(vcpu);
2091         return 0;
2092 }
2093
2094 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2095 {
2096         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2097         return 0;
2098 }
2099
2100 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2101 {
2102         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2103         return 0;
2104 }
2105
2106 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2107                                   struct kvm_sregs *sregs)
2108 {
2109         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2110         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2111         restore_access_regs(vcpu->run->s.regs.acrs);
2112         return 0;
2113 }
2114
2115 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2116                                   struct kvm_sregs *sregs)
2117 {
2118         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2119         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2120         return 0;
2121 }
2122
2123 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2124 {
2125         /* make sure the new values will be lazily loaded */
2126         save_fpu_regs();
2127         if (test_fp_ctl(fpu->fpc))
2128                 return -EINVAL;
2129         current->thread.fpu.fpc = fpu->fpc;
2130         if (MACHINE_HAS_VX)
2131                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
2132         else
2133                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
2134         return 0;
2135 }
2136
2137 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2138 {
2139         /* make sure we have the latest values */
2140         save_fpu_regs();
2141         if (MACHINE_HAS_VX)
2142                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
2143         else
2144                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
2145         fpu->fpc = current->thread.fpu.fpc;
2146         return 0;
2147 }
2148
2149 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2150 {
2151         int rc = 0;
2152
2153         if (!is_vcpu_stopped(vcpu))
2154                 rc = -EBUSY;
2155         else {
2156                 vcpu->run->psw_mask = psw.mask;
2157                 vcpu->run->psw_addr = psw.addr;
2158         }
2159         return rc;
2160 }
2161
2162 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2163                                   struct kvm_translation *tr)
2164 {
2165         return -EINVAL; /* not implemented yet */
2166 }
2167
2168 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2169                               KVM_GUESTDBG_USE_HW_BP | \
2170                               KVM_GUESTDBG_ENABLE)
2171
2172 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2173                                         struct kvm_guest_debug *dbg)
2174 {
2175         int rc = 0;
2176
2177         vcpu->guest_debug = 0;
2178         kvm_s390_clear_bp_data(vcpu);
2179
2180         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2181                 return -EINVAL;
2182         if (!sclp.has_gpere)
2183                 return -EINVAL;
2184
2185         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2186                 vcpu->guest_debug = dbg->control;
2187                 /* enforce guest PER */
2188                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2189
2190                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2191                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2192         } else {
2193                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2194                 vcpu->arch.guestdbg.last_bp = 0;
2195         }
2196
2197         if (rc) {
2198                 vcpu->guest_debug = 0;
2199                 kvm_s390_clear_bp_data(vcpu);
2200                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2201         }
2202
2203         return rc;
2204 }
2205
2206 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2207                                     struct kvm_mp_state *mp_state)
2208 {
2209         /* CHECK_STOP and LOAD are not supported yet */
2210         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2211                                        KVM_MP_STATE_OPERATING;
2212 }
2213
2214 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2215                                     struct kvm_mp_state *mp_state)
2216 {
2217         int rc = 0;
2218
2219         /* user space knows about this interface - let it control the state */
2220         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2221
2222         switch (mp_state->mp_state) {
2223         case KVM_MP_STATE_STOPPED:
2224                 kvm_s390_vcpu_stop(vcpu);
2225                 break;
2226         case KVM_MP_STATE_OPERATING:
2227                 kvm_s390_vcpu_start(vcpu);
2228                 break;
2229         case KVM_MP_STATE_LOAD:
2230         case KVM_MP_STATE_CHECK_STOP:
2231                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2232         default:
2233                 rc = -ENXIO;
2234         }
2235
2236         return rc;
2237 }
2238
2239 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2240 {
2241         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2242 }
2243
2244 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2245 {
2246 retry:
2247         kvm_s390_vcpu_request_handled(vcpu);
2248         if (!vcpu->requests)
2249                 return 0;
2250         /*
2251          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2252          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2253          * This ensures that the ipte instruction for this request has
2254          * already finished. We might race against a second unmapper that
2255          * wants to set the blocking bit. Lets just retry the request loop.
2256          */
2257         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2258                 int rc;
2259                 rc = gmap_ipte_notify(vcpu->arch.gmap,
2260                                       kvm_s390_get_prefix(vcpu),
2261                                       PAGE_SIZE * 2);
2262                 if (rc)
2263                         return rc;
2264                 goto retry;
2265         }
2266
2267         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2268                 vcpu->arch.sie_block->ihcpu = 0xffff;
2269                 goto retry;
2270         }
2271
2272         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2273                 if (!ibs_enabled(vcpu)) {
2274                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2275                         atomic_or(CPUSTAT_IBS,
2276                                         &vcpu->arch.sie_block->cpuflags);
2277                 }
2278                 goto retry;
2279         }
2280
2281         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2282                 if (ibs_enabled(vcpu)) {
2283                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2284                         atomic_andnot(CPUSTAT_IBS,
2285                                           &vcpu->arch.sie_block->cpuflags);
2286                 }
2287                 goto retry;
2288         }
2289
2290         /* nothing to do, just clear the request */
2291         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2292
2293         return 0;
2294 }
2295
2296 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2297 {
2298         struct kvm_vcpu *vcpu;
2299         int i;
2300
2301         mutex_lock(&kvm->lock);
2302         preempt_disable();
2303         kvm->arch.epoch = tod - get_tod_clock();
2304         kvm_s390_vcpu_block_all(kvm);
2305         kvm_for_each_vcpu(i, vcpu, kvm)
2306                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2307         kvm_s390_vcpu_unblock_all(kvm);
2308         preempt_enable();
2309         mutex_unlock(&kvm->lock);
2310 }
2311
2312 /**
2313  * kvm_arch_fault_in_page - fault-in guest page if necessary
2314  * @vcpu: The corresponding virtual cpu
2315  * @gpa: Guest physical address
2316  * @writable: Whether the page should be writable or not
2317  *
2318  * Make sure that a guest page has been faulted-in on the host.
2319  *
2320  * Return: Zero on success, negative error code otherwise.
2321  */
2322 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2323 {
2324         return gmap_fault(vcpu->arch.gmap, gpa,
2325                           writable ? FAULT_FLAG_WRITE : 0);
2326 }
2327
2328 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2329                                       unsigned long token)
2330 {
2331         struct kvm_s390_interrupt inti;
2332         struct kvm_s390_irq irq;
2333
2334         if (start_token) {
2335                 irq.u.ext.ext_params2 = token;
2336                 irq.type = KVM_S390_INT_PFAULT_INIT;
2337                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2338         } else {
2339                 inti.type = KVM_S390_INT_PFAULT_DONE;
2340                 inti.parm64 = token;
2341                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2342         }
2343 }
2344
2345 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2346                                      struct kvm_async_pf *work)
2347 {
2348         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2349         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2350 }
2351
2352 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2353                                  struct kvm_async_pf *work)
2354 {
2355         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2356         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2357 }
2358
2359 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2360                                struct kvm_async_pf *work)
2361 {
2362         /* s390 will always inject the page directly */
2363 }
2364
2365 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2366 {
2367         /*
2368          * s390 will always inject the page directly,
2369          * but we still want check_async_completion to cleanup
2370          */
2371         return true;
2372 }
2373
2374 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2375 {
2376         hva_t hva;
2377         struct kvm_arch_async_pf arch;
2378         int rc;
2379
2380         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2381                 return 0;
2382         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2383             vcpu->arch.pfault_compare)
2384                 return 0;
2385         if (psw_extint_disabled(vcpu))
2386                 return 0;
2387         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2388                 return 0;
2389         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2390                 return 0;
2391         if (!vcpu->arch.gmap->pfault_enabled)
2392                 return 0;
2393
2394         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2395         hva += current->thread.gmap_addr & ~PAGE_MASK;
2396         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2397                 return 0;
2398
2399         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2400         return rc;
2401 }
2402
2403 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2404 {
2405         int rc, cpuflags;
2406
2407         /*
2408          * On s390 notifications for arriving pages will be delivered directly
2409          * to the guest but the house keeping for completed pfaults is
2410          * handled outside the worker.
2411          */
2412         kvm_check_async_pf_completion(vcpu);
2413
2414         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2415         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2416
2417         if (need_resched())
2418                 schedule();
2419
2420         if (test_cpu_flag(CIF_MCCK_PENDING))
2421                 s390_handle_mcck();
2422
2423         if (!kvm_is_ucontrol(vcpu->kvm)) {
2424                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2425                 if (rc)
2426                         return rc;
2427         }
2428
2429         rc = kvm_s390_handle_requests(vcpu);
2430         if (rc)
2431                 return rc;
2432
2433         if (guestdbg_enabled(vcpu)) {
2434                 kvm_s390_backup_guest_per_regs(vcpu);
2435                 kvm_s390_patch_guest_per_regs(vcpu);
2436         }
2437
2438         vcpu->arch.sie_block->icptcode = 0;
2439         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2440         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2441         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2442
2443         return 0;
2444 }
2445
2446 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2447 {
2448         struct kvm_s390_pgm_info pgm_info = {
2449                 .code = PGM_ADDRESSING,
2450         };
2451         u8 opcode, ilen;
2452         int rc;
2453
2454         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2455         trace_kvm_s390_sie_fault(vcpu);
2456
2457         /*
2458          * We want to inject an addressing exception, which is defined as a
2459          * suppressing or terminating exception. However, since we came here
2460          * by a DAT access exception, the PSW still points to the faulting
2461          * instruction since DAT exceptions are nullifying. So we've got
2462          * to look up the current opcode to get the length of the instruction
2463          * to be able to forward the PSW.
2464          */
2465         rc = read_guest_instr(vcpu, &opcode, 1);
2466         ilen = insn_length(opcode);
2467         if (rc < 0) {
2468                 return rc;
2469         } else if (rc) {
2470                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2471                  * Forward by arbitrary ilc, injection will take care of
2472                  * nullification if necessary.
2473                  */
2474                 pgm_info = vcpu->arch.pgm;
2475                 ilen = 4;
2476         }
2477         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2478         kvm_s390_forward_psw(vcpu, ilen);
2479         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2480 }
2481
2482 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2483 {
2484         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2485                    vcpu->arch.sie_block->icptcode);
2486         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2487
2488         if (guestdbg_enabled(vcpu))
2489                 kvm_s390_restore_guest_per_regs(vcpu);
2490
2491         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2492         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2493
2494         if (vcpu->arch.sie_block->icptcode > 0) {
2495                 int rc = kvm_handle_sie_intercept(vcpu);
2496
2497                 if (rc != -EOPNOTSUPP)
2498                         return rc;
2499                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2500                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2501                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2502                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2503                 return -EREMOTE;
2504         } else if (exit_reason != -EFAULT) {
2505                 vcpu->stat.exit_null++;
2506                 return 0;
2507         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2508                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2509                 vcpu->run->s390_ucontrol.trans_exc_code =
2510                                                 current->thread.gmap_addr;
2511                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2512                 return -EREMOTE;
2513         } else if (current->thread.gmap_pfault) {
2514                 trace_kvm_s390_major_guest_pfault(vcpu);
2515                 current->thread.gmap_pfault = 0;
2516                 if (kvm_arch_setup_async_pf(vcpu))
2517                         return 0;
2518                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2519         }
2520         return vcpu_post_run_fault_in_sie(vcpu);
2521 }
2522
2523 static int __vcpu_run(struct kvm_vcpu *vcpu)
2524 {
2525         int rc, exit_reason;
2526
2527         /*
2528          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2529          * ning the guest), so that memslots (and other stuff) are protected
2530          */
2531         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2532
2533         do {
2534                 rc = vcpu_pre_run(vcpu);
2535                 if (rc)
2536                         break;
2537
2538                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2539                 /*
2540                  * As PF_VCPU will be used in fault handler, between
2541                  * guest_enter and guest_exit should be no uaccess.
2542                  */
2543                 local_irq_disable();
2544                 __kvm_guest_enter();
2545                 __disable_cpu_timer_accounting(vcpu);
2546                 local_irq_enable();
2547                 exit_reason = sie64a(vcpu->arch.sie_block,
2548                                      vcpu->run->s.regs.gprs);
2549                 local_irq_disable();
2550                 __enable_cpu_timer_accounting(vcpu);
2551                 __kvm_guest_exit();
2552                 local_irq_enable();
2553                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2554
2555                 rc = vcpu_post_run(vcpu, exit_reason);
2556         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2557
2558         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2559         return rc;
2560 }
2561
2562 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2563 {
2564         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2565         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2566         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2567                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2568         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2569                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2570                 /* some control register changes require a tlb flush */
2571                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2572         }
2573         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2574                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2575                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2576                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2577                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2578                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2579         }
2580         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2581                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2582                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2583                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2584                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2585                         kvm_clear_async_pf_completion_queue(vcpu);
2586         }
2587         kvm_run->kvm_dirty_regs = 0;
2588 }
2589
2590 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2591 {
2592         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2593         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2594         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2595         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2596         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2597         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2598         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2599         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2600         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2601         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2602         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2603         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2604 }
2605
2606 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2607 {
2608         int rc;
2609         sigset_t sigsaved;
2610
2611         if (guestdbg_exit_pending(vcpu)) {
2612                 kvm_s390_prepare_debug_exit(vcpu);
2613                 return 0;
2614         }
2615
2616         if (vcpu->sigset_active)
2617                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2618
2619         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2620                 kvm_s390_vcpu_start(vcpu);
2621         } else if (is_vcpu_stopped(vcpu)) {
2622                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2623                                    vcpu->vcpu_id);
2624                 return -EINVAL;
2625         }
2626
2627         sync_regs(vcpu, kvm_run);
2628         enable_cpu_timer_accounting(vcpu);
2629
2630         might_fault();
2631         rc = __vcpu_run(vcpu);
2632
2633         if (signal_pending(current) && !rc) {
2634                 kvm_run->exit_reason = KVM_EXIT_INTR;
2635                 rc = -EINTR;
2636         }
2637
2638         if (guestdbg_exit_pending(vcpu) && !rc)  {
2639                 kvm_s390_prepare_debug_exit(vcpu);
2640                 rc = 0;
2641         }
2642
2643         if (rc == -EREMOTE) {
2644                 /* userspace support is needed, kvm_run has been prepared */
2645                 rc = 0;
2646         }
2647
2648         disable_cpu_timer_accounting(vcpu);
2649         store_regs(vcpu, kvm_run);
2650
2651         if (vcpu->sigset_active)
2652                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2653
2654         vcpu->stat.exit_userspace++;
2655         return rc;
2656 }
2657
2658 /*
2659  * store status at address
2660  * we use have two special cases:
2661  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2662  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2663  */
2664 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2665 {
2666         unsigned char archmode = 1;
2667         freg_t fprs[NUM_FPRS];
2668         unsigned int px;
2669         u64 clkcomp, cputm;
2670         int rc;
2671
2672         px = kvm_s390_get_prefix(vcpu);
2673         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2674                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2675                         return -EFAULT;
2676                 gpa = 0;
2677         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2678                 if (write_guest_real(vcpu, 163, &archmode, 1))
2679                         return -EFAULT;
2680                 gpa = px;
2681         } else
2682                 gpa -= __LC_FPREGS_SAVE_AREA;
2683
2684         /* manually convert vector registers if necessary */
2685         if (MACHINE_HAS_VX) {
2686                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2687                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2688                                      fprs, 128);
2689         } else {
2690                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2691                                      vcpu->run->s.regs.fprs, 128);
2692         }
2693         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2694                               vcpu->run->s.regs.gprs, 128);
2695         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2696                               &vcpu->arch.sie_block->gpsw, 16);
2697         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2698                               &px, 4);
2699         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2700                               &vcpu->run->s.regs.fpc, 4);
2701         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2702                               &vcpu->arch.sie_block->todpr, 4);
2703         cputm = kvm_s390_get_cpu_timer(vcpu);
2704         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2705                               &cputm, 8);
2706         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2707         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2708                               &clkcomp, 8);
2709         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2710                               &vcpu->run->s.regs.acrs, 64);
2711         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2712                               &vcpu->arch.sie_block->gcr, 128);
2713         return rc ? -EFAULT : 0;
2714 }
2715
2716 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2717 {
2718         /*
2719          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2720          * copying in vcpu load/put. Lets update our copies before we save
2721          * it into the save area
2722          */
2723         save_fpu_regs();
2724         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2725         save_access_regs(vcpu->run->s.regs.acrs);
2726
2727         return kvm_s390_store_status_unloaded(vcpu, addr);
2728 }
2729
2730 /*
2731  * store additional status at address
2732  */
2733 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2734                                         unsigned long gpa)
2735 {
2736         /* Only bits 0-53 are used for address formation */
2737         if (!(gpa & ~0x3ff))
2738                 return 0;
2739
2740         return write_guest_abs(vcpu, gpa & ~0x3ff,
2741                                (void *)&vcpu->run->s.regs.vrs, 512);
2742 }
2743
2744 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2745 {
2746         if (!test_kvm_facility(vcpu->kvm, 129))
2747                 return 0;
2748
2749         /*
2750          * The guest VXRS are in the host VXRs due to the lazy
2751          * copying in vcpu load/put. We can simply call save_fpu_regs()
2752          * to save the current register state because we are in the
2753          * middle of a load/put cycle.
2754          *
2755          * Let's update our copies before we save it into the save area.
2756          */
2757         save_fpu_regs();
2758
2759         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2760 }
2761
2762 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2763 {
2764         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2765         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2766 }
2767
2768 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2769 {
2770         unsigned int i;
2771         struct kvm_vcpu *vcpu;
2772
2773         kvm_for_each_vcpu(i, vcpu, kvm) {
2774                 __disable_ibs_on_vcpu(vcpu);
2775         }
2776 }
2777
2778 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2779 {
2780         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2781         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2782 }
2783
2784 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2785 {
2786         int i, online_vcpus, started_vcpus = 0;
2787
2788         if (!is_vcpu_stopped(vcpu))
2789                 return;
2790
2791         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2792         /* Only one cpu at a time may enter/leave the STOPPED state. */
2793         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2794         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2795
2796         for (i = 0; i < online_vcpus; i++) {
2797                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2798                         started_vcpus++;
2799         }
2800
2801         if (started_vcpus == 0) {
2802                 /* we're the only active VCPU -> speed it up */
2803                 __enable_ibs_on_vcpu(vcpu);
2804         } else if (started_vcpus == 1) {
2805                 /*
2806                  * As we are starting a second VCPU, we have to disable
2807                  * the IBS facility on all VCPUs to remove potentially
2808                  * oustanding ENABLE requests.
2809                  */
2810                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2811         }
2812
2813         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2814         /*
2815          * Another VCPU might have used IBS while we were offline.
2816          * Let's play safe and flush the VCPU at startup.
2817          */
2818         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2819         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2820         return;
2821 }
2822
2823 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2824 {
2825         int i, online_vcpus, started_vcpus = 0;
2826         struct kvm_vcpu *started_vcpu = NULL;
2827
2828         if (is_vcpu_stopped(vcpu))
2829                 return;
2830
2831         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2832         /* Only one cpu at a time may enter/leave the STOPPED state. */
2833         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2834         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2835
2836         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2837         kvm_s390_clear_stop_irq(vcpu);
2838
2839         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2840         __disable_ibs_on_vcpu(vcpu);
2841
2842         for (i = 0; i < online_vcpus; i++) {
2843                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2844                         started_vcpus++;
2845                         started_vcpu = vcpu->kvm->vcpus[i];
2846                 }
2847         }
2848
2849         if (started_vcpus == 1) {
2850                 /*
2851                  * As we only have one VCPU left, we want to enable the
2852                  * IBS facility for that VCPU to speed it up.
2853                  */
2854                 __enable_ibs_on_vcpu(started_vcpu);
2855         }
2856
2857         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2858         return;
2859 }
2860
2861 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2862                                      struct kvm_enable_cap *cap)
2863 {
2864         int r;
2865
2866         if (cap->flags)
2867                 return -EINVAL;
2868
2869         switch (cap->cap) {
2870         case KVM_CAP_S390_CSS_SUPPORT:
2871                 if (!vcpu->kvm->arch.css_support) {
2872                         vcpu->kvm->arch.css_support = 1;
2873                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2874                         trace_kvm_s390_enable_css(vcpu->kvm);
2875                 }
2876                 r = 0;
2877                 break;
2878         default:
2879                 r = -EINVAL;
2880                 break;
2881         }
2882         return r;
2883 }
2884
2885 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2886                                   struct kvm_s390_mem_op *mop)
2887 {
2888         void __user *uaddr = (void __user *)mop->buf;
2889         void *tmpbuf = NULL;
2890         int r, srcu_idx;
2891         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2892                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2893
2894         if (mop->flags & ~supported_flags)
2895                 return -EINVAL;
2896
2897         if (mop->size > MEM_OP_MAX_SIZE)
2898                 return -E2BIG;
2899
2900         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2901                 tmpbuf = vmalloc(mop->size);
2902                 if (!tmpbuf)
2903                         return -ENOMEM;
2904         }
2905
2906         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2907
2908         switch (mop->op) {
2909         case KVM_S390_MEMOP_LOGICAL_READ:
2910                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2911                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2912                                             mop->size, GACC_FETCH);
2913                         break;
2914                 }
2915                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2916                 if (r == 0) {
2917                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2918                                 r = -EFAULT;
2919                 }
2920                 break;
2921         case KVM_S390_MEMOP_LOGICAL_WRITE:
2922                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2923                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2924                                             mop->size, GACC_STORE);
2925                         break;
2926                 }
2927                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2928                         r = -EFAULT;
2929                         break;
2930                 }
2931                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2932                 break;
2933         default:
2934                 r = -EINVAL;
2935         }
2936
2937         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2938
2939         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2940                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2941
2942         vfree(tmpbuf);
2943         return r;
2944 }
2945
2946 long kvm_arch_vcpu_ioctl(struct file *filp,
2947                          unsigned int ioctl, unsigned long arg)
2948 {
2949         struct kvm_vcpu *vcpu = filp->private_data;
2950         void __user *argp = (void __user *)arg;
2951         int idx;
2952         long r;
2953
2954         switch (ioctl) {
2955         case KVM_S390_IRQ: {
2956                 struct kvm_s390_irq s390irq;
2957
2958                 r = -EFAULT;
2959                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2960                         break;
2961                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2962                 break;
2963         }
2964         case KVM_S390_INTERRUPT: {
2965                 struct kvm_s390_interrupt s390int;
2966                 struct kvm_s390_irq s390irq;
2967
2968                 r = -EFAULT;
2969                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2970                         break;
2971                 if (s390int_to_s390irq(&s390int, &s390irq))
2972                         return -EINVAL;
2973                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2974                 break;
2975         }
2976         case KVM_S390_STORE_STATUS:
2977                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2978                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2979                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2980                 break;
2981         case KVM_S390_SET_INITIAL_PSW: {
2982                 psw_t psw;
2983
2984                 r = -EFAULT;
2985                 if (copy_from_user(&psw, argp, sizeof(psw)))
2986                         break;
2987                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2988                 break;
2989         }
2990         case KVM_S390_INITIAL_RESET:
2991                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2992                 break;
2993         case KVM_SET_ONE_REG:
2994         case KVM_GET_ONE_REG: {
2995                 struct kvm_one_reg reg;
2996                 r = -EFAULT;
2997                 if (copy_from_user(&reg, argp, sizeof(reg)))
2998                         break;
2999                 if (ioctl == KVM_SET_ONE_REG)
3000                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3001                 else
3002                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3003                 break;
3004         }
3005 #ifdef CONFIG_KVM_S390_UCONTROL
3006         case KVM_S390_UCAS_MAP: {
3007                 struct kvm_s390_ucas_mapping ucasmap;
3008
3009                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3010                         r = -EFAULT;
3011                         break;
3012                 }
3013
3014                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3015                         r = -EINVAL;
3016                         break;
3017                 }
3018
3019                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3020                                      ucasmap.vcpu_addr, ucasmap.length);
3021                 break;
3022         }
3023         case KVM_S390_UCAS_UNMAP: {
3024                 struct kvm_s390_ucas_mapping ucasmap;
3025
3026                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3027                         r = -EFAULT;
3028                         break;
3029                 }
3030
3031                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3032                         r = -EINVAL;
3033                         break;
3034                 }
3035
3036                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3037                         ucasmap.length);
3038                 break;
3039         }
3040 #endif
3041         case KVM_S390_VCPU_FAULT: {
3042                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3043                 break;
3044         }
3045         case KVM_ENABLE_CAP:
3046         {
3047                 struct kvm_enable_cap cap;
3048                 r = -EFAULT;
3049                 if (copy_from_user(&cap, argp, sizeof(cap)))
3050                         break;
3051                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3052                 break;
3053         }
3054         case KVM_S390_MEM_OP: {
3055                 struct kvm_s390_mem_op mem_op;
3056
3057                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3058                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3059                 else
3060                         r = -EFAULT;
3061                 break;
3062         }
3063         case KVM_S390_SET_IRQ_STATE: {
3064                 struct kvm_s390_irq_state irq_state;
3065
3066                 r = -EFAULT;
3067                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3068                         break;
3069                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3070                     irq_state.len == 0 ||
3071                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3072                         r = -EINVAL;
3073                         break;
3074                 }
3075                 r = kvm_s390_set_irq_state(vcpu,
3076                                            (void __user *) irq_state.buf,
3077                                            irq_state.len);
3078                 break;
3079         }
3080         case KVM_S390_GET_IRQ_STATE: {
3081                 struct kvm_s390_irq_state irq_state;
3082
3083                 r = -EFAULT;
3084                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3085                         break;
3086                 if (irq_state.len == 0) {
3087                         r = -EINVAL;
3088                         break;
3089                 }
3090                 r = kvm_s390_get_irq_state(vcpu,
3091                                            (__u8 __user *)  irq_state.buf,
3092                                            irq_state.len);
3093                 break;
3094         }
3095         default:
3096                 r = -ENOTTY;
3097         }
3098         return r;
3099 }
3100
3101 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3102 {
3103 #ifdef CONFIG_KVM_S390_UCONTROL
3104         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3105                  && (kvm_is_ucontrol(vcpu->kvm))) {
3106                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3107                 get_page(vmf->page);
3108                 return 0;
3109         }
3110 #endif
3111         return VM_FAULT_SIGBUS;
3112 }
3113
3114 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3115                             unsigned long npages)
3116 {
3117         return 0;
3118 }
3119
3120 /* Section: memory related */
3121 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3122                                    struct kvm_memory_slot *memslot,
3123                                    const struct kvm_userspace_memory_region *mem,
3124                                    enum kvm_mr_change change)
3125 {
3126         /* A few sanity checks. We can have memory slots which have to be
3127            located/ended at a segment boundary (1MB). The memory in userland is
3128            ok to be fragmented into various different vmas. It is okay to mmap()
3129            and munmap() stuff in this slot after doing this call at any time */
3130
3131         if (mem->userspace_addr & 0xffffful)
3132                 return -EINVAL;
3133
3134         if (mem->memory_size & 0xffffful)
3135                 return -EINVAL;
3136
3137         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3138                 return -EINVAL;
3139
3140         return 0;
3141 }
3142
3143 void kvm_arch_commit_memory_region(struct kvm *kvm,
3144                                 const struct kvm_userspace_memory_region *mem,
3145                                 const struct kvm_memory_slot *old,
3146                                 const struct kvm_memory_slot *new,
3147                                 enum kvm_mr_change change)
3148 {
3149         int rc;
3150
3151         /* If the basics of the memslot do not change, we do not want
3152          * to update the gmap. Every update causes several unnecessary
3153          * segment translation exceptions. This is usually handled just
3154          * fine by the normal fault handler + gmap, but it will also
3155          * cause faults on the prefix page of running guest CPUs.
3156          */
3157         if (old->userspace_addr == mem->userspace_addr &&
3158             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3159             old->npages * PAGE_SIZE == mem->memory_size)
3160                 return;
3161
3162         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3163                 mem->guest_phys_addr, mem->memory_size);
3164         if (rc)
3165                 pr_warn("failed to commit memory region\n");
3166         return;
3167 }
3168
3169 static inline unsigned long nonhyp_mask(int i)
3170 {
3171         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3172
3173         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3174 }
3175
3176 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3177 {
3178         vcpu->valid_wakeup = false;
3179 }
3180
3181 static int __init kvm_s390_init(void)
3182 {
3183         int i;
3184
3185         if (!sclp.has_sief2) {
3186                 pr_info("SIE not available\n");
3187                 return -ENODEV;
3188         }
3189
3190         for (i = 0; i < 16; i++)
3191                 kvm_s390_fac_list_mask[i] |=
3192                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3193
3194         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3195 }
3196
3197 static void __exit kvm_s390_exit(void)
3198 {
3199         kvm_exit();
3200 }
3201
3202 module_init(kvm_s390_init);
3203 module_exit(kvm_s390_exit);
3204
3205 /*
3206  * Enable autoloading of the kvm module.
3207  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3208  * since x86 takes a different approach.
3209  */
3210 #include <linux/miscdevice.h>
3211 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3212 MODULE_ALIAS("devname:kvm");