arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <asm/asm-offsets.h>
  30 #include <asm/lowcore.h>
  31 #include <asm/etr.h>
  32 #include <asm/pgtable.h>
  33 #include <asm/gmap.h>
  34 #include <asm/nmi.h>
  35 #include <asm/switch_to.h>
  36 #include <asm/isc.h>
  37 #include <asm/sclp.h>
  38 #include "kvm-s390.h"
  39 #include "gaccess.h"
  40
  41 #define KMSG_COMPONENT "kvm-s390"
  42 #undef pr_fmt
  43 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  44
  45 #define CREATE_TRACE_POINTS
  46 #include "trace.h"
  47 #include "trace-s390.h"
  48
  49 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  50 #define LOCAL_IRQS 32
  51 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  52                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  53
  54 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  55
  56 struct kvm_stats_debugfs_item debugfs_entries[] = {
  57         { "userspace_handled", VCPU_STAT(exit_userspace) },
  58         { "exit_null", VCPU_STAT(exit_null) },
  59         { "exit_validity", VCPU_STAT(exit_validity) },
  60         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  61         { "exit_external_request", VCPU_STAT(exit_external_request) },
  62         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  63         { "exit_instruction", VCPU_STAT(exit_instruction) },
  64         { "exit_pei", VCPU_STAT(exit_pei) },
  65         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  66         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  67         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  68         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  69         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
  70         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  71         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  72         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  73         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  74         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  75         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  76         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  77         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  78         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  79         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  80         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  81         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  82         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  83         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  84         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  85         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  86         { "instruction_spx", VCPU_STAT(instruction_spx) },
  87         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  88         { "instruction_stap", VCPU_STAT(instruction_stap) },
  89         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  90         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  91         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  92         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  93         { "instruction_essa", VCPU_STAT(instruction_essa) },
  94         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  95         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  96         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
  97         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
  98         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
  99         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 100         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 101         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
 102         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 103         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 104         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 105         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 106         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 107         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 108         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 109         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 110         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 111         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 112         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 113         { "diagnose_10", VCPU_STAT(diagnose_10) },
 114         { "diagnose_44", VCPU_STAT(diagnose_44) },
 115         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 116         { "diagnose_258", VCPU_STAT(diagnose_258) },
 117         { "diagnose_308", VCPU_STAT(diagnose_308) },
 118         { "diagnose_500", VCPU_STAT(diagnose_500) },
 119         { NULL }
 120 };
 121
 122 /* upper facilities limit for kvm */
 123 unsigned long kvm_s390_fac_list_mask[16] = {
 124         0xffe6000000000000UL,
 125         0x005e000000000000UL,
 126 };
 127
 128 unsigned long kvm_s390_fac_list_mask_size(void)
 129 {
 130         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 131         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 132 }
 133
 134 static struct gmap_notifier gmap_notifier;
 135 debug_info_t *kvm_s390_dbf;
 136
 137 /* Section: not file related */
 138 int kvm_arch_hardware_enable(void)
 139 {
 140         /* every s390 is virtualization enabled ;-) */
 141         return 0;
 142 }
 143
 144 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 145
 146 /*
 147  * This callback is executed during stop_machine(). All CPUs are therefore
 148  * temporarily stopped. In order not to change guest behavior, we have to
 149  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 150  * so a CPU won't be stopped while calculating with the epoch.
 151  */
 152 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 153                           void *v)
 154 {
 155         struct kvm *kvm;
 156         struct kvm_vcpu *vcpu;
 157         int i;
 158         unsigned long long *delta = v;
 159
 160         list_for_each_entry(kvm, &vm_list, vm_list) {
 161                 kvm->arch.epoch -= *delta;
 162                 kvm_for_each_vcpu(i, vcpu, kvm) {
 163                         vcpu->arch.sie_block->epoch -= *delta;
 164                         if (vcpu->arch.cputm_enabled)
 165                                 vcpu->arch.cputm_start += *delta;
 166                 }
 167         }
 168         return NOTIFY_OK;
 169 }
 170
 171 static struct notifier_block kvm_clock_notifier = {
 172         .notifier_call = kvm_clock_sync,
 173 };
 174
 175 int kvm_arch_hardware_setup(void)
 176 {
 177         gmap_notifier.notifier_call = kvm_gmap_notifier;
 178         gmap_register_ipte_notifier(&gmap_notifier);
 179         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 180                                        &kvm_clock_notifier);
 181         return 0;
 182 }
 183
 184 void kvm_arch_hardware_unsetup(void)
 185 {
 186         gmap_unregister_ipte_notifier(&gmap_notifier);
 187         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 188                                          &kvm_clock_notifier);
 189 }
 190
 191 int kvm_arch_init(void *opaque)
 192 {
 193         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 194         if (!kvm_s390_dbf)
 195                 return -ENOMEM;
 196
 197         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 198                 debug_unregister(kvm_s390_dbf);
 199                 return -ENOMEM;
 200         }
 201
 202         /* Register floating interrupt controller interface. */
 203         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 204 }
 205
 206 void kvm_arch_exit(void)
 207 {
 208         debug_unregister(kvm_s390_dbf);
 209 }
 210
 211 /* Section: device related */
 212 long kvm_arch_dev_ioctl(struct file *filp,
 213                         unsigned int ioctl, unsigned long arg)
 214 {
 215         if (ioctl == KVM_S390_ENABLE_SIE)
 216                 return s390_enable_sie();
 217         return -EINVAL;
 218 }
 219
 220 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 221 {
 222         int r;
 223
 224         switch (ext) {
 225         case KVM_CAP_S390_PSW:
 226         case KVM_CAP_S390_GMAP:
 227         case KVM_CAP_SYNC_MMU:
 228 #ifdef CONFIG_KVM_S390_UCONTROL
 229         case KVM_CAP_S390_UCONTROL:
 230 #endif
 231         case KVM_CAP_ASYNC_PF:
 232         case KVM_CAP_SYNC_REGS:
 233         case KVM_CAP_ONE_REG:
 234         case KVM_CAP_ENABLE_CAP:
 235         case KVM_CAP_S390_CSS_SUPPORT:
 236         case KVM_CAP_IOEVENTFD:
 237         case KVM_CAP_DEVICE_CTRL:
 238         case KVM_CAP_ENABLE_CAP_VM:
 239         case KVM_CAP_S390_IRQCHIP:
 240         case KVM_CAP_VM_ATTRIBUTES:
 241         case KVM_CAP_MP_STATE:
 242         case KVM_CAP_S390_INJECT_IRQ:
 243         case KVM_CAP_S390_USER_SIGP:
 244         case KVM_CAP_S390_USER_STSI:
 245         case KVM_CAP_S390_SKEYS:
 246         case KVM_CAP_S390_IRQ_STATE:
 247                 r = 1;
 248                 break;
 249         case KVM_CAP_S390_MEM_OP:
 250                 r = MEM_OP_MAX_SIZE;
 251                 break;
 252         case KVM_CAP_NR_VCPUS:
 253         case KVM_CAP_MAX_VCPUS:
 254                 r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
 255                                   : KVM_S390_BSCA_CPU_SLOTS;
 256                 break;
 257         case KVM_CAP_NR_MEMSLOTS:
 258                 r = KVM_USER_MEM_SLOTS;
 259                 break;
 260         case KVM_CAP_S390_COW:
 261                 r = MACHINE_HAS_ESOP;
 262                 break;
 263         case KVM_CAP_S390_VECTOR_REGISTERS:
 264                 r = MACHINE_HAS_VX;
 265                 break;
 266         case KVM_CAP_S390_RI:
 267                 r = test_facility(64);
 268                 break;
 269         default:
 270                 r = 0;
 271         }
 272         return r;
 273 }
 274
 275 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 276                                         struct kvm_memory_slot *memslot)
 277 {
 278         gfn_t cur_gfn, last_gfn;
 279         unsigned long address;
 280         struct gmap *gmap = kvm->arch.gmap;
 281
 282         /* Loop over all guest pages */
 283         last_gfn = memslot->base_gfn + memslot->npages;
 284         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 285                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 286
 287                 if (test_and_clear_guest_dirty(gmap->mm, address))
 288                         mark_page_dirty(kvm, cur_gfn);
 289                 if (fatal_signal_pending(current))
 290                         return;
 291                 cond_resched();
 292         }
 293 }
 294
 295 /* Section: vm related */
 296 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
 297
 298 /*
 299  * Get (and clear) the dirty memory log for a memory slot.
 300  */
 301 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 302                                struct kvm_dirty_log *log)
 303 {
 304         int r;
 305         unsigned long n;
 306         struct kvm_memslots *slots;
 307         struct kvm_memory_slot *memslot;
 308         int is_dirty = 0;
 309
 310         mutex_lock(&kvm->slots_lock);
 311
 312         r = -EINVAL;
 313         if (log->slot >= KVM_USER_MEM_SLOTS)
 314                 goto out;
 315
 316         slots = kvm_memslots(kvm);
 317         memslot = id_to_memslot(slots, log->slot);
 318         r = -ENOENT;
 319         if (!memslot->dirty_bitmap)
 320                 goto out;
 321
 322         kvm_s390_sync_dirty_log(kvm, memslot);
 323         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 324         if (r)
 325                 goto out;
 326
 327         /* Clear the dirty log */
 328         if (is_dirty) {
 329                 n = kvm_dirty_bitmap_bytes(memslot);
 330                 memset(memslot->dirty_bitmap, 0, n);
 331         }
 332         r = 0;
 333 out:
 334         mutex_unlock(&kvm->slots_lock);
 335         return r;
 336 }
 337
 338 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 339 {
 340         int r;
 341
 342         if (cap->flags)
 343                 return -EINVAL;
 344
 345         switch (cap->cap) {
 346         case KVM_CAP_S390_IRQCHIP:
 347                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 348                 kvm->arch.use_irqchip = 1;
 349                 r = 0;
 350                 break;
 351         case KVM_CAP_S390_USER_SIGP:
 352                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 353                 kvm->arch.user_sigp = 1;
 354                 r = 0;
 355                 break;
 356         case KVM_CAP_S390_VECTOR_REGISTERS:
 357                 mutex_lock(&kvm->lock);
 358                 if (atomic_read(&kvm->online_vcpus)) {
 359                         r = -EBUSY;
 360                 } else if (MACHINE_HAS_VX) {
 361                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
 362                         set_kvm_facility(kvm->arch.model.fac_list, 129);
 363                         r = 0;
 364                 } else
 365                         r = -EINVAL;
 366                 mutex_unlock(&kvm->lock);
 367                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 368                          r ? "(not available)" : "(success)");
 369                 break;
 370         case KVM_CAP_S390_RI:
 371                 r = -EINVAL;
 372                 mutex_lock(&kvm->lock);
 373                 if (atomic_read(&kvm->online_vcpus)) {
 374                         r = -EBUSY;
 375                 } else if (test_facility(64)) {
 376                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
 377                         set_kvm_facility(kvm->arch.model.fac_list, 64);
 378                         r = 0;
 379                 }
 380                 mutex_unlock(&kvm->lock);
 381                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
 382                          r ? "(not available)" : "(success)");
 383                 break;
 384         case KVM_CAP_S390_USER_STSI:
 385                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 386                 kvm->arch.user_stsi = 1;
 387                 r = 0;
 388                 break;
 389         default:
 390                 r = -EINVAL;
 391                 break;
 392         }
 393         return r;
 394 }
 395
 396 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 397 {
 398         int ret;
 399
 400         switch (attr->attr) {
 401         case KVM_S390_VM_MEM_LIMIT_SIZE:
 402                 ret = 0;
 403                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 404                          kvm->arch.mem_limit);
 405                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 406                         ret = -EFAULT;
 407                 break;
 408         default:
 409                 ret = -ENXIO;
 410                 break;
 411         }
 412         return ret;
 413 }
 414
 415 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 416 {
 417         int ret;
 418         unsigned int idx;
 419         switch (attr->attr) {
 420         case KVM_S390_VM_MEM_ENABLE_CMMA:
 421                 /* enable CMMA only for z10 and later (EDAT_1) */
 422                 ret = -EINVAL;
 423                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
 424                         break;
 425
 426                 ret = -EBUSY;
 427                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 428                 mutex_lock(&kvm->lock);
 429                 if (atomic_read(&kvm->online_vcpus) == 0) {
 430                         kvm->arch.use_cmma = 1;
 431                         ret = 0;
 432                 }
 433                 mutex_unlock(&kvm->lock);
 434                 break;
 435         case KVM_S390_VM_MEM_CLR_CMMA:
 436                 ret = -EINVAL;
 437                 if (!kvm->arch.use_cmma)
 438                         break;
 439
 440                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 441                 mutex_lock(&kvm->lock);
 442                 idx = srcu_read_lock(&kvm->srcu);
 443                 s390_reset_cmma(kvm->arch.gmap->mm);
 444                 srcu_read_unlock(&kvm->srcu, idx);
 445                 mutex_unlock(&kvm->lock);
 446                 ret = 0;
 447                 break;
 448         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 449                 unsigned long new_limit;
 450
 451                 if (kvm_is_ucontrol(kvm))
 452                         return -EINVAL;
 453
 454                 if (get_user(new_limit, (u64 __user *)attr->addr))
 455                         return -EFAULT;
 456
 457                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
 458                     new_limit > kvm->arch.mem_limit)
 459                         return -E2BIG;
 460
 461                 if (!new_limit)
 462                         return -EINVAL;
 463
 464                 /* gmap_alloc takes last usable address */
 465                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
 466                         new_limit -= 1;
 467
 468                 ret = -EBUSY;
 469                 mutex_lock(&kvm->lock);
 470                 if (atomic_read(&kvm->online_vcpus) == 0) {
 471                         /* gmap_alloc will round the limit up */
 472                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 473
 474                         if (!new) {
 475                                 ret = -ENOMEM;
 476                         } else {
 477                                 gmap_free(kvm->arch.gmap);
 478                                 new->private = kvm;
 479                                 kvm->arch.gmap = new;
 480                                 ret = 0;
 481                         }
 482                 }
 483                 mutex_unlock(&kvm->lock);
 484                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
 485                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
 486                          (void *) kvm->arch.gmap->asce);
 487                 break;
 488         }
 489         default:
 490                 ret = -ENXIO;
 491                 break;
 492         }
 493         return ret;
 494 }
 495
 496 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 497
 498 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 499 {
 500         struct kvm_vcpu *vcpu;
 501         int i;
 502
 503         if (!test_kvm_facility(kvm, 76))
 504                 return -EINVAL;
 505
 506         mutex_lock(&kvm->lock);
 507         switch (attr->attr) {
 508         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 509                 get_random_bytes(
 510                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 511                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 512                 kvm->arch.crypto.aes_kw = 1;
 513                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 514                 break;
 515         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 516                 get_random_bytes(
 517                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 518                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 519                 kvm->arch.crypto.dea_kw = 1;
 520                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 521                 break;
 522         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 523                 kvm->arch.crypto.aes_kw = 0;
 524                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 525                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 526                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 527                 break;
 528         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 529                 kvm->arch.crypto.dea_kw = 0;
 530                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 531                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 532                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 533                 break;
 534         default:
 535                 mutex_unlock(&kvm->lock);
 536                 return -ENXIO;
 537         }
 538
 539         kvm_for_each_vcpu(i, vcpu, kvm) {
 540                 kvm_s390_vcpu_crypto_setup(vcpu);
 541                 exit_sie(vcpu);
 542         }
 543         mutex_unlock(&kvm->lock);
 544         return 0;
 545 }
 546
 547 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 548 {
 549         u8 gtod_high;
 550
 551         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 552                                            sizeof(gtod_high)))
 553                 return -EFAULT;
 554
 555         if (gtod_high != 0)
 556                 return -EINVAL;
 557         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
 558
 559         return 0;
 560 }
 561
 562 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 563 {
 564         u64 gtod;
 565
 566         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 567                 return -EFAULT;
 568
 569         kvm_s390_set_tod_clock(kvm, gtod);
 570         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
 571         return 0;
 572 }
 573
 574 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 575 {
 576         int ret;
 577
 578         if (attr->flags)
 579                 return -EINVAL;
 580
 581         switch (attr->attr) {
 582         case KVM_S390_VM_TOD_HIGH:
 583                 ret = kvm_s390_set_tod_high(kvm, attr);
 584                 break;
 585         case KVM_S390_VM_TOD_LOW:
 586                 ret = kvm_s390_set_tod_low(kvm, attr);
 587                 break;
 588         default:
 589                 ret = -ENXIO;
 590                 break;
 591         }
 592         return ret;
 593 }
 594
 595 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 596 {
 597         u8 gtod_high = 0;
 598
 599         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 600                                          sizeof(gtod_high)))
 601                 return -EFAULT;
 602         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
 603
 604         return 0;
 605 }
 606
 607 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 608 {
 609         u64 gtod;
 610
 611         gtod = kvm_s390_get_tod_clock_fast(kvm);
 612         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 613                 return -EFAULT;
 614         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
 615
 616         return 0;
 617 }
 618
 619 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 620 {
 621         int ret;
 622
 623         if (attr->flags)
 624                 return -EINVAL;
 625
 626         switch (attr->attr) {
 627         case KVM_S390_VM_TOD_HIGH:
 628                 ret = kvm_s390_get_tod_high(kvm, attr);
 629                 break;
 630         case KVM_S390_VM_TOD_LOW:
 631                 ret = kvm_s390_get_tod_low(kvm, attr);
 632                 break;
 633         default:
 634                 ret = -ENXIO;
 635                 break;
 636         }
 637         return ret;
 638 }
 639
 640 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 641 {
 642         struct kvm_s390_vm_cpu_processor *proc;
 643         u16 lowest_ibc, unblocked_ibc;
 644         int ret = 0;
 645
 646         mutex_lock(&kvm->lock);
 647         if (atomic_read(&kvm->online_vcpus)) {
 648                 ret = -EBUSY;
 649                 goto out;
 650         }
 651         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 652         if (!proc) {
 653                 ret = -ENOMEM;
 654                 goto out;
 655         }
 656         if (!copy_from_user(proc, (void __user *)attr->addr,
 657                             sizeof(*proc))) {
 658                 kvm->arch.model.cpuid = proc->cpuid;
 659                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
 660                 unblocked_ibc = sclp.ibc & 0xfff;
 661                 if (lowest_ibc && proc->ibc) {
 662                         if (proc->ibc > unblocked_ibc)
 663                                 kvm->arch.model.ibc = unblocked_ibc;
 664                         else if (proc->ibc < lowest_ibc)
 665                                 kvm->arch.model.ibc = lowest_ibc;
 666                         else
 667                                 kvm->arch.model.ibc = proc->ibc;
 668                 }
 669                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
 670                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 671         } else
 672                 ret = -EFAULT;
 673         kfree(proc);
 674 out:
 675         mutex_unlock(&kvm->lock);
 676         return ret;
 677 }
 678
 679 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 680 {
 681         int ret = -ENXIO;
 682
 683         switch (attr->attr) {
 684         case KVM_S390_VM_CPU_PROCESSOR:
 685                 ret = kvm_s390_set_processor(kvm, attr);
 686                 break;
 687         }
 688         return ret;
 689 }
 690
 691 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 692 {
 693         struct kvm_s390_vm_cpu_processor *proc;
 694         int ret = 0;
 695
 696         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 697         if (!proc) {
 698                 ret = -ENOMEM;
 699                 goto out;
 700         }
 701         proc->cpuid = kvm->arch.model.cpuid;
 702         proc->ibc = kvm->arch.model.ibc;
 703         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
 704                S390_ARCH_FAC_LIST_SIZE_BYTE);
 705         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 706                 ret = -EFAULT;
 707         kfree(proc);
 708 out:
 709         return ret;
 710 }
 711
 712 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 713 {
 714         struct kvm_s390_vm_cpu_machine *mach;
 715         int ret = 0;
 716
 717         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 718         if (!mach) {
 719                 ret = -ENOMEM;
 720                 goto out;
 721         }
 722         get_cpu_id((struct cpuid *) &mach->cpuid);
 723         mach->ibc = sclp.ibc;
 724         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
 725                S390_ARCH_FAC_LIST_SIZE_BYTE);
 726         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 727                S390_ARCH_FAC_LIST_SIZE_BYTE);
 728         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 729                 ret = -EFAULT;
 730         kfree(mach);
 731 out:
 732         return ret;
 733 }
 734
 735 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 736 {
 737         int ret = -ENXIO;
 738
 739         switch (attr->attr) {
 740         case KVM_S390_VM_CPU_PROCESSOR:
 741                 ret = kvm_s390_get_processor(kvm, attr);
 742                 break;
 743         case KVM_S390_VM_CPU_MACHINE:
 744                 ret = kvm_s390_get_machine(kvm, attr);
 745                 break;
 746         }
 747         return ret;
 748 }
 749
 750 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 751 {
 752         int ret;
 753
 754         switch (attr->group) {
 755         case KVM_S390_VM_MEM_CTRL:
 756                 ret = kvm_s390_set_mem_control(kvm, attr);
 757                 break;
 758         case KVM_S390_VM_TOD:
 759                 ret = kvm_s390_set_tod(kvm, attr);
 760                 break;
 761         case KVM_S390_VM_CPU_MODEL:
 762                 ret = kvm_s390_set_cpu_model(kvm, attr);
 763                 break;
 764         case KVM_S390_VM_CRYPTO:
 765                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 766                 break;
 767         default:
 768                 ret = -ENXIO;
 769                 break;
 770         }
 771
 772         return ret;
 773 }
 774
 775 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 776 {
 777         int ret;
 778
 779         switch (attr->group) {
 780         case KVM_S390_VM_MEM_CTRL:
 781                 ret = kvm_s390_get_mem_control(kvm, attr);
 782                 break;
 783         case KVM_S390_VM_TOD:
 784                 ret = kvm_s390_get_tod(kvm, attr);
 785                 break;
 786         case KVM_S390_VM_CPU_MODEL:
 787                 ret = kvm_s390_get_cpu_model(kvm, attr);
 788                 break;
 789         default:
 790                 ret = -ENXIO;
 791                 break;
 792         }
 793
 794         return ret;
 795 }
 796
 797 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 798 {
 799         int ret;
 800
 801         switch (attr->group) {
 802         case KVM_S390_VM_MEM_CTRL:
 803                 switch (attr->attr) {
 804                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 805                 case KVM_S390_VM_MEM_CLR_CMMA:
 806                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 807                         ret = 0;
 808                         break;
 809                 default:
 810                         ret = -ENXIO;
 811                         break;
 812                 }
 813                 break;
 814         case KVM_S390_VM_TOD:
 815                 switch (attr->attr) {
 816                 case KVM_S390_VM_TOD_LOW:
 817                 case KVM_S390_VM_TOD_HIGH:
 818                         ret = 0;
 819                         break;
 820                 default:
 821                         ret = -ENXIO;
 822                         break;
 823                 }
 824                 break;
 825         case KVM_S390_VM_CPU_MODEL:
 826                 switch (attr->attr) {
 827                 case KVM_S390_VM_CPU_PROCESSOR:
 828                 case KVM_S390_VM_CPU_MACHINE:
 829                         ret = 0;
 830                         break;
 831                 default:
 832                         ret = -ENXIO;
 833                         break;
 834                 }
 835                 break;
 836         case KVM_S390_VM_CRYPTO:
 837                 switch (attr->attr) {
 838                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 839                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 840                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 841                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 842                         ret = 0;
 843                         break;
 844                 default:
 845                         ret = -ENXIO;
 846                         break;
 847                 }
 848                 break;
 849         default:
 850                 ret = -ENXIO;
 851                 break;
 852         }
 853
 854         return ret;
 855 }
 856
 857 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 858 {
 859         uint8_t *keys;
 860         uint64_t hva;
 861         unsigned long curkey;
 862         int i, r = 0;
 863
 864         if (args->flags != 0)
 865                 return -EINVAL;
 866
 867         /* Is this guest using storage keys? */
 868         if (!mm_use_skey(current->mm))
 869                 return KVM_S390_GET_SKEYS_NONE;
 870
 871         /* Enforce sane limit on memory allocation */
 872         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 873                 return -EINVAL;
 874
 875         keys = kmalloc_array(args->count, sizeof(uint8_t),
 876                              GFP_KERNEL | __GFP_NOWARN);
 877         if (!keys)
 878                 keys = vmalloc(sizeof(uint8_t) * args->count);
 879         if (!keys)
 880                 return -ENOMEM;
 881
 882         for (i = 0; i < args->count; i++) {
 883                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 884                 if (kvm_is_error_hva(hva)) {
 885                         r = -EFAULT;
 886                         goto out;
 887                 }
 888
 889                 curkey = get_guest_storage_key(current->mm, hva);
 890                 if (IS_ERR_VALUE(curkey)) {
 891                         r = curkey;
 892                         goto out;
 893                 }
 894                 keys[i] = curkey;
 895         }
 896
 897         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
 898                          sizeof(uint8_t) * args->count);
 899         if (r)
 900                 r = -EFAULT;
 901 out:
 902         kvfree(keys);
 903         return r;
 904 }
 905
 906 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 907 {
 908         uint8_t *keys;
 909         uint64_t hva;
 910         int i, r = 0;
 911
 912         if (args->flags != 0)
 913                 return -EINVAL;
 914
 915         /* Enforce sane limit on memory allocation */
 916         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 917                 return -EINVAL;
 918
 919         keys = kmalloc_array(args->count, sizeof(uint8_t),
 920                              GFP_KERNEL | __GFP_NOWARN);
 921         if (!keys)
 922                 keys = vmalloc(sizeof(uint8_t) * args->count);
 923         if (!keys)
 924                 return -ENOMEM;
 925
 926         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
 927                            sizeof(uint8_t) * args->count);
 928         if (r) {
 929                 r = -EFAULT;
 930                 goto out;
 931         }
 932
 933         /* Enable storage key handling for the guest */
 934         r = s390_enable_skey();
 935         if (r)
 936                 goto out;
 937
 938         for (i = 0; i < args->count; i++) {
 939                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 940                 if (kvm_is_error_hva(hva)) {
 941                         r = -EFAULT;
 942                         goto out;
 943                 }
 944
 945                 /* Lowest order bit is reserved */
 946                 if (keys[i] & 0x01) {
 947                         r = -EINVAL;
 948                         goto out;
 949                 }
 950
 951                 r = set_guest_storage_key(current->mm, hva,
 952                                           (unsigned long)keys[i], 0);
 953                 if (r)
 954                         goto out;
 955         }
 956 out:
 957         kvfree(keys);
 958         return r;
 959 }
 960
 961 long kvm_arch_vm_ioctl(struct file *filp,
 962                        unsigned int ioctl, unsigned long arg)
 963 {
 964         struct kvm *kvm = filp->private_data;
 965         void __user *argp = (void __user *)arg;
 966         struct kvm_device_attr attr;
 967         int r;
 968
 969         switch (ioctl) {
 970         case KVM_S390_INTERRUPT: {
 971                 struct kvm_s390_interrupt s390int;
 972
 973                 r = -EFAULT;
 974                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
 975                         break;
 976                 r = kvm_s390_inject_vm(kvm, &s390int);
 977                 break;
 978         }
 979         case KVM_ENABLE_CAP: {
 980                 struct kvm_enable_cap cap;
 981                 r = -EFAULT;
 982                 if (copy_from_user(&cap, argp, sizeof(cap)))
 983                         break;
 984                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 985                 break;
 986         }
 987         case KVM_CREATE_IRQCHIP: {
 988                 struct kvm_irq_routing_entry routing;
 989
 990                 r = -EINVAL;
 991                 if (kvm->arch.use_irqchip) {
 992                         /* Set up dummy routing. */
 993                         memset(&routing, 0, sizeof(routing));
 994                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 995                 }
 996                 break;
 997         }
 998         case KVM_SET_DEVICE_ATTR: {
 999                 r = -EFAULT;
1000                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1001                         break;
1002                 r = kvm_s390_vm_set_attr(kvm, &attr);
1003                 break;
1004         }
1005         case KVM_GET_DEVICE_ATTR: {
1006                 r = -EFAULT;
1007                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1008                         break;
1009                 r = kvm_s390_vm_get_attr(kvm, &attr);
1010                 break;
1011         }
1012         case KVM_HAS_DEVICE_ATTR: {
1013                 r = -EFAULT;
1014                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1015                         break;
1016                 r = kvm_s390_vm_has_attr(kvm, &attr);
1017                 break;
1018         }
1019         case KVM_S390_GET_SKEYS: {
1020                 struct kvm_s390_skeys args;
1021
1022                 r = -EFAULT;
1023                 if (copy_from_user(&args, argp,
1024                                    sizeof(struct kvm_s390_skeys)))
1025                         break;
1026                 r = kvm_s390_get_skeys(kvm, &args);
1027                 break;
1028         }
1029         case KVM_S390_SET_SKEYS: {
1030                 struct kvm_s390_skeys args;
1031
1032                 r = -EFAULT;
1033                 if (copy_from_user(&args, argp,
1034                                    sizeof(struct kvm_s390_skeys)))
1035                         break;
1036                 r = kvm_s390_set_skeys(kvm, &args);
1037                 break;
1038         }
1039         default:
1040                 r = -ENOTTY;
1041         }
1042
1043         return r;
1044 }
1045
1046 static int kvm_s390_query_ap_config(u8 *config)
1047 {
1048         u32 fcn_code = 0x04000000UL;
1049         u32 cc = 0;
1050
1051         memset(config, 0, 128);
1052         asm volatile(
1053                 "lgr 0,%1\n"
1054                 "lgr 2,%2\n"
1055                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1056                 "0: ipm %0\n"
1057                 "srl %0,28\n"
1058                 "1:\n"
1059                 EX_TABLE(0b, 1b)
1060                 : "+r" (cc)
1061                 : "r" (fcn_code), "r" (config)
1062                 : "cc", "0", "2", "memory"
1063         );
1064
1065         return cc;
1066 }
1067
1068 static int kvm_s390_apxa_installed(void)
1069 {
1070         u8 config[128];
1071         int cc;
1072
1073         if (test_facility(12)) {
1074                 cc = kvm_s390_query_ap_config(config);
1075
1076                 if (cc)
1077                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1078                 else
1079                         return config[0] & 0x40;
1080         }
1081
1082         return 0;
1083 }
1084
1085 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1086 {
1087         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1088
1089         if (kvm_s390_apxa_installed())
1090                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1091         else
1092                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1093 }
1094
1095 static u64 kvm_s390_get_initial_cpuid(void)
1096 {
1097         struct cpuid cpuid;
1098
1099         get_cpu_id(&cpuid);
1100         cpuid.version = 0xff;
1101         return *((u64 *) &cpuid);
1102 }
1103
1104 static void kvm_s390_crypto_init(struct kvm *kvm)
1105 {
1106         if (!test_kvm_facility(kvm, 76))
1107                 return;
1108
1109         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1110         kvm_s390_set_crycb_format(kvm);
1111
1112         /* Enable AES/DEA protected key functions by default */
1113         kvm->arch.crypto.aes_kw = 1;
1114         kvm->arch.crypto.dea_kw = 1;
1115         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1116                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1117         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1118                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1119 }
1120
1121 static void sca_dispose(struct kvm *kvm)
1122 {
1123         if (kvm->arch.use_esca)
1124                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1125         else
1126                 free_page((unsigned long)(kvm->arch.sca));
1127         kvm->arch.sca = NULL;
1128 }
1129
1130 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1131 {
1132         int i, rc;
1133         char debug_name[16];
1134         static unsigned long sca_offset;
1135
1136         rc = -EINVAL;
1137 #ifdef CONFIG_KVM_S390_UCONTROL
1138         if (type & ~KVM_VM_S390_UCONTROL)
1139                 goto out_err;
1140         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1141                 goto out_err;
1142 #else
1143         if (type)
1144                 goto out_err;
1145 #endif
1146
1147         rc = s390_enable_sie();
1148         if (rc)
1149                 goto out_err;
1150
1151         rc = -ENOMEM;
1152
1153         kvm->arch.use_esca = 0; /* start with basic SCA */
1154         rwlock_init(&kvm->arch.sca_lock);
1155         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
1156         if (!kvm->arch.sca)
1157                 goto out_err;
1158         spin_lock(&kvm_lock);
1159         sca_offset += 16;
1160         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1161                 sca_offset = 0;
1162         kvm->arch.sca = (struct bsca_block *)
1163                         ((char *) kvm->arch.sca + sca_offset);
1164         spin_unlock(&kvm_lock);
1165
1166         sprintf(debug_name, "kvm-%u", current->pid);
1167
1168         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1169         if (!kvm->arch.dbf)
1170                 goto out_err;
1171
1172         kvm->arch.sie_page2 =
1173              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1174         if (!kvm->arch.sie_page2)
1175                 goto out_err;
1176
1177         /* Populate the facility mask initially. */
1178         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1179                S390_ARCH_FAC_LIST_SIZE_BYTE);
1180         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1181                 if (i < kvm_s390_fac_list_mask_size())
1182                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1183                 else
1184                         kvm->arch.model.fac_mask[i] = 0UL;
1185         }
1186
1187         /* Populate the facility list initially. */
1188         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1189         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1190                S390_ARCH_FAC_LIST_SIZE_BYTE);
1191
1192         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1193         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1194
1195         kvm_s390_crypto_init(kvm);
1196
1197         spin_lock_init(&kvm->arch.float_int.lock);
1198         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1199                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1200         init_waitqueue_head(&kvm->arch.ipte_wq);
1201         mutex_init(&kvm->arch.ipte_mutex);
1202
1203         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1204         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1205
1206         if (type & KVM_VM_S390_UCONTROL) {
1207                 kvm->arch.gmap = NULL;
1208                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1209         } else {
1210                 if (sclp.hamax == U64_MAX)
1211                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1212                 else
1213                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1214                                                     sclp.hamax + 1);
1215                 kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
1216                 if (!kvm->arch.gmap)
1217                         goto out_err;
1218                 kvm->arch.gmap->private = kvm;
1219                 kvm->arch.gmap->pfault_enabled = 0;
1220         }
1221
1222         kvm->arch.css_support = 0;
1223         kvm->arch.use_irqchip = 0;
1224         kvm->arch.epoch = 0;
1225
1226         spin_lock_init(&kvm->arch.start_stop_lock);
1227         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1228
1229         return 0;
1230 out_err:
1231         free_page((unsigned long)kvm->arch.sie_page2);
1232         debug_unregister(kvm->arch.dbf);
1233         sca_dispose(kvm);
1234         KVM_EVENT(3, "creation of vm failed: %d", rc);
1235         return rc;
1236 }
1237
1238 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1239 {
1240         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1241         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1242         kvm_s390_clear_local_irqs(vcpu);
1243         kvm_clear_async_pf_completion_queue(vcpu);
1244         if (!kvm_is_ucontrol(vcpu->kvm))
1245                 sca_del_vcpu(vcpu);
1246
1247         if (kvm_is_ucontrol(vcpu->kvm))
1248                 gmap_free(vcpu->arch.gmap);
1249
1250         if (vcpu->kvm->arch.use_cmma)
1251                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1252         free_page((unsigned long)(vcpu->arch.sie_block));
1253
1254         kvm_vcpu_uninit(vcpu);
1255         kmem_cache_free(kvm_vcpu_cache, vcpu);
1256 }
1257
1258 static void kvm_free_vcpus(struct kvm *kvm)
1259 {
1260         unsigned int i;
1261         struct kvm_vcpu *vcpu;
1262
1263         kvm_for_each_vcpu(i, vcpu, kvm)
1264                 kvm_arch_vcpu_destroy(vcpu);
1265
1266         mutex_lock(&kvm->lock);
1267         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1268                 kvm->vcpus[i] = NULL;
1269
1270         atomic_set(&kvm->online_vcpus, 0);
1271         mutex_unlock(&kvm->lock);
1272 }
1273
1274 void kvm_arch_destroy_vm(struct kvm *kvm)
1275 {
1276         kvm_free_vcpus(kvm);
1277         sca_dispose(kvm);
1278         debug_unregister(kvm->arch.dbf);
1279         free_page((unsigned long)kvm->arch.sie_page2);
1280         if (!kvm_is_ucontrol(kvm))
1281                 gmap_free(kvm->arch.gmap);
1282         kvm_s390_destroy_adapters(kvm);
1283         kvm_s390_clear_float_irqs(kvm);
1284         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1285 }
1286
1287 /* Section: vcpu related */
1288 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1289 {
1290         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1291         if (!vcpu->arch.gmap)
1292                 return -ENOMEM;
1293         vcpu->arch.gmap->private = vcpu->kvm;
1294
1295         return 0;
1296 }
1297
1298 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1299 {
1300         read_lock(&vcpu->kvm->arch.sca_lock);
1301         if (vcpu->kvm->arch.use_esca) {
1302                 struct esca_block *sca = vcpu->kvm->arch.sca;
1303
1304                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1305                 sca->cpu[vcpu->vcpu_id].sda = 0;
1306         } else {
1307                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1308
1309                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1310                 sca->cpu[vcpu->vcpu_id].sda = 0;
1311         }
1312         read_unlock(&vcpu->kvm->arch.sca_lock);
1313 }
1314
1315 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1316 {
1317         read_lock(&vcpu->kvm->arch.sca_lock);
1318         if (vcpu->kvm->arch.use_esca) {
1319                 struct esca_block *sca = vcpu->kvm->arch.sca;
1320
1321                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1322                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1323                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1324                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1325                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1326         } else {
1327                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1328
1329                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1330                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1331                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1332                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1333         }
1334         read_unlock(&vcpu->kvm->arch.sca_lock);
1335 }
1336
1337 /* Basic SCA to Extended SCA data copy routines */
1338 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1339 {
1340         d->sda = s->sda;
1341         d->sigp_ctrl.c = s->sigp_ctrl.c;
1342         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1343 }
1344
1345 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1346 {
1347         int i;
1348
1349         d->ipte_control = s->ipte_control;
1350         d->mcn[0] = s->mcn;
1351         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1352                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1353 }
1354
1355 static int sca_switch_to_extended(struct kvm *kvm)
1356 {
1357         struct bsca_block *old_sca = kvm->arch.sca;
1358         struct esca_block *new_sca;
1359         struct kvm_vcpu *vcpu;
1360         unsigned int vcpu_idx;
1361         u32 scaol, scaoh;
1362
1363         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1364         if (!new_sca)
1365                 return -ENOMEM;
1366
1367         scaoh = (u32)((u64)(new_sca) >> 32);
1368         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1369
1370         kvm_s390_vcpu_block_all(kvm);
1371         write_lock(&kvm->arch.sca_lock);
1372
1373         sca_copy_b_to_e(new_sca, old_sca);
1374
1375         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1376                 vcpu->arch.sie_block->scaoh = scaoh;
1377                 vcpu->arch.sie_block->scaol = scaol;
1378                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1379         }
1380         kvm->arch.sca = new_sca;
1381         kvm->arch.use_esca = 1;
1382
1383         write_unlock(&kvm->arch.sca_lock);
1384         kvm_s390_vcpu_unblock_all(kvm);
1385
1386         free_page((unsigned long)old_sca);
1387
1388         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1389                  old_sca, kvm->arch.sca);
1390         return 0;
1391 }
1392
1393 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1394 {
1395         int rc;
1396
1397         if (id < KVM_S390_BSCA_CPU_SLOTS)
1398                 return true;
1399         if (!sclp.has_esca)
1400                 return false;
1401
1402         mutex_lock(&kvm->lock);
1403         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1404         mutex_unlock(&kvm->lock);
1405
1406         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1407 }
1408
1409 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1410 {
1411         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1412         kvm_clear_async_pf_completion_queue(vcpu);
1413         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1414                                     KVM_SYNC_GPRS |
1415                                     KVM_SYNC_ACRS |
1416                                     KVM_SYNC_CRS |
1417                                     KVM_SYNC_ARCH0 |
1418                                     KVM_SYNC_PFAULT;
1419         if (test_kvm_facility(vcpu->kvm, 64))
1420                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1421         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1422          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1423          */
1424         if (MACHINE_HAS_VX)
1425                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1426         else
1427                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1428
1429         if (kvm_is_ucontrol(vcpu->kvm))
1430                 return __kvm_ucontrol_vcpu_init(vcpu);
1431
1432         return 0;
1433 }
1434
1435 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1436 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1437 {
1438         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1439         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1440         vcpu->arch.cputm_start = get_tod_clock_fast();
1441         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1442 }
1443
1444 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1445 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1446 {
1447         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1448         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1449         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1450         vcpu->arch.cputm_start = 0;
1451         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1452 }
1453
1454 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1455 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1456 {
1457         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1458         vcpu->arch.cputm_enabled = true;
1459         __start_cpu_timer_accounting(vcpu);
1460 }
1461
1462 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1463 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1464 {
1465         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1466         __stop_cpu_timer_accounting(vcpu);
1467         vcpu->arch.cputm_enabled = false;
1468 }
1469
1470 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1471 {
1472         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1473         __enable_cpu_timer_accounting(vcpu);
1474         preempt_enable();
1475 }
1476
1477 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1478 {
1479         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1480         __disable_cpu_timer_accounting(vcpu);
1481         preempt_enable();
1482 }
1483
1484 /* set the cpu timer - may only be called from the VCPU thread itself */
1485 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1486 {
1487         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1488         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1489         if (vcpu->arch.cputm_enabled)
1490                 vcpu->arch.cputm_start = get_tod_clock_fast();
1491         vcpu->arch.sie_block->cputm = cputm;
1492         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1493         preempt_enable();
1494 }
1495
1496 /* update and get the cpu timer - can also be called from other VCPU threads */
1497 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1498 {
1499         unsigned int seq;
1500         __u64 value;
1501
1502         if (unlikely(!vcpu->arch.cputm_enabled))
1503                 return vcpu->arch.sie_block->cputm;
1504
1505         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1506         do {
1507                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1508                 /*
1509                  * If the writer would ever execute a read in the critical
1510                  * section, e.g. in irq context, we have a deadlock.
1511                  */
1512                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1513                 value = vcpu->arch.sie_block->cputm;
1514                 /* if cputm_start is 0, accounting is being started/stopped */
1515                 if (likely(vcpu->arch.cputm_start))
1516                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1517         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1518         preempt_enable();
1519         return value;
1520 }
1521
1522 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1523 {
1524         /* Save host register state */
1525         save_fpu_regs();
1526         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1527         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1528
1529         if (MACHINE_HAS_VX)
1530                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1531         else
1532                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1533         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1534         if (test_fp_ctl(current->thread.fpu.fpc))
1535                 /* User space provided an invalid FPC, let's clear it */
1536                 current->thread.fpu.fpc = 0;
1537
1538         save_access_regs(vcpu->arch.host_acrs);
1539         restore_access_regs(vcpu->run->s.regs.acrs);
1540         gmap_enable(vcpu->arch.gmap);
1541         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1542         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1543                 __start_cpu_timer_accounting(vcpu);
1544         vcpu->cpu = cpu;
1545 }
1546
1547 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1548 {
1549         vcpu->cpu = -1;
1550         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1551                 __stop_cpu_timer_accounting(vcpu);
1552         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1553         gmap_disable(vcpu->arch.gmap);
1554
1555         /* Save guest register state */
1556         save_fpu_regs();
1557         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1558
1559         /* Restore host register state */
1560         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1561         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1562
1563         save_access_regs(vcpu->run->s.regs.acrs);
1564         restore_access_regs(vcpu->arch.host_acrs);
1565 }
1566
1567 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1568 {
1569         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1570         vcpu->arch.sie_block->gpsw.mask = 0UL;
1571         vcpu->arch.sie_block->gpsw.addr = 0UL;
1572         kvm_s390_set_prefix(vcpu, 0);
1573         kvm_s390_set_cpu_timer(vcpu, 0);
1574         vcpu->arch.sie_block->ckc       = 0UL;
1575         vcpu->arch.sie_block->todpr     = 0;
1576         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1577         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1578         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1579         /* make sure the new fpc will be lazily loaded */
1580         save_fpu_regs();
1581         current->thread.fpu.fpc = 0;
1582         vcpu->arch.sie_block->gbea = 1;
1583         vcpu->arch.sie_block->pp = 0;
1584         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1585         kvm_clear_async_pf_completion_queue(vcpu);
1586         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1587                 kvm_s390_vcpu_stop(vcpu);
1588         kvm_s390_clear_local_irqs(vcpu);
1589 }
1590
1591 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1592 {
1593         mutex_lock(&vcpu->kvm->lock);
1594         preempt_disable();
1595         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1596         preempt_enable();
1597         mutex_unlock(&vcpu->kvm->lock);
1598         if (!kvm_is_ucontrol(vcpu->kvm)) {
1599                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1600                 sca_add_vcpu(vcpu);
1601         }
1602
1603 }
1604
1605 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1606 {
1607         if (!test_kvm_facility(vcpu->kvm, 76))
1608                 return;
1609
1610         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1611
1612         if (vcpu->kvm->arch.crypto.aes_kw)
1613                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1614         if (vcpu->kvm->arch.crypto.dea_kw)
1615                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1616
1617         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1618 }
1619
1620 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1621 {
1622         free_page(vcpu->arch.sie_block->cbrlo);
1623         vcpu->arch.sie_block->cbrlo = 0;
1624 }
1625
1626 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1627 {
1628         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1629         if (!vcpu->arch.sie_block->cbrlo)
1630                 return -ENOMEM;
1631
1632         vcpu->arch.sie_block->ecb2 |= 0x80;
1633         vcpu->arch.sie_block->ecb2 &= ~0x08;
1634         return 0;
1635 }
1636
1637 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1638 {
1639         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1640
1641         vcpu->arch.sie_block->ibc = model->ibc;
1642         if (test_kvm_facility(vcpu->kvm, 7))
1643                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1644 }
1645
1646 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1647 {
1648         int rc = 0;
1649
1650         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1651                                                     CPUSTAT_SM |
1652                                                     CPUSTAT_STOPPED);
1653
1654         if (test_kvm_facility(vcpu->kvm, 78))
1655                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1656         else if (test_kvm_facility(vcpu->kvm, 8))
1657                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1658
1659         kvm_s390_vcpu_setup_model(vcpu);
1660
1661         vcpu->arch.sie_block->ecb = 0x02;
1662         if (test_kvm_facility(vcpu->kvm, 9))
1663                 vcpu->arch.sie_block->ecb |= 0x04;
1664         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1665                 vcpu->arch.sie_block->ecb |= 0x10;
1666
1667         if (test_kvm_facility(vcpu->kvm, 8))
1668                 vcpu->arch.sie_block->ecb2 |= 0x08;
1669         vcpu->arch.sie_block->eca   = 0xC1002000U;
1670         if (sclp.has_siif)
1671                 vcpu->arch.sie_block->eca |= 1;
1672         if (sclp.has_sigpif)
1673                 vcpu->arch.sie_block->eca |= 0x10000000U;
1674         if (test_kvm_facility(vcpu->kvm, 64))
1675                 vcpu->arch.sie_block->ecb3 |= 0x01;
1676         if (test_kvm_facility(vcpu->kvm, 129)) {
1677                 vcpu->arch.sie_block->eca |= 0x00020000;
1678                 vcpu->arch.sie_block->ecd |= 0x20000000;
1679         }
1680         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1681         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1682
1683         if (vcpu->kvm->arch.use_cmma) {
1684                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1685                 if (rc)
1686                         return rc;
1687         }
1688         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1689         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1690
1691         kvm_s390_vcpu_crypto_setup(vcpu);
1692
1693         return rc;
1694 }
1695
1696 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1697                                       unsigned int id)
1698 {
1699         struct kvm_vcpu *vcpu;
1700         struct sie_page *sie_page;
1701         int rc = -EINVAL;
1702
1703         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1704                 goto out;
1705
1706         rc = -ENOMEM;
1707
1708         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1709         if (!vcpu)
1710                 goto out;
1711
1712         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1713         if (!sie_page)
1714                 goto out_free_cpu;
1715
1716         vcpu->arch.sie_block = &sie_page->sie_block;
1717         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1718
1719         vcpu->arch.sie_block->icpua = id;
1720         spin_lock_init(&vcpu->arch.local_int.lock);
1721         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1722         vcpu->arch.local_int.wq = &vcpu->wq;
1723         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1724         seqcount_init(&vcpu->arch.cputm_seqcount);
1725
1726         rc = kvm_vcpu_init(vcpu, kvm, id);
1727         if (rc)
1728                 goto out_free_sie_block;
1729         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
1730                  vcpu->arch.sie_block);
1731         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1732
1733         return vcpu;
1734 out_free_sie_block:
1735         free_page((unsigned long)(vcpu->arch.sie_block));
1736 out_free_cpu:
1737         kmem_cache_free(kvm_vcpu_cache, vcpu);
1738 out:
1739         return ERR_PTR(rc);
1740 }
1741
1742 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1743 {
1744         return kvm_s390_vcpu_has_irq(vcpu, 0);
1745 }
1746
1747 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1748 {
1749         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1750         exit_sie(vcpu);
1751 }
1752
1753 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1754 {
1755         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1756 }
1757
1758 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1759 {
1760         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1761         exit_sie(vcpu);
1762 }
1763
1764 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1765 {
1766         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1767 }
1768
1769 /*
1770  * Kick a guest cpu out of SIE and wait until SIE is not running.
1771  * If the CPU is not running (e.g. waiting as idle) the function will
1772  * return immediately. */
1773 void exit_sie(struct kvm_vcpu *vcpu)
1774 {
1775         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1776         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1777                 cpu_relax();
1778 }
1779
1780 /* Kick a guest cpu out of SIE to process a request synchronously */
1781 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1782 {
1783         kvm_make_request(req, vcpu);
1784         kvm_s390_vcpu_request(vcpu);
1785 }
1786
1787 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1788 {
1789         int i;
1790         struct kvm *kvm = gmap->private;
1791         struct kvm_vcpu *vcpu;
1792
1793         kvm_for_each_vcpu(i, vcpu, kvm) {
1794                 /* match against both prefix pages */
1795                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1796                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1797                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1798                 }
1799         }
1800 }
1801
1802 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1803 {
1804         /* kvm common code refers to this, but never calls it */
1805         BUG();
1806         return 0;
1807 }
1808
1809 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1810                                            struct kvm_one_reg *reg)
1811 {
1812         int r = -EINVAL;
1813
1814         switch (reg->id) {
1815         case KVM_REG_S390_TODPR:
1816                 r = put_user(vcpu->arch.sie_block->todpr,
1817                              (u32 __user *)reg->addr);
1818                 break;
1819         case KVM_REG_S390_EPOCHDIFF:
1820                 r = put_user(vcpu->arch.sie_block->epoch,
1821                              (u64 __user *)reg->addr);
1822                 break;
1823         case KVM_REG_S390_CPU_TIMER:
1824                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
1825                              (u64 __user *)reg->addr);
1826                 break;
1827         case KVM_REG_S390_CLOCK_COMP:
1828                 r = put_user(vcpu->arch.sie_block->ckc,
1829                              (u64 __user *)reg->addr);
1830                 break;
1831         case KVM_REG_S390_PFTOKEN:
1832                 r = put_user(vcpu->arch.pfault_token,
1833                              (u64 __user *)reg->addr);
1834                 break;
1835         case KVM_REG_S390_PFCOMPARE:
1836                 r = put_user(vcpu->arch.pfault_compare,
1837                              (u64 __user *)reg->addr);
1838                 break;
1839         case KVM_REG_S390_PFSELECT:
1840                 r = put_user(vcpu->arch.pfault_select,
1841                              (u64 __user *)reg->addr);
1842                 break;
1843         case KVM_REG_S390_PP:
1844                 r = put_user(vcpu->arch.sie_block->pp,
1845                              (u64 __user *)reg->addr);
1846                 break;
1847         case KVM_REG_S390_GBEA:
1848                 r = put_user(vcpu->arch.sie_block->gbea,
1849                              (u64 __user *)reg->addr);
1850                 break;
1851         default:
1852                 break;
1853         }
1854
1855         return r;
1856 }
1857
1858 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1859                                            struct kvm_one_reg *reg)
1860 {
1861         int r = -EINVAL;
1862         __u64 val;
1863
1864         switch (reg->id) {
1865         case KVM_REG_S390_TODPR:
1866                 r = get_user(vcpu->arch.sie_block->todpr,
1867                              (u32 __user *)reg->addr);
1868                 break;
1869         case KVM_REG_S390_EPOCHDIFF:
1870                 r = get_user(vcpu->arch.sie_block->epoch,
1871                              (u64 __user *)reg->addr);
1872                 break;
1873         case KVM_REG_S390_CPU_TIMER:
1874                 r = get_user(val, (u64 __user *)reg->addr);
1875                 if (!r)
1876                         kvm_s390_set_cpu_timer(vcpu, val);
1877                 break;
1878         case KVM_REG_S390_CLOCK_COMP:
1879                 r = get_user(vcpu->arch.sie_block->ckc,
1880                              (u64 __user *)reg->addr);
1881                 break;
1882         case KVM_REG_S390_PFTOKEN:
1883                 r = get_user(vcpu->arch.pfault_token,
1884                              (u64 __user *)reg->addr);
1885                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1886                         kvm_clear_async_pf_completion_queue(vcpu);
1887                 break;
1888         case KVM_REG_S390_PFCOMPARE:
1889                 r = get_user(vcpu->arch.pfault_compare,
1890                              (u64 __user *)reg->addr);
1891                 break;
1892         case KVM_REG_S390_PFSELECT:
1893                 r = get_user(vcpu->arch.pfault_select,
1894                              (u64 __user *)reg->addr);
1895                 break;
1896         case KVM_REG_S390_PP:
1897                 r = get_user(vcpu->arch.sie_block->pp,
1898                              (u64 __user *)reg->addr);
1899                 break;
1900         case KVM_REG_S390_GBEA:
1901                 r = get_user(vcpu->arch.sie_block->gbea,
1902                              (u64 __user *)reg->addr);
1903                 break;
1904         default:
1905                 break;
1906         }
1907
1908         return r;
1909 }
1910
1911 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1912 {
1913         kvm_s390_vcpu_initial_reset(vcpu);
1914         return 0;
1915 }
1916
1917 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1918 {
1919         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1920         return 0;
1921 }
1922
1923 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1924 {
1925         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1926         return 0;
1927 }
1928
1929 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1930                                   struct kvm_sregs *sregs)
1931 {
1932         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1933         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1934         restore_access_regs(vcpu->run->s.regs.acrs);
1935         return 0;
1936 }
1937
1938 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1939                                   struct kvm_sregs *sregs)
1940 {
1941         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1942         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1943         return 0;
1944 }
1945
1946 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1947 {
1948         /* make sure the new values will be lazily loaded */
1949         save_fpu_regs();
1950         if (test_fp_ctl(fpu->fpc))
1951                 return -EINVAL;
1952         current->thread.fpu.fpc = fpu->fpc;
1953         if (MACHINE_HAS_VX)
1954                 convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
1955         else
1956                 memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
1957         return 0;
1958 }
1959
1960 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1961 {
1962         /* make sure we have the latest values */
1963         save_fpu_regs();
1964         if (MACHINE_HAS_VX)
1965                 convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
1966         else
1967                 memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
1968         fpu->fpc = current->thread.fpu.fpc;
1969         return 0;
1970 }
1971
1972 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1973 {
1974         int rc = 0;
1975
1976         if (!is_vcpu_stopped(vcpu))
1977                 rc = -EBUSY;
1978         else {
1979                 vcpu->run->psw_mask = psw.mask;
1980                 vcpu->run->psw_addr = psw.addr;
1981         }
1982         return rc;
1983 }
1984
1985 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1986                                   struct kvm_translation *tr)
1987 {
1988         return -EINVAL; /* not implemented yet */
1989 }
1990
1991 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1992                               KVM_GUESTDBG_USE_HW_BP | \
1993                               KVM_GUESTDBG_ENABLE)
1994
1995 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1996                                         struct kvm_guest_debug *dbg)
1997 {
1998         int rc = 0;
1999
2000         vcpu->guest_debug = 0;
2001         kvm_s390_clear_bp_data(vcpu);
2002
2003         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2004                 return -EINVAL;
2005
2006         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2007                 vcpu->guest_debug = dbg->control;
2008                 /* enforce guest PER */
2009                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2010
2011                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2012                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2013         } else {
2014                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2015                 vcpu->arch.guestdbg.last_bp = 0;
2016         }
2017
2018         if (rc) {
2019                 vcpu->guest_debug = 0;
2020                 kvm_s390_clear_bp_data(vcpu);
2021                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2022         }
2023
2024         return rc;
2025 }
2026
2027 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2028                                     struct kvm_mp_state *mp_state)
2029 {
2030         /* CHECK_STOP and LOAD are not supported yet */
2031         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2032                                        KVM_MP_STATE_OPERATING;
2033 }
2034
2035 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2036                                     struct kvm_mp_state *mp_state)
2037 {
2038         int rc = 0;
2039
2040         /* user space knows about this interface - let it control the state */
2041         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2042
2043         switch (mp_state->mp_state) {
2044         case KVM_MP_STATE_STOPPED:
2045                 kvm_s390_vcpu_stop(vcpu);
2046                 break;
2047         case KVM_MP_STATE_OPERATING:
2048                 kvm_s390_vcpu_start(vcpu);
2049                 break;
2050         case KVM_MP_STATE_LOAD:
2051         case KVM_MP_STATE_CHECK_STOP:
2052                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2053         default:
2054                 rc = -ENXIO;
2055         }
2056
2057         return rc;
2058 }
2059
2060 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2061 {
2062         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2063 }
2064
2065 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2066 {
2067 retry:
2068         kvm_s390_vcpu_request_handled(vcpu);
2069         if (!vcpu->requests)
2070                 return 0;
2071         /*
2072          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2073          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
2074          * This ensures that the ipte instruction for this request has
2075          * already finished. We might race against a second unmapper that
2076          * wants to set the blocking bit. Lets just retry the request loop.
2077          */
2078         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2079                 int rc;
2080                 rc = gmap_ipte_notify(vcpu->arch.gmap,
2081                                       kvm_s390_get_prefix(vcpu),
2082                                       PAGE_SIZE * 2);
2083                 if (rc)
2084                         return rc;
2085                 goto retry;
2086         }
2087
2088         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2089                 vcpu->arch.sie_block->ihcpu = 0xffff;
2090                 goto retry;
2091         }
2092
2093         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2094                 if (!ibs_enabled(vcpu)) {
2095                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2096                         atomic_or(CPUSTAT_IBS,
2097                                         &vcpu->arch.sie_block->cpuflags);
2098                 }
2099                 goto retry;
2100         }
2101
2102         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2103                 if (ibs_enabled(vcpu)) {
2104                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2105                         atomic_andnot(CPUSTAT_IBS,
2106                                           &vcpu->arch.sie_block->cpuflags);
2107                 }
2108                 goto retry;
2109         }
2110
2111         /* nothing to do, just clear the request */
2112         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2113
2114         return 0;
2115 }
2116
2117 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2118 {
2119         struct kvm_vcpu *vcpu;
2120         int i;
2121
2122         mutex_lock(&kvm->lock);
2123         preempt_disable();
2124         kvm->arch.epoch = tod - get_tod_clock();
2125         kvm_s390_vcpu_block_all(kvm);
2126         kvm_for_each_vcpu(i, vcpu, kvm)
2127                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2128         kvm_s390_vcpu_unblock_all(kvm);
2129         preempt_enable();
2130         mutex_unlock(&kvm->lock);
2131 }
2132
2133 /**
2134  * kvm_arch_fault_in_page - fault-in guest page if necessary
2135  * @vcpu: The corresponding virtual cpu
2136  * @gpa: Guest physical address
2137  * @writable: Whether the page should be writable or not
2138  *
2139  * Make sure that a guest page has been faulted-in on the host.
2140  *
2141  * Return: Zero on success, negative error code otherwise.
2142  */
2143 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2144 {
2145         return gmap_fault(vcpu->arch.gmap, gpa,
2146                           writable ? FAULT_FLAG_WRITE : 0);
2147 }
2148
2149 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2150                                       unsigned long token)
2151 {
2152         struct kvm_s390_interrupt inti;
2153         struct kvm_s390_irq irq;
2154
2155         if (start_token) {
2156                 irq.u.ext.ext_params2 = token;
2157                 irq.type = KVM_S390_INT_PFAULT_INIT;
2158                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2159         } else {
2160                 inti.type = KVM_S390_INT_PFAULT_DONE;
2161                 inti.parm64 = token;
2162                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2163         }
2164 }
2165
2166 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2167                                      struct kvm_async_pf *work)
2168 {
2169         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2170         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2171 }
2172
2173 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2174                                  struct kvm_async_pf *work)
2175 {
2176         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2177         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2178 }
2179
2180 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2181                                struct kvm_async_pf *work)
2182 {
2183         /* s390 will always inject the page directly */
2184 }
2185
2186 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2187 {
2188         /*
2189          * s390 will always inject the page directly,
2190          * but we still want check_async_completion to cleanup
2191          */
2192         return true;
2193 }
2194
2195 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2196 {
2197         hva_t hva;
2198         struct kvm_arch_async_pf arch;
2199         int rc;
2200
2201         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2202                 return 0;
2203         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2204             vcpu->arch.pfault_compare)
2205                 return 0;
2206         if (psw_extint_disabled(vcpu))
2207                 return 0;
2208         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2209                 return 0;
2210         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2211                 return 0;
2212         if (!vcpu->arch.gmap->pfault_enabled)
2213                 return 0;
2214
2215         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2216         hva += current->thread.gmap_addr & ~PAGE_MASK;
2217         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2218                 return 0;
2219
2220         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2221         return rc;
2222 }
2223
2224 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2225 {
2226         int rc, cpuflags;
2227
2228         /*
2229          * On s390 notifications for arriving pages will be delivered directly
2230          * to the guest but the house keeping for completed pfaults is
2231          * handled outside the worker.
2232          */
2233         kvm_check_async_pf_completion(vcpu);
2234
2235         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2236         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2237
2238         if (need_resched())
2239                 schedule();
2240
2241         if (test_cpu_flag(CIF_MCCK_PENDING))
2242                 s390_handle_mcck();
2243
2244         if (!kvm_is_ucontrol(vcpu->kvm)) {
2245                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2246                 if (rc)
2247                         return rc;
2248         }
2249
2250         rc = kvm_s390_handle_requests(vcpu);
2251         if (rc)
2252                 return rc;
2253
2254         if (guestdbg_enabled(vcpu)) {
2255                 kvm_s390_backup_guest_per_regs(vcpu);
2256                 kvm_s390_patch_guest_per_regs(vcpu);
2257         }
2258
2259         vcpu->arch.sie_block->icptcode = 0;
2260         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2261         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2262         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2263
2264         return 0;
2265 }
2266
2267 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2268 {
2269         struct kvm_s390_pgm_info pgm_info = {
2270                 .code = PGM_ADDRESSING,
2271         };
2272         u8 opcode, ilen;
2273         int rc;
2274
2275         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2276         trace_kvm_s390_sie_fault(vcpu);
2277
2278         /*
2279          * We want to inject an addressing exception, which is defined as a
2280          * suppressing or terminating exception. However, since we came here
2281          * by a DAT access exception, the PSW still points to the faulting
2282          * instruction since DAT exceptions are nullifying. So we've got
2283          * to look up the current opcode to get the length of the instruction
2284          * to be able to forward the PSW.
2285          */
2286         rc = read_guest_instr(vcpu, &opcode, 1);
2287         ilen = insn_length(opcode);
2288         if (rc < 0) {
2289                 return rc;
2290         } else if (rc) {
2291                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2292                  * Forward by arbitrary ilc, injection will take care of
2293                  * nullification if necessary.
2294                  */
2295                 pgm_info = vcpu->arch.pgm;
2296                 ilen = 4;
2297         }
2298         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2299         kvm_s390_forward_psw(vcpu, ilen);
2300         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2301 }
2302
2303 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2304 {
2305         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2306                    vcpu->arch.sie_block->icptcode);
2307         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2308
2309         if (guestdbg_enabled(vcpu))
2310                 kvm_s390_restore_guest_per_regs(vcpu);
2311
2312         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2313         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2314
2315         if (vcpu->arch.sie_block->icptcode > 0) {
2316                 int rc = kvm_handle_sie_intercept(vcpu);
2317
2318                 if (rc != -EOPNOTSUPP)
2319                         return rc;
2320                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2321                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2322                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2323                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2324                 return -EREMOTE;
2325         } else if (exit_reason != -EFAULT) {
2326                 vcpu->stat.exit_null++;
2327                 return 0;
2328         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2329                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2330                 vcpu->run->s390_ucontrol.trans_exc_code =
2331                                                 current->thread.gmap_addr;
2332                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2333                 return -EREMOTE;
2334         } else if (current->thread.gmap_pfault) {
2335                 trace_kvm_s390_major_guest_pfault(vcpu);
2336                 current->thread.gmap_pfault = 0;
2337                 if (kvm_arch_setup_async_pf(vcpu))
2338                         return 0;
2339                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2340         }
2341         return vcpu_post_run_fault_in_sie(vcpu);
2342 }
2343
2344 static int __vcpu_run(struct kvm_vcpu *vcpu)
2345 {
2346         int rc, exit_reason;
2347
2348         /*
2349          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2350          * ning the guest), so that memslots (and other stuff) are protected
2351          */
2352         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2353
2354         do {
2355                 rc = vcpu_pre_run(vcpu);
2356                 if (rc)
2357                         break;
2358
2359                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2360                 /*
2361                  * As PF_VCPU will be used in fault handler, between
2362                  * guest_enter and guest_exit should be no uaccess.
2363                  */
2364                 local_irq_disable();
2365                 __kvm_guest_enter();
2366                 __disable_cpu_timer_accounting(vcpu);
2367                 local_irq_enable();
2368                 exit_reason = sie64a(vcpu->arch.sie_block,
2369                                      vcpu->run->s.regs.gprs);
2370                 local_irq_disable();
2371                 __enable_cpu_timer_accounting(vcpu);
2372                 __kvm_guest_exit();
2373                 local_irq_enable();
2374                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2375
2376                 rc = vcpu_post_run(vcpu, exit_reason);
2377         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2378
2379         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2380         return rc;
2381 }
2382
2383 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2384 {
2385         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2386         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2387         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2388                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2389         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2390                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2391                 /* some control register changes require a tlb flush */
2392                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2393         }
2394         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2395                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2396                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2397                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2398                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2399                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2400         }
2401         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2402                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2403                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2404                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2405                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2406                         kvm_clear_async_pf_completion_queue(vcpu);
2407         }
2408         kvm_run->kvm_dirty_regs = 0;
2409 }
2410
2411 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2412 {
2413         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2414         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2415         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2416         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2417         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2418         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2419         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2420         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2421         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2422         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2423         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2424         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2425 }
2426
2427 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2428 {
2429         int rc;
2430         sigset_t sigsaved;
2431
2432         if (guestdbg_exit_pending(vcpu)) {
2433                 kvm_s390_prepare_debug_exit(vcpu);
2434                 return 0;
2435         }
2436
2437         if (vcpu->sigset_active)
2438                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2439
2440         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2441                 kvm_s390_vcpu_start(vcpu);
2442         } else if (is_vcpu_stopped(vcpu)) {
2443                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2444                                    vcpu->vcpu_id);
2445                 return -EINVAL;
2446         }
2447
2448         sync_regs(vcpu, kvm_run);
2449         enable_cpu_timer_accounting(vcpu);
2450
2451         might_fault();
2452         rc = __vcpu_run(vcpu);
2453
2454         if (signal_pending(current) && !rc) {
2455                 kvm_run->exit_reason = KVM_EXIT_INTR;
2456                 rc = -EINTR;
2457         }
2458
2459         if (guestdbg_exit_pending(vcpu) && !rc)  {
2460                 kvm_s390_prepare_debug_exit(vcpu);
2461                 rc = 0;
2462         }
2463
2464         if (rc == -EREMOTE) {
2465                 /* userspace support is needed, kvm_run has been prepared */
2466                 rc = 0;
2467         }
2468
2469         disable_cpu_timer_accounting(vcpu);
2470         store_regs(vcpu, kvm_run);
2471
2472         if (vcpu->sigset_active)
2473                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2474
2475         vcpu->stat.exit_userspace++;
2476         return rc;
2477 }
2478
2479 /*
2480  * store status at address
2481  * we use have two special cases:
2482  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2483  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2484  */
2485 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2486 {
2487         unsigned char archmode = 1;
2488         freg_t fprs[NUM_FPRS];
2489         unsigned int px;
2490         u64 clkcomp, cputm;
2491         int rc;
2492
2493         px = kvm_s390_get_prefix(vcpu);
2494         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2495                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2496                         return -EFAULT;
2497                 gpa = 0;
2498         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2499                 if (write_guest_real(vcpu, 163, &archmode, 1))
2500                         return -EFAULT;
2501                 gpa = px;
2502         } else
2503                 gpa -= __LC_FPREGS_SAVE_AREA;
2504
2505         /* manually convert vector registers if necessary */
2506         if (MACHINE_HAS_VX) {
2507                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2508                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2509                                      fprs, 128);
2510         } else {
2511                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2512                                      vcpu->run->s.regs.fprs, 128);
2513         }
2514         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2515                               vcpu->run->s.regs.gprs, 128);
2516         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2517                               &vcpu->arch.sie_block->gpsw, 16);
2518         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2519                               &px, 4);
2520         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2521                               &vcpu->run->s.regs.fpc, 4);
2522         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2523                               &vcpu->arch.sie_block->todpr, 4);
2524         cputm = kvm_s390_get_cpu_timer(vcpu);
2525         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2526                               &cputm, 8);
2527         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2528         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2529                               &clkcomp, 8);
2530         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2531                               &vcpu->run->s.regs.acrs, 64);
2532         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2533                               &vcpu->arch.sie_block->gcr, 128);
2534         return rc ? -EFAULT : 0;
2535 }
2536
2537 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2538 {
2539         /*
2540          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2541          * copying in vcpu load/put. Lets update our copies before we save
2542          * it into the save area
2543          */
2544         save_fpu_regs();
2545         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2546         save_access_regs(vcpu->run->s.regs.acrs);
2547
2548         return kvm_s390_store_status_unloaded(vcpu, addr);
2549 }
2550
2551 /*
2552  * store additional status at address
2553  */
2554 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2555                                         unsigned long gpa)
2556 {
2557         /* Only bits 0-53 are used for address formation */
2558         if (!(gpa & ~0x3ff))
2559                 return 0;
2560
2561         return write_guest_abs(vcpu, gpa & ~0x3ff,
2562                                (void *)&vcpu->run->s.regs.vrs, 512);
2563 }
2564
2565 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2566 {
2567         if (!test_kvm_facility(vcpu->kvm, 129))
2568                 return 0;
2569
2570         /*
2571          * The guest VXRS are in the host VXRs due to the lazy
2572          * copying in vcpu load/put. We can simply call save_fpu_regs()
2573          * to save the current register state because we are in the
2574          * middle of a load/put cycle.
2575          *
2576          * Let's update our copies before we save it into the save area.
2577          */
2578         save_fpu_regs();
2579
2580         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2581 }
2582
2583 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2584 {
2585         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2586         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2587 }
2588
2589 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2590 {
2591         unsigned int i;
2592         struct kvm_vcpu *vcpu;
2593
2594         kvm_for_each_vcpu(i, vcpu, kvm) {
2595                 __disable_ibs_on_vcpu(vcpu);
2596         }
2597 }
2598
2599 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2600 {
2601         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2602         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2603 }
2604
2605 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2606 {
2607         int i, online_vcpus, started_vcpus = 0;
2608
2609         if (!is_vcpu_stopped(vcpu))
2610                 return;
2611
2612         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2613         /* Only one cpu at a time may enter/leave the STOPPED state. */
2614         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2615         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2616
2617         for (i = 0; i < online_vcpus; i++) {
2618                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2619                         started_vcpus++;
2620         }
2621
2622         if (started_vcpus == 0) {
2623                 /* we're the only active VCPU -> speed it up */
2624                 __enable_ibs_on_vcpu(vcpu);
2625         } else if (started_vcpus == 1) {
2626                 /*
2627                  * As we are starting a second VCPU, we have to disable
2628                  * the IBS facility on all VCPUs to remove potentially
2629                  * oustanding ENABLE requests.
2630                  */
2631                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2632         }
2633
2634         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2635         /*
2636          * Another VCPU might have used IBS while we were offline.
2637          * Let's play safe and flush the VCPU at startup.
2638          */
2639         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2640         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2641         return;
2642 }
2643
2644 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2645 {
2646         int i, online_vcpus, started_vcpus = 0;
2647         struct kvm_vcpu *started_vcpu = NULL;
2648
2649         if (is_vcpu_stopped(vcpu))
2650                 return;
2651
2652         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2653         /* Only one cpu at a time may enter/leave the STOPPED state. */
2654         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2655         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2656
2657         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2658         kvm_s390_clear_stop_irq(vcpu);
2659
2660         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2661         __disable_ibs_on_vcpu(vcpu);
2662
2663         for (i = 0; i < online_vcpus; i++) {
2664                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2665                         started_vcpus++;
2666                         started_vcpu = vcpu->kvm->vcpus[i];
2667                 }
2668         }
2669
2670         if (started_vcpus == 1) {
2671                 /*
2672                  * As we only have one VCPU left, we want to enable the
2673                  * IBS facility for that VCPU to speed it up.
2674                  */
2675                 __enable_ibs_on_vcpu(started_vcpu);
2676         }
2677
2678         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2679         return;
2680 }
2681
2682 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2683                                      struct kvm_enable_cap *cap)
2684 {
2685         int r;
2686
2687         if (cap->flags)
2688                 return -EINVAL;
2689
2690         switch (cap->cap) {
2691         case KVM_CAP_S390_CSS_SUPPORT:
2692                 if (!vcpu->kvm->arch.css_support) {
2693                         vcpu->kvm->arch.css_support = 1;
2694                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2695                         trace_kvm_s390_enable_css(vcpu->kvm);
2696                 }
2697                 r = 0;
2698                 break;
2699         default:
2700                 r = -EINVAL;
2701                 break;
2702         }
2703         return r;
2704 }
2705
2706 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2707                                   struct kvm_s390_mem_op *mop)
2708 {
2709         void __user *uaddr = (void __user *)mop->buf;
2710         void *tmpbuf = NULL;
2711         int r, srcu_idx;
2712         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2713                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2714
2715         if (mop->flags & ~supported_flags)
2716                 return -EINVAL;
2717
2718         if (mop->size > MEM_OP_MAX_SIZE)
2719                 return -E2BIG;
2720
2721         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2722                 tmpbuf = vmalloc(mop->size);
2723                 if (!tmpbuf)
2724                         return -ENOMEM;
2725         }
2726
2727         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2728
2729         switch (mop->op) {
2730         case KVM_S390_MEMOP_LOGICAL_READ:
2731                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2732                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2733                                             mop->size, GACC_FETCH);
2734                         break;
2735                 }
2736                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2737                 if (r == 0) {
2738                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2739                                 r = -EFAULT;
2740                 }
2741                 break;
2742         case KVM_S390_MEMOP_LOGICAL_WRITE:
2743                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2744                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
2745                                             mop->size, GACC_STORE);
2746                         break;
2747                 }
2748                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2749                         r = -EFAULT;
2750                         break;
2751                 }
2752                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2753                 break;
2754         default:
2755                 r = -EINVAL;
2756         }
2757
2758         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2759
2760         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2761                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2762
2763         vfree(tmpbuf);
2764         return r;
2765 }
2766
2767 long kvm_arch_vcpu_ioctl(struct file *filp,
2768                          unsigned int ioctl, unsigned long arg)
2769 {
2770         struct kvm_vcpu *vcpu = filp->private_data;
2771         void __user *argp = (void __user *)arg;
2772         int idx;
2773         long r;
2774
2775         switch (ioctl) {
2776         case KVM_S390_IRQ: {
2777                 struct kvm_s390_irq s390irq;
2778
2779                 r = -EFAULT;
2780                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2781                         break;
2782                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2783                 break;
2784         }
2785         case KVM_S390_INTERRUPT: {
2786                 struct kvm_s390_interrupt s390int;
2787                 struct kvm_s390_irq s390irq;
2788
2789                 r = -EFAULT;
2790                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2791                         break;
2792                 if (s390int_to_s390irq(&s390int, &s390irq))
2793                         return -EINVAL;
2794                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2795                 break;
2796         }
2797         case KVM_S390_STORE_STATUS:
2798                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2799                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2800                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2801                 break;
2802         case KVM_S390_SET_INITIAL_PSW: {
2803                 psw_t psw;
2804
2805                 r = -EFAULT;
2806                 if (copy_from_user(&psw, argp, sizeof(psw)))
2807                         break;
2808                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2809                 break;
2810         }
2811         case KVM_S390_INITIAL_RESET:
2812                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2813                 break;
2814         case KVM_SET_ONE_REG:
2815         case KVM_GET_ONE_REG: {
2816                 struct kvm_one_reg reg;
2817                 r = -EFAULT;
2818                 if (copy_from_user(&reg, argp, sizeof(reg)))
2819                         break;
2820                 if (ioctl == KVM_SET_ONE_REG)
2821                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2822                 else
2823                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2824                 break;
2825         }
2826 #ifdef CONFIG_KVM_S390_UCONTROL
2827         case KVM_S390_UCAS_MAP: {
2828                 struct kvm_s390_ucas_mapping ucasmap;
2829
2830                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2831                         r = -EFAULT;
2832                         break;
2833                 }
2834
2835                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2836                         r = -EINVAL;
2837                         break;
2838                 }
2839
2840                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2841                                      ucasmap.vcpu_addr, ucasmap.length);
2842                 break;
2843         }
2844         case KVM_S390_UCAS_UNMAP: {
2845                 struct kvm_s390_ucas_mapping ucasmap;
2846
2847                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2848                         r = -EFAULT;
2849                         break;
2850                 }
2851
2852                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2853                         r = -EINVAL;
2854                         break;
2855                 }
2856
2857                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2858                         ucasmap.length);
2859                 break;
2860         }
2861 #endif
2862         case KVM_S390_VCPU_FAULT: {
2863                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2864                 break;
2865         }
2866         case KVM_ENABLE_CAP:
2867         {
2868                 struct kvm_enable_cap cap;
2869                 r = -EFAULT;
2870                 if (copy_from_user(&cap, argp, sizeof(cap)))
2871                         break;
2872                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2873                 break;
2874         }
2875         case KVM_S390_MEM_OP: {
2876                 struct kvm_s390_mem_op mem_op;
2877
2878                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2879                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2880                 else
2881                         r = -EFAULT;
2882                 break;
2883         }
2884         case KVM_S390_SET_IRQ_STATE: {
2885                 struct kvm_s390_irq_state irq_state;
2886
2887                 r = -EFAULT;
2888                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2889                         break;
2890                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2891                     irq_state.len == 0 ||
2892                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2893                         r = -EINVAL;
2894                         break;
2895                 }
2896                 r = kvm_s390_set_irq_state(vcpu,
2897                                            (void __user *) irq_state.buf,
2898                                            irq_state.len);
2899                 break;
2900         }
2901         case KVM_S390_GET_IRQ_STATE: {
2902                 struct kvm_s390_irq_state irq_state;
2903
2904                 r = -EFAULT;
2905                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2906                         break;
2907                 if (irq_state.len == 0) {
2908                         r = -EINVAL;
2909                         break;
2910                 }
2911                 r = kvm_s390_get_irq_state(vcpu,
2912                                            (__u8 __user *)  irq_state.buf,
2913                                            irq_state.len);
2914                 break;
2915         }
2916         default:
2917                 r = -ENOTTY;
2918         }
2919         return r;
2920 }
2921
2922 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2923 {
2924 #ifdef CONFIG_KVM_S390_UCONTROL
2925         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2926                  && (kvm_is_ucontrol(vcpu->kvm))) {
2927                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2928                 get_page(vmf->page);
2929                 return 0;
2930         }
2931 #endif
2932         return VM_FAULT_SIGBUS;
2933 }
2934
2935 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2936                             unsigned long npages)
2937 {
2938         return 0;
2939 }
2940
2941 /* Section: memory related */
2942 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2943                                    struct kvm_memory_slot *memslot,
2944                                    const struct kvm_userspace_memory_region *mem,
2945                                    enum kvm_mr_change change)
2946 {
2947         /* A few sanity checks. We can have memory slots which have to be
2948            located/ended at a segment boundary (1MB). The memory in userland is
2949            ok to be fragmented into various different vmas. It is okay to mmap()
2950            and munmap() stuff in this slot after doing this call at any time */
2951
2952         if (mem->userspace_addr & 0xffffful)
2953                 return -EINVAL;
2954
2955         if (mem->memory_size & 0xffffful)
2956                 return -EINVAL;
2957
2958         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
2959                 return -EINVAL;
2960
2961         return 0;
2962 }
2963
2964 void kvm_arch_commit_memory_region(struct kvm *kvm,
2965                                 const struct kvm_userspace_memory_region *mem,
2966                                 const struct kvm_memory_slot *old,
2967                                 const struct kvm_memory_slot *new,
2968                                 enum kvm_mr_change change)
2969 {
2970         int rc;
2971
2972         /* If the basics of the memslot do not change, we do not want
2973          * to update the gmap. Every update causes several unnecessary
2974          * segment translation exceptions. This is usually handled just
2975          * fine by the normal fault handler + gmap, but it will also
2976          * cause faults on the prefix page of running guest CPUs.
2977          */
2978         if (old->userspace_addr == mem->userspace_addr &&
2979             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2980             old->npages * PAGE_SIZE == mem->memory_size)
2981                 return;
2982
2983         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2984                 mem->guest_phys_addr, mem->memory_size);
2985         if (rc)
2986                 pr_warn("failed to commit memory region\n");
2987         return;
2988 }
2989
2990 static inline unsigned long nonhyp_mask(int i)
2991 {
2992         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
2993
2994         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
2995 }
2996
2997 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
2998 {
2999         vcpu->valid_wakeup = false;
3000 }
3001
3002 static int __init kvm_s390_init(void)
3003 {
3004         int i;
3005
3006         if (!sclp.has_sief2) {
3007                 pr_info("SIE not available\n");
3008                 return -ENODEV;
3009         }
3010
3011         for (i = 0; i < 16; i++)
3012                 kvm_s390_fac_list_mask[i] |=
3013                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3014
3015         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3016 }
3017
3018 static void __exit kvm_s390_exit(void)
3019 {
3020         kvm_exit();
3021 }
3022
3023 module_init(kvm_s390_init);
3024 module_exit(kvm_s390_exit);
3025
3026 /*
3027  * Enable autoloading of the kvm module.
3028  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3029  * since x86 takes a different approach.
3030  */
3031 #include <linux/miscdevice.h>
3032 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3033 MODULE_ALIAS("devname:kvm");