arch/s390/kvm/kvm-s390.c

   1 /*
   2  * hosting zSeries kernel virtual machines
   3  *
   4  * Copyright IBM Corp. 2008, 2009
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License (version 2 only)
   8  * as published by the Free Software Foundation.
   9  *
  10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
  11  *               Christian Borntraeger <borntraeger@de.ibm.com>
  12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
  13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
  14  *               Jason J. Herne <jjherne@us.ibm.com>
  15  */
  16
  17 #include <linux/compiler.h>
  18 #include <linux/err.h>
  19 #include <linux/fs.h>
  20 #include <linux/hrtimer.h>
  21 #include <linux/init.h>
  22 #include <linux/kvm.h>
  23 #include <linux/kvm_host.h>
  24 #include <linux/module.h>
  25 #include <linux/random.h>
  26 #include <linux/slab.h>
  27 #include <linux/timer.h>
  28 #include <linux/vmalloc.h>
  29 #include <asm/asm-offsets.h>
  30 #include <asm/lowcore.h>
  31 #include <asm/etr.h>
  32 #include <asm/pgtable.h>
  33 #include <asm/nmi.h>
  34 #include <asm/switch_to.h>
  35 #include <asm/isc.h>
  36 #include <asm/sclp.h>
  37 #include "kvm-s390.h"
  38 #include "gaccess.h"
  39
  40 #define KMSG_COMPONENT "kvm-s390"
  41 #undef pr_fmt
  42 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  43
  44 #define CREATE_TRACE_POINTS
  45 #include "trace.h"
  46 #include "trace-s390.h"
  47
  48 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
  49 #define LOCAL_IRQS 32
  50 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
  51                            (KVM_MAX_VCPUS + LOCAL_IRQS))
  52
  53 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
  54
  55 struct kvm_stats_debugfs_item debugfs_entries[] = {
  56         { "userspace_handled", VCPU_STAT(exit_userspace) },
  57         { "exit_null", VCPU_STAT(exit_null) },
  58         { "exit_validity", VCPU_STAT(exit_validity) },
  59         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
  60         { "exit_external_request", VCPU_STAT(exit_external_request) },
  61         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
  62         { "exit_instruction", VCPU_STAT(exit_instruction) },
  63         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
  64         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
  65         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
  66         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
  67         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
  68         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
  69         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
  70         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
  71         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
  72         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
  73         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
  74         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
  75         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
  76         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
  77         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
  78         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
  79         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
  80         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
  81         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
  82         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
  83         { "instruction_spx", VCPU_STAT(instruction_spx) },
  84         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
  85         { "instruction_stap", VCPU_STAT(instruction_stap) },
  86         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
  87         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
  88         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
  89         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
  90         { "instruction_essa", VCPU_STAT(instruction_essa) },
  91         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
  92         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
  93         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
  94         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
  95         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
  96         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
  97         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
  98         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
  99         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 100         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
 101         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
 102         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 103         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
 104         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 105         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 106         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
 107         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
 108         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
 109         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 110         { "diagnose_10", VCPU_STAT(diagnose_10) },
 111         { "diagnose_44", VCPU_STAT(diagnose_44) },
 112         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
 113         { "diagnose_258", VCPU_STAT(diagnose_258) },
 114         { "diagnose_308", VCPU_STAT(diagnose_308) },
 115         { "diagnose_500", VCPU_STAT(diagnose_500) },
 116         { NULL }
 117 };
 118
 119 /* upper facilities limit for kvm */
 120 unsigned long kvm_s390_fac_list_mask[] = {
 121         0xffe6fffbfcfdfc40UL,
 122         0x005e800000000000UL,
 123 };
 124
 125 unsigned long kvm_s390_fac_list_mask_size(void)
 126 {
 127         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
 128         return ARRAY_SIZE(kvm_s390_fac_list_mask);
 129 }
 130
 131 static struct gmap_notifier gmap_notifier;
 132 debug_info_t *kvm_s390_dbf;
 133
 134 /* Section: not file related */
 135 int kvm_arch_hardware_enable(void)
 136 {
 137         /* every s390 is virtualization enabled ;-) */
 138         return 0;
 139 }
 140
 141 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
 142
 143 /*
 144  * This callback is executed during stop_machine(). All CPUs are therefore
 145  * temporarily stopped. In order not to change guest behavior, we have to
 146  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
 147  * so a CPU won't be stopped while calculating with the epoch.
 148  */
 149 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
 150                           void *v)
 151 {
 152         struct kvm *kvm;
 153         struct kvm_vcpu *vcpu;
 154         int i;
 155         unsigned long long *delta = v;
 156
 157         list_for_each_entry(kvm, &vm_list, vm_list) {
 158                 kvm->arch.epoch -= *delta;
 159                 kvm_for_each_vcpu(i, vcpu, kvm) {
 160                         vcpu->arch.sie_block->epoch -= *delta;
 161                 }
 162         }
 163         return NOTIFY_OK;
 164 }
 165
 166 static struct notifier_block kvm_clock_notifier = {
 167         .notifier_call = kvm_clock_sync,
 168 };
 169
 170 int kvm_arch_hardware_setup(void)
 171 {
 172         gmap_notifier.notifier_call = kvm_gmap_notifier;
 173         gmap_register_ipte_notifier(&gmap_notifier);
 174         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
 175                                        &kvm_clock_notifier);
 176         return 0;
 177 }
 178
 179 void kvm_arch_hardware_unsetup(void)
 180 {
 181         gmap_unregister_ipte_notifier(&gmap_notifier);
 182         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
 183                                          &kvm_clock_notifier);
 184 }
 185
 186 int kvm_arch_init(void *opaque)
 187 {
 188         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
 189         if (!kvm_s390_dbf)
 190                 return -ENOMEM;
 191
 192         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
 193                 debug_unregister(kvm_s390_dbf);
 194                 return -ENOMEM;
 195         }
 196
 197         /* Register floating interrupt controller interface. */
 198         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 199 }
 200
 201 void kvm_arch_exit(void)
 202 {
 203         debug_unregister(kvm_s390_dbf);
 204 }
 205
 206 /* Section: device related */
 207 long kvm_arch_dev_ioctl(struct file *filp,
 208                         unsigned int ioctl, unsigned long arg)
 209 {
 210         if (ioctl == KVM_S390_ENABLE_SIE)
 211                 return s390_enable_sie();
 212         return -EINVAL;
 213 }
 214
 215 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 216 {
 217         int r;
 218
 219         switch (ext) {
 220         case KVM_CAP_S390_PSW:
 221         case KVM_CAP_S390_GMAP:
 222         case KVM_CAP_SYNC_MMU:
 223 #ifdef CONFIG_KVM_S390_UCONTROL
 224         case KVM_CAP_S390_UCONTROL:
 225 #endif
 226         case KVM_CAP_ASYNC_PF:
 227         case KVM_CAP_SYNC_REGS:
 228         case KVM_CAP_ONE_REG:
 229         case KVM_CAP_ENABLE_CAP:
 230         case KVM_CAP_S390_CSS_SUPPORT:
 231         case KVM_CAP_IOEVENTFD:
 232         case KVM_CAP_DEVICE_CTRL:
 233         case KVM_CAP_ENABLE_CAP_VM:
 234         case KVM_CAP_S390_IRQCHIP:
 235         case KVM_CAP_VM_ATTRIBUTES:
 236         case KVM_CAP_MP_STATE:
 237         case KVM_CAP_S390_INJECT_IRQ:
 238         case KVM_CAP_S390_USER_SIGP:
 239         case KVM_CAP_S390_USER_STSI:
 240         case KVM_CAP_S390_SKEYS:
 241         case KVM_CAP_S390_IRQ_STATE:
 242                 r = 1;
 243                 break;
 244         case KVM_CAP_S390_MEM_OP:
 245                 r = MEM_OP_MAX_SIZE;
 246                 break;
 247         case KVM_CAP_NR_VCPUS:
 248         case KVM_CAP_MAX_VCPUS:
 249                 r = KVM_MAX_VCPUS;
 250                 break;
 251         case KVM_CAP_NR_MEMSLOTS:
 252                 r = KVM_USER_MEM_SLOTS;
 253                 break;
 254         case KVM_CAP_S390_COW:
 255                 r = MACHINE_HAS_ESOP;
 256                 break;
 257         case KVM_CAP_S390_VECTOR_REGISTERS:
 258                 r = MACHINE_HAS_VX;
 259                 break;
 260         default:
 261                 r = 0;
 262         }
 263         return r;
 264 }
 265
 266 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 267                                         struct kvm_memory_slot *memslot)
 268 {
 269         gfn_t cur_gfn, last_gfn;
 270         unsigned long address;
 271         struct gmap *gmap = kvm->arch.gmap;
 272
 273         down_read(&gmap->mm->mmap_sem);
 274         /* Loop over all guest pages */
 275         last_gfn = memslot->base_gfn + memslot->npages;
 276         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
 277                 address = gfn_to_hva_memslot(memslot, cur_gfn);
 278
 279                 if (gmap_test_and_clear_dirty(address, gmap))
 280                         mark_page_dirty(kvm, cur_gfn);
 281         }
 282         up_read(&gmap->mm->mmap_sem);
 283 }
 284
 285 /* Section: vm related */
 286 /*
 287  * Get (and clear) the dirty memory log for a memory slot.
 288  */
 289 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 290                                struct kvm_dirty_log *log)
 291 {
 292         int r;
 293         unsigned long n;
 294         struct kvm_memslots *slots;
 295         struct kvm_memory_slot *memslot;
 296         int is_dirty = 0;
 297
 298         mutex_lock(&kvm->slots_lock);
 299
 300         r = -EINVAL;
 301         if (log->slot >= KVM_USER_MEM_SLOTS)
 302                 goto out;
 303
 304         slots = kvm_memslots(kvm);
 305         memslot = id_to_memslot(slots, log->slot);
 306         r = -ENOENT;
 307         if (!memslot->dirty_bitmap)
 308                 goto out;
 309
 310         kvm_s390_sync_dirty_log(kvm, memslot);
 311         r = kvm_get_dirty_log(kvm, log, &is_dirty);
 312         if (r)
 313                 goto out;
 314
 315         /* Clear the dirty log */
 316         if (is_dirty) {
 317                 n = kvm_dirty_bitmap_bytes(memslot);
 318                 memset(memslot->dirty_bitmap, 0, n);
 319         }
 320         r = 0;
 321 out:
 322         mutex_unlock(&kvm->slots_lock);
 323         return r;
 324 }
 325
 326 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 327 {
 328         int r;
 329
 330         if (cap->flags)
 331                 return -EINVAL;
 332
 333         switch (cap->cap) {
 334         case KVM_CAP_S390_IRQCHIP:
 335                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
 336                 kvm->arch.use_irqchip = 1;
 337                 r = 0;
 338                 break;
 339         case KVM_CAP_S390_USER_SIGP:
 340                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
 341                 kvm->arch.user_sigp = 1;
 342                 r = 0;
 343                 break;
 344         case KVM_CAP_S390_VECTOR_REGISTERS:
 345                 if (MACHINE_HAS_VX) {
 346                         set_kvm_facility(kvm->arch.model.fac->mask, 129);
 347                         set_kvm_facility(kvm->arch.model.fac->list, 129);
 348                         r = 0;
 349                 } else
 350                         r = -EINVAL;
 351                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 352                          r ? "(not available)" : "(success)");
 353                 break;
 354         case KVM_CAP_S390_USER_STSI:
 355                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 356                 kvm->arch.user_stsi = 1;
 357                 r = 0;
 358                 break;
 359         default:
 360                 r = -EINVAL;
 361                 break;
 362         }
 363         return r;
 364 }
 365
 366 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 367 {
 368         int ret;
 369
 370         switch (attr->attr) {
 371         case KVM_S390_VM_MEM_LIMIT_SIZE:
 372                 ret = 0;
 373                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
 374                          kvm->arch.gmap->asce_end);
 375                 if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
 376                         ret = -EFAULT;
 377                 break;
 378         default:
 379                 ret = -ENXIO;
 380                 break;
 381         }
 382         return ret;
 383 }
 384
 385 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
 386 {
 387         int ret;
 388         unsigned int idx;
 389         switch (attr->attr) {
 390         case KVM_S390_VM_MEM_ENABLE_CMMA:
 391                 /* enable CMMA only for z10 and later (EDAT_1) */
 392                 ret = -EINVAL;
 393                 if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
 394                         break;
 395
 396                 ret = -EBUSY;
 397                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
 398                 mutex_lock(&kvm->lock);
 399                 if (atomic_read(&kvm->online_vcpus) == 0) {
 400                         kvm->arch.use_cmma = 1;
 401                         ret = 0;
 402                 }
 403                 mutex_unlock(&kvm->lock);
 404                 break;
 405         case KVM_S390_VM_MEM_CLR_CMMA:
 406                 ret = -EINVAL;
 407                 if (!kvm->arch.use_cmma)
 408                         break;
 409
 410                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
 411                 mutex_lock(&kvm->lock);
 412                 idx = srcu_read_lock(&kvm->srcu);
 413                 s390_reset_cmma(kvm->arch.gmap->mm);
 414                 srcu_read_unlock(&kvm->srcu, idx);
 415                 mutex_unlock(&kvm->lock);
 416                 ret = 0;
 417                 break;
 418         case KVM_S390_VM_MEM_LIMIT_SIZE: {
 419                 unsigned long new_limit;
 420
 421                 if (kvm_is_ucontrol(kvm))
 422                         return -EINVAL;
 423
 424                 if (get_user(new_limit, (u64 __user *)attr->addr))
 425                         return -EFAULT;
 426
 427                 if (new_limit > kvm->arch.gmap->asce_end)
 428                         return -E2BIG;
 429
 430                 ret = -EBUSY;
 431                 mutex_lock(&kvm->lock);
 432                 if (atomic_read(&kvm->online_vcpus) == 0) {
 433                         /* gmap_alloc will round the limit up */
 434                         struct gmap *new = gmap_alloc(current->mm, new_limit);
 435
 436                         if (!new) {
 437                                 ret = -ENOMEM;
 438                         } else {
 439                                 gmap_free(kvm->arch.gmap);
 440                                 new->private = kvm;
 441                                 kvm->arch.gmap = new;
 442                                 ret = 0;
 443                         }
 444                 }
 445                 mutex_unlock(&kvm->lock);
 446                 VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
 447                 break;
 448         }
 449         default:
 450                 ret = -ENXIO;
 451                 break;
 452         }
 453         return ret;
 454 }
 455
 456 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
 457
 458 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
 459 {
 460         struct kvm_vcpu *vcpu;
 461         int i;
 462
 463         if (!test_kvm_facility(kvm, 76))
 464                 return -EINVAL;
 465
 466         mutex_lock(&kvm->lock);
 467         switch (attr->attr) {
 468         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 469                 get_random_bytes(
 470                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
 471                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 472                 kvm->arch.crypto.aes_kw = 1;
 473                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
 474                 break;
 475         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 476                 get_random_bytes(
 477                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
 478                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 479                 kvm->arch.crypto.dea_kw = 1;
 480                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
 481                 break;
 482         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 483                 kvm->arch.crypto.aes_kw = 0;
 484                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
 485                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
 486                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
 487                 break;
 488         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 489                 kvm->arch.crypto.dea_kw = 0;
 490                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
 491                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 492                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
 493                 break;
 494         default:
 495                 mutex_unlock(&kvm->lock);
 496                 return -ENXIO;
 497         }
 498
 499         kvm_for_each_vcpu(i, vcpu, kvm) {
 500                 kvm_s390_vcpu_crypto_setup(vcpu);
 501                 exit_sie(vcpu);
 502         }
 503         mutex_unlock(&kvm->lock);
 504         return 0;
 505 }
 506
 507 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 508 {
 509         u8 gtod_high;
 510
 511         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
 512                                            sizeof(gtod_high)))
 513                 return -EFAULT;
 514
 515         if (gtod_high != 0)
 516                 return -EINVAL;
 517         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
 518
 519         return 0;
 520 }
 521
 522 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 523 {
 524         u64 gtod;
 525
 526         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
 527                 return -EFAULT;
 528
 529         kvm_s390_set_tod_clock(kvm, gtod);
 530         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
 531         return 0;
 532 }
 533
 534 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 535 {
 536         int ret;
 537
 538         if (attr->flags)
 539                 return -EINVAL;
 540
 541         switch (attr->attr) {
 542         case KVM_S390_VM_TOD_HIGH:
 543                 ret = kvm_s390_set_tod_high(kvm, attr);
 544                 break;
 545         case KVM_S390_VM_TOD_LOW:
 546                 ret = kvm_s390_set_tod_low(kvm, attr);
 547                 break;
 548         default:
 549                 ret = -ENXIO;
 550                 break;
 551         }
 552         return ret;
 553 }
 554
 555 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
 556 {
 557         u8 gtod_high = 0;
 558
 559         if (copy_to_user((void __user *)attr->addr, &gtod_high,
 560                                          sizeof(gtod_high)))
 561                 return -EFAULT;
 562         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
 563
 564         return 0;
 565 }
 566
 567 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
 568 {
 569         u64 gtod;
 570
 571         gtod = kvm_s390_get_tod_clock_fast(kvm);
 572         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
 573                 return -EFAULT;
 574         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
 575
 576         return 0;
 577 }
 578
 579 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
 580 {
 581         int ret;
 582
 583         if (attr->flags)
 584                 return -EINVAL;
 585
 586         switch (attr->attr) {
 587         case KVM_S390_VM_TOD_HIGH:
 588                 ret = kvm_s390_get_tod_high(kvm, attr);
 589                 break;
 590         case KVM_S390_VM_TOD_LOW:
 591                 ret = kvm_s390_get_tod_low(kvm, attr);
 592                 break;
 593         default:
 594                 ret = -ENXIO;
 595                 break;
 596         }
 597         return ret;
 598 }
 599
 600 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 601 {
 602         struct kvm_s390_vm_cpu_processor *proc;
 603         int ret = 0;
 604
 605         mutex_lock(&kvm->lock);
 606         if (atomic_read(&kvm->online_vcpus)) {
 607                 ret = -EBUSY;
 608                 goto out;
 609         }
 610         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 611         if (!proc) {
 612                 ret = -ENOMEM;
 613                 goto out;
 614         }
 615         if (!copy_from_user(proc, (void __user *)attr->addr,
 616                             sizeof(*proc))) {
 617                 memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
 618                        sizeof(struct cpuid));
 619                 kvm->arch.model.ibc = proc->ibc;
 620                 memcpy(kvm->arch.model.fac->list, proc->fac_list,
 621                        S390_ARCH_FAC_LIST_SIZE_BYTE);
 622         } else
 623                 ret = -EFAULT;
 624         kfree(proc);
 625 out:
 626         mutex_unlock(&kvm->lock);
 627         return ret;
 628 }
 629
 630 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 631 {
 632         int ret = -ENXIO;
 633
 634         switch (attr->attr) {
 635         case KVM_S390_VM_CPU_PROCESSOR:
 636                 ret = kvm_s390_set_processor(kvm, attr);
 637                 break;
 638         }
 639         return ret;
 640 }
 641
 642 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
 643 {
 644         struct kvm_s390_vm_cpu_processor *proc;
 645         int ret = 0;
 646
 647         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
 648         if (!proc) {
 649                 ret = -ENOMEM;
 650                 goto out;
 651         }
 652         memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
 653         proc->ibc = kvm->arch.model.ibc;
 654         memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
 655         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
 656                 ret = -EFAULT;
 657         kfree(proc);
 658 out:
 659         return ret;
 660 }
 661
 662 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
 663 {
 664         struct kvm_s390_vm_cpu_machine *mach;
 665         int ret = 0;
 666
 667         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
 668         if (!mach) {
 669                 ret = -ENOMEM;
 670                 goto out;
 671         }
 672         get_cpu_id((struct cpuid *) &mach->cpuid);
 673         mach->ibc = sclp.ibc;
 674         memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
 675                S390_ARCH_FAC_LIST_SIZE_BYTE);
 676         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
 677                S390_ARCH_FAC_LIST_SIZE_BYTE);
 678         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
 679                 ret = -EFAULT;
 680         kfree(mach);
 681 out:
 682         return ret;
 683 }
 684
 685 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
 686 {
 687         int ret = -ENXIO;
 688
 689         switch (attr->attr) {
 690         case KVM_S390_VM_CPU_PROCESSOR:
 691                 ret = kvm_s390_get_processor(kvm, attr);
 692                 break;
 693         case KVM_S390_VM_CPU_MACHINE:
 694                 ret = kvm_s390_get_machine(kvm, attr);
 695                 break;
 696         }
 697         return ret;
 698 }
 699
 700 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 701 {
 702         int ret;
 703
 704         switch (attr->group) {
 705         case KVM_S390_VM_MEM_CTRL:
 706                 ret = kvm_s390_set_mem_control(kvm, attr);
 707                 break;
 708         case KVM_S390_VM_TOD:
 709                 ret = kvm_s390_set_tod(kvm, attr);
 710                 break;
 711         case KVM_S390_VM_CPU_MODEL:
 712                 ret = kvm_s390_set_cpu_model(kvm, attr);
 713                 break;
 714         case KVM_S390_VM_CRYPTO:
 715                 ret = kvm_s390_vm_set_crypto(kvm, attr);
 716                 break;
 717         default:
 718                 ret = -ENXIO;
 719                 break;
 720         }
 721
 722         return ret;
 723 }
 724
 725 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 726 {
 727         int ret;
 728
 729         switch (attr->group) {
 730         case KVM_S390_VM_MEM_CTRL:
 731                 ret = kvm_s390_get_mem_control(kvm, attr);
 732                 break;
 733         case KVM_S390_VM_TOD:
 734                 ret = kvm_s390_get_tod(kvm, attr);
 735                 break;
 736         case KVM_S390_VM_CPU_MODEL:
 737                 ret = kvm_s390_get_cpu_model(kvm, attr);
 738                 break;
 739         default:
 740                 ret = -ENXIO;
 741                 break;
 742         }
 743
 744         return ret;
 745 }
 746
 747 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 748 {
 749         int ret;
 750
 751         switch (attr->group) {
 752         case KVM_S390_VM_MEM_CTRL:
 753                 switch (attr->attr) {
 754                 case KVM_S390_VM_MEM_ENABLE_CMMA:
 755                 case KVM_S390_VM_MEM_CLR_CMMA:
 756                 case KVM_S390_VM_MEM_LIMIT_SIZE:
 757                         ret = 0;
 758                         break;
 759                 default:
 760                         ret = -ENXIO;
 761                         break;
 762                 }
 763                 break;
 764         case KVM_S390_VM_TOD:
 765                 switch (attr->attr) {
 766                 case KVM_S390_VM_TOD_LOW:
 767                 case KVM_S390_VM_TOD_HIGH:
 768                         ret = 0;
 769                         break;
 770                 default:
 771                         ret = -ENXIO;
 772                         break;
 773                 }
 774                 break;
 775         case KVM_S390_VM_CPU_MODEL:
 776                 switch (attr->attr) {
 777                 case KVM_S390_VM_CPU_PROCESSOR:
 778                 case KVM_S390_VM_CPU_MACHINE:
 779                         ret = 0;
 780                         break;
 781                 default:
 782                         ret = -ENXIO;
 783                         break;
 784                 }
 785                 break;
 786         case KVM_S390_VM_CRYPTO:
 787                 switch (attr->attr) {
 788                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
 789                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
 790                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
 791                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
 792                         ret = 0;
 793                         break;
 794                 default:
 795                         ret = -ENXIO;
 796                         break;
 797                 }
 798                 break;
 799         default:
 800                 ret = -ENXIO;
 801                 break;
 802         }
 803
 804         return ret;
 805 }
 806
 807 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 808 {
 809         uint8_t *keys;
 810         uint64_t hva;
 811         unsigned long curkey;
 812         int i, r = 0;
 813
 814         if (args->flags != 0)
 815                 return -EINVAL;
 816
 817         /* Is this guest using storage keys? */
 818         if (!mm_use_skey(current->mm))
 819                 return KVM_S390_GET_SKEYS_NONE;
 820
 821         /* Enforce sane limit on memory allocation */
 822         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 823                 return -EINVAL;
 824
 825         keys = kmalloc_array(args->count, sizeof(uint8_t),
 826                              GFP_KERNEL | __GFP_NOWARN);
 827         if (!keys)
 828                 keys = vmalloc(sizeof(uint8_t) * args->count);
 829         if (!keys)
 830                 return -ENOMEM;
 831
 832         for (i = 0; i < args->count; i++) {
 833                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 834                 if (kvm_is_error_hva(hva)) {
 835                         r = -EFAULT;
 836                         goto out;
 837                 }
 838
 839                 curkey = get_guest_storage_key(current->mm, hva);
 840                 if (IS_ERR_VALUE(curkey)) {
 841                         r = curkey;
 842                         goto out;
 843                 }
 844                 keys[i] = curkey;
 845         }
 846
 847         r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
 848                          sizeof(uint8_t) * args->count);
 849         if (r)
 850                 r = -EFAULT;
 851 out:
 852         kvfree(keys);
 853         return r;
 854 }
 855
 856 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
 857 {
 858         uint8_t *keys;
 859         uint64_t hva;
 860         int i, r = 0;
 861
 862         if (args->flags != 0)
 863                 return -EINVAL;
 864
 865         /* Enforce sane limit on memory allocation */
 866         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
 867                 return -EINVAL;
 868
 869         keys = kmalloc_array(args->count, sizeof(uint8_t),
 870                              GFP_KERNEL | __GFP_NOWARN);
 871         if (!keys)
 872                 keys = vmalloc(sizeof(uint8_t) * args->count);
 873         if (!keys)
 874                 return -ENOMEM;
 875
 876         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
 877                            sizeof(uint8_t) * args->count);
 878         if (r) {
 879                 r = -EFAULT;
 880                 goto out;
 881         }
 882
 883         /* Enable storage key handling for the guest */
 884         r = s390_enable_skey();
 885         if (r)
 886                 goto out;
 887
 888         for (i = 0; i < args->count; i++) {
 889                 hva = gfn_to_hva(kvm, args->start_gfn + i);
 890                 if (kvm_is_error_hva(hva)) {
 891                         r = -EFAULT;
 892                         goto out;
 893                 }
 894
 895                 /* Lowest order bit is reserved */
 896                 if (keys[i] & 0x01) {
 897                         r = -EINVAL;
 898                         goto out;
 899                 }
 900
 901                 r = set_guest_storage_key(current->mm, hva,
 902                                           (unsigned long)keys[i], 0);
 903                 if (r)
 904                         goto out;
 905         }
 906 out:
 907         kvfree(keys);
 908         return r;
 909 }
 910
 911 long kvm_arch_vm_ioctl(struct file *filp,
 912                        unsigned int ioctl, unsigned long arg)
 913 {
 914         struct kvm *kvm = filp->private_data;
 915         void __user *argp = (void __user *)arg;
 916         struct kvm_device_attr attr;
 917         int r;
 918
 919         switch (ioctl) {
 920         case KVM_S390_INTERRUPT: {
 921                 struct kvm_s390_interrupt s390int;
 922
 923                 r = -EFAULT;
 924                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
 925                         break;
 926                 r = kvm_s390_inject_vm(kvm, &s390int);
 927                 break;
 928         }
 929         case KVM_ENABLE_CAP: {
 930                 struct kvm_enable_cap cap;
 931                 r = -EFAULT;
 932                 if (copy_from_user(&cap, argp, sizeof(cap)))
 933                         break;
 934                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
 935                 break;
 936         }
 937         case KVM_CREATE_IRQCHIP: {
 938                 struct kvm_irq_routing_entry routing;
 939
 940                 r = -EINVAL;
 941                 if (kvm->arch.use_irqchip) {
 942                         /* Set up dummy routing. */
 943                         memset(&routing, 0, sizeof(routing));
 944                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
 945                 }
 946                 break;
 947         }
 948         case KVM_SET_DEVICE_ATTR: {
 949                 r = -EFAULT;
 950                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 951                         break;
 952                 r = kvm_s390_vm_set_attr(kvm, &attr);
 953                 break;
 954         }
 955         case KVM_GET_DEVICE_ATTR: {
 956                 r = -EFAULT;
 957                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 958                         break;
 959                 r = kvm_s390_vm_get_attr(kvm, &attr);
 960                 break;
 961         }
 962         case KVM_HAS_DEVICE_ATTR: {
 963                 r = -EFAULT;
 964                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
 965                         break;
 966                 r = kvm_s390_vm_has_attr(kvm, &attr);
 967                 break;
 968         }
 969         case KVM_S390_GET_SKEYS: {
 970                 struct kvm_s390_skeys args;
 971
 972                 r = -EFAULT;
 973                 if (copy_from_user(&args, argp,
 974                                    sizeof(struct kvm_s390_skeys)))
 975                         break;
 976                 r = kvm_s390_get_skeys(kvm, &args);
 977                 break;
 978         }
 979         case KVM_S390_SET_SKEYS: {
 980                 struct kvm_s390_skeys args;
 981
 982                 r = -EFAULT;
 983                 if (copy_from_user(&args, argp,
 984                                    sizeof(struct kvm_s390_skeys)))
 985                         break;
 986                 r = kvm_s390_set_skeys(kvm, &args);
 987                 break;
 988         }
 989         default:
 990                 r = -ENOTTY;
 991         }
 992
 993         return r;
 994 }
 995
 996 static int kvm_s390_query_ap_config(u8 *config)
 997 {
 998         u32 fcn_code = 0x04000000UL;
 999         u32 cc = 0;
1000
1001         memset(config, 0, 128);
1002         asm volatile(
1003                 "lgr 0,%1\n"
1004                 "lgr 2,%2\n"
1005                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1006                 "0: ipm %0\n"
1007                 "srl %0,28\n"
1008                 "1:\n"
1009                 EX_TABLE(0b, 1b)
1010                 : "+r" (cc)
1011                 : "r" (fcn_code), "r" (config)
1012                 : "cc", "0", "2", "memory"
1013         );
1014
1015         return cc;
1016 }
1017
1018 static int kvm_s390_apxa_installed(void)
1019 {
1020         u8 config[128];
1021         int cc;
1022
1023         if (test_facility(2) && test_facility(12)) {
1024                 cc = kvm_s390_query_ap_config(config);
1025
1026                 if (cc)
1027                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1028                 else
1029                         return config[0] & 0x40;
1030         }
1031
1032         return 0;
1033 }
1034
1035 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1036 {
1037         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1038
1039         if (kvm_s390_apxa_installed())
1040                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1041         else
1042                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1043 }
1044
1045 static void kvm_s390_get_cpu_id(struct cpuid *cpu_id)
1046 {
1047         get_cpu_id(cpu_id);
1048         cpu_id->version = 0xff;
1049 }
1050
1051 static int kvm_s390_crypto_init(struct kvm *kvm)
1052 {
1053         if (!test_kvm_facility(kvm, 76))
1054                 return 0;
1055
1056         kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
1057                                          GFP_KERNEL | GFP_DMA);
1058         if (!kvm->arch.crypto.crycb)
1059                 return -ENOMEM;
1060
1061         kvm_s390_set_crycb_format(kvm);
1062
1063         /* Enable AES/DEA protected key functions by default */
1064         kvm->arch.crypto.aes_kw = 1;
1065         kvm->arch.crypto.dea_kw = 1;
1066         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1067                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1068         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1069                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1070
1071         return 0;
1072 }
1073
1074 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1075 {
1076         int i, rc;
1077         char debug_name[16];
1078         static unsigned long sca_offset;
1079
1080         rc = -EINVAL;
1081 #ifdef CONFIG_KVM_S390_UCONTROL
1082         if (type & ~KVM_VM_S390_UCONTROL)
1083                 goto out_err;
1084         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1085                 goto out_err;
1086 #else
1087         if (type)
1088                 goto out_err;
1089 #endif
1090
1091         rc = s390_enable_sie();
1092         if (rc)
1093                 goto out_err;
1094
1095         rc = -ENOMEM;
1096
1097         kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
1098         if (!kvm->arch.sca)
1099                 goto out_err;
1100         spin_lock(&kvm_lock);
1101         sca_offset = (sca_offset + 16) & 0x7f0;
1102         kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
1103         spin_unlock(&kvm_lock);
1104
1105         sprintf(debug_name, "kvm-%u", current->pid);
1106
1107         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1108         if (!kvm->arch.dbf)
1109                 goto out_err;
1110
1111         /*
1112          * The architectural maximum amount of facilities is 16 kbit. To store
1113          * this amount, 2 kbyte of memory is required. Thus we need a full
1114          * page to hold the guest facility list (arch.model.fac->list) and the
1115          * facility mask (arch.model.fac->mask). Its address size has to be
1116          * 31 bits and word aligned.
1117          */
1118         kvm->arch.model.fac =
1119                 (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1120         if (!kvm->arch.model.fac)
1121                 goto out_err;
1122
1123         /* Populate the facility mask initially. */
1124         memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
1125                S390_ARCH_FAC_LIST_SIZE_BYTE);
1126         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1127                 if (i < kvm_s390_fac_list_mask_size())
1128                         kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
1129                 else
1130                         kvm->arch.model.fac->mask[i] = 0UL;
1131         }
1132
1133         /* Populate the facility list initially. */
1134         memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
1135                S390_ARCH_FAC_LIST_SIZE_BYTE);
1136
1137         kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
1138         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1139
1140         if (kvm_s390_crypto_init(kvm) < 0)
1141                 goto out_err;
1142
1143         spin_lock_init(&kvm->arch.float_int.lock);
1144         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1145                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1146         init_waitqueue_head(&kvm->arch.ipte_wq);
1147         mutex_init(&kvm->arch.ipte_mutex);
1148
1149         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1150         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1151
1152         if (type & KVM_VM_S390_UCONTROL) {
1153                 kvm->arch.gmap = NULL;
1154         } else {
1155                 kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
1156                 if (!kvm->arch.gmap)
1157                         goto out_err;
1158                 kvm->arch.gmap->private = kvm;
1159                 kvm->arch.gmap->pfault_enabled = 0;
1160         }
1161
1162         kvm->arch.css_support = 0;
1163         kvm->arch.use_irqchip = 0;
1164         kvm->arch.epoch = 0;
1165
1166         spin_lock_init(&kvm->arch.start_stop_lock);
1167         KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
1168
1169         return 0;
1170 out_err:
1171         kfree(kvm->arch.crypto.crycb);
1172         free_page((unsigned long)kvm->arch.model.fac);
1173         debug_unregister(kvm->arch.dbf);
1174         free_page((unsigned long)(kvm->arch.sca));
1175         KVM_EVENT(3, "creation of vm failed: %d", rc);
1176         return rc;
1177 }
1178
1179 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1180 {
1181         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1182         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1183         kvm_s390_clear_local_irqs(vcpu);
1184         kvm_clear_async_pf_completion_queue(vcpu);
1185         if (!kvm_is_ucontrol(vcpu->kvm)) {
1186                 clear_bit(63 - vcpu->vcpu_id,
1187                           (unsigned long *) &vcpu->kvm->arch.sca->mcn);
1188                 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
1189                     (__u64) vcpu->arch.sie_block)
1190                         vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
1191         }
1192         smp_mb();
1193
1194         if (kvm_is_ucontrol(vcpu->kvm))
1195                 gmap_free(vcpu->arch.gmap);
1196
1197         if (vcpu->kvm->arch.use_cmma)
1198                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1199         free_page((unsigned long)(vcpu->arch.sie_block));
1200
1201         kvm_vcpu_uninit(vcpu);
1202         kmem_cache_free(kvm_vcpu_cache, vcpu);
1203 }
1204
1205 static void kvm_free_vcpus(struct kvm *kvm)
1206 {
1207         unsigned int i;
1208         struct kvm_vcpu *vcpu;
1209
1210         kvm_for_each_vcpu(i, vcpu, kvm)
1211                 kvm_arch_vcpu_destroy(vcpu);
1212
1213         mutex_lock(&kvm->lock);
1214         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1215                 kvm->vcpus[i] = NULL;
1216
1217         atomic_set(&kvm->online_vcpus, 0);
1218         mutex_unlock(&kvm->lock);
1219 }
1220
1221 void kvm_arch_destroy_vm(struct kvm *kvm)
1222 {
1223         kvm_free_vcpus(kvm);
1224         free_page((unsigned long)kvm->arch.model.fac);
1225         free_page((unsigned long)(kvm->arch.sca));
1226         debug_unregister(kvm->arch.dbf);
1227         kfree(kvm->arch.crypto.crycb);
1228         if (!kvm_is_ucontrol(kvm))
1229                 gmap_free(kvm->arch.gmap);
1230         kvm_s390_destroy_adapters(kvm);
1231         kvm_s390_clear_float_irqs(kvm);
1232         KVM_EVENT(3, "vm 0x%p destroyed", kvm);
1233 }
1234
1235 /* Section: vcpu related */
1236 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1237 {
1238         vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
1239         if (!vcpu->arch.gmap)
1240                 return -ENOMEM;
1241         vcpu->arch.gmap->private = vcpu->kvm;
1242
1243         return 0;
1244 }
1245
1246 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1247 {
1248         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1249         kvm_clear_async_pf_completion_queue(vcpu);
1250         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1251                                     KVM_SYNC_GPRS |
1252                                     KVM_SYNC_ACRS |
1253                                     KVM_SYNC_CRS |
1254                                     KVM_SYNC_ARCH0 |
1255                                     KVM_SYNC_PFAULT;
1256         if (test_kvm_facility(vcpu->kvm, 129))
1257                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1258
1259         if (kvm_is_ucontrol(vcpu->kvm))
1260                 return __kvm_ucontrol_vcpu_init(vcpu);
1261
1262         return 0;
1263 }
1264
1265 /*
1266  * Backs up the current FP/VX register save area on a particular
1267  * destination.  Used to switch between different register save
1268  * areas.
1269  */
1270 static inline void save_fpu_to(struct fpu *dst)
1271 {
1272         dst->fpc = current->thread.fpu.fpc;
1273         dst->flags = current->thread.fpu.flags;
1274         dst->regs = current->thread.fpu.regs;
1275 }
1276
1277 /*
1278  * Switches the FP/VX register save area from which to lazy
1279  * restore register contents.
1280  */
1281 static inline void load_fpu_from(struct fpu *from)
1282 {
1283         current->thread.fpu.fpc = from->fpc;
1284         current->thread.fpu.flags = from->flags;
1285         current->thread.fpu.regs = from->regs;
1286 }
1287
1288 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1289 {
1290         /* Save host register state */
1291         save_fpu_regs();
1292         save_fpu_to(&vcpu->arch.host_fpregs);
1293
1294         if (test_kvm_facility(vcpu->kvm, 129)) {
1295                 current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1296                 current->thread.fpu.flags = FPU_USE_VX;
1297                 /*
1298                  * Use the register save area in the SIE-control block
1299                  * for register restore and save in kvm_arch_vcpu_put()
1300                  */
1301                 current->thread.fpu.vxrs =
1302                         (__vector128 *)&vcpu->run->s.regs.vrs;
1303                 /* Always enable the vector extension for KVM */
1304                 __ctl_set_vx();
1305         } else
1306                 load_fpu_from(&vcpu->arch.guest_fpregs);
1307
1308         if (test_fp_ctl(current->thread.fpu.fpc))
1309                 /* User space provided an invalid FPC, let's clear it */
1310                 current->thread.fpu.fpc = 0;
1311
1312         save_access_regs(vcpu->arch.host_acrs);
1313         restore_access_regs(vcpu->run->s.regs.acrs);
1314         gmap_enable(vcpu->arch.gmap);
1315         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1316 }
1317
1318 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1319 {
1320         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1321         gmap_disable(vcpu->arch.gmap);
1322
1323         save_fpu_regs();
1324
1325         if (test_kvm_facility(vcpu->kvm, 129))
1326                 /*
1327                  * kvm_arch_vcpu_load() set up the register save area to
1328                  * the &vcpu->run->s.regs.vrs and, thus, the vector registers
1329                  * are already saved.  Only the floating-point control must be
1330                  * copied.
1331                  */
1332                 vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1333         else
1334                 save_fpu_to(&vcpu->arch.guest_fpregs);
1335         load_fpu_from(&vcpu->arch.host_fpregs);
1336
1337         save_access_regs(vcpu->run->s.regs.acrs);
1338         restore_access_regs(vcpu->arch.host_acrs);
1339 }
1340
1341 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1342 {
1343         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1344         vcpu->arch.sie_block->gpsw.mask = 0UL;
1345         vcpu->arch.sie_block->gpsw.addr = 0UL;
1346         kvm_s390_set_prefix(vcpu, 0);
1347         vcpu->arch.sie_block->cputm     = 0UL;
1348         vcpu->arch.sie_block->ckc       = 0UL;
1349         vcpu->arch.sie_block->todpr     = 0;
1350         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1351         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1352         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1353         vcpu->arch.guest_fpregs.fpc = 0;
1354         asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
1355         vcpu->arch.sie_block->gbea = 1;
1356         vcpu->arch.sie_block->pp = 0;
1357         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1358         kvm_clear_async_pf_completion_queue(vcpu);
1359         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1360                 kvm_s390_vcpu_stop(vcpu);
1361         kvm_s390_clear_local_irqs(vcpu);
1362 }
1363
1364 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1365 {
1366         mutex_lock(&vcpu->kvm->lock);
1367         preempt_disable();
1368         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1369         preempt_enable();
1370         mutex_unlock(&vcpu->kvm->lock);
1371         if (!kvm_is_ucontrol(vcpu->kvm))
1372                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1373 }
1374
1375 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1376 {
1377         if (!test_kvm_facility(vcpu->kvm, 76))
1378                 return;
1379
1380         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1381
1382         if (vcpu->kvm->arch.crypto.aes_kw)
1383                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1384         if (vcpu->kvm->arch.crypto.dea_kw)
1385                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1386
1387         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1388 }
1389
1390 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1391 {
1392         free_page(vcpu->arch.sie_block->cbrlo);
1393         vcpu->arch.sie_block->cbrlo = 0;
1394 }
1395
1396 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1397 {
1398         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1399         if (!vcpu->arch.sie_block->cbrlo)
1400                 return -ENOMEM;
1401
1402         vcpu->arch.sie_block->ecb2 |= 0x80;
1403         vcpu->arch.sie_block->ecb2 &= ~0x08;
1404         return 0;
1405 }
1406
1407 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1408 {
1409         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1410
1411         vcpu->arch.cpu_id = model->cpu_id;
1412         vcpu->arch.sie_block->ibc = model->ibc;
1413         vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
1414 }
1415
1416 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1417 {
1418         int rc = 0;
1419
1420         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1421                                                     CPUSTAT_SM |
1422                                                     CPUSTAT_STOPPED);
1423
1424         if (test_kvm_facility(vcpu->kvm, 78))
1425                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1426         else if (test_kvm_facility(vcpu->kvm, 8))
1427                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1428
1429         kvm_s390_vcpu_setup_model(vcpu);
1430
1431         vcpu->arch.sie_block->ecb   = 6;
1432         if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
1433                 vcpu->arch.sie_block->ecb |= 0x10;
1434
1435         vcpu->arch.sie_block->ecb2  = 8;
1436         vcpu->arch.sie_block->eca   = 0xC1002000U;
1437         if (sclp.has_siif)
1438                 vcpu->arch.sie_block->eca |= 1;
1439         if (sclp.has_sigpif)
1440                 vcpu->arch.sie_block->eca |= 0x10000000U;
1441         if (test_kvm_facility(vcpu->kvm, 129)) {
1442                 vcpu->arch.sie_block->eca |= 0x00020000;
1443                 vcpu->arch.sie_block->ecd |= 0x20000000;
1444         }
1445         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1446
1447         if (vcpu->kvm->arch.use_cmma) {
1448                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1449                 if (rc)
1450                         return rc;
1451         }
1452         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1453         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1454
1455         kvm_s390_vcpu_crypto_setup(vcpu);
1456
1457         return rc;
1458 }
1459
1460 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1461                                       unsigned int id)
1462 {
1463         struct kvm_vcpu *vcpu;
1464         struct sie_page *sie_page;
1465         int rc = -EINVAL;
1466
1467         if (id >= KVM_MAX_VCPUS)
1468                 goto out;
1469
1470         rc = -ENOMEM;
1471
1472         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1473         if (!vcpu)
1474                 goto out;
1475
1476         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1477         if (!sie_page)
1478                 goto out_free_cpu;
1479
1480         vcpu->arch.sie_block = &sie_page->sie_block;
1481         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1482
1483         vcpu->arch.sie_block->icpua = id;
1484         if (!kvm_is_ucontrol(kvm)) {
1485                 if (!kvm->arch.sca) {
1486                         WARN_ON_ONCE(1);
1487                         goto out_free_cpu;
1488                 }
1489                 if (!kvm->arch.sca->cpu[id].sda)
1490                         kvm->arch.sca->cpu[id].sda =
1491                                 (__u64) vcpu->arch.sie_block;
1492                 vcpu->arch.sie_block->scaoh =
1493                         (__u32)(((__u64)kvm->arch.sca) >> 32);
1494                 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
1495                 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
1496         }
1497
1498         spin_lock_init(&vcpu->arch.local_int.lock);
1499         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
1500         vcpu->arch.local_int.wq = &vcpu->wq;
1501         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
1502
1503         /*
1504          * Allocate a save area for floating-point registers.  If the vector
1505          * extension is available, register contents are saved in the SIE
1506          * control block.  The allocated save area is still required in
1507          * particular places, for example, in kvm_s390_vcpu_store_status().
1508          */
1509         vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
1510                                                GFP_KERNEL);
1511         if (!vcpu->arch.guest_fpregs.fprs) {
1512                 rc = -ENOMEM;
1513                 goto out_free_sie_block;
1514         }
1515
1516         rc = kvm_vcpu_init(vcpu, kvm, id);
1517         if (rc)
1518                 goto out_free_sie_block;
1519         VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
1520                  vcpu->arch.sie_block);
1521         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
1522
1523         return vcpu;
1524 out_free_sie_block:
1525         free_page((unsigned long)(vcpu->arch.sie_block));
1526 out_free_cpu:
1527         kmem_cache_free(kvm_vcpu_cache, vcpu);
1528 out:
1529         return ERR_PTR(rc);
1530 }
1531
1532 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
1533 {
1534         return kvm_s390_vcpu_has_irq(vcpu, 0);
1535 }
1536
1537 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
1538 {
1539         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1540         exit_sie(vcpu);
1541 }
1542
1543 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
1544 {
1545         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
1546 }
1547
1548 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
1549 {
1550         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1551         exit_sie(vcpu);
1552 }
1553
1554 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
1555 {
1556         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
1557 }
1558
1559 /*
1560  * Kick a guest cpu out of SIE and wait until SIE is not running.
1561  * If the CPU is not running (e.g. waiting as idle) the function will
1562  * return immediately. */
1563 void exit_sie(struct kvm_vcpu *vcpu)
1564 {
1565         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
1566         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
1567                 cpu_relax();
1568 }
1569
1570 /* Kick a guest cpu out of SIE to process a request synchronously */
1571 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
1572 {
1573         kvm_make_request(req, vcpu);
1574         kvm_s390_vcpu_request(vcpu);
1575 }
1576
1577 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address)
1578 {
1579         int i;
1580         struct kvm *kvm = gmap->private;
1581         struct kvm_vcpu *vcpu;
1582
1583         kvm_for_each_vcpu(i, vcpu, kvm) {
1584                 /* match against both prefix pages */
1585                 if (kvm_s390_get_prefix(vcpu) == (address & ~0x1000UL)) {
1586                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address);
1587                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
1588                 }
1589         }
1590 }
1591
1592 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
1593 {
1594         /* kvm common code refers to this, but never calls it */
1595         BUG();
1596         return 0;
1597 }
1598
1599 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
1600                                            struct kvm_one_reg *reg)
1601 {
1602         int r = -EINVAL;
1603
1604         switch (reg->id) {
1605         case KVM_REG_S390_TODPR:
1606                 r = put_user(vcpu->arch.sie_block->todpr,
1607                              (u32 __user *)reg->addr);
1608                 break;
1609         case KVM_REG_S390_EPOCHDIFF:
1610                 r = put_user(vcpu->arch.sie_block->epoch,
1611                              (u64 __user *)reg->addr);
1612                 break;
1613         case KVM_REG_S390_CPU_TIMER:
1614                 r = put_user(vcpu->arch.sie_block->cputm,
1615                              (u64 __user *)reg->addr);
1616                 break;
1617         case KVM_REG_S390_CLOCK_COMP:
1618                 r = put_user(vcpu->arch.sie_block->ckc,
1619                              (u64 __user *)reg->addr);
1620                 break;
1621         case KVM_REG_S390_PFTOKEN:
1622                 r = put_user(vcpu->arch.pfault_token,
1623                              (u64 __user *)reg->addr);
1624                 break;
1625         case KVM_REG_S390_PFCOMPARE:
1626                 r = put_user(vcpu->arch.pfault_compare,
1627                              (u64 __user *)reg->addr);
1628                 break;
1629         case KVM_REG_S390_PFSELECT:
1630                 r = put_user(vcpu->arch.pfault_select,
1631                              (u64 __user *)reg->addr);
1632                 break;
1633         case KVM_REG_S390_PP:
1634                 r = put_user(vcpu->arch.sie_block->pp,
1635                              (u64 __user *)reg->addr);
1636                 break;
1637         case KVM_REG_S390_GBEA:
1638                 r = put_user(vcpu->arch.sie_block->gbea,
1639                              (u64 __user *)reg->addr);
1640                 break;
1641         default:
1642                 break;
1643         }
1644
1645         return r;
1646 }
1647
1648 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
1649                                            struct kvm_one_reg *reg)
1650 {
1651         int r = -EINVAL;
1652
1653         switch (reg->id) {
1654         case KVM_REG_S390_TODPR:
1655                 r = get_user(vcpu->arch.sie_block->todpr,
1656                              (u32 __user *)reg->addr);
1657                 break;
1658         case KVM_REG_S390_EPOCHDIFF:
1659                 r = get_user(vcpu->arch.sie_block->epoch,
1660                              (u64 __user *)reg->addr);
1661                 break;
1662         case KVM_REG_S390_CPU_TIMER:
1663                 r = get_user(vcpu->arch.sie_block->cputm,
1664                              (u64 __user *)reg->addr);
1665                 break;
1666         case KVM_REG_S390_CLOCK_COMP:
1667                 r = get_user(vcpu->arch.sie_block->ckc,
1668                              (u64 __user *)reg->addr);
1669                 break;
1670         case KVM_REG_S390_PFTOKEN:
1671                 r = get_user(vcpu->arch.pfault_token,
1672                              (u64 __user *)reg->addr);
1673                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1674                         kvm_clear_async_pf_completion_queue(vcpu);
1675                 break;
1676         case KVM_REG_S390_PFCOMPARE:
1677                 r = get_user(vcpu->arch.pfault_compare,
1678                              (u64 __user *)reg->addr);
1679                 break;
1680         case KVM_REG_S390_PFSELECT:
1681                 r = get_user(vcpu->arch.pfault_select,
1682                              (u64 __user *)reg->addr);
1683                 break;
1684         case KVM_REG_S390_PP:
1685                 r = get_user(vcpu->arch.sie_block->pp,
1686                              (u64 __user *)reg->addr);
1687                 break;
1688         case KVM_REG_S390_GBEA:
1689                 r = get_user(vcpu->arch.sie_block->gbea,
1690                              (u64 __user *)reg->addr);
1691                 break;
1692         default:
1693                 break;
1694         }
1695
1696         return r;
1697 }
1698
1699 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
1700 {
1701         kvm_s390_vcpu_initial_reset(vcpu);
1702         return 0;
1703 }
1704
1705 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1706 {
1707         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
1708         return 0;
1709 }
1710
1711 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
1712 {
1713         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
1714         return 0;
1715 }
1716
1717 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
1718                                   struct kvm_sregs *sregs)
1719 {
1720         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
1721         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
1722         restore_access_regs(vcpu->run->s.regs.acrs);
1723         return 0;
1724 }
1725
1726 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
1727                                   struct kvm_sregs *sregs)
1728 {
1729         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
1730         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
1731         return 0;
1732 }
1733
1734 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1735 {
1736         if (test_fp_ctl(fpu->fpc))
1737                 return -EINVAL;
1738         memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
1739         vcpu->arch.guest_fpregs.fpc = fpu->fpc;
1740         save_fpu_regs();
1741         load_fpu_from(&vcpu->arch.guest_fpregs);
1742         return 0;
1743 }
1744
1745 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
1746 {
1747         memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
1748         fpu->fpc = vcpu->arch.guest_fpregs.fpc;
1749         return 0;
1750 }
1751
1752 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
1753 {
1754         int rc = 0;
1755
1756         if (!is_vcpu_stopped(vcpu))
1757                 rc = -EBUSY;
1758         else {
1759                 vcpu->run->psw_mask = psw.mask;
1760                 vcpu->run->psw_addr = psw.addr;
1761         }
1762         return rc;
1763 }
1764
1765 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
1766                                   struct kvm_translation *tr)
1767 {
1768         return -EINVAL; /* not implemented yet */
1769 }
1770
1771 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
1772                               KVM_GUESTDBG_USE_HW_BP | \
1773                               KVM_GUESTDBG_ENABLE)
1774
1775 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
1776                                         struct kvm_guest_debug *dbg)
1777 {
1778         int rc = 0;
1779
1780         vcpu->guest_debug = 0;
1781         kvm_s390_clear_bp_data(vcpu);
1782
1783         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
1784                 return -EINVAL;
1785
1786         if (dbg->control & KVM_GUESTDBG_ENABLE) {
1787                 vcpu->guest_debug = dbg->control;
1788                 /* enforce guest PER */
1789                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1790
1791                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
1792                         rc = kvm_s390_import_bp_data(vcpu, dbg);
1793         } else {
1794                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1795                 vcpu->arch.guestdbg.last_bp = 0;
1796         }
1797
1798         if (rc) {
1799                 vcpu->guest_debug = 0;
1800                 kvm_s390_clear_bp_data(vcpu);
1801                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
1802         }
1803
1804         return rc;
1805 }
1806
1807 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
1808                                     struct kvm_mp_state *mp_state)
1809 {
1810         /* CHECK_STOP and LOAD are not supported yet */
1811         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
1812                                        KVM_MP_STATE_OPERATING;
1813 }
1814
1815 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
1816                                     struct kvm_mp_state *mp_state)
1817 {
1818         int rc = 0;
1819
1820         /* user space knows about this interface - let it control the state */
1821         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
1822
1823         switch (mp_state->mp_state) {
1824         case KVM_MP_STATE_STOPPED:
1825                 kvm_s390_vcpu_stop(vcpu);
1826                 break;
1827         case KVM_MP_STATE_OPERATING:
1828                 kvm_s390_vcpu_start(vcpu);
1829                 break;
1830         case KVM_MP_STATE_LOAD:
1831         case KVM_MP_STATE_CHECK_STOP:
1832                 /* fall through - CHECK_STOP and LOAD are not supported yet */
1833         default:
1834                 rc = -ENXIO;
1835         }
1836
1837         return rc;
1838 }
1839
1840 static bool ibs_enabled(struct kvm_vcpu *vcpu)
1841 {
1842         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
1843 }
1844
1845 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
1846 {
1847 retry:
1848         kvm_s390_vcpu_request_handled(vcpu);
1849         if (!vcpu->requests)
1850                 return 0;
1851         /*
1852          * We use MMU_RELOAD just to re-arm the ipte notifier for the
1853          * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
1854          * This ensures that the ipte instruction for this request has
1855          * already finished. We might race against a second unmapper that
1856          * wants to set the blocking bit. Lets just retry the request loop.
1857          */
1858         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
1859                 int rc;
1860                 rc = gmap_ipte_notify(vcpu->arch.gmap,
1861                                       kvm_s390_get_prefix(vcpu),
1862                                       PAGE_SIZE * 2);
1863                 if (rc)
1864                         return rc;
1865                 goto retry;
1866         }
1867
1868         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
1869                 vcpu->arch.sie_block->ihcpu = 0xffff;
1870                 goto retry;
1871         }
1872
1873         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
1874                 if (!ibs_enabled(vcpu)) {
1875                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
1876                         atomic_or(CPUSTAT_IBS,
1877                                         &vcpu->arch.sie_block->cpuflags);
1878                 }
1879                 goto retry;
1880         }
1881
1882         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
1883                 if (ibs_enabled(vcpu)) {
1884                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
1885                         atomic_andnot(CPUSTAT_IBS,
1886                                           &vcpu->arch.sie_block->cpuflags);
1887                 }
1888                 goto retry;
1889         }
1890
1891         /* nothing to do, just clear the request */
1892         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
1893
1894         return 0;
1895 }
1896
1897 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
1898 {
1899         struct kvm_vcpu *vcpu;
1900         int i;
1901
1902         mutex_lock(&kvm->lock);
1903         preempt_disable();
1904         kvm->arch.epoch = tod - get_tod_clock();
1905         kvm_s390_vcpu_block_all(kvm);
1906         kvm_for_each_vcpu(i, vcpu, kvm)
1907                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
1908         kvm_s390_vcpu_unblock_all(kvm);
1909         preempt_enable();
1910         mutex_unlock(&kvm->lock);
1911 }
1912
1913 /**
1914  * kvm_arch_fault_in_page - fault-in guest page if necessary
1915  * @vcpu: The corresponding virtual cpu
1916  * @gpa: Guest physical address
1917  * @writable: Whether the page should be writable or not
1918  *
1919  * Make sure that a guest page has been faulted-in on the host.
1920  *
1921  * Return: Zero on success, negative error code otherwise.
1922  */
1923 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
1924 {
1925         return gmap_fault(vcpu->arch.gmap, gpa,
1926                           writable ? FAULT_FLAG_WRITE : 0);
1927 }
1928
1929 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
1930                                       unsigned long token)
1931 {
1932         struct kvm_s390_interrupt inti;
1933         struct kvm_s390_irq irq;
1934
1935         if (start_token) {
1936                 irq.u.ext.ext_params2 = token;
1937                 irq.type = KVM_S390_INT_PFAULT_INIT;
1938                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
1939         } else {
1940                 inti.type = KVM_S390_INT_PFAULT_DONE;
1941                 inti.parm64 = token;
1942                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
1943         }
1944 }
1945
1946 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
1947                                      struct kvm_async_pf *work)
1948 {
1949         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
1950         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
1951 }
1952
1953 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
1954                                  struct kvm_async_pf *work)
1955 {
1956         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
1957         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
1958 }
1959
1960 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
1961                                struct kvm_async_pf *work)
1962 {
1963         /* s390 will always inject the page directly */
1964 }
1965
1966 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
1967 {
1968         /*
1969          * s390 will always inject the page directly,
1970          * but we still want check_async_completion to cleanup
1971          */
1972         return true;
1973 }
1974
1975 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
1976 {
1977         hva_t hva;
1978         struct kvm_arch_async_pf arch;
1979         int rc;
1980
1981         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
1982                 return 0;
1983         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
1984             vcpu->arch.pfault_compare)
1985                 return 0;
1986         if (psw_extint_disabled(vcpu))
1987                 return 0;
1988         if (kvm_s390_vcpu_has_irq(vcpu, 0))
1989                 return 0;
1990         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
1991                 return 0;
1992         if (!vcpu->arch.gmap->pfault_enabled)
1993                 return 0;
1994
1995         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
1996         hva += current->thread.gmap_addr & ~PAGE_MASK;
1997         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
1998                 return 0;
1999
2000         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2001         return rc;
2002 }
2003
2004 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2005 {
2006         int rc, cpuflags;
2007
2008         /*
2009          * On s390 notifications for arriving pages will be delivered directly
2010          * to the guest but the house keeping for completed pfaults is
2011          * handled outside the worker.
2012          */
2013         kvm_check_async_pf_completion(vcpu);
2014
2015         memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
2016
2017         if (need_resched())
2018                 schedule();
2019
2020         if (test_cpu_flag(CIF_MCCK_PENDING))
2021                 s390_handle_mcck();
2022
2023         if (!kvm_is_ucontrol(vcpu->kvm)) {
2024                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2025                 if (rc)
2026                         return rc;
2027         }
2028
2029         rc = kvm_s390_handle_requests(vcpu);
2030         if (rc)
2031                 return rc;
2032
2033         if (guestdbg_enabled(vcpu)) {
2034                 kvm_s390_backup_guest_per_regs(vcpu);
2035                 kvm_s390_patch_guest_per_regs(vcpu);
2036         }
2037
2038         vcpu->arch.sie_block->icptcode = 0;
2039         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2040         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2041         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2042
2043         return 0;
2044 }
2045
2046 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2047 {
2048         psw_t *psw = &vcpu->arch.sie_block->gpsw;
2049         u8 opcode;
2050         int rc;
2051
2052         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2053         trace_kvm_s390_sie_fault(vcpu);
2054
2055         /*
2056          * We want to inject an addressing exception, which is defined as a
2057          * suppressing or terminating exception. However, since we came here
2058          * by a DAT access exception, the PSW still points to the faulting
2059          * instruction since DAT exceptions are nullifying. So we've got
2060          * to look up the current opcode to get the length of the instruction
2061          * to be able to forward the PSW.
2062          */
2063         rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
2064         if (rc)
2065                 return kvm_s390_inject_prog_cond(vcpu, rc);
2066         psw->addr = __rewind_psw(*psw, -insn_length(opcode));
2067
2068         return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
2069 }
2070
2071 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2072 {
2073         int rc = -1;
2074
2075         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2076                    vcpu->arch.sie_block->icptcode);
2077         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2078
2079         if (guestdbg_enabled(vcpu))
2080                 kvm_s390_restore_guest_per_regs(vcpu);
2081
2082         if (exit_reason >= 0) {
2083                 rc = 0;
2084         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2085                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2086                 vcpu->run->s390_ucontrol.trans_exc_code =
2087                                                 current->thread.gmap_addr;
2088                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2089                 rc = -EREMOTE;
2090
2091         } else if (current->thread.gmap_pfault) {
2092                 trace_kvm_s390_major_guest_pfault(vcpu);
2093                 current->thread.gmap_pfault = 0;
2094                 if (kvm_arch_setup_async_pf(vcpu)) {
2095                         rc = 0;
2096                 } else {
2097                         gpa_t gpa = current->thread.gmap_addr;
2098                         rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
2099                 }
2100         }
2101
2102         if (rc == -1)
2103                 rc = vcpu_post_run_fault_in_sie(vcpu);
2104
2105         memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
2106
2107         if (rc == 0) {
2108                 if (kvm_is_ucontrol(vcpu->kvm))
2109                         /* Don't exit for host interrupts. */
2110                         rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
2111                 else
2112                         rc = kvm_handle_sie_intercept(vcpu);
2113         }
2114
2115         return rc;
2116 }
2117
2118 static int __vcpu_run(struct kvm_vcpu *vcpu)
2119 {
2120         int rc, exit_reason;
2121
2122         /*
2123          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2124          * ning the guest), so that memslots (and other stuff) are protected
2125          */
2126         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2127
2128         do {
2129                 rc = vcpu_pre_run(vcpu);
2130                 if (rc)
2131                         break;
2132
2133                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2134                 /*
2135                  * As PF_VCPU will be used in fault handler, between
2136                  * guest_enter and guest_exit should be no uaccess.
2137                  */
2138                 local_irq_disable();
2139                 __kvm_guest_enter();
2140                 local_irq_enable();
2141                 exit_reason = sie64a(vcpu->arch.sie_block,
2142                                      vcpu->run->s.regs.gprs);
2143                 local_irq_disable();
2144                 __kvm_guest_exit();
2145                 local_irq_enable();
2146                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2147
2148                 rc = vcpu_post_run(vcpu, exit_reason);
2149         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2150
2151         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2152         return rc;
2153 }
2154
2155 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2156 {
2157         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2158         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2159         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2160                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2161         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2162                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2163                 /* some control register changes require a tlb flush */
2164                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2165         }
2166         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2167                 vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
2168                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2169                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2170                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2171                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2172         }
2173         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2174                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2175                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2176                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2177                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2178                         kvm_clear_async_pf_completion_queue(vcpu);
2179         }
2180         kvm_run->kvm_dirty_regs = 0;
2181 }
2182
2183 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2184 {
2185         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2186         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2187         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2188         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2189         kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
2190         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2191         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2192         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2193         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2194         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2195         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2196         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2197 }
2198
2199 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2200 {
2201         int rc;
2202         sigset_t sigsaved;
2203
2204         if (guestdbg_exit_pending(vcpu)) {
2205                 kvm_s390_prepare_debug_exit(vcpu);
2206                 return 0;
2207         }
2208
2209         if (vcpu->sigset_active)
2210                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2211
2212         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2213                 kvm_s390_vcpu_start(vcpu);
2214         } else if (is_vcpu_stopped(vcpu)) {
2215                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2216                                    vcpu->vcpu_id);
2217                 return -EINVAL;
2218         }
2219
2220         sync_regs(vcpu, kvm_run);
2221
2222         might_fault();
2223         rc = __vcpu_run(vcpu);
2224
2225         if (signal_pending(current) && !rc) {
2226                 kvm_run->exit_reason = KVM_EXIT_INTR;
2227                 rc = -EINTR;
2228         }
2229
2230         if (guestdbg_exit_pending(vcpu) && !rc)  {
2231                 kvm_s390_prepare_debug_exit(vcpu);
2232                 rc = 0;
2233         }
2234
2235         if (rc == -EOPNOTSUPP) {
2236                 /* intercept cannot be handled in-kernel, prepare kvm-run */
2237                 kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
2238                 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2239                 kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
2240                 kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
2241                 rc = 0;
2242         }
2243
2244         if (rc == -EREMOTE) {
2245                 /* intercept was handled, but userspace support is needed
2246                  * kvm_run has been prepared by the handler */
2247                 rc = 0;
2248         }
2249
2250         store_regs(vcpu, kvm_run);
2251
2252         if (vcpu->sigset_active)
2253                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2254
2255         vcpu->stat.exit_userspace++;
2256         return rc;
2257 }
2258
2259 /*
2260  * store status at address
2261  * we use have two special cases:
2262  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2263  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2264  */
2265 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2266 {
2267         unsigned char archmode = 1;
2268         unsigned int px;
2269         u64 clkcomp;
2270         int rc;
2271
2272         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2273                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2274                         return -EFAULT;
2275                 gpa = SAVE_AREA_BASE;
2276         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2277                 if (write_guest_real(vcpu, 163, &archmode, 1))
2278                         return -EFAULT;
2279                 gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE);
2280         }
2281         rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs),
2282                              vcpu->arch.guest_fpregs.fprs, 128);
2283         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs),
2284                               vcpu->run->s.regs.gprs, 128);
2285         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw),
2286                               &vcpu->arch.sie_block->gpsw, 16);
2287         px = kvm_s390_get_prefix(vcpu);
2288         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg),
2289                               &px, 4);
2290         rc |= write_guest_abs(vcpu,
2291                               gpa + offsetof(struct save_area, fp_ctrl_reg),
2292                               &vcpu->arch.guest_fpregs.fpc, 4);
2293         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg),
2294                               &vcpu->arch.sie_block->todpr, 4);
2295         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer),
2296                               &vcpu->arch.sie_block->cputm, 8);
2297         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2298         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp),
2299                               &clkcomp, 8);
2300         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs),
2301                               &vcpu->run->s.regs.acrs, 64);
2302         rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs),
2303                               &vcpu->arch.sie_block->gcr, 128);
2304         return rc ? -EFAULT : 0;
2305 }
2306
2307 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2308 {
2309         /*
2310          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2311          * copying in vcpu load/put. Lets update our copies before we save
2312          * it into the save area
2313          */
2314         save_fpu_regs();
2315         if (test_kvm_facility(vcpu->kvm, 129)) {
2316                 /*
2317                  * If the vector extension is available, the vector registers
2318                  * which overlaps with floating-point registers are saved in
2319                  * the SIE-control block.  Hence, extract the floating-point
2320                  * registers and the FPC value and store them in the
2321                  * guest_fpregs structure.
2322                  */
2323                 WARN_ON(!is_vx_task(current));    /* XXX remove later */
2324                 vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
2325                 convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
2326                                  current->thread.fpu.vxrs);
2327         } else
2328                 save_fpu_to(&vcpu->arch.guest_fpregs);
2329         save_access_regs(vcpu->run->s.regs.acrs);
2330
2331         return kvm_s390_store_status_unloaded(vcpu, addr);
2332 }
2333
2334 /*
2335  * store additional status at address
2336  */
2337 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2338                                         unsigned long gpa)
2339 {
2340         /* Only bits 0-53 are used for address formation */
2341         if (!(gpa & ~0x3ff))
2342                 return 0;
2343
2344         return write_guest_abs(vcpu, gpa & ~0x3ff,
2345                                (void *)&vcpu->run->s.regs.vrs, 512);
2346 }
2347
2348 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2349 {
2350         if (!test_kvm_facility(vcpu->kvm, 129))
2351                 return 0;
2352
2353         /*
2354          * The guest VXRS are in the host VXRs due to the lazy
2355          * copying in vcpu load/put. We can simply call save_fpu_regs()
2356          * to save the current register state because we are in the
2357          * middle of a load/put cycle.
2358          *
2359          * Let's update our copies before we save it into the save area.
2360          */
2361         save_fpu_regs();
2362
2363         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2364 }
2365
2366 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2367 {
2368         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2369         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2370 }
2371
2372 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2373 {
2374         unsigned int i;
2375         struct kvm_vcpu *vcpu;
2376
2377         kvm_for_each_vcpu(i, vcpu, kvm) {
2378                 __disable_ibs_on_vcpu(vcpu);
2379         }
2380 }
2381
2382 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2383 {
2384         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2385         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2386 }
2387
2388 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2389 {
2390         int i, online_vcpus, started_vcpus = 0;
2391
2392         if (!is_vcpu_stopped(vcpu))
2393                 return;
2394
2395         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2396         /* Only one cpu at a time may enter/leave the STOPPED state. */
2397         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2398         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2399
2400         for (i = 0; i < online_vcpus; i++) {
2401                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2402                         started_vcpus++;
2403         }
2404
2405         if (started_vcpus == 0) {
2406                 /* we're the only active VCPU -> speed it up */
2407                 __enable_ibs_on_vcpu(vcpu);
2408         } else if (started_vcpus == 1) {
2409                 /*
2410                  * As we are starting a second VCPU, we have to disable
2411                  * the IBS facility on all VCPUs to remove potentially
2412                  * oustanding ENABLE requests.
2413                  */
2414                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2415         }
2416
2417         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2418         /*
2419          * Another VCPU might have used IBS while we were offline.
2420          * Let's play safe and flush the VCPU at startup.
2421          */
2422         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2423         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2424         return;
2425 }
2426
2427 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2428 {
2429         int i, online_vcpus, started_vcpus = 0;
2430         struct kvm_vcpu *started_vcpu = NULL;
2431
2432         if (is_vcpu_stopped(vcpu))
2433                 return;
2434
2435         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2436         /* Only one cpu at a time may enter/leave the STOPPED state. */
2437         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2438         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2439
2440         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2441         kvm_s390_clear_stop_irq(vcpu);
2442
2443         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2444         __disable_ibs_on_vcpu(vcpu);
2445
2446         for (i = 0; i < online_vcpus; i++) {
2447                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2448                         started_vcpus++;
2449                         started_vcpu = vcpu->kvm->vcpus[i];
2450                 }
2451         }
2452
2453         if (started_vcpus == 1) {
2454                 /*
2455                  * As we only have one VCPU left, we want to enable the
2456                  * IBS facility for that VCPU to speed it up.
2457                  */
2458                 __enable_ibs_on_vcpu(started_vcpu);
2459         }
2460
2461         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2462         return;
2463 }
2464
2465 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2466                                      struct kvm_enable_cap *cap)
2467 {
2468         int r;
2469
2470         if (cap->flags)
2471                 return -EINVAL;
2472
2473         switch (cap->cap) {
2474         case KVM_CAP_S390_CSS_SUPPORT:
2475                 if (!vcpu->kvm->arch.css_support) {
2476                         vcpu->kvm->arch.css_support = 1;
2477                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2478                         trace_kvm_s390_enable_css(vcpu->kvm);
2479                 }
2480                 r = 0;
2481                 break;
2482         default:
2483                 r = -EINVAL;
2484                 break;
2485         }
2486         return r;
2487 }
2488
2489 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
2490                                   struct kvm_s390_mem_op *mop)
2491 {
2492         void __user *uaddr = (void __user *)mop->buf;
2493         void *tmpbuf = NULL;
2494         int r, srcu_idx;
2495         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
2496                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
2497
2498         if (mop->flags & ~supported_flags)
2499                 return -EINVAL;
2500
2501         if (mop->size > MEM_OP_MAX_SIZE)
2502                 return -E2BIG;
2503
2504         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
2505                 tmpbuf = vmalloc(mop->size);
2506                 if (!tmpbuf)
2507                         return -ENOMEM;
2508         }
2509
2510         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2511
2512         switch (mop->op) {
2513         case KVM_S390_MEMOP_LOGICAL_READ:
2514                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2515                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
2516                         break;
2517                 }
2518                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2519                 if (r == 0) {
2520                         if (copy_to_user(uaddr, tmpbuf, mop->size))
2521                                 r = -EFAULT;
2522                 }
2523                 break;
2524         case KVM_S390_MEMOP_LOGICAL_WRITE:
2525                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
2526                         r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
2527                         break;
2528                 }
2529                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
2530                         r = -EFAULT;
2531                         break;
2532                 }
2533                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
2534                 break;
2535         default:
2536                 r = -EINVAL;
2537         }
2538
2539         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
2540
2541         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
2542                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
2543
2544         vfree(tmpbuf);
2545         return r;
2546 }
2547
2548 long kvm_arch_vcpu_ioctl(struct file *filp,
2549                          unsigned int ioctl, unsigned long arg)
2550 {
2551         struct kvm_vcpu *vcpu = filp->private_data;
2552         void __user *argp = (void __user *)arg;
2553         int idx;
2554         long r;
2555
2556         switch (ioctl) {
2557         case KVM_S390_IRQ: {
2558                 struct kvm_s390_irq s390irq;
2559
2560                 r = -EFAULT;
2561                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
2562                         break;
2563                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2564                 break;
2565         }
2566         case KVM_S390_INTERRUPT: {
2567                 struct kvm_s390_interrupt s390int;
2568                 struct kvm_s390_irq s390irq;
2569
2570                 r = -EFAULT;
2571                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
2572                         break;
2573                 if (s390int_to_s390irq(&s390int, &s390irq))
2574                         return -EINVAL;
2575                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
2576                 break;
2577         }
2578         case KVM_S390_STORE_STATUS:
2579                 idx = srcu_read_lock(&vcpu->kvm->srcu);
2580                 r = kvm_s390_vcpu_store_status(vcpu, arg);
2581                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
2582                 break;
2583         case KVM_S390_SET_INITIAL_PSW: {
2584                 psw_t psw;
2585
2586                 r = -EFAULT;
2587                 if (copy_from_user(&psw, argp, sizeof(psw)))
2588                         break;
2589                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
2590                 break;
2591         }
2592         case KVM_S390_INITIAL_RESET:
2593                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
2594                 break;
2595         case KVM_SET_ONE_REG:
2596         case KVM_GET_ONE_REG: {
2597                 struct kvm_one_reg reg;
2598                 r = -EFAULT;
2599                 if (copy_from_user(&reg, argp, sizeof(reg)))
2600                         break;
2601                 if (ioctl == KVM_SET_ONE_REG)
2602                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
2603                 else
2604                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
2605                 break;
2606         }
2607 #ifdef CONFIG_KVM_S390_UCONTROL
2608         case KVM_S390_UCAS_MAP: {
2609                 struct kvm_s390_ucas_mapping ucasmap;
2610
2611                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2612                         r = -EFAULT;
2613                         break;
2614                 }
2615
2616                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2617                         r = -EINVAL;
2618                         break;
2619                 }
2620
2621                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
2622                                      ucasmap.vcpu_addr, ucasmap.length);
2623                 break;
2624         }
2625         case KVM_S390_UCAS_UNMAP: {
2626                 struct kvm_s390_ucas_mapping ucasmap;
2627
2628                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
2629                         r = -EFAULT;
2630                         break;
2631                 }
2632
2633                 if (!kvm_is_ucontrol(vcpu->kvm)) {
2634                         r = -EINVAL;
2635                         break;
2636                 }
2637
2638                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
2639                         ucasmap.length);
2640                 break;
2641         }
2642 #endif
2643         case KVM_S390_VCPU_FAULT: {
2644                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
2645                 break;
2646         }
2647         case KVM_ENABLE_CAP:
2648         {
2649                 struct kvm_enable_cap cap;
2650                 r = -EFAULT;
2651                 if (copy_from_user(&cap, argp, sizeof(cap)))
2652                         break;
2653                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
2654                 break;
2655         }
2656         case KVM_S390_MEM_OP: {
2657                 struct kvm_s390_mem_op mem_op;
2658
2659                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
2660                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
2661                 else
2662                         r = -EFAULT;
2663                 break;
2664         }
2665         case KVM_S390_SET_IRQ_STATE: {
2666                 struct kvm_s390_irq_state irq_state;
2667
2668                 r = -EFAULT;
2669                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2670                         break;
2671                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
2672                     irq_state.len == 0 ||
2673                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
2674                         r = -EINVAL;
2675                         break;
2676                 }
2677                 r = kvm_s390_set_irq_state(vcpu,
2678                                            (void __user *) irq_state.buf,
2679                                            irq_state.len);
2680                 break;
2681         }
2682         case KVM_S390_GET_IRQ_STATE: {
2683                 struct kvm_s390_irq_state irq_state;
2684
2685                 r = -EFAULT;
2686                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
2687                         break;
2688                 if (irq_state.len == 0) {
2689                         r = -EINVAL;
2690                         break;
2691                 }
2692                 r = kvm_s390_get_irq_state(vcpu,
2693                                            (__u8 __user *)  irq_state.buf,
2694                                            irq_state.len);
2695                 break;
2696         }
2697         default:
2698                 r = -ENOTTY;
2699         }
2700         return r;
2701 }
2702
2703 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
2704 {
2705 #ifdef CONFIG_KVM_S390_UCONTROL
2706         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
2707                  && (kvm_is_ucontrol(vcpu->kvm))) {
2708                 vmf->page = virt_to_page(vcpu->arch.sie_block);
2709                 get_page(vmf->page);
2710                 return 0;
2711         }
2712 #endif
2713         return VM_FAULT_SIGBUS;
2714 }
2715
2716 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
2717                             unsigned long npages)
2718 {
2719         return 0;
2720 }
2721
2722 /* Section: memory related */
2723 int kvm_arch_prepare_memory_region(struct kvm *kvm,
2724                                    struct kvm_memory_slot *memslot,
2725                                    const struct kvm_userspace_memory_region *mem,
2726                                    enum kvm_mr_change change)
2727 {
2728         /* A few sanity checks. We can have memory slots which have to be
2729            located/ended at a segment boundary (1MB). The memory in userland is
2730            ok to be fragmented into various different vmas. It is okay to mmap()
2731            and munmap() stuff in this slot after doing this call at any time */
2732
2733         if (mem->userspace_addr & 0xffffful)
2734                 return -EINVAL;
2735
2736         if (mem->memory_size & 0xffffful)
2737                 return -EINVAL;
2738
2739         return 0;
2740 }
2741
2742 void kvm_arch_commit_memory_region(struct kvm *kvm,
2743                                 const struct kvm_userspace_memory_region *mem,
2744                                 const struct kvm_memory_slot *old,
2745                                 const struct kvm_memory_slot *new,
2746                                 enum kvm_mr_change change)
2747 {
2748         int rc;
2749
2750         /* If the basics of the memslot do not change, we do not want
2751          * to update the gmap. Every update causes several unnecessary
2752          * segment translation exceptions. This is usually handled just
2753          * fine by the normal fault handler + gmap, but it will also
2754          * cause faults on the prefix page of running guest CPUs.
2755          */
2756         if (old->userspace_addr == mem->userspace_addr &&
2757             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
2758             old->npages * PAGE_SIZE == mem->memory_size)
2759                 return;
2760
2761         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
2762                 mem->guest_phys_addr, mem->memory_size);
2763         if (rc)
2764                 pr_warn("failed to commit memory region\n");
2765         return;
2766 }
2767
2768 static int __init kvm_s390_init(void)
2769 {
2770         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
2771 }
2772
2773 static void __exit kvm_s390_exit(void)
2774 {
2775         kvm_exit();
2776 }
2777
2778 module_init(kvm_s390_init);
2779 module_exit(kvm_s390_exit);
2780
2781 /*
2782  * Enable autoloading of the kvm module.
2783  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
2784  * since x86 takes a different approach.
2785  */
2786 #include <linux/miscdevice.h>
2787 MODULE_ALIAS_MISCDEV(KVM_MINOR);
2788 MODULE_ALIAS("devname:kvm");