arch/x86/oprofile/nmi_int.c

   1 /**
   2  * @file nmi_int.c
   3  *
   4  * @remark Copyright 2002 OProfile authors
   5  * @remark Read the file COPYING
   6  *
   7  * @author John Levon <levon@movementarian.org>
   8  */
   9
  10 #include <linux/init.h>
  11 #include <linux/notifier.h>
  12 #include <linux/smp.h>
  13 #include <linux/oprofile.h>
  14 #include <linux/sysdev.h>
  15 #include <linux/slab.h>
  16 #include <linux/moduleparam.h>
  17 #include <linux/kdebug.h>
  18 #include <linux/cpu.h>
  19 #include <asm/nmi.h>
  20 #include <asm/msr.h>
  21 #include <asm/apic.h>
  22
  23 #include "op_counter.h"
  24 #include "op_x86_model.h"
  25
  26 static struct op_x86_model_spec const *model;
  27 static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
  28 static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
  29
  30 static int nmi_start(void);
  31 static void nmi_stop(void);
  32 static void nmi_cpu_start(void *dummy);
  33 static void nmi_cpu_stop(void *dummy);
  34
  35 /* 0 == registered but off, 1 == registered and on */
  36 static int nmi_enabled = 0;
  37
  38 #ifdef CONFIG_SMP
  39 static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
  40                                  void *data)
  41 {
  42         int cpu = (unsigned long)data;
  43         switch (action) {
  44         case CPU_DOWN_FAILED:
  45         case CPU_ONLINE:
  46                 smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
  47                 break;
  48         case CPU_DOWN_PREPARE:
  49                 smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
  50                 break;
  51         }
  52         return NOTIFY_DONE;
  53 }
  54
  55 static struct notifier_block oprofile_cpu_nb = {
  56         .notifier_call = oprofile_cpu_notifier
  57 };
  58 #endif
  59
  60 #ifdef CONFIG_PM
  61
  62 static int nmi_suspend(struct sys_device *dev, pm_message_t state)
  63 {
  64         /* Only one CPU left, just stop that one */
  65         if (nmi_enabled == 1)
  66                 nmi_cpu_stop(NULL);
  67         return 0;
  68 }
  69
  70 static int nmi_resume(struct sys_device *dev)
  71 {
  72         if (nmi_enabled == 1)
  73                 nmi_cpu_start(NULL);
  74         return 0;
  75 }
  76
  77 static struct sysdev_class oprofile_sysclass = {
  78         .name           = "oprofile",
  79         .resume         = nmi_resume,
  80         .suspend        = nmi_suspend,
  81 };
  82
  83 static struct sys_device device_oprofile = {
  84         .id     = 0,
  85         .cls    = &oprofile_sysclass,
  86 };
  87
  88 static int __init init_sysfs(void)
  89 {
  90         int error;
  91
  92         error = sysdev_class_register(&oprofile_sysclass);
  93         if (!error)
  94                 error = sysdev_register(&device_oprofile);
  95         return error;
  96 }
  97
  98 static void exit_sysfs(void)
  99 {
 100         sysdev_unregister(&device_oprofile);
 101         sysdev_class_unregister(&oprofile_sysclass);
 102 }
 103
 104 #else
 105 #define init_sysfs() do { } while (0)
 106 #define exit_sysfs() do { } while (0)
 107 #endif /* CONFIG_PM */
 108
 109 static int profile_exceptions_notify(struct notifier_block *self,
 110                                      unsigned long val, void *data)
 111 {
 112         struct die_args *args = (struct die_args *)data;
 113         int ret = NOTIFY_DONE;
 114         int cpu = smp_processor_id();
 115
 116         switch (val) {
 117         case DIE_NMI:
 118                 if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
 119                         ret = NOTIFY_STOP;
 120                 break;
 121         default:
 122                 break;
 123         }
 124         return ret;
 125 }
 126
 127 static void nmi_cpu_save_registers(struct op_msrs *msrs)
 128 {
 129         unsigned int const nr_ctrs = model->num_counters;
 130         unsigned int const nr_ctrls = model->num_controls;
 131         struct op_msr *counters = msrs->counters;
 132         struct op_msr *controls = msrs->controls;
 133         unsigned int i;
 134
 135         for (i = 0; i < nr_ctrs; ++i) {
 136                 if (counters[i].addr) {
 137                         rdmsr(counters[i].addr,
 138                                 counters[i].saved.low,
 139                                 counters[i].saved.high);
 140                 }
 141         }
 142
 143         for (i = 0; i < nr_ctrls; ++i) {
 144                 if (controls[i].addr) {
 145                         rdmsr(controls[i].addr,
 146                                 controls[i].saved.low,
 147                                 controls[i].saved.high);
 148                 }
 149         }
 150 }
 151
 152 static void nmi_save_registers(void *dummy)
 153 {
 154         int cpu = smp_processor_id();
 155         struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
 156         nmi_cpu_save_registers(msrs);
 157 }
 158
 159 static void free_msrs(void)
 160 {
 161         int i;
 162         for_each_possible_cpu(i) {
 163                 kfree(per_cpu(cpu_msrs, i).counters);
 164                 per_cpu(cpu_msrs, i).counters = NULL;
 165                 kfree(per_cpu(cpu_msrs, i).controls);
 166                 per_cpu(cpu_msrs, i).controls = NULL;
 167         }
 168 }
 169
 170 static int allocate_msrs(void)
 171 {
 172         int success = 1;
 173         size_t controls_size = sizeof(struct op_msr) * model->num_controls;
 174         size_t counters_size = sizeof(struct op_msr) * model->num_counters;
 175
 176         int i;
 177         for_each_possible_cpu(i) {
 178                 per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
 179                                                                 GFP_KERNEL);
 180                 if (!per_cpu(cpu_msrs, i).counters) {
 181                         success = 0;
 182                         break;
 183                 }
 184                 per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
 185                                                                 GFP_KERNEL);
 186                 if (!per_cpu(cpu_msrs, i).controls) {
 187                         success = 0;
 188                         break;
 189                 }
 190         }
 191
 192         if (!success)
 193                 free_msrs();
 194
 195         return success;
 196 }
 197
 198 static void nmi_cpu_setup(void *dummy)
 199 {
 200         int cpu = smp_processor_id();
 201         struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
 202         spin_lock(&oprofilefs_lock);
 203         model->setup_ctrs(msrs);
 204         spin_unlock(&oprofilefs_lock);
 205         per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
 206         apic_write(APIC_LVTPC, APIC_DM_NMI);
 207 }
 208
 209 static struct notifier_block profile_exceptions_nb = {
 210         .notifier_call = profile_exceptions_notify,
 211         .next = NULL,
 212         .priority = 0
 213 };
 214
 215 static int nmi_setup(void)
 216 {
 217         int err = 0;
 218         int cpu;
 219
 220         if (!allocate_msrs())
 221                 return -ENOMEM;
 222
 223         err = register_die_notifier(&profile_exceptions_nb);
 224         if (err) {
 225                 free_msrs();
 226                 return err;
 227         }
 228
 229         /* We need to serialize save and setup for HT because the subset
 230          * of msrs are distinct for save and setup operations
 231          */
 232
 233         /* Assume saved/restored counters are the same on all CPUs */
 234         model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
 235         for_each_possible_cpu(cpu) {
 236                 if (cpu != 0) {
 237                         memcpy(per_cpu(cpu_msrs, cpu).counters,
 238                                 per_cpu(cpu_msrs, 0).counters,
 239                                 sizeof(struct op_msr) * model->num_counters);
 240
 241                         memcpy(per_cpu(cpu_msrs, cpu).controls,
 242                                 per_cpu(cpu_msrs, 0).controls,
 243                                 sizeof(struct op_msr) * model->num_controls);
 244                 }
 245
 246         }
 247         on_each_cpu(nmi_save_registers, NULL, 1);
 248         on_each_cpu(nmi_cpu_setup, NULL, 1);
 249         nmi_enabled = 1;
 250         return 0;
 251 }
 252
 253 static void nmi_restore_registers(struct op_msrs *msrs)
 254 {
 255         unsigned int const nr_ctrs = model->num_counters;
 256         unsigned int const nr_ctrls = model->num_controls;
 257         struct op_msr *counters = msrs->counters;
 258         struct op_msr *controls = msrs->controls;
 259         unsigned int i;
 260
 261         for (i = 0; i < nr_ctrls; ++i) {
 262                 if (controls[i].addr) {
 263                         wrmsr(controls[i].addr,
 264                                 controls[i].saved.low,
 265                                 controls[i].saved.high);
 266                 }
 267         }
 268
 269         for (i = 0; i < nr_ctrs; ++i) {
 270                 if (counters[i].addr) {
 271                         wrmsr(counters[i].addr,
 272                                 counters[i].saved.low,
 273                                 counters[i].saved.high);
 274                 }
 275         }
 276 }
 277
 278 static void nmi_cpu_shutdown(void *dummy)
 279 {
 280         unsigned int v;
 281         int cpu = smp_processor_id();
 282         struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
 283
 284         /* restoring APIC_LVTPC can trigger an apic error because the delivery
 285          * mode and vector nr combination can be illegal. That's by design: on
 286          * power on apic lvt contain a zero vector nr which are legal only for
 287          * NMI delivery mode. So inhibit apic err before restoring lvtpc
 288          */
 289         v = apic_read(APIC_LVTERR);
 290         apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
 291         apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
 292         apic_write(APIC_LVTERR, v);
 293         nmi_restore_registers(msrs);
 294 }
 295
 296 static void nmi_shutdown(void)
 297 {
 298         struct op_msrs *msrs = &get_cpu_var(cpu_msrs);
 299         nmi_enabled = 0;
 300         on_each_cpu(nmi_cpu_shutdown, NULL, 1);
 301         unregister_die_notifier(&profile_exceptions_nb);
 302         model->shutdown(msrs);
 303         free_msrs();
 304         put_cpu_var(cpu_msrs);
 305 }
 306
 307 static void nmi_cpu_start(void *dummy)
 308 {
 309         struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
 310         model->start(msrs);
 311 }
 312
 313 static int nmi_start(void)
 314 {
 315         on_each_cpu(nmi_cpu_start, NULL, 1);
 316         return 0;
 317 }
 318
 319 static void nmi_cpu_stop(void *dummy)
 320 {
 321         struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
 322         model->stop(msrs);
 323 }
 324
 325 static void nmi_stop(void)
 326 {
 327         on_each_cpu(nmi_cpu_stop, NULL, 1);
 328 }
 329
 330 struct op_counter_config counter_config[OP_MAX_COUNTER];
 331
 332 static int nmi_create_files(struct super_block *sb, struct dentry *root)
 333 {
 334         unsigned int i;
 335
 336         for (i = 0; i < model->num_counters; ++i) {
 337                 struct dentry *dir;
 338                 char buf[4];
 339
 340                 /* quick little hack to _not_ expose a counter if it is not
 341                  * available for use.  This should protect userspace app.
 342                  * NOTE:  assumes 1:1 mapping here (that counters are organized
 343                  *        sequentially in their struct assignment).
 344                  */
 345                 if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
 346                         continue;
 347
 348                 snprintf(buf,  sizeof(buf), "%d", i);
 349                 dir = oprofilefs_mkdir(sb, root, buf);
 350                 oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
 351                 oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
 352                 oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
 353                 oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
 354                 oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
 355                 oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
 356         }
 357
 358         return 0;
 359 }
 360
 361 static int p4force;
 362 module_param(p4force, int, 0);
 363
 364 static int __init p4_init(char **cpu_type)
 365 {
 366         __u8 cpu_model = boot_cpu_data.x86_model;
 367
 368         if (!p4force && (cpu_model > 6 || cpu_model == 5))
 369                 return 0;
 370
 371 #ifndef CONFIG_SMP
 372         *cpu_type = "i386/p4";
 373         model = &op_p4_spec;
 374         return 1;
 375 #else
 376         switch (smp_num_siblings) {
 377         case 1:
 378                 *cpu_type = "i386/p4";
 379                 model = &op_p4_spec;
 380                 return 1;
 381
 382         case 2:
 383                 *cpu_type = "i386/p4-ht";
 384                 model = &op_p4_ht2_spec;
 385                 return 1;
 386         }
 387 #endif
 388
 389         printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
 390         printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
 391         return 0;
 392 }
 393
 394 static int __init ppro_init(char **cpu_type)
 395 {
 396         __u8 cpu_model = boot_cpu_data.x86_model;
 397
 398         switch (cpu_model) {
 399         case 0 ... 2:
 400                 *cpu_type = "i386/ppro";
 401                 break;
 402         case 3 ... 5:
 403                 *cpu_type = "i386/pii";
 404                 break;
 405         case 6 ... 8:
 406                 *cpu_type = "i386/piii";
 407                 break;
 408         case 9:
 409                 *cpu_type = "i386/p6_mobile";
 410                 break;
 411         case 10 ... 13:
 412                 *cpu_type = "i386/p6";
 413                 break;
 414         case 14:
 415                 *cpu_type = "i386/core";
 416                 break;
 417         case 15: case 23:
 418                 *cpu_type = "i386/core_2";
 419                 break;
 420         case 26:
 421                 *cpu_type = "i386/core_2";
 422                 break;
 423         default:
 424                 /* Unknown */
 425                 return 0;
 426         }
 427
 428         model = &op_ppro_spec;
 429         return 1;
 430 }
 431
 432 /* in order to get sysfs right */
 433 static int using_nmi;
 434
 435 int __init op_nmi_init(struct oprofile_operations *ops)
 436 {
 437         __u8 vendor = boot_cpu_data.x86_vendor;
 438         __u8 family = boot_cpu_data.x86;
 439         char *cpu_type;
 440
 441         if (!cpu_has_apic)
 442                 return -ENODEV;
 443
 444         switch (vendor) {
 445         case X86_VENDOR_AMD:
 446                 /* Needs to be at least an Athlon (or hammer in 32bit mode) */
 447
 448                 switch (family) {
 449                 default:
 450                         return -ENODEV;
 451                 case 6:
 452                         model = &op_athlon_spec;
 453                         cpu_type = "i386/athlon";
 454                         break;
 455                 case 0xf:
 456                         model = &op_athlon_spec;
 457                         /* Actually it could be i386/hammer too, but give
 458                          user space an consistent name. */
 459                         cpu_type = "x86-64/hammer";
 460                         break;
 461                 case 0x10:
 462                         model = &op_athlon_spec;
 463                         cpu_type = "x86-64/family10";
 464                         break;
 465                 }
 466                 break;
 467
 468         case X86_VENDOR_INTEL:
 469                 switch (family) {
 470                         /* Pentium IV */
 471                 case 0xf:
 472                         if (!p4_init(&cpu_type))
 473                                 return -ENODEV;
 474                         break;
 475
 476                         /* A P6-class processor */
 477                 case 6:
 478                         if (!ppro_init(&cpu_type))
 479                                 return -ENODEV;
 480                         break;
 481
 482                 default:
 483                         return -ENODEV;
 484                 }
 485                 break;
 486
 487         default:
 488                 return -ENODEV;
 489         }
 490
 491         init_sysfs();
 492 #ifdef CONFIG_SMP
 493         register_cpu_notifier(&oprofile_cpu_nb);
 494 #endif
 495         using_nmi = 1;
 496         ops->create_files = nmi_create_files;
 497         ops->setup = nmi_setup;
 498         ops->shutdown = nmi_shutdown;
 499         ops->start = nmi_start;
 500         ops->stop = nmi_stop;
 501         ops->cpu_type = cpu_type;
 502         printk(KERN_INFO "oprofile: using NMI interrupt.\n");
 503         return 0;
 504 }
 505
 506 void op_nmi_exit(void)
 507 {
 508         if (using_nmi) {
 509                 exit_sysfs();
 510 #ifdef CONFIG_SMP
 511                 unregister_cpu_notifier(&oprofile_cpu_nb);
 512 #endif
 513         }
 514 }