Merge branch 'perf/urgent' into perf/core
authorIngo Molnar <mingo@elte.hu>
Fri, 12 Mar 2010 09:20:57 +0000 (10:20 +0100)
committerIngo Molnar <mingo@elte.hu>
Fri, 12 Mar 2010 09:20:59 +0000 (10:20 +0100)
Merge reason: We want to queue up a dependent patch.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
1  2 
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h
kernel/perf_event.c
tools/perf/builtin-record.c
tools/perf/builtin-top.c

index e24f6374f9f52ab7912a13a6ca55daecb77e6390,7645faea8e85e9d4f3c85326ddb6851bc1e0510e..a6d92c34135c7ca577d24c3eb159d374641cd9e7
  #include <asm/stacktrace.h>
  #include <asm/nmi.h>
  
 -static u64 perf_event_mask __read_mostly;
 +#if 0
 +#undef wrmsrl
 +#define wrmsrl(msr, val)                                      \
 +do {                                                          \
 +      trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
 +                      (unsigned long)(val));                  \
 +      native_write_msr((msr), (u32)((u64)(val)),              \
 +                      (u32)((u64)(val) >> 32));               \
 +} while (0)
 +#endif
 +
 +/*
 + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
 + */
 +static unsigned long
 +copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 +{
 +      unsigned long offset, addr = (unsigned long)from;
 +      int type = in_nmi() ? KM_NMI : KM_IRQ0;
 +      unsigned long size, len = 0;
 +      struct page *page;
 +      void *map;
 +      int ret;
  
 -/* The maximal number of PEBS events: */
 -#define MAX_PEBS_EVENTS       4
 +      do {
 +              ret = __get_user_pages_fast(addr, 1, 0, &page);
 +              if (!ret)
 +                      break;
  
 -/* The size of a BTS record in bytes: */
 -#define BTS_RECORD_SIZE               24
 +              offset = addr & (PAGE_SIZE - 1);
 +              size = min(PAGE_SIZE - offset, n - len);
  
 -/* The size of a per-cpu BTS buffer in bytes: */
 -#define BTS_BUFFER_SIZE               (BTS_RECORD_SIZE * 2048)
 +              map = kmap_atomic(page, type);
 +              memcpy(to, map+offset, size);
 +              kunmap_atomic(map, type);
 +              put_page(page);
  
 -/* The BTS overflow threshold in bytes from the end of the buffer: */
 -#define BTS_OVFL_TH           (BTS_RECORD_SIZE * 128)
 +              len  += size;
 +              to   += size;
 +              addr += size;
  
 +      } while (len < n);
  
 -/*
 - * Bits in the debugctlmsr controlling branch tracing.
 - */
 -#define X86_DEBUGCTL_TR                       (1 << 6)
 -#define X86_DEBUGCTL_BTS              (1 << 7)
 -#define X86_DEBUGCTL_BTINT            (1 << 8)
 -#define X86_DEBUGCTL_BTS_OFF_OS               (1 << 9)
 -#define X86_DEBUGCTL_BTS_OFF_USR      (1 << 10)
 +      return len;
 +}
  
 -/*
 - * A debug store configuration.
 - *
 - * We only support architectures that use 64bit fields.
 - */
 -struct debug_store {
 -      u64     bts_buffer_base;
 -      u64     bts_index;
 -      u64     bts_absolute_maximum;
 -      u64     bts_interrupt_threshold;
 -      u64     pebs_buffer_base;
 -      u64     pebs_index;
 -      u64     pebs_absolute_maximum;
 -      u64     pebs_interrupt_threshold;
 -      u64     pebs_event_reset[MAX_PEBS_EVENTS];
 -};
 +static u64 perf_event_mask __read_mostly;
  
  struct event_constraint {
        union {
@@@ -94,40 -87,18 +94,40 @@@ struct amd_nb 
        struct event_constraint event_constraints[X86_PMC_IDX_MAX];
  };
  
 +#define MAX_LBR_ENTRIES               16
 +
  struct cpu_hw_events {
 +      /*
 +       * Generic x86 PMC bits
 +       */
        struct perf_event       *events[X86_PMC_IDX_MAX]; /* in counter order */
        unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
        unsigned long           interrupts;
        int                     enabled;
 -      struct debug_store      *ds;
  
        int                     n_events;
        int                     n_added;
        int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
        u64                     tags[X86_PMC_IDX_MAX];
        struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
 +
 +      /*
 +       * Intel DebugStore bits
 +       */
 +      struct debug_store      *ds;
 +      u64                     pebs_enabled;
 +
 +      /*
 +       * Intel LBR bits
 +       */
 +      int                             lbr_users;
 +      void                            *lbr_context;
 +      struct perf_branch_stack        lbr_stack;
 +      struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
 +
 +      /*
 +       * AMD specific bits
 +       */
        struct amd_nb           *amd_nb;
  };
  
  #define EVENT_CONSTRAINT(c, n, m)     \
        __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
  
 +/*
 + * Constraint on the Event code.
 + */
  #define INTEL_EVENT_CONSTRAINT(c, n)  \
        EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
  
 +/*
 + * Constraint on the Event code + UMask + fixed-mask
 + */
  #define FIXED_EVENT_CONSTRAINT(c, n)  \
        EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
  
 +/*
 + * Constraint on the Event code + UMask
 + */
 +#define PEBS_EVENT_CONSTRAINT(c, n)   \
 +      EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
 +
  #define EVENT_CONSTRAINT_END          \
        EVENT_CONSTRAINT(0, 0, 0)
  
  #define for_each_event_constraint(e, c)       \
        for ((e) = (c); (e)->cmask; (e)++)
  
 +union perf_capabilities {
 +      struct {
 +              u64     lbr_format    : 6;
 +              u64     pebs_trap     : 1;
 +              u64     pebs_arch_reg : 1;
 +              u64     pebs_format   : 4;
 +              u64     smm_freeze    : 1;
 +      };
 +      u64     capabilities;
 +};
 +
  /*
   * struct x86_pmu - generic x86 pmu
   */
  struct x86_pmu {
 +      /*
 +       * Generic x86 PMC bits
 +       */
        const char      *name;
        int             version;
        int             (*handle_irq)(struct pt_regs *);
        u64             event_mask;
        int             apic;
        u64             max_period;
 -      u64             intel_ctrl;
 -      void            (*enable_bts)(u64 config);
 -      void            (*disable_bts)(void);
 -
        struct event_constraint *
                        (*get_event_constraints)(struct cpu_hw_events *cpuc,
                                                 struct perf_event *event);
        void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
                                                 struct perf_event *event);
        struct event_constraint *event_constraints;
 +      void            (*quirks)(void);
  
        void            (*cpu_prepare)(int cpu);
        void            (*cpu_starting)(int cpu);
        void            (*cpu_dying)(int cpu);
        void            (*cpu_dead)(int cpu);
 +
 +      /*
 +       * Intel Arch Perfmon v2+
 +       */
 +      u64                     intel_ctrl;
 +      union perf_capabilities intel_cap;
 +
 +      /*
 +       * Intel DebugStore bits
 +       */
 +      int             bts, pebs;
 +      int             pebs_record_size;
 +      void            (*drain_pebs)(struct pt_regs *regs);
 +      struct event_constraint *pebs_constraints;
 +
 +      /*
 +       * Intel LBR
 +       */
 +      unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
 +      int             lbr_nr;                    /* hardware stack size */
  };
  
  static struct x86_pmu x86_pmu __read_mostly;
@@@ -365,14 -293,110 +365,14 @@@ static void release_pmc_hardware(void
  #endif
  }
  
 -static inline bool bts_available(void)
 -{
 -      return x86_pmu.enable_bts != NULL;
 -}
 -
 -static void init_debug_store_on_cpu(int cpu)
 -{
 -      struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 -
 -      if (!ds)
 -              return;
 -
 -      wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
 -                   (u32)((u64)(unsigned long)ds),
 -                   (u32)((u64)(unsigned long)ds >> 32));
 -}
 -
 -static void fini_debug_store_on_cpu(int cpu)
 -{
 -      if (!per_cpu(cpu_hw_events, cpu).ds)
 -              return;
 -
 -      wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
 -}
 -
 -static void release_bts_hardware(void)
 -{
 -      int cpu;
 -
 -      if (!bts_available())
 -              return;
 -
 -      get_online_cpus();
 -
 -      for_each_online_cpu(cpu)
 -              fini_debug_store_on_cpu(cpu);
 -
 -      for_each_possible_cpu(cpu) {
 -              struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 -
 -              if (!ds)
 -                      continue;
 -
 -              per_cpu(cpu_hw_events, cpu).ds = NULL;
 -
 -              kfree((void *)(unsigned long)ds->bts_buffer_base);
 -              kfree(ds);
 -      }
 -
 -      put_online_cpus();
 -}
 -
 -static int reserve_bts_hardware(void)
 -{
 -      int cpu, err = 0;
 -
 -      if (!bts_available())
 -              return 0;
 -
 -      get_online_cpus();
 -
 -      for_each_possible_cpu(cpu) {
 -              struct debug_store *ds;
 -              void *buffer;
 -
 -              err = -ENOMEM;
 -              buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
 -              if (unlikely(!buffer))
 -                      break;
 -
 -              ds = kzalloc(sizeof(*ds), GFP_KERNEL);
 -              if (unlikely(!ds)) {
 -                      kfree(buffer);
 -                      break;
 -              }
 -
 -              ds->bts_buffer_base = (u64)(unsigned long)buffer;
 -              ds->bts_index = ds->bts_buffer_base;
 -              ds->bts_absolute_maximum =
 -                      ds->bts_buffer_base + BTS_BUFFER_SIZE;
 -              ds->bts_interrupt_threshold =
 -                      ds->bts_absolute_maximum - BTS_OVFL_TH;
 -
 -              per_cpu(cpu_hw_events, cpu).ds = ds;
 -              err = 0;
 -      }
 -
 -      if (err)
 -              release_bts_hardware();
 -      else {
 -              for_each_online_cpu(cpu)
 -                      init_debug_store_on_cpu(cpu);
 -      }
 -
 -      put_online_cpus();
 -
 -      return err;
 -}
 +static int reserve_ds_buffers(void);
 +static void release_ds_buffers(void);
  
  static void hw_perf_event_destroy(struct perf_event *event)
  {
        if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
                release_pmc_hardware();
 -              release_bts_hardware();
 +              release_ds_buffers();
                mutex_unlock(&pmc_reserve_mutex);
        }
  }
@@@ -435,7 -459,7 +435,7 @@@ static int __hw_perf_event_init(struct 
                        if (!reserve_pmc_hardware())
                                err = -EBUSY;
                        else
 -                              err = reserve_bts_hardware();
 +                              err = reserve_ds_buffers();
                }
                if (!err)
                        atomic_inc(&active_events);
        if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
            (hwc->sample_period == 1)) {
                /* BTS is not supported by this architecture. */
 -              if (!bts_available())
 +              if (!x86_pmu.bts)
                        return -EOPNOTSUPP;
  
                /* BTS is currently only allowed for user-mode. */
@@@ -787,7 -811,6 +787,6 @@@ void hw_perf_enable(void
                 * step2: reprogram moved events into new counters
                 */
                for (i = 0; i < n_running; i++) {
                        event = cpuc->event_list[i];
                        hwc = &event->hw;
  
                                continue;
  
                        x86_pmu_stop(event);
-                       hwc->idx = -1;
                }
  
                for (i = 0; i < cpuc->n_events; i++) {
                        event = cpuc->event_list[i];
                        hwc = &event->hw;
  
-                       if (i < n_running &&
-                           match_prev_assignment(hwc, cpuc, i))
-                               continue;
-                       if (hwc->idx == -1)
+                       if (!match_prev_assignment(hwc, cpuc, i))
                                x86_assign_hw_event(event, cpuc, i);
+                       else if (i < n_running)
+                               continue;
  
                        x86_pmu_start(event);
                }
  
  static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
  {
 -      (void)checking_wrmsrl(hwc->config_base + hwc->idx,
 +      wrmsrl(hwc->config_base + hwc->idx,
                              hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
  }
  
  static inline void x86_pmu_disable_event(struct perf_event *event)
  {
        struct hw_perf_event *hwc = &event->hw;
 -      (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config);
 +
 +      wrmsrl(hwc->config_base + hwc->idx, hwc->config);
  }
  
  static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@@ -855,7 -872,7 +849,7 @@@ x86_perf_event_set_period(struct perf_e
        struct hw_perf_event *hwc = &event->hw;
        s64 left = atomic64_read(&hwc->period_left);
        s64 period = hwc->sample_period;
 -      int err, ret = 0, idx = hwc->idx;
 +      int ret = 0, idx = hwc->idx;
  
        if (idx == X86_PMC_IDX_FIXED_BTS)
                return 0;
         */
        atomic64_set(&hwc->prev_count, (u64)-left);
  
 -      err = checking_wrmsrl(hwc->event_base + idx,
 -                           (u64)(-left) & x86_pmu.event_mask);
 +      wrmsrl(hwc->event_base + idx,
 +                      (u64)(-left) & x86_pmu.event_mask);
  
        perf_event_update_userpage(event);
  
@@@ -972,7 -989,6 +966,7 @@@ static void x86_pmu_unthrottle(struct p
  void perf_event_print_debug(void)
  {
        u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
 +      u64 pebs;
        struct cpu_hw_events *cpuc;
        unsigned long flags;
        int cpu, idx;
                rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
                rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
                rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
 +              rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
  
                pr_info("\n");
                pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
                pr_info("CPU#%d: status:     %016llx\n", cpu, status);
                pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
                pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
 +              pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
        }
 -      pr_info("CPU#%d: active:       %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 +      pr_info("CPU#%d: active:     %016llx\n", cpu, *(u64 *)cpuc->active_mask);
  
        for (idx = 0; idx < x86_pmu.num_events; idx++) {
                rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
@@@ -1313,8 -1327,6 +1307,8 @@@ undo
  
  #include "perf_event_amd.c"
  #include "perf_event_p6.c"
 +#include "perf_event_intel_lbr.c"
 +#include "perf_event_intel_ds.c"
  #include "perf_event_intel.c"
  
  static int __cpuinit
@@@ -1386,9 -1398,6 +1380,9 @@@ void __init init_hw_perf_events(void
  
        pr_cont("%s PMU driver.\n", x86_pmu.name);
  
 +      if (x86_pmu.quirks)
 +              x86_pmu.quirks();
 +
        if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
                WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
                     x86_pmu.num_events, X86_PMC_MAX_GENERIC);
@@@ -1449,32 -1458,6 +1443,32 @@@ static const struct pmu pmu = 
        .unthrottle     = x86_pmu_unthrottle,
  };
  
 +/*
 + * validate that we can schedule this event
 + */
 +static int validate_event(struct perf_event *event)
 +{
 +      struct cpu_hw_events *fake_cpuc;
 +      struct event_constraint *c;
 +      int ret = 0;
 +
 +      fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
 +      if (!fake_cpuc)
 +              return -ENOMEM;
 +
 +      c = x86_pmu.get_event_constraints(fake_cpuc, event);
 +
 +      if (!c || !c->weight)
 +              ret = -ENOSPC;
 +
 +      if (x86_pmu.put_event_constraints)
 +              x86_pmu.put_event_constraints(fake_cpuc, event);
 +
 +      kfree(fake_cpuc);
 +
 +      return ret;
 +}
 +
  /*
   * validate a single event group
   *
@@@ -1540,8 -1523,6 +1534,8 @@@ const struct pmu *hw_perf_event_init(st
  
                if (event->group_leader != event)
                        err = validate_group(event);
 +              else
 +                      err = validate_event(event);
  
                event->pmu = tmp;
        }
@@@ -1612,6 -1593,41 +1606,6 @@@ perf_callchain_kernel(struct pt_regs *r
        dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
  }
  
 -/*
 - * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
 - */
 -static unsigned long
 -copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 -{
 -      unsigned long offset, addr = (unsigned long)from;
 -      int type = in_nmi() ? KM_NMI : KM_IRQ0;
 -      unsigned long size, len = 0;
 -      struct page *page;
 -      void *map;
 -      int ret;
 -
 -      do {
 -              ret = __get_user_pages_fast(addr, 1, 0, &page);
 -              if (!ret)
 -                      break;
 -
 -              offset = addr & (PAGE_SIZE - 1);
 -              size = min(PAGE_SIZE - offset, n - len);
 -
 -              map = kmap_atomic(page, type);
 -              memcpy(to, map+offset, size);
 -              kunmap_atomic(map, type);
 -              put_page(page);
 -
 -              len  += size;
 -              to   += size;
 -              addr += size;
 -
 -      } while (len < n);
 -
 -      return len;
 -}
 -
  static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
  {
        unsigned long bytes;
@@@ -1685,3 -1701,16 +1679,16 @@@ struct perf_callchain_entry *perf_callc
  
        return entry;
  }
+ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
+ {
+       regs->ip = ip;
+       /*
+        * perf_arch_fetch_caller_regs adds another call, we need to increment
+        * the skip level
+        */
+       regs->bp = rewind_frame_pointer(skip + 1);
+       regs->cs = __KERNEL_CS;
+       local_save_flags(regs->flags);
+ }
+ EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
index be85f7c4a94ffd2548b753acb3f7b4cea780c402,95477038a72ad479b2c1b5d19138e9ff675ee62c..2bccb7b9da2d70d82b2ca178432e6d03a0d0e3c6
@@@ -203,9 -203,8 +203,9 @@@ struct perf_event_attr 
                                enable_on_exec :  1, /* next exec enables     */
                                task           :  1, /* trace fork/exit       */
                                watermark      :  1, /* wakeup_watermark      */
 +                              precise        :  1, /* OoO invariant counter */
  
 -                              __reserved_1   : 49;
 +                              __reserved_1   : 48;
  
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@@ -294,12 -293,6 +294,12 @@@ struct perf_event_mmap_page 
  #define PERF_RECORD_MISC_USER                 (2 << 0)
  #define PERF_RECORD_MISC_HYPERVISOR           (3 << 0)
  
 +#define PERF_RECORD_MISC_EXACT                        (1 << 14)
 +/*
 + * Reserve the last bit to indicate some extended misc field
 + */
 +#define PERF_RECORD_MISC_EXT_RESERVED         (1 << 15)
 +
  struct perf_event_header {
        __u32   type;
        __u16   misc;
@@@ -459,6 -452,8 +459,8 @@@ enum perf_callchain_context 
  #include <linux/fs.h>
  #include <linux/pid_namespace.h>
  #include <linux/workqueue.h>
+ #include <linux/ftrace.h>
+ #include <linux/cpu.h>
  #include <asm/atomic.h>
  
  #define PERF_MAX_STACK_DEPTH          255
@@@ -473,17 -468,6 +475,17 @@@ struct perf_raw_record 
        void                            *data;
  };
  
 +struct perf_branch_entry {
 +      __u64                           from;
 +      __u64                           to;
 +      __u64                           flags;
 +};
 +
 +struct perf_branch_stack {
 +      __u64                           nr;
 +      struct perf_branch_entry        entries[0];
 +};
 +
  struct task_struct;
  
  /**
@@@ -865,6 -849,44 +867,44 @@@ perf_sw_event(u32 event_id, u64 nr, in
                __perf_sw_event(event_id, nr, nmi, regs, addr);
  }
  
+ extern void
+ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+ /*
+  * Take a snapshot of the regs. Skip ip and frame pointer to
+  * the nth caller. We only need a few of the regs:
+  * - ip for PERF_SAMPLE_IP
+  * - cs for user_mode() tests
+  * - bp for callchains
+  * - eflags, for future purposes, just in case
+  */
+ static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
+ {
+       unsigned long ip;
+       memset(regs, 0, sizeof(*regs));
+       switch (skip) {
+       case 1 :
+               ip = CALLER_ADDR0;
+               break;
+       case 2 :
+               ip = CALLER_ADDR1;
+               break;
+       case 3 :
+               ip = CALLER_ADDR2;
+               break;
+       case 4:
+               ip = CALLER_ADDR3;
+               break;
+       /* No need to support further for now */
+       default:
+               ip = 0;
+       }
+       return perf_arch_fetch_caller_regs(regs, ip, skip);
+ }
  extern void __perf_event_mmap(struct vm_area_struct *vma);
  
  static inline void perf_event_mmap(struct vm_area_struct *vma)
@@@ -898,7 -920,8 +938,8 @@@ static inline bool perf_paranoid_kernel
  }
  
  extern void perf_event_init(void);
- extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size);
+ extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
+                         int entry_size, struct pt_regs *regs);
  extern void perf_bp_event(struct perf_event *event, void *data);
  
  #ifndef perf_misc_flags
diff --combined kernel/perf_event.c
index 3853d49c7d56365d7df880017c55d1f2663a4f6f,fb3031cf9f173f18e081277453ee05f87ad70fc5..8bf61273c58b3842200822ff8bcf02b416cbfc84
@@@ -1368,8 -1368,6 +1368,8 @@@ void perf_event_task_sched_in(struct ta
        if (cpuctx->task_ctx == ctx)
                return;
  
 +      perf_disable();
 +
        /*
         * We want to keep the following priority order:
         * cpu pinned (that don't need to move), task pinned,
        ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE);
  
        cpuctx->task_ctx = ctx;
 +
 +      perf_enable();
  }
  
  #define MAX_INTERRUPTS (~0ULL)
@@@ -2790,6 -2786,11 +2790,11 @@@ __weak struct perf_callchain_entry *per
        return NULL;
  }
  
+ __weak
+ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
+ {
+ }
  /*
   * Output
   */
@@@ -4317,9 -4318,8 +4322,8 @@@ static const struct pmu perf_ops_task_c
  #ifdef CONFIG_EVENT_TRACING
  
  void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
-                         int entry_size)
+                  int entry_size, struct pt_regs *regs)
  {
-       struct pt_regs *regs = get_irq_regs();
        struct perf_sample_data data;
        struct perf_raw_record raw = {
                .size = entry_size,
        perf_sample_data_init(&data, addr);
        data.raw = &raw;
  
-       if (!regs)
-               regs = task_pt_regs(current);
        /* Trace events already protected against recursion */
        do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
-                               &data, regs);
+                        &data, regs);
  }
  EXPORT_SYMBOL_GPL(perf_tp_event);
  
@@@ -4350,7 -4347,7 +4351,7 @@@ static int perf_tp_event_match(struct p
  
  static void tp_perf_event_destroy(struct perf_event *event)
  {
-       ftrace_profile_disable(event->attr.config);
+       perf_trace_disable(event->attr.config);
  }
  
  static const struct pmu *tp_perf_event_init(struct perf_event *event)
                        !capable(CAP_SYS_ADMIN))
                return ERR_PTR(-EPERM);
  
-       if (ftrace_profile_enable(event->attr.config))
+       if (perf_trace_enable(event->attr.config))
                return NULL;
  
        event->destroy = tp_perf_event_destroy;
@@@ -5371,12 -5368,22 +5372,22 @@@ int perf_event_init_task(struct task_st
        return ret;
  }
  
+ static void __init perf_event_init_all_cpus(void)
+ {
+       int cpu;
+       struct perf_cpu_context *cpuctx;
+       for_each_possible_cpu(cpu) {
+               cpuctx = &per_cpu(perf_cpu_context, cpu);
+               __perf_event_init_context(&cpuctx->ctx, NULL);
+       }
+ }
  static void __cpuinit perf_event_init_cpu(int cpu)
  {
        struct perf_cpu_context *cpuctx;
  
        cpuctx = &per_cpu(perf_cpu_context, cpu);
-       __perf_event_init_context(&cpuctx->ctx, NULL);
  
        spin_lock(&perf_resource_lock);
        cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
@@@ -5442,6 -5449,7 +5453,7 @@@ static struct notifier_block __cpuinitd
  
  void __init perf_event_init(void)
  {
+       perf_event_init_all_cpus();
        perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
                        (void *)(long)smp_processor_id());
        perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
index 6e4a39328b3363367ca930d123804599172409ef,3b8b6387c47ca4de49873990b2c47216c8309aaf..bed175d59e55383d92c05173758f33a7c406a73a
@@@ -22,6 -22,7 +22,7 @@@
  #include "util/debug.h"
  #include "util/session.h"
  #include "util/symbol.h"
+ #include "util/cpumap.h"
  
  #include <unistd.h>
  #include <sched.h>
@@@ -244,6 -245,9 +245,9 @@@ static void create_counter(int counter
  
        attr->sample_type       |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
  
+       if (nr_counters > 1)
+               attr->sample_type |= PERF_SAMPLE_ID;
        if (freq) {
                attr->sample_type       |= PERF_SAMPLE_PERIOD;
                attr->freq              = 1;
@@@ -280,8 -284,7 +284,8 @@@ try_again
                int err = errno;
  
                if (err == EPERM || err == EACCES)
 -                      die("Permission error - are you root?\n");
 +                      die("Permission error - are you root?\n"
 +                          "\t Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n");
                else if (err ==  ENODEV && profile_cpu != -1)
                        die("No such device - did you specify an out-of-range profile CPU?\n");
  
@@@ -392,6 -395,9 +396,9 @@@ static int process_buildids(void
  {
        u64 size = lseek(output, 0, SEEK_CUR);
  
+       if (size == 0)
+               return 0;
        session->fd = output;
        return __perf_session__process_events(session, post_processing_offset,
                                              size - post_processing_offset,
@@@ -419,9 -425,6 +426,6 @@@ static int __cmd_record(int argc, cons
        char buf;
  
        page_size = sysconf(_SC_PAGE_SIZE);
-       nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-       assert(nr_cpus <= MAX_NR_CPUS);
-       assert(nr_cpus >= 0);
  
        atexit(sig_atexit);
        signal(SIGCHLD, sig_handler);
        if ((!system_wide && !inherit) || profile_cpu != -1) {
                open_counters(profile_cpu, target_pid);
        } else {
+               nr_cpus = read_cpu_map();
                for (i = 0; i < nr_cpus; i++)
-                       open_counters(i, target_pid);
+                       open_counters(cpumap[i], target_pid);
        }
  
        if (file_new) {
diff --combined tools/perf/builtin-top.c
index c051833f755cc4657a6dd44b945b2f94621ea6cb,0b719e3dde050611c282b0c21dccbd91f507ad70..ec4822322abd7fb4db17a13af5da5ff17629d551
@@@ -28,6 -28,7 +28,7 @@@
  #include <linux/rbtree.h>
  #include "util/parse-options.h"
  #include "util/parse-events.h"
+ #include "util/cpumap.h"
  
  #include "util/debug.h"
  
@@@ -410,7 -411,6 +411,7 @@@ static double sym_weight(const struct s
  
  static long                   samples;
  static long                   userspace_samples;
 +static long                   exact_samples;
  static const char             CONSOLE_CLEAR[] = "\e[H\e[2J";
  
  static void __list_insert_active_sym(struct sym_entry *syme)
@@@ -451,7 -451,6 +452,7 @@@ static void print_sym_table(void
        int counter, snap = !display_weighted ? sym_counter : 0;
        float samples_per_sec = samples/delay_secs;
        float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
 +      float esamples_percent = (100.0*exact_samples)/samples;
        float sum_ksamples = 0.0;
        struct sym_entry *syme, *n;
        struct rb_root tmp = RB_ROOT;
        int sym_width = 0, dso_width = 0, max_dso_width;
        const int win_width = winsize.ws_col - 1;
  
 -      samples = userspace_samples = 0;
 +      samples = userspace_samples = exact_samples = 0;
  
        /* Sort the active symbols */
        pthread_mutex_lock(&active_symbols_lock);
        puts(CONSOLE_CLEAR);
  
        printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
 -      printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%% [",
 +      printf( "   PerfTop:%8.0f irqs/sec  kernel:%4.1f%%  exact: %4.1f%% [",
                samples_per_sec,
 -              100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
 +              100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)),
 +              esamples_percent);
  
        if (nr_counters == 1 || !display_weighted) {
                printf("%Ld", (u64)attrs[0].sample_period);
@@@ -957,9 -955,6 +958,9 @@@ static void event__process_sample(cons
                return;
        }
  
 +      if (self->header.misc & PERF_RECORD_MISC_EXACT)
 +              exact_samples++;
 +
        if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 ||
            al.filtered)
                return;
@@@ -1129,7 -1124,7 +1130,7 @@@ static void start_counter(int i, int co
  
        cpu = profile_cpu;
        if (target_pid == -1 && profile_cpu == -1)
-               cpu = i;
+               cpu = cpumap[i];
  
        attr = attrs + counter;
  
@@@ -1353,12 -1348,10 +1354,10 @@@ int cmd_top(int argc, const char **argv
                attrs[counter].sample_period = default_interval;
        }
  
-       nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
-       assert(nr_cpus <= MAX_NR_CPUS);
-       assert(nr_cpus >= 0);
        if (target_pid != -1 || profile_cpu != -1)
                nr_cpus = 1;
+       else
+               nr_cpus = read_cpu_map();
  
        get_term_dimensions(&winsize);
        if (print_entries == 0) {
This page took 0.077217 seconds and 5 git commands to generate.