perf_events: Update Intel extra regs shared constraints management
[deliverable/linux.git] / arch / x86 / kernel / cpu / perf_event_intel.c
index d38b0020f77564f926da3104b275b1cefd6e9113..6ad95baff856b1ee962f611fb159babd684fc060 100644 (file)
@@ -1,25 +1,15 @@
 #ifdef CONFIG_CPU_SUP_INTEL
 
-#define MAX_EXTRA_REGS 2
-
-/*
- * Per register state.
- */
-struct er_account {
-       int                     ref;            /* reference count */
-       unsigned int            extra_reg;      /* extra MSR number */
-       u64                     extra_config;   /* extra MSR config */
-};
-
 /*
- * Per core state
- * This used to coordinate shared registers for HT threads.
+ * Per core/cpu state
+ *
+ * Used to coordinate shared registers between HT threads or
+ * among events on a single PMU.
  */
-struct intel_percore {
-       raw_spinlock_t          lock;           /* protect structure */
-       struct er_account       regs[MAX_EXTRA_REGS];
-       int                     refcnt;         /* number of threads */
-       unsigned                core_id;
+struct intel_shared_regs {
+       struct er_account       regs[EXTRA_REG_MAX];
+       int                     refcnt;         /* per-core: #HT threads */
+       unsigned                core_id;        /* per-core: core id */
 };
 
 /*
@@ -88,16 +78,10 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
 {
-       INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
+       INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
        EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_nehalem_percore_constraints[] __read_mostly =
-{
-       INTEL_EVENT_CONSTRAINT(0xb7, 0),
-       EVENT_CONSTRAINT_END
-};
-
 static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
 {
        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -125,18 +109,11 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
 
 static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
 {
-       INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
-       INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
+       INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
+       INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
        EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_westmere_percore_constraints[] __read_mostly =
-{
-       INTEL_EVENT_CONSTRAINT(0xb7, 0),
-       INTEL_EVENT_CONSTRAINT(0xbb, 0),
-       EVENT_CONSTRAINT_END
-};
-
 static struct event_constraint intel_gen_event_constraints[] __read_mostly =
 {
        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -1037,65 +1014,89 @@ intel_bts_constraints(struct perf_event *event)
        return NULL;
 }
 
+/*
+ * manage allocation of shared extra msr for certain events
+ *
+ * sharing can be:
+ * per-cpu: to be shared between the various events on a single PMU
+ * per-core: per-cpu + shared by HT threads
+ */
 static struct event_constraint *
-intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
+                                  struct hw_perf_event_extra *reg)
 {
-       struct hw_perf_event *hwc = &event->hw;
-       unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
-       struct event_constraint *c;
-       struct intel_percore *pc;
+       struct event_constraint *c = &emptyconstraint;
        struct er_account *era;
-       int i;
-       int free_slot;
-       int found;
 
-       if (!x86_pmu.percore_constraints || hwc->extra_alloc)
-               return NULL;
+       /* already allocated shared msr */
+       if (reg->alloc || !cpuc->shared_regs)
+               return &unconstrained;
 
-       for (c = x86_pmu.percore_constraints; c->cmask; c++) {
-               if (e != c->code)
-                       continue;
+       era = &cpuc->shared_regs->regs[reg->idx];
+
+       raw_spin_lock(&era->lock);
+
+       if (!atomic_read(&era->ref) || era->config == reg->config) {
+
+               /* lock in msr value */
+               era->config = reg->config;
+               era->reg = reg->reg;
+
+               /* one more user */
+               atomic_inc(&era->ref);
+
+               /* no need to reallocate during incremental event scheduling */
+               reg->alloc = 1;
 
                /*
-                * Allocate resource per core.
+                * All events using extra_reg are unconstrained.
+                * Avoids calling x86_get_event_constraints()
+                *
+                * Must revisit if extra_reg controlling events
+                * ever have constraints. Worst case we go through
+                * the regular event constraint table.
                 */
-               pc = cpuc->per_core;
-               if (!pc)
-                       break;
-               c = &emptyconstraint;
-               raw_spin_lock(&pc->lock);
-               free_slot = -1;
-               found = 0;
-               for (i = 0; i < MAX_EXTRA_REGS; i++) {
-                       era = &pc->regs[i];
-                       if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
-                               /* Allow sharing same config */
-                               if (hwc->extra_config == era->extra_config) {
-                                       era->ref++;
-                                       cpuc->percore_used = 1;
-                                       hwc->extra_alloc = 1;
-                                       c = NULL;
-                               }
-                               /* else conflict */
-                               found = 1;
-                               break;
-                       } else if (era->ref == 0 && free_slot == -1)
-                               free_slot = i;
-               }
-               if (!found && free_slot != -1) {
-                       era = &pc->regs[free_slot];
-                       era->ref = 1;
-                       era->extra_reg = hwc->extra_reg;
-                       era->extra_config = hwc->extra_config;
-                       cpuc->percore_used = 1;
-                       hwc->extra_alloc = 1;
-                       c = NULL;
-               }
-               raw_spin_unlock(&pc->lock);
-               return c;
+               c = &unconstrained;
        }
+       raw_spin_unlock(&era->lock);
 
-       return NULL;
+       return c;
+}
+
+static void
+__intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
+                                  struct hw_perf_event_extra *reg)
+{
+       struct er_account *era;
+
+       /*
+        * only put constraint if extra reg was actually
+        * allocated. Also takes care of event which do
+        * not use an extra shared reg
+        */
+       if (!reg->alloc)
+               return;
+
+       era = &cpuc->shared_regs->regs[reg->idx];
+
+       /* one fewer user */
+       atomic_dec(&era->ref);
+
+       /* allocate again next time */
+       reg->alloc = 0;
+}
+
+static struct event_constraint *
+intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
+                             struct perf_event *event)
+{
+       struct event_constraint *c = NULL;
+       struct hw_perf_event_extra *xreg;
+
+       xreg = &event->hw.extra_reg;
+       if (xreg->idx != EXTRA_REG_NONE)
+               c = __intel_shared_reg_get_constraints(cpuc, xreg);
+       return c;
 }
 
 static struct event_constraint *
@@ -1111,49 +1112,28 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
        if (c)
                return c;
 
-       c = intel_percore_constraints(cpuc, event);
+       c = intel_shared_regs_constraints(cpuc, event);
        if (c)
                return c;
 
        return x86_get_event_constraints(cpuc, event);
 }
 
-static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+static void
+intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
                                        struct perf_event *event)
 {
-       struct extra_reg *er;
-       struct intel_percore *pc;
-       struct er_account *era;
-       struct hw_perf_event *hwc = &event->hw;
-       int i, allref;
-
-       if (!cpuc->percore_used)
-               return;
+       struct hw_perf_event_extra *reg;
 
-       for (er = x86_pmu.extra_regs; er->msr; er++) {
-               if (er->event != (hwc->config & er->config_mask))
-                       continue;
+       reg = &event->hw.extra_reg;
+       if (reg->idx != EXTRA_REG_NONE)
+               __intel_shared_reg_put_constraints(cpuc, reg);
+}
 
-               pc = cpuc->per_core;
-               raw_spin_lock(&pc->lock);
-               for (i = 0; i < MAX_EXTRA_REGS; i++) {
-                       era = &pc->regs[i];
-                       if (era->ref > 0 &&
-                           era->extra_config == hwc->extra_config &&
-                           era->extra_reg == er->msr) {
-                               era->ref--;
-                               hwc->extra_alloc = 0;
-                               break;
-                       }
-               }
-               allref = 0;
-               for (i = 0; i < MAX_EXTRA_REGS; i++)
-                       allref += pc->regs[i].ref;
-               if (allref == 0)
-                       cpuc->percore_used = 0;
-               raw_spin_unlock(&pc->lock);
-               break;
-       }
+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+                                       struct perf_event *event)
+{
+       intel_put_shared_regs_event_constraints(cpuc, event);
 }
 
 static int intel_pmu_hw_config(struct perf_event *event)
@@ -1231,20 +1211,36 @@ static __initconst const struct x86_pmu core_pmu = {
        .event_constraints      = intel_core_event_constraints,
 };
 
+static struct intel_shared_regs *allocate_shared_regs(int cpu)
+{
+       struct intel_shared_regs *regs;
+       int i;
+
+       regs = kzalloc_node(sizeof(struct intel_shared_regs),
+                           GFP_KERNEL, cpu_to_node(cpu));
+       if (regs) {
+               /*
+                * initialize the locks to keep lockdep happy
+                */
+               for (i = 0; i < EXTRA_REG_MAX; i++)
+                       raw_spin_lock_init(&regs->regs[i].lock);
+
+               regs->core_id = -1;
+       }
+       return regs;
+}
+
 static int intel_pmu_cpu_prepare(int cpu)
 {
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 
-       if (!cpu_has_ht_siblings())
+       if (!x86_pmu.extra_regs)
                return NOTIFY_OK;
 
-       cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
-                                     GFP_KERNEL, cpu_to_node(cpu));
-       if (!cpuc->per_core)
+       cpuc->shared_regs = allocate_shared_regs(cpu);
+       if (!cpuc->shared_regs)
                return NOTIFY_BAD;
 
-       raw_spin_lock_init(&cpuc->per_core->lock);
-       cpuc->per_core->core_id = -1;
        return NOTIFY_OK;
 }
 
@@ -1260,32 +1256,34 @@ static void intel_pmu_cpu_starting(int cpu)
         */
        intel_pmu_lbr_reset();
 
-       if (!cpu_has_ht_siblings())
+       if (!cpuc->shared_regs)
                return;
 
        for_each_cpu(i, topology_thread_cpumask(cpu)) {
-               struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
+               struct intel_shared_regs *pc;
 
+               pc = per_cpu(cpu_hw_events, i).shared_regs;
                if (pc && pc->core_id == core_id) {
-                       kfree(cpuc->per_core);
-                       cpuc->per_core = pc;
+                       kfree(cpuc->shared_regs);
+                       cpuc->shared_regs = pc;
                        break;
                }
        }
 
-       cpuc->per_core->core_id = core_id;
-       cpuc->per_core->refcnt++;
+       cpuc->shared_regs->core_id = core_id;
+       cpuc->shared_regs->refcnt++;
 }
 
 static void intel_pmu_cpu_dying(int cpu)
 {
        struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
-       struct intel_percore *pc = cpuc->per_core;
+       struct intel_shared_regs *pc;
 
+       pc = cpuc->shared_regs;
        if (pc) {
                if (pc->core_id == -1 || --pc->refcnt == 0)
                        kfree(pc);
-               cpuc->per_core = NULL;
+               cpuc->shared_regs = NULL;
        }
 
        fini_debug_store_on_cpu(cpu);
@@ -1436,7 +1434,6 @@ static __init int intel_pmu_init(void)
 
                x86_pmu.event_constraints = intel_nehalem_event_constraints;
                x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
-               x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
                x86_pmu.enable_all = intel_pmu_nhm_enable_all;
                x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
@@ -1481,7 +1478,6 @@ static __init int intel_pmu_init(void)
                intel_pmu_lbr_init_nhm();
 
                x86_pmu.event_constraints = intel_westmere_event_constraints;
-               x86_pmu.percore_constraints = intel_westmere_percore_constraints;
                x86_pmu.enable_all = intel_pmu_nhm_enable_all;
                x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
                x86_pmu.extra_regs = intel_westmere_extra_regs;
This page took 0.0361900000000001 seconds and 5 git commands to generate.