perf_counter: powerpc: only reserve PMU hardware when we need it
[deliverable/linux.git] / arch / powerpc / kernel / perf_counter.c
index bd6ba85beb5492badc062e37a662e7af7e6bcc5d..560dd1e7b524472c35309c54cfcac5d57ccaf2bd 100644 (file)
@@ -32,6 +32,17 @@ DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
 
 struct power_pmu *ppmu;
 
+/*
+ * Normally, to ignore kernel events we set the FCS (freeze counters
+ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
+ * hypervisor bit set in the MSR, or if we are running on a processor
+ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
+ * then we need to use the FCHV bit to ignore kernel events.
+ */
+static unsigned int freeze_counters_kernel = MMCR0_FCS;
+
+static void perf_counter_interrupt(struct pt_regs *regs);
+
 void perf_counter_print_debug(void)
 {
 }
@@ -364,7 +375,7 @@ void hw_perf_restore(u64 disable)
        if (counter->hw_event.exclude_user)
                cpuhw->mmcr[0] |= MMCR0_FCP;
        if (counter->hw_event.exclude_kernel)
-               cpuhw->mmcr[0] |= MMCR0_FCS;
+               cpuhw->mmcr[0] |= freeze_counters_kernel;
        if (counter->hw_event.exclude_hv)
                cpuhw->mmcr[0] |= MMCR0_FCHV;
 
@@ -408,6 +419,7 @@ void hw_perf_restore(u64 disable)
                atomic64_set(&counter->hw.prev_count, val);
                counter->hw.idx = hwc_index[i] + 1;
                write_pmc(counter->hw.idx, val);
+               perf_counter_update_userpage(counter);
        }
        mb();
        cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
@@ -445,6 +457,8 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
 {
        counter->state = PERF_COUNTER_STATE_ACTIVE;
        counter->oncpu = cpu;
+       counter->tstamp_running += counter->ctx->time_now -
+               counter->tstamp_stopped;
        if (is_software_counter(counter))
                counter->hw_ops->enable(counter);
 }
@@ -563,6 +577,7 @@ static void power_perf_disable(struct perf_counter *counter)
                        ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
                        write_pmc(counter->hw.idx, 0);
                        counter->hw.idx = 0;
+                       perf_counter_update_userpage(counter);
                        break;
                }
        }
@@ -581,6 +596,24 @@ struct hw_perf_counter_ops power_perf_ops = {
        .read = power_perf_read
 };
 
+/* Number of perf_counters counting hardware events */
+static atomic_t num_counters;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_counter.
+ */
+static void hw_perf_counter_destroy(struct perf_counter *counter)
+{
+       if (!atomic_add_unless(&num_counters, -1, 1)) {
+               mutex_lock(&pmc_reserve_mutex);
+               if (atomic_dec_return(&num_counters) == 0)
+                       release_pmc_hardware();
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+}
+
 const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter)
 {
@@ -588,17 +621,19 @@ hw_perf_counter_init(struct perf_counter *counter)
        struct perf_counter *ctrs[MAX_HWCOUNTERS];
        unsigned int events[MAX_HWCOUNTERS];
        int n;
+       int err;
 
        if (!ppmu)
                return NULL;
        if ((s64)counter->hw_event.irq_period < 0)
                return NULL;
-       ev = counter->hw_event.type;
-       if (!counter->hw_event.raw) {
-               if (ev >= ppmu->n_generic ||
-                   ppmu->generic_events[ev] == 0)
+       if (!perf_event_raw(&counter->hw_event)) {
+               ev = perf_event_id(&counter->hw_event);
+               if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
                        return NULL;
                ev = ppmu->generic_events[ev];
+       } else {
+               ev = perf_event_config(&counter->hw_event);
        }
        counter->hw.config_base = ev;
        counter->hw.idx = 0;
@@ -606,10 +641,7 @@ hw_perf_counter_init(struct perf_counter *counter)
        /*
         * If we are not running on a hypervisor, force the
         * exclude_hv bit to 0 so that we don't care what
-        * the user set it to.  This also means that we don't
-        * set the MMCR0_FCHV bit, which unconditionally freezes
-        * the counters on the PPC970 variants used in Apple G5
-        * machines (since MSR.HV is always 1 on those machines).
+        * the user set it to.
         */
        if (!firmware_has_feature(FW_FEATURE_LPAR))
                counter->hw_event.exclude_hv = 0;
@@ -627,6 +659,7 @@ hw_perf_counter_init(struct perf_counter *counter)
                        return NULL;
        }
        events[n] = ev;
+       ctrs[n] = counter;
        if (check_excludes(ctrs, n, 1))
                return NULL;
        if (power_check_constraints(events, n + 1))
@@ -634,61 +667,28 @@ hw_perf_counter_init(struct perf_counter *counter)
 
        counter->hw.config = events[n];
        atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
-       return &power_perf_ops;
-}
-
-/*
- * Handle wakeups.
- */
-void perf_counter_do_pending(void)
-{
-       int i;
-       struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
-       struct perf_counter *counter;
-
-       set_perf_counter_pending(0);
-       for (i = 0; i < cpuhw->n_counters; ++i) {
-               counter = cpuhw->counter[i];
-               if (counter && counter->wakeup_pending) {
-                       counter->wakeup_pending = 0;
-                       wake_up(&counter->waitq);
-               }
-       }
-}
-
-/*
- * Record data for an irq counter.
- * This function was lifted from the x86 code; maybe it should
- * go in the core?
- */
-static void perf_store_irq_data(struct perf_counter *counter, u64 data)
-{
-       struct perf_data *irqdata = counter->irqdata;
-
-       if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
-               irqdata->overrun++;
-       } else {
-               u64 *p = (u64 *) &irqdata->data[irqdata->len];
 
-               *p = data;
-               irqdata->len += sizeof(u64);
+       /*
+        * See if we need to reserve the PMU.
+        * If no counters are currently in use, then we have to take a
+        * mutex to ensure that we don't race with another task doing
+        * reserve_pmc_hardware or release_pmc_hardware.
+        */
+       err = 0;
+       if (!atomic_inc_not_zero(&num_counters)) {
+               mutex_lock(&pmc_reserve_mutex);
+               if (atomic_read(&num_counters) == 0 &&
+                   reserve_pmc_hardware(perf_counter_interrupt))
+                       err = -EBUSY;
+               else
+                       atomic_inc(&num_counters);
+               mutex_unlock(&pmc_reserve_mutex);
        }
-}
+       counter->destroy = hw_perf_counter_destroy;
 
-/*
- * Record all the values of the counters in a group
- */
-static void perf_handle_group(struct perf_counter *counter)
-{
-       struct perf_counter *leader, *sub;
-
-       leader = counter->group_leader;
-       list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-               if (sub != counter)
-                       sub->hw_ops->read(sub);
-               perf_store_irq_data(counter, sub->hw_event.type);
-               perf_store_irq_data(counter, atomic64_read(&sub->count));
-       }
+       if (err)
+               return NULL;
+       return &power_perf_ops;
 }
 
 /*
@@ -726,24 +726,13 @@ static void record_and_restart(struct perf_counter *counter, long val,
        write_pmc(counter->hw.idx, val);
        atomic64_set(&counter->hw.prev_count, val);
        atomic64_set(&counter->hw.period_left, left);
+       perf_counter_update_userpage(counter);
 
        /*
         * Finally record data if requested.
         */
-       if (record) {
-               switch (counter->hw_event.record_type) {
-               case PERF_RECORD_SIMPLE:
-                       break;
-               case PERF_RECORD_IRQ:
-                       perf_store_irq_data(counter, instruction_pointer(regs));
-                       counter->wakeup_pending = 1;
-                       break;
-               case PERF_RECORD_GROUP:
-                       perf_handle_group(counter);
-                       counter->wakeup_pending = 1;
-                       break;
-               }
-       }
+       if (record)
+               perf_counter_output(counter, 1, regs);
 }
 
 /*
@@ -755,7 +744,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
        struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
        struct perf_counter *counter;
        long val;
-       int need_wakeup = 0, found = 0;
+       int found = 0;
 
        for (i = 0; i < cpuhw->n_counters; ++i) {
                counter = cpuhw->counter[i];
@@ -764,8 +753,6 @@ static void perf_counter_interrupt(struct pt_regs *regs)
                        /* counter has overflowed */
                        found = 1;
                        record_and_restart(counter, val, regs);
-                       if (counter->wakeup_pending)
-                               need_wakeup = 1;
                }
        }
 
@@ -795,17 +782,14 @@ static void perf_counter_interrupt(struct pt_regs *regs)
        /*
         * If we need a wakeup, check whether interrupts were soft-enabled
         * when we took the interrupt.  If they were, we can wake stuff up
-        * immediately; otherwise we'll have to set a flag and do the
-        * wakeup when interrupts get soft-enabled.
+        * immediately; otherwise we'll have do the wakeup when interrupts
+        * get soft-enabled.
         */
-       if (need_wakeup) {
-               if (regs->softe) {
-                       irq_enter();
-                       perf_counter_do_pending();
-                       irq_exit();
-               } else {
-                       set_perf_counter_pending(1);
-               }
+       if (test_perf_counter_pending() && regs->softe) {
+               irq_enter();
+               clear_perf_counter_pending();
+               perf_counter_do_pending();
+               irq_exit();
        }
 }
 
@@ -817,30 +801,45 @@ void hw_perf_counter_setup(int cpu)
        cpuhw->mmcr[0] = MMCR0_FC;
 }
 
+extern struct power_pmu power4_pmu;
 extern struct power_pmu ppc970_pmu;
+extern struct power_pmu power5_pmu;
+extern struct power_pmu power5p_pmu;
 extern struct power_pmu power6_pmu;
 
 static int init_perf_counters(void)
 {
        unsigned long pvr;
 
-       if (reserve_pmc_hardware(perf_counter_interrupt)) {
-               printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
-               return -EBUSY;
-       }
-
        /* XXX should get this from cputable */
        pvr = mfspr(SPRN_PVR);
        switch (PVR_VER(pvr)) {
+       case PV_POWER4:
+       case PV_POWER4p:
+               ppmu = &power4_pmu;
+               break;
        case PV_970:
        case PV_970FX:
        case PV_970MP:
                ppmu = &ppc970_pmu;
                break;
+       case PV_POWER5:
+               ppmu = &power5_pmu;
+               break;
+       case PV_POWER5p:
+               ppmu = &power5p_pmu;
+               break;
        case 0x3e:
                ppmu = &power6_pmu;
                break;
        }
+
+       /*
+        * Use FCHV to ignore kernel events if MSR.HV is set.
+        */
+       if (mfmsr() & MSR_HV)
+               freeze_counters_kernel = MMCR0_FCHV;
+
        return 0;
 }
 
This page took 0.028348 seconds and 5 git commands to generate.