#include <asm/reg.h>
#include <asm/pmc.h>
#include <asm/machdep.h>
+#include <asm/firmware.h>
struct cpu_hw_counters {
int n_counters;
struct power_pmu *ppmu;
+/*
+ * Normally, to ignore kernel events we set the FCS (freeze counters
+ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
+ * hypervisor bit set in the MSR, or if we are running on a processor
+ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
+ * then we need to use the FCHV bit to ignore kernel events.
+ */
+static unsigned int freeze_counters_kernel = MMCR0_FCS;
+
+static void perf_counter_interrupt(struct pt_regs *regs);
+
void perf_counter_print_debug(void)
{
}
return 0;
}
+/*
+ * Check if newly-added counters have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added counters.
+ */
+static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
+{
+ int eu, ek, eh;
+ int i, n;
+ struct perf_counter *counter;
+
+ n = n_prev + n_new;
+ if (n <= 1)
+ return 0;
+
+ eu = ctrs[0]->hw_event.exclude_user;
+ ek = ctrs[0]->hw_event.exclude_kernel;
+ eh = ctrs[0]->hw_event.exclude_hv;
+ if (n_prev == 0)
+ n_prev = 1;
+ for (i = n_prev; i < n; ++i) {
+ counter = ctrs[i];
+ if (counter->hw_event.exclude_user != eu ||
+ counter->hw_event.exclude_kernel != ek ||
+ counter->hw_event.exclude_hv != eh)
+ return -EAGAIN;
+ }
+ return 0;
+}
+
static void power_perf_read(struct perf_counter *counter)
{
long val, delta, prev;
goto out;
}
+ /*
+ * Add in MMCR0 freeze bits corresponding to the
+ * hw_event.exclude_* bits for the first counter.
+ * We have already checked that all counters have the
+ * same values for these bits as the first counter.
+ */
+ counter = cpuhw->counter[0];
+ if (counter->hw_event.exclude_user)
+ cpuhw->mmcr[0] |= MMCR0_FCP;
+ if (counter->hw_event.exclude_kernel)
+ cpuhw->mmcr[0] |= freeze_counters_kernel;
+ if (counter->hw_event.exclude_hv)
+ cpuhw->mmcr[0] |= MMCR0_FCHV;
+
/*
* Write the new configuration to MMCR* with the freeze
* bit set and set the hardware counters to their initial values.
atomic64_set(&counter->hw.prev_count, val);
counter->hw.idx = hwc_index[i] + 1;
write_pmc(counter->hw.idx, val);
+ perf_counter_update_userpage(counter);
}
mb();
cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
{
counter->state = PERF_COUNTER_STATE_ACTIVE;
counter->oncpu = cpu;
+ counter->tstamp_running += counter->ctx->time_now -
+ counter->tstamp_stopped;
if (is_software_counter(counter))
counter->hw_ops->enable(counter);
}
&cpuhw->counter[n0], &cpuhw->events[n0]);
if (n < 0)
return -EAGAIN;
+ if (check_excludes(cpuhw->counter, n0, n))
+ return -EAGAIN;
if (power_check_constraints(cpuhw->events, n + n0))
return -EAGAIN;
cpuhw->n_counters = n0 + n;
goto out;
cpuhw->counter[n0] = counter;
cpuhw->events[n0] = counter->hw.config;
+ if (check_excludes(cpuhw->counter, n0, 1))
+ goto out;
if (power_check_constraints(cpuhw->events, n0 + 1))
goto out;
ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
write_pmc(counter->hw.idx, 0);
counter->hw.idx = 0;
+ perf_counter_update_userpage(counter);
break;
}
}
.read = power_perf_read
};
+/* Number of perf_counters counting hardware events */
+static atomic_t num_counters;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_counter.
+ */
+static void hw_perf_counter_destroy(struct perf_counter *counter)
+{
+ if (!atomic_add_unless(&num_counters, -1, 1)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_counters) == 0)
+ release_pmc_hardware();
+ mutex_unlock(&pmc_reserve_mutex);
+ }
+}
+
const struct hw_perf_counter_ops *
hw_perf_counter_init(struct perf_counter *counter)
{
struct perf_counter *ctrs[MAX_HWCOUNTERS];
unsigned int events[MAX_HWCOUNTERS];
int n;
+ int err;
if (!ppmu)
return NULL;
if ((s64)counter->hw_event.irq_period < 0)
return NULL;
- ev = counter->hw_event.type;
- if (!counter->hw_event.raw) {
- if (ev >= ppmu->n_generic ||
- ppmu->generic_events[ev] == 0)
+ if (!perf_event_raw(&counter->hw_event)) {
+ ev = perf_event_id(&counter->hw_event);
+ if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
return NULL;
ev = ppmu->generic_events[ev];
+ } else {
+ ev = perf_event_config(&counter->hw_event);
}
counter->hw.config_base = ev;
counter->hw.idx = 0;
+ /*
+ * If we are not running on a hypervisor, force the
+ * exclude_hv bit to 0 so that we don't care what
+ * the user set it to.
+ */
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ counter->hw_event.exclude_hv = 0;
+
/*
* If this is in a group, check if it can go on with all the
* other hardware counters in the group. We assume the counter
if (n < 0)
return NULL;
}
- events[n++] = ev;
- if (power_check_constraints(events, n))
+ events[n] = ev;
+ ctrs[n] = counter;
+ if (check_excludes(ctrs, n, 1))
+ return NULL;
+ if (power_check_constraints(events, n + 1))
return NULL;
- counter->hw.config = events[n - 1];
+ counter->hw.config = events[n];
atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
- return &power_perf_ops;
-}
-
-/*
- * Handle wakeups.
- */
-void perf_counter_do_pending(void)
-{
- int i;
- struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
- struct perf_counter *counter;
- set_perf_counter_pending(0);
- for (i = 0; i < cpuhw->n_counters; ++i) {
- counter = cpuhw->counter[i];
- if (counter && counter->wakeup_pending) {
- counter->wakeup_pending = 0;
- wake_up(&counter->waitq);
- }
- }
-}
-
-/*
- * Record data for an irq counter.
- * This function was lifted from the x86 code; maybe it should
- * go in the core?
- */
-static void perf_store_irq_data(struct perf_counter *counter, u64 data)
-{
- struct perf_data *irqdata = counter->irqdata;
-
- if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
- irqdata->overrun++;
- } else {
- u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
- *p = data;
- irqdata->len += sizeof(u64);
+ /*
+ * See if we need to reserve the PMU.
+ * If no counters are currently in use, then we have to take a
+ * mutex to ensure that we don't race with another task doing
+ * reserve_pmc_hardware or release_pmc_hardware.
+ */
+ err = 0;
+ if (!atomic_inc_not_zero(&num_counters)) {
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_read(&num_counters) == 0 &&
+ reserve_pmc_hardware(perf_counter_interrupt))
+ err = -EBUSY;
+ else
+ atomic_inc(&num_counters);
+ mutex_unlock(&pmc_reserve_mutex);
}
-}
+ counter->destroy = hw_perf_counter_destroy;
-/*
- * Record all the values of the counters in a group
- */
-static void perf_handle_group(struct perf_counter *counter)
-{
- struct perf_counter *leader, *sub;
-
- leader = counter->group_leader;
- list_for_each_entry(sub, &leader->sibling_list, list_entry) {
- if (sub != counter)
- sub->hw_ops->read(sub);
- perf_store_irq_data(counter, sub->hw_event.type);
- perf_store_irq_data(counter, atomic64_read(&sub->count));
- }
+ if (err)
+ return NULL;
+ return &power_perf_ops;
}
/*
write_pmc(counter->hw.idx, val);
atomic64_set(&counter->hw.prev_count, val);
atomic64_set(&counter->hw.period_left, left);
+ perf_counter_update_userpage(counter);
/*
* Finally record data if requested.
*/
- if (record) {
- switch (counter->hw_event.record_type) {
- case PERF_RECORD_SIMPLE:
- break;
- case PERF_RECORD_IRQ:
- perf_store_irq_data(counter, instruction_pointer(regs));
- counter->wakeup_pending = 1;
- break;
- case PERF_RECORD_GROUP:
- perf_handle_group(counter);
- counter->wakeup_pending = 1;
- break;
- }
- }
+ if (record)
+ perf_counter_output(counter, 1, regs);
}
/*
struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
struct perf_counter *counter;
long val;
- int need_wakeup = 0, found = 0;
+ int found = 0;
for (i = 0; i < cpuhw->n_counters; ++i) {
counter = cpuhw->counter[i];
/* counter has overflowed */
found = 1;
record_and_restart(counter, val, regs);
- if (counter->wakeup_pending)
- need_wakeup = 1;
}
}
/*
* If we need a wakeup, check whether interrupts were soft-enabled
* when we took the interrupt. If they were, we can wake stuff up
- * immediately; otherwise we'll have to set a flag and do the
- * wakeup when interrupts get soft-enabled.
+ * immediately; otherwise we'll have do the wakeup when interrupts
+ * get soft-enabled.
*/
- if (need_wakeup) {
- if (regs->softe) {
- irq_enter();
- perf_counter_do_pending();
- irq_exit();
- } else {
- set_perf_counter_pending(1);
- }
+ if (test_perf_counter_pending() && regs->softe) {
+ irq_enter();
+ clear_perf_counter_pending();
+ perf_counter_do_pending();
+ irq_exit();
}
}
cpuhw->mmcr[0] = MMCR0_FC;
}
+extern struct power_pmu power4_pmu;
extern struct power_pmu ppc970_pmu;
+extern struct power_pmu power5_pmu;
+extern struct power_pmu power5p_pmu;
extern struct power_pmu power6_pmu;
static int init_perf_counters(void)
{
unsigned long pvr;
- if (reserve_pmc_hardware(perf_counter_interrupt)) {
- printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
- return -EBUSY;
- }
-
/* XXX should get this from cputable */
pvr = mfspr(SPRN_PVR);
switch (PVR_VER(pvr)) {
+ case PV_POWER4:
+ case PV_POWER4p:
+ ppmu = &power4_pmu;
+ break;
case PV_970:
case PV_970FX:
case PV_970MP:
ppmu = &ppc970_pmu;
break;
+ case PV_POWER5:
+ ppmu = &power5_pmu;
+ break;
+ case PV_POWER5p:
+ ppmu = &power5p_pmu;
+ break;
case 0x3e:
ppmu = &power6_pmu;
break;
}
+
+ /*
+ * Use FCHV to ignore kernel events if MSR.HV is set.
+ */
+ if (mfmsr() & MSR_HV)
+ freeze_counters_kernel = MMCR0_FCHV;
+
return 0;
}