perf_counter: More aggressive frequency adjustment
[deliverable/linux.git] / kernel / perf_counter.c
index a5d3e2aedd2f107ad5bf5a74153e4679ca97ee42..51c571ee4d0b1369e1030b0b1f5d91dca8140885 100644 (file)
@@ -1184,21 +1184,47 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 static void perf_log_throttle(struct perf_counter *counter, int enable);
 static void perf_log_period(struct perf_counter *counter, u64 period);
 
-static void perf_adjust_freq(struct perf_counter_context *ctx)
+static void perf_adjust_period(struct perf_counter *counter, u64 events)
 {
-       struct perf_counter *counter;
-       u64 interrupts, sample_period;
-       u64 events, period;
+       struct hw_perf_counter *hwc = &counter->hw;
+       u64 period, sample_period;
        s64 delta;
 
+       events *= hwc->sample_period;
+       period = div64_u64(events, counter->attr.sample_freq);
+
+       delta = (s64)(period - hwc->sample_period);
+       delta = (delta + 7) / 8; /* low pass filter */
+
+       sample_period = hwc->sample_period + delta;
+
+       if (!sample_period)
+               sample_period = 1;
+
+       perf_log_period(counter, sample_period);
+
+       hwc->sample_period = sample_period;
+}
+
+static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
+{
+       struct perf_counter *counter;
+       struct hw_perf_counter *hwc;
+       u64 interrupts, freq;
+
        spin_lock(&ctx->lock);
        list_for_each_entry(counter, &ctx->counter_list, list_entry) {
                if (counter->state != PERF_COUNTER_STATE_ACTIVE)
                        continue;
 
-               interrupts = counter->hw.interrupts;
-               counter->hw.interrupts = 0;
+               hwc = &counter->hw;
+
+               interrupts = hwc->interrupts;
+               hwc->interrupts = 0;
 
+               /*
+                * unthrottle counters on the tick
+                */
                if (interrupts == MAX_INTERRUPTS) {
                        perf_log_throttle(counter, 1);
                        counter->pmu->unthrottle(counter);
@@ -1208,20 +1234,38 @@ static void perf_adjust_freq(struct perf_counter_context *ctx)
                if (!counter->attr.freq || !counter->attr.sample_freq)
                        continue;
 
-               events = HZ * interrupts * counter->hw.sample_period;
-               period = div64_u64(events, counter->attr.sample_freq);
+               /*
+                * if the specified freq < HZ then we need to skip ticks
+                */
+               if (counter->attr.sample_freq < HZ) {
+                       freq = counter->attr.sample_freq;
 
-               delta = (s64)(1 + period - counter->hw.sample_period);
-               delta >>= 1;
+                       hwc->freq_count += freq;
+                       hwc->freq_interrupts += interrupts;
 
-               sample_period = counter->hw.sample_period + delta;
+                       if (hwc->freq_count < HZ)
+                               continue;
 
-               if (!sample_period)
-                       sample_period = 1;
+                       interrupts = hwc->freq_interrupts;
+                       hwc->freq_interrupts = 0;
+                       hwc->freq_count -= HZ;
+               } else
+                       freq = HZ;
 
-               perf_log_period(counter, sample_period);
+               perf_adjust_period(counter, freq * interrupts);
 
-               counter->hw.sample_period = sample_period;
+               /*
+                * In order to avoid being stalled by an (accidental) huge
+                * sample period, force reset the sample period if we didn't
+                * get any events in this freq period.
+                */
+               if (!interrupts) {
+                       perf_disable();
+                       counter->pmu->disable(counter);
+                       atomic_set(&hwc->period_left, 0);
+                       counter->pmu->enable(counter);
+                       perf_enable();
+               }
        }
        spin_unlock(&ctx->lock);
 }
@@ -1261,9 +1305,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
        cpuctx = &per_cpu(perf_cpu_context, cpu);
        ctx = curr->perf_counter_ctxp;
 
-       perf_adjust_freq(&cpuctx->ctx);
+       perf_ctx_adjust_freq(&cpuctx->ctx);
        if (ctx)
-               perf_adjust_freq(ctx);
+               perf_ctx_adjust_freq(ctx);
 
        perf_counter_cpu_sched_out(cpuctx);
        if (ctx)
@@ -1629,10 +1673,10 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 
                counter->attr.sample_freq = value;
        } else {
+               perf_log_period(counter, value);
+
                counter->attr.sample_period = value;
                counter->hw.sample_period = value;
-
-               perf_log_period(counter, value);
        }
 unlock:
        spin_unlock_irq(&ctx->lock);
@@ -2255,7 +2299,7 @@ out:
 }
 
 static void perf_output_copy(struct perf_output_handle *handle,
-                            void *buf, unsigned int len)
+                            const void *buf, unsigned int len)
 {
        unsigned int pages_mask;
        unsigned int offset;
@@ -2392,8 +2436,8 @@ static void perf_counter_output(struct perf_counter *counter,
                header.size += sizeof(u64);
        }
 
-       if (sample_type & PERF_SAMPLE_CONFIG) {
-               header.type |= PERF_SAMPLE_CONFIG;
+       if (sample_type & PERF_SAMPLE_ID) {
+               header.type |= PERF_SAMPLE_ID;
                header.size += sizeof(u64);
        }
 
@@ -2404,6 +2448,11 @@ static void perf_counter_output(struct perf_counter *counter,
                cpu_entry.cpu = raw_smp_processor_id();
        }
 
+       if (sample_type & PERF_SAMPLE_PERIOD) {
+               header.type |= PERF_SAMPLE_PERIOD;
+               header.size += sizeof(u64);
+       }
+
        if (sample_type & PERF_SAMPLE_GROUP) {
                header.type |= PERF_SAMPLE_GROUP;
                header.size += sizeof(u64) +
@@ -2439,12 +2488,15 @@ static void perf_counter_output(struct perf_counter *counter,
        if (sample_type & PERF_SAMPLE_ADDR)
                perf_output_put(&handle, addr);
 
-       if (sample_type & PERF_SAMPLE_CONFIG)
-               perf_output_put(&handle, counter->attr.config);
+       if (sample_type & PERF_SAMPLE_ID)
+               perf_output_put(&handle, counter->id);
 
        if (sample_type & PERF_SAMPLE_CPU)
                perf_output_put(&handle, cpu_entry);
 
+       if (sample_type & PERF_SAMPLE_PERIOD)
+               perf_output_put(&handle, counter->hw.sample_period);
+
        /*
         * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
         */
@@ -2681,9 +2733,10 @@ void perf_counter_comm(struct task_struct *task)
  */
 
 struct perf_mmap_event {
-       struct file     *file;
-       char            *file_name;
-       int             file_size;
+       struct vm_area_struct   *vma;
+
+       const char              *file_name;
+       int                     file_size;
 
        struct {
                struct perf_event_header        header;
@@ -2744,11 +2797,12 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
 {
        struct perf_cpu_context *cpuctx;
        struct perf_counter_context *ctx;
-       struct file *file = mmap_event->file;
+       struct vm_area_struct *vma = mmap_event->vma;
+       struct file *file = vma->vm_file;
        unsigned int size;
        char tmp[16];
        char *buf = NULL;
-       char *name;
+       const char *name;
 
        if (file) {
                buf = kzalloc(PATH_MAX, GFP_KERNEL);
@@ -2762,6 +2816,15 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
                        goto got_name;
                }
        } else {
+               name = arch_vma_name(mmap_event->vma);
+               if (name)
+                       goto got_name;
+
+               if (!vma->vm_mm) {
+                       name = strncpy(tmp, "[vdso]", sizeof(tmp));
+                       goto got_name;
+               }
+
                name = strncpy(tmp, "//anon", sizeof(tmp));
                goto got_name;
        }
@@ -2791,8 +2854,7 @@ got_name:
        kfree(buf);
 }
 
-void perf_counter_mmap(unsigned long addr, unsigned long len,
-                      unsigned long pgoff, struct file *file)
+void __perf_counter_mmap(struct vm_area_struct *vma)
 {
        struct perf_mmap_event mmap_event;
 
@@ -2800,12 +2862,12 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
                return;
 
        mmap_event = (struct perf_mmap_event){
-               .file   = file,
+               .vma    = vma,
                .event  = {
                        .header = { .type = PERF_EVENT_MMAP, },
-                       .start  = addr,
-                       .len    = len,
-                       .pgoff  = pgoff,
+                       .start  = vma->vm_start,
+                       .len    = vma->vm_end - vma->vm_start,
+                       .pgoff  = vma->vm_pgoff,
                },
        };
 
@@ -2817,33 +2879,41 @@ void perf_counter_mmap(unsigned long addr, unsigned long len,
  * event flow.
  */
 
+struct freq_event {
+       struct perf_event_header        header;
+       u64                             time;
+       u64                             id;
+       u64                             period;
+};
+
 static void perf_log_period(struct perf_counter *counter, u64 period)
 {
        struct perf_output_handle handle;
+       struct freq_event event;
        int ret;
 
-       struct {
-               struct perf_event_header        header;
-               u64                             time;
-               u64                             period;
-       } freq_event = {
+       if (counter->hw.sample_period == period)
+               return;
+
+       if (counter->attr.sample_type & PERF_SAMPLE_PERIOD)
+               return;
+
+       event = (struct freq_event) {
                .header = {
                        .type = PERF_EVENT_PERIOD,
                        .misc = 0,
-                       .size = sizeof(freq_event),
+                       .size = sizeof(event),
                },
                .time = sched_clock(),
+               .id = counter->id,
                .period = period,
        };
 
-       if (counter->hw.sample_period == period)
-               return;
-
-       ret = perf_output_begin(&handle, counter, sizeof(freq_event), 0, 0);
+       ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0);
        if (ret)
                return;
 
-       perf_output_put(&handle, freq_event);
+       perf_output_put(&handle, event);
        perf_output_end(&handle);
 }
 
@@ -2885,15 +2955,16 @@ int perf_counter_overflow(struct perf_counter *counter,
 {
        int events = atomic_read(&counter->event_limit);
        int throttle = counter->pmu->unthrottle != NULL;
+       struct hw_perf_counter *hwc = &counter->hw;
        int ret = 0;
 
        if (!throttle) {
-               counter->hw.interrupts++;
+               hwc->interrupts++;
        } else {
-               if (counter->hw.interrupts != MAX_INTERRUPTS) {
-                       counter->hw.interrupts++;
-                       if (HZ*counter->hw.interrupts > (u64)sysctl_perf_counter_limit) {
-                               counter->hw.interrupts = MAX_INTERRUPTS;
+               if (hwc->interrupts != MAX_INTERRUPTS) {
+                       hwc->interrupts++;
+                       if (HZ * hwc->interrupts > (u64)sysctl_perf_counter_limit) {
+                               hwc->interrupts = MAX_INTERRUPTS;
                                perf_log_throttle(counter, 0);
                                ret = 1;
                        }
@@ -2907,6 +2978,16 @@ int perf_counter_overflow(struct perf_counter *counter,
                }
        }
 
+       if (counter->attr.freq) {
+               u64 now = sched_clock();
+               s64 delta = now - hwc->freq_stamp;
+
+               hwc->freq_stamp = now;
+
+               if (delta > 0 && delta < TICK_NSEC)
+                       perf_adjust_period(counter, NSEC_PER_SEC / (int)delta);
+       }
+
        /*
         * XXX event_limit might not quite work as expected on inherited
         * counters
@@ -3053,14 +3134,12 @@ static int perf_swcounter_match(struct perf_counter *counter,
                                enum perf_event_types type,
                                u32 event, struct pt_regs *regs)
 {
-       u64 event_config;
-
-       event_config = ((u64) type << PERF_COUNTER_TYPE_SHIFT) | event;
-
        if (!perf_swcounter_is_counting(counter))
                return 0;
 
-       if (counter->attr.config != event_config)
+       if (counter->attr.type != type)
+               return 0;
+       if (counter->attr.config != event)
                return 0;
 
        if (regs) {
@@ -3343,7 +3422,6 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
                return NULL;
 
        counter->destroy = tp_perf_counter_destroy;
-       counter->hw.sample_period = counter->attr.sample_period;
 
        return &perf_ops_generic;
 }
@@ -3365,7 +3443,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
         * to be kernel events, and page faults are never hypervisor
         * events.
         */
-       switch (perf_event_id(&counter->attr)) {
+       switch (counter->attr.config) {
        case PERF_COUNT_CPU_CLOCK:
                pmu = &perf_ops_cpu_clock;
 
@@ -3447,10 +3525,11 @@ perf_counter_alloc(struct perf_counter_attr *attr,
        pmu = NULL;
 
        hwc = &counter->hw;
+       hwc->sample_period = attr->sample_period;
        if (attr->freq && attr->sample_freq)
-               hwc->sample_period = div64_u64(TICK_NSEC, attr->sample_freq);
-       else
-               hwc->sample_period = attr->sample_period;
+               hwc->sample_period = 1;
+
+       atomic64_set(&hwc->period_left, hwc->sample_period);
 
        /*
         * we currently do not support PERF_SAMPLE_GROUP on inherited counters
@@ -3458,13 +3537,14 @@ perf_counter_alloc(struct perf_counter_attr *attr,
        if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
                goto done;
 
-       if (perf_event_raw(attr)) {
+       if (attr->type == PERF_TYPE_RAW) {
                pmu = hw_perf_counter_init(counter);
                goto done;
        }
 
-       switch (perf_event_type(attr)) {
+       switch (attr->type) {
        case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
                pmu = hw_perf_counter_init(counter);
                break;
 
@@ -3650,6 +3730,9 @@ inherit_counter(struct perf_counter *parent_counter,
        else
                child_counter->state = PERF_COUNTER_STATE_OFF;
 
+       if (parent_counter->attr.freq)
+               child_counter->hw.sample_period = parent_counter->hw.sample_period;
+
        /*
         * Link it up in the child's context:
         */
This page took 0.031872 seconds and 5 git commands to generate.