Merge branch 'for_paulus' of master.kernel.org:/pub/scm/linux/kernel/git/galak/powerpc
[deliverable/linux.git] / arch / i386 / oprofile / op_model_p4.c
1 /**
2 * @file op_model_p4.c
3 * P4 model-specific MSR operations
4 *
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Graydon Hoare
9 */
10
11 #include <linux/oprofile.h>
12 #include <linux/smp.h>
13 #include <asm/msr.h>
14 #include <asm/ptrace.h>
15 #include <asm/fixmap.h>
16 #include <asm/apic.h>
17 #include <asm/nmi.h>
18
19 #include "op_x86_model.h"
20 #include "op_counter.h"
21
22 #define NUM_EVENTS 39
23
24 #define NUM_COUNTERS_NON_HT 8
25 #define NUM_ESCRS_NON_HT 45
26 #define NUM_CCCRS_NON_HT 18
27 #define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
28
29 #define NUM_COUNTERS_HT2 4
30 #define NUM_ESCRS_HT2 23
31 #define NUM_CCCRS_HT2 9
32 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
33
34 static unsigned int num_counters = NUM_COUNTERS_NON_HT;
35
36
37 /* this has to be checked dynamically since the
38 hyper-threadedness of a chip is discovered at
39 kernel boot-time. */
40 static inline void setup_num_counters(void)
41 {
42 #ifdef CONFIG_SMP
43 if (smp_num_siblings == 2)
44 num_counters = NUM_COUNTERS_HT2;
45 #endif
46 }
47
48 static int inline addr_increment(void)
49 {
50 #ifdef CONFIG_SMP
51 return smp_num_siblings == 2 ? 2 : 1;
52 #else
53 return 1;
54 #endif
55 }
56
57
58 /* tables to simulate simplified hardware view of p4 registers */
59 struct p4_counter_binding {
60 int virt_counter;
61 int counter_address;
62 int cccr_address;
63 };
64
65 struct p4_event_binding {
66 int escr_select; /* value to put in CCCR */
67 int event_select; /* value to put in ESCR */
68 struct {
69 int virt_counter; /* for this counter... */
70 int escr_address; /* use this ESCR */
71 } bindings[2];
72 };
73
74 /* nb: these CTR_* defines are a duplicate of defines in
75 event/i386.p4*events. */
76
77
78 #define CTR_BPU_0 (1 << 0)
79 #define CTR_MS_0 (1 << 1)
80 #define CTR_FLAME_0 (1 << 2)
81 #define CTR_IQ_4 (1 << 3)
82 #define CTR_BPU_2 (1 << 4)
83 #define CTR_MS_2 (1 << 5)
84 #define CTR_FLAME_2 (1 << 6)
85 #define CTR_IQ_5 (1 << 7)
86
87 static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
88 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
89 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
90 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
91 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
92 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
93 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
94 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
95 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
96 };
97
98 #define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
99
100 /* All cccr we don't use. */
101 static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
102 MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
103 MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3,
104 MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
105 MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1,
106 MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3
107 };
108
109 /* p4 event codes in libop/op_event.h are indices into this table. */
110
111 static struct p4_event_binding p4_events[NUM_EVENTS] = {
112
113 { /* BRANCH_RETIRED */
114 0x05, 0x06,
115 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
116 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
117 },
118
119 { /* MISPRED_BRANCH_RETIRED */
120 0x04, 0x03,
121 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
122 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
123 },
124
125 { /* TC_DELIVER_MODE */
126 0x01, 0x01,
127 { { CTR_MS_0, MSR_P4_TC_ESCR0},
128 { CTR_MS_2, MSR_P4_TC_ESCR1} }
129 },
130
131 { /* BPU_FETCH_REQUEST */
132 0x00, 0x03,
133 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
134 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
135 },
136
137 { /* ITLB_REFERENCE */
138 0x03, 0x18,
139 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
140 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
141 },
142
143 { /* MEMORY_CANCEL */
144 0x05, 0x02,
145 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
146 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
147 },
148
149 { /* MEMORY_COMPLETE */
150 0x02, 0x08,
151 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
152 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
153 },
154
155 { /* LOAD_PORT_REPLAY */
156 0x02, 0x04,
157 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
158 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
159 },
160
161 { /* STORE_PORT_REPLAY */
162 0x02, 0x05,
163 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
164 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
165 },
166
167 { /* MOB_LOAD_REPLAY */
168 0x02, 0x03,
169 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
170 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
171 },
172
173 { /* PAGE_WALK_TYPE */
174 0x04, 0x01,
175 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
176 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
177 },
178
179 { /* BSQ_CACHE_REFERENCE */
180 0x07, 0x0c,
181 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
182 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
183 },
184
185 { /* IOQ_ALLOCATION */
186 0x06, 0x03,
187 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
188 { 0, 0 } }
189 },
190
191 { /* IOQ_ACTIVE_ENTRIES */
192 0x06, 0x1a,
193 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
194 { 0, 0 } }
195 },
196
197 { /* FSB_DATA_ACTIVITY */
198 0x06, 0x17,
199 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
200 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
201 },
202
203 { /* BSQ_ALLOCATION */
204 0x07, 0x05,
205 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
206 { 0, 0 } }
207 },
208
209 { /* BSQ_ACTIVE_ENTRIES */
210 0x07, 0x06,
211 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
212 { 0, 0 } }
213 },
214
215 { /* X87_ASSIST */
216 0x05, 0x03,
217 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
218 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
219 },
220
221 { /* SSE_INPUT_ASSIST */
222 0x01, 0x34,
223 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
224 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
225 },
226
227 { /* PACKED_SP_UOP */
228 0x01, 0x08,
229 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
230 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
231 },
232
233 { /* PACKED_DP_UOP */
234 0x01, 0x0c,
235 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
236 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
237 },
238
239 { /* SCALAR_SP_UOP */
240 0x01, 0x0a,
241 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
242 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
243 },
244
245 { /* SCALAR_DP_UOP */
246 0x01, 0x0e,
247 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
248 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
249 },
250
251 { /* 64BIT_MMX_UOP */
252 0x01, 0x02,
253 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
254 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
255 },
256
257 { /* 128BIT_MMX_UOP */
258 0x01, 0x1a,
259 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
260 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
261 },
262
263 { /* X87_FP_UOP */
264 0x01, 0x04,
265 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
266 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
267 },
268
269 { /* X87_SIMD_MOVES_UOP */
270 0x01, 0x2e,
271 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
272 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
273 },
274
275 { /* MACHINE_CLEAR */
276 0x05, 0x02,
277 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
278 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
279 },
280
281 { /* GLOBAL_POWER_EVENTS */
282 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
283 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
284 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
285 },
286
287 { /* TC_MS_XFER */
288 0x00, 0x05,
289 { { CTR_MS_0, MSR_P4_MS_ESCR0},
290 { CTR_MS_2, MSR_P4_MS_ESCR1} }
291 },
292
293 { /* UOP_QUEUE_WRITES */
294 0x00, 0x09,
295 { { CTR_MS_0, MSR_P4_MS_ESCR0},
296 { CTR_MS_2, MSR_P4_MS_ESCR1} }
297 },
298
299 { /* FRONT_END_EVENT */
300 0x05, 0x08,
301 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
302 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
303 },
304
305 { /* EXECUTION_EVENT */
306 0x05, 0x0c,
307 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
308 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
309 },
310
311 { /* REPLAY_EVENT */
312 0x05, 0x09,
313 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
314 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
315 },
316
317 { /* INSTR_RETIRED */
318 0x04, 0x02,
319 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
320 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
321 },
322
323 { /* UOPS_RETIRED */
324 0x04, 0x01,
325 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
326 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
327 },
328
329 { /* UOP_TYPE */
330 0x02, 0x02,
331 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
332 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
333 },
334
335 { /* RETIRED_MISPRED_BRANCH_TYPE */
336 0x02, 0x05,
337 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
338 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
339 },
340
341 { /* RETIRED_BRANCH_TYPE */
342 0x02, 0x04,
343 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
344 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
345 }
346 };
347
348
349 #define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
350
351 #define ESCR_RESERVED_BITS 0x80000003
352 #define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
353 #define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
354 #define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
355 #define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
356 #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
357 #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
358 #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
359 #define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
360 #define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
361
362 #define CCCR_RESERVED_BITS 0x38030FFF
363 #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
364 #define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
365 #define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
366 #define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
367 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
368 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
369 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
370 #define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
371 #define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
372 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
373 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
374
375 #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
376 #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
377 #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
378
379
380 /* this assigns a "stagger" to the current CPU, which is used throughout
381 the code in this module as an extra array offset, to select the "even"
382 or "odd" part of all the divided resources. */
383 static unsigned int get_stagger(void)
384 {
385 #ifdef CONFIG_SMP
386 int cpu = smp_processor_id();
387 return (cpu != first_cpu(cpu_sibling_map[cpu]));
388 #endif
389 return 0;
390 }
391
392
393 /* finally, mediate access to a real hardware counter
394 by passing a "virtual" counter numer to this macro,
395 along with your stagger setting. */
396 #define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
397
398 static unsigned long reset_value[NUM_COUNTERS_NON_HT];
399
400
401 static void p4_fill_in_addresses(struct op_msrs * const msrs)
402 {
403 unsigned int i;
404 unsigned int addr, stag;
405
406 setup_num_counters();
407 stag = get_stagger();
408
409 /* the counter registers we pay attention to */
410 for (i = 0; i < num_counters; ++i) {
411 msrs->counters[i].addr =
412 p4_counters[VIRT_CTR(stag, i)].counter_address;
413 }
414
415 /* FIXME: bad feeling, we don't save the 10 counters we don't use. */
416
417 /* 18 CCCR registers */
418 for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
419 addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
420 msrs->controls[i].addr = addr;
421 }
422
423 /* 43 ESCR registers in three or four discontiguous group */
424 for (addr = MSR_P4_BSU_ESCR0 + stag;
425 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
426 msrs->controls[i].addr = addr;
427 }
428
429 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
430 * to avoid special case in nmi_{save|restore}_registers() */
431 if (boot_cpu_data.x86_model >= 0x3) {
432 for (addr = MSR_P4_BSU_ESCR0 + stag;
433 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
434 msrs->controls[i].addr = addr;
435 }
436 } else {
437 for (addr = MSR_P4_IQ_ESCR0 + stag;
438 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
439 msrs->controls[i].addr = addr;
440 }
441 }
442
443 for (addr = MSR_P4_RAT_ESCR0 + stag;
444 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
445 msrs->controls[i].addr = addr;
446 }
447
448 for (addr = MSR_P4_MS_ESCR0 + stag;
449 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
450 msrs->controls[i].addr = addr;
451 }
452
453 for (addr = MSR_P4_IX_ESCR0 + stag;
454 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
455 msrs->controls[i].addr = addr;
456 }
457
458 /* there are 2 remaining non-contiguously located ESCRs */
459
460 if (num_counters == NUM_COUNTERS_NON_HT) {
461 /* standard non-HT CPUs handle both remaining ESCRs*/
462 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
463 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
464
465 } else if (stag == 0) {
466 /* HT CPUs give the first remainder to the even thread, as
467 the 32nd control register */
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
469
470 } else {
471 /* and two copies of the second to the odd thread,
472 for the 22st and 23nd control registers */
473 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
474 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
475 }
476 }
477
478
479 static void pmc_setup_one_p4_counter(unsigned int ctr)
480 {
481 int i;
482 int const maxbind = 2;
483 unsigned int cccr = 0;
484 unsigned int escr = 0;
485 unsigned int high = 0;
486 unsigned int counter_bit;
487 struct p4_event_binding *ev = NULL;
488 unsigned int stag;
489
490 stag = get_stagger();
491
492 /* convert from counter *number* to counter *bit* */
493 counter_bit = 1 << VIRT_CTR(stag, ctr);
494
495 /* find our event binding structure. */
496 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
497 printk(KERN_ERR
498 "oprofile: P4 event code 0x%lx out of range\n",
499 counter_config[ctr].event);
500 return;
501 }
502
503 ev = &(p4_events[counter_config[ctr].event - 1]);
504
505 for (i = 0; i < maxbind; i++) {
506 if (ev->bindings[i].virt_counter & counter_bit) {
507
508 /* modify ESCR */
509 ESCR_READ(escr, high, ev, i);
510 ESCR_CLEAR(escr);
511 if (stag == 0) {
512 ESCR_SET_USR_0(escr, counter_config[ctr].user);
513 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
514 } else {
515 ESCR_SET_USR_1(escr, counter_config[ctr].user);
516 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
517 }
518 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
519 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
520 ESCR_WRITE(escr, high, ev, i);
521
522 /* modify CCCR */
523 CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
524 CCCR_CLEAR(cccr);
525 CCCR_SET_REQUIRED_BITS(cccr);
526 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
527 if (stag == 0) {
528 CCCR_SET_PMI_OVF_0(cccr);
529 } else {
530 CCCR_SET_PMI_OVF_1(cccr);
531 }
532 CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
533 return;
534 }
535 }
536
537 printk(KERN_ERR
538 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
539 counter_config[ctr].event, stag, ctr);
540 }
541
542
543 static void p4_setup_ctrs(struct op_msrs const * const msrs)
544 {
545 unsigned int i;
546 unsigned int low, high;
547 unsigned int addr;
548 unsigned int stag;
549
550 stag = get_stagger();
551
552 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
553 if (! MISC_PMC_ENABLED_P(low)) {
554 printk(KERN_ERR "oprofile: P4 PMC not available\n");
555 return;
556 }
557
558 /* clear the cccrs we will use */
559 for (i = 0 ; i < num_counters ; i++) {
560 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
561 CCCR_CLEAR(low);
562 CCCR_SET_REQUIRED_BITS(low);
563 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
564 }
565
566 /* clear cccrs outside our concern */
567 for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
568 rdmsr(p4_unused_cccr[i], low, high);
569 CCCR_CLEAR(low);
570 CCCR_SET_REQUIRED_BITS(low);
571 wrmsr(p4_unused_cccr[i], low, high);
572 }
573
574 /* clear all escrs (including those outside our concern) */
575 for (addr = MSR_P4_BSU_ESCR0 + stag;
576 addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) {
577 wrmsr(addr, 0, 0);
578 }
579
580 /* On older models clear also MSR_P4_IQ_ESCR0/1 */
581 if (boot_cpu_data.x86_model < 0x3) {
582 wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
583 wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
584 }
585
586 for (addr = MSR_P4_RAT_ESCR0 + stag;
587 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
588 wrmsr(addr, 0, 0);
589 }
590
591 for (addr = MSR_P4_MS_ESCR0 + stag;
592 addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){
593 wrmsr(addr, 0, 0);
594 }
595
596 for (addr = MSR_P4_IX_ESCR0 + stag;
597 addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){
598 wrmsr(addr, 0, 0);
599 }
600
601 if (num_counters == NUM_COUNTERS_NON_HT) {
602 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
603 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
604 } else if (stag == 0) {
605 wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
606 } else {
607 wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
608 }
609
610 /* setup all counters */
611 for (i = 0 ; i < num_counters ; ++i) {
612 if (counter_config[i].enabled) {
613 reset_value[i] = counter_config[i].count;
614 pmc_setup_one_p4_counter(i);
615 CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
616 } else {
617 reset_value[i] = 0;
618 }
619 }
620 }
621
622
623 static int p4_check_ctrs(struct pt_regs * const regs,
624 struct op_msrs const * const msrs)
625 {
626 unsigned long ctr, low, high, stag, real;
627 int i;
628
629 stag = get_stagger();
630
631 for (i = 0; i < num_counters; ++i) {
632
633 if (!reset_value[i])
634 continue;
635
636 /*
637 * there is some eccentricity in the hardware which
638 * requires that we perform 2 extra corrections:
639 *
640 * - check both the CCCR:OVF flag for overflow and the
641 * counter high bit for un-flagged overflows.
642 *
643 * - write the counter back twice to ensure it gets
644 * updated properly.
645 *
646 * the former seems to be related to extra NMIs happening
647 * during the current NMI; the latter is reported as errata
648 * N15 in intel doc 249199-029, pentium 4 specification
649 * update, though their suggested work-around does not
650 * appear to solve the problem.
651 */
652
653 real = VIRT_CTR(stag, i);
654
655 CCCR_READ(low, high, real);
656 CTR_READ(ctr, high, real);
657 if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
658 oprofile_add_sample(regs, i);
659 CTR_WRITE(reset_value[i], real);
660 CCCR_CLEAR_OVF(low);
661 CCCR_WRITE(low, high, real);
662 CTR_WRITE(reset_value[i], real);
663 }
664 }
665
666 /* P4 quirk: you have to re-unmask the apic vector */
667 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
668
669 /* See op_model_ppro.c */
670 return 1;
671 }
672
673
674 static void p4_start(struct op_msrs const * const msrs)
675 {
676 unsigned int low, high, stag;
677 int i;
678
679 stag = get_stagger();
680
681 for (i = 0; i < num_counters; ++i) {
682 if (!reset_value[i])
683 continue;
684 CCCR_READ(low, high, VIRT_CTR(stag, i));
685 CCCR_SET_ENABLE(low);
686 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
687 }
688 }
689
690
691 static void p4_stop(struct op_msrs const * const msrs)
692 {
693 unsigned int low, high, stag;
694 int i;
695
696 stag = get_stagger();
697
698 for (i = 0; i < num_counters; ++i) {
699 CCCR_READ(low, high, VIRT_CTR(stag, i));
700 CCCR_SET_DISABLE(low);
701 CCCR_WRITE(low, high, VIRT_CTR(stag, i));
702 }
703 }
704
705
706 #ifdef CONFIG_SMP
707 struct op_x86_model_spec const op_p4_ht2_spec = {
708 .num_counters = NUM_COUNTERS_HT2,
709 .num_controls = NUM_CONTROLS_HT2,
710 .fill_in_addresses = &p4_fill_in_addresses,
711 .setup_ctrs = &p4_setup_ctrs,
712 .check_ctrs = &p4_check_ctrs,
713 .start = &p4_start,
714 .stop = &p4_stop
715 };
716 #endif
717
718 struct op_x86_model_spec const op_p4_spec = {
719 .num_counters = NUM_COUNTERS_NON_HT,
720 .num_controls = NUM_CONTROLS_NON_HT,
721 .fill_in_addresses = &p4_fill_in_addresses,
722 .setup_ctrs = &p4_setup_ctrs,
723 .check_ctrs = &p4_check_ctrs,
724 .start = &p4_start,
725 .stop = &p4_stop
726 };
This page took 0.074341 seconds and 6 git commands to generate.