Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ericvh...
[deliverable/linux.git] / arch / x86 / kernel / cpu / intel_cacheinfo.c
1 /*
2 * Routines to indentify caches on Intel CPU.
3 *
4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */
9
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/device.h>
13 #include <linux/compiler.h>
14 #include <linux/cpu.h>
15 #include <linux/sched.h>
16 #include <linux/pci.h>
17
18 #include <asm/processor.h>
19 #include <linux/smp.h>
20 #include <asm/amd_nb.h>
21 #include <asm/smp.h>
22
23 #define LVL_1_INST 1
24 #define LVL_1_DATA 2
25 #define LVL_2 3
26 #define LVL_3 4
27 #define LVL_TRACE 5
28
29 struct _cache_table {
30 unsigned char descriptor;
31 char cache_type;
32 short size;
33 };
34
35 #define MB(x) ((x) * 1024)
36
37 /* All the cache descriptor types we care about (no TLB or
38 trace cache entries) */
39
40 static const struct _cache_table __cpuinitconst cache_table[] =
41 {
42 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
43 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
44 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
45 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
46 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
47 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
48 { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */
49 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
50 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
51 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
52 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
53 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
54 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
55 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
56 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
57 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
58 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
59 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
60 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
61 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
62 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
63 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
64 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
65 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
66 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
67 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
68 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
69 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
70 { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */
71 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
72 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
73 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
74 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
75 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
76 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
77 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
78 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
79 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
80 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
81 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
82 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
83 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
84 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
85 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
86 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
88 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
89 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
90 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
91 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
92 { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */
93 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
94 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
95 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
96 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
97 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
98 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
99 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
100 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
101 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
102 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
103 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
104 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
105 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
106 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
107 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
108 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
109 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
110 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
111 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
112 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
113 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
114 { 0x00, 0, 0}
115 };
116
117
118 enum _cache_type {
119 CACHE_TYPE_NULL = 0,
120 CACHE_TYPE_DATA = 1,
121 CACHE_TYPE_INST = 2,
122 CACHE_TYPE_UNIFIED = 3
123 };
124
125 union _cpuid4_leaf_eax {
126 struct {
127 enum _cache_type type:5;
128 unsigned int level:3;
129 unsigned int is_self_initializing:1;
130 unsigned int is_fully_associative:1;
131 unsigned int reserved:4;
132 unsigned int num_threads_sharing:12;
133 unsigned int num_cores_on_die:6;
134 } split;
135 u32 full;
136 };
137
138 union _cpuid4_leaf_ebx {
139 struct {
140 unsigned int coherency_line_size:12;
141 unsigned int physical_line_partition:10;
142 unsigned int ways_of_associativity:10;
143 } split;
144 u32 full;
145 };
146
147 union _cpuid4_leaf_ecx {
148 struct {
149 unsigned int number_of_sets:32;
150 } split;
151 u32 full;
152 };
153
154 struct amd_l3_cache {
155 struct amd_northbridge *nb;
156 unsigned indices;
157 u8 subcaches[4];
158 };
159
160 struct _cpuid4_info {
161 union _cpuid4_leaf_eax eax;
162 union _cpuid4_leaf_ebx ebx;
163 union _cpuid4_leaf_ecx ecx;
164 unsigned long size;
165 struct amd_l3_cache *l3;
166 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
167 };
168
169 /* subset of above _cpuid4_info w/o shared_cpu_map */
170 struct _cpuid4_info_regs {
171 union _cpuid4_leaf_eax eax;
172 union _cpuid4_leaf_ebx ebx;
173 union _cpuid4_leaf_ecx ecx;
174 unsigned long size;
175 struct amd_l3_cache *l3;
176 };
177
178 unsigned short num_cache_leaves;
179
180 /* AMD doesn't have CPUID4. Emulate it here to report the same
181 information to the user. This makes some assumptions about the machine:
182 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
183
184 In theory the TLBs could be reported as fake type (they are in "dummy").
185 Maybe later */
186 union l1_cache {
187 struct {
188 unsigned line_size:8;
189 unsigned lines_per_tag:8;
190 unsigned assoc:8;
191 unsigned size_in_kb:8;
192 };
193 unsigned val;
194 };
195
196 union l2_cache {
197 struct {
198 unsigned line_size:8;
199 unsigned lines_per_tag:4;
200 unsigned assoc:4;
201 unsigned size_in_kb:16;
202 };
203 unsigned val;
204 };
205
206 union l3_cache {
207 struct {
208 unsigned line_size:8;
209 unsigned lines_per_tag:4;
210 unsigned assoc:4;
211 unsigned res:2;
212 unsigned size_encoded:14;
213 };
214 unsigned val;
215 };
216
217 static const unsigned short __cpuinitconst assocs[] = {
218 [1] = 1,
219 [2] = 2,
220 [4] = 4,
221 [6] = 8,
222 [8] = 16,
223 [0xa] = 32,
224 [0xb] = 48,
225 [0xc] = 64,
226 [0xd] = 96,
227 [0xe] = 128,
228 [0xf] = 0xffff /* fully associative - no way to show this currently */
229 };
230
231 static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
232 static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
233
234 static void __cpuinit
235 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
236 union _cpuid4_leaf_ebx *ebx,
237 union _cpuid4_leaf_ecx *ecx)
238 {
239 unsigned dummy;
240 unsigned line_size, lines_per_tag, assoc, size_in_kb;
241 union l1_cache l1i, l1d;
242 union l2_cache l2;
243 union l3_cache l3;
244 union l1_cache *l1 = &l1d;
245
246 eax->full = 0;
247 ebx->full = 0;
248 ecx->full = 0;
249
250 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
251 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
252
253 switch (leaf) {
254 case 1:
255 l1 = &l1i;
256 case 0:
257 if (!l1->val)
258 return;
259 assoc = assocs[l1->assoc];
260 line_size = l1->line_size;
261 lines_per_tag = l1->lines_per_tag;
262 size_in_kb = l1->size_in_kb;
263 break;
264 case 2:
265 if (!l2.val)
266 return;
267 assoc = assocs[l2.assoc];
268 line_size = l2.line_size;
269 lines_per_tag = l2.lines_per_tag;
270 /* cpu_data has errata corrections for K7 applied */
271 size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
272 break;
273 case 3:
274 if (!l3.val)
275 return;
276 assoc = assocs[l3.assoc];
277 line_size = l3.line_size;
278 lines_per_tag = l3.lines_per_tag;
279 size_in_kb = l3.size_encoded * 512;
280 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
281 size_in_kb = size_in_kb >> 1;
282 assoc = assoc >> 1;
283 }
284 break;
285 default:
286 return;
287 }
288
289 eax->split.is_self_initializing = 1;
290 eax->split.type = types[leaf];
291 eax->split.level = levels[leaf];
292 eax->split.num_threads_sharing = 0;
293 eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
294
295
296 if (assoc == 0xffff)
297 eax->split.is_fully_associative = 1;
298 ebx->split.coherency_line_size = line_size - 1;
299 ebx->split.ways_of_associativity = assoc - 1;
300 ebx->split.physical_line_partition = lines_per_tag - 1;
301 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
302 (ebx->split.ways_of_associativity + 1) - 1;
303 }
304
305 struct _cache_attr {
306 struct attribute attr;
307 ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int);
308 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count,
309 unsigned int);
310 };
311
312 #ifdef CONFIG_AMD_NB
313
314 /*
315 * L3 cache descriptors
316 */
317 static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
318 {
319 unsigned int sc0, sc1, sc2, sc3;
320 u32 val = 0;
321
322 pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
323
324 /* calculate subcache sizes */
325 l3->subcaches[0] = sc0 = !(val & BIT(0));
326 l3->subcaches[1] = sc1 = !(val & BIT(4));
327 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
328 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
329
330 l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
331 l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
332 }
333
334 static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
335 int index)
336 {
337 static struct amd_l3_cache *__cpuinitdata l3_caches;
338 int node;
339
340 /* only for L3, and not in virtualized environments */
341 if (index < 3 || amd_nb_num() == 0)
342 return;
343
344 /*
345 * Strictly speaking, the amount in @size below is leaked since it is
346 * never freed but this is done only on shutdown so it doesn't matter.
347 */
348 if (!l3_caches) {
349 int size = amd_nb_num() * sizeof(struct amd_l3_cache);
350
351 l3_caches = kzalloc(size, GFP_ATOMIC);
352 if (!l3_caches)
353 return;
354 }
355
356 node = amd_get_nb_id(smp_processor_id());
357
358 if (!l3_caches[node].nb) {
359 l3_caches[node].nb = node_to_amd_nb(node);
360 amd_calc_l3_indices(&l3_caches[node]);
361 }
362
363 this_leaf->l3 = &l3_caches[node];
364 }
365
366 /*
367 * check whether a slot used for disabling an L3 index is occupied.
368 * @l3: L3 cache descriptor
369 * @slot: slot number (0..1)
370 *
371 * @returns: the disabled index if used or negative value if slot free.
372 */
373 int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
374 {
375 unsigned int reg = 0;
376
377 pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
378
379 /* check whether this slot is activated already */
380 if (reg & (3UL << 30))
381 return reg & 0xfff;
382
383 return -1;
384 }
385
386 static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
387 unsigned int slot)
388 {
389 int index;
390
391 if (!this_leaf->l3 ||
392 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
393 return -EINVAL;
394
395 index = amd_get_l3_disable_slot(this_leaf->l3, slot);
396 if (index >= 0)
397 return sprintf(buf, "%d\n", index);
398
399 return sprintf(buf, "FREE\n");
400 }
401
402 #define SHOW_CACHE_DISABLE(slot) \
403 static ssize_t \
404 show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \
405 unsigned int cpu) \
406 { \
407 return show_cache_disable(this_leaf, buf, slot); \
408 }
409 SHOW_CACHE_DISABLE(0)
410 SHOW_CACHE_DISABLE(1)
411
412 static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
413 unsigned slot, unsigned long idx)
414 {
415 int i;
416
417 idx |= BIT(30);
418
419 /*
420 * disable index in all 4 subcaches
421 */
422 for (i = 0; i < 4; i++) {
423 u32 reg = idx | (i << 20);
424
425 if (!l3->subcaches[i])
426 continue;
427
428 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
429
430 /*
431 * We need to WBINVD on a core on the node containing the L3
432 * cache which indices we disable therefore a simple wbinvd()
433 * is not sufficient.
434 */
435 wbinvd_on_cpu(cpu);
436
437 reg |= BIT(31);
438 pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
439 }
440 }
441
442 /*
443 * disable a L3 cache index by using a disable-slot
444 *
445 * @l3: L3 cache descriptor
446 * @cpu: A CPU on the node containing the L3 cache
447 * @slot: slot number (0..1)
448 * @index: index to disable
449 *
450 * @return: 0 on success, error status on failure
451 */
452 int amd_set_l3_disable_slot(struct amd_l3_cache *l3, int cpu, unsigned slot,
453 unsigned long index)
454 {
455 int ret = 0;
456
457 #define SUBCACHE_MASK (3UL << 20)
458 #define SUBCACHE_INDEX 0xfff
459
460 /*
461 * check whether this slot is already used or
462 * the index is already disabled
463 */
464 ret = amd_get_l3_disable_slot(l3, slot);
465 if (ret >= 0)
466 return -EINVAL;
467
468 /*
469 * check whether the other slot has disabled the
470 * same index already
471 */
472 if (index == amd_get_l3_disable_slot(l3, !slot))
473 return -EINVAL;
474
475 /* do not allow writes outside of allowed bits */
476 if ((index & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
477 ((index & SUBCACHE_INDEX) > l3->indices))
478 return -EINVAL;
479
480 amd_l3_disable_index(l3, cpu, slot, index);
481
482 return 0;
483 }
484
485 static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
486 const char *buf, size_t count,
487 unsigned int slot)
488 {
489 unsigned long val = 0;
490 int cpu, err = 0;
491
492 if (!capable(CAP_SYS_ADMIN))
493 return -EPERM;
494
495 if (!this_leaf->l3 ||
496 !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
497 return -EINVAL;
498
499 cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
500
501 if (strict_strtoul(buf, 10, &val) < 0)
502 return -EINVAL;
503
504 err = amd_set_l3_disable_slot(this_leaf->l3, cpu, slot, val);
505 if (err) {
506 if (err == -EEXIST)
507 printk(KERN_WARNING "L3 disable slot %d in use!\n",
508 slot);
509 return err;
510 }
511 return count;
512 }
513
514 #define STORE_CACHE_DISABLE(slot) \
515 static ssize_t \
516 store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
517 const char *buf, size_t count, \
518 unsigned int cpu) \
519 { \
520 return store_cache_disable(this_leaf, buf, count, slot); \
521 }
522 STORE_CACHE_DISABLE(0)
523 STORE_CACHE_DISABLE(1)
524
525 static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
526 show_cache_disable_0, store_cache_disable_0);
527 static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
528 show_cache_disable_1, store_cache_disable_1);
529
530 static ssize_t
531 show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu)
532 {
533 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
534 return -EINVAL;
535
536 return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
537 }
538
539 static ssize_t
540 store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
541 unsigned int cpu)
542 {
543 unsigned long val;
544
545 if (!capable(CAP_SYS_ADMIN))
546 return -EPERM;
547
548 if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
549 return -EINVAL;
550
551 if (strict_strtoul(buf, 16, &val) < 0)
552 return -EINVAL;
553
554 if (amd_set_subcaches(cpu, val))
555 return -EINVAL;
556
557 return count;
558 }
559
560 static struct _cache_attr subcaches =
561 __ATTR(subcaches, 0644, show_subcaches, store_subcaches);
562
563 #else /* CONFIG_AMD_NB */
564 #define amd_init_l3_cache(x, y)
565 #endif /* CONFIG_AMD_NB */
566
567 static int
568 __cpuinit cpuid4_cache_lookup_regs(int index,
569 struct _cpuid4_info_regs *this_leaf)
570 {
571 union _cpuid4_leaf_eax eax;
572 union _cpuid4_leaf_ebx ebx;
573 union _cpuid4_leaf_ecx ecx;
574 unsigned edx;
575
576 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
577 amd_cpuid4(index, &eax, &ebx, &ecx);
578 amd_init_l3_cache(this_leaf, index);
579 } else {
580 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
581 }
582
583 if (eax.split.type == CACHE_TYPE_NULL)
584 return -EIO; /* better error ? */
585
586 this_leaf->eax = eax;
587 this_leaf->ebx = ebx;
588 this_leaf->ecx = ecx;
589 this_leaf->size = (ecx.split.number_of_sets + 1) *
590 (ebx.split.coherency_line_size + 1) *
591 (ebx.split.physical_line_partition + 1) *
592 (ebx.split.ways_of_associativity + 1);
593 return 0;
594 }
595
596 static int __cpuinit find_num_cache_leaves(void)
597 {
598 unsigned int eax, ebx, ecx, edx;
599 union _cpuid4_leaf_eax cache_eax;
600 int i = -1;
601
602 do {
603 ++i;
604 /* Do cpuid(4) loop to find out num_cache_leaves */
605 cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
606 cache_eax.full = eax;
607 } while (cache_eax.split.type != CACHE_TYPE_NULL);
608 return i;
609 }
610
611 unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
612 {
613 /* Cache sizes */
614 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
615 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
616 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
617 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
618 #ifdef CONFIG_X86_HT
619 unsigned int cpu = c->cpu_index;
620 #endif
621
622 if (c->cpuid_level > 3) {
623 static int is_initialized;
624
625 if (is_initialized == 0) {
626 /* Init num_cache_leaves from boot CPU */
627 num_cache_leaves = find_num_cache_leaves();
628 is_initialized++;
629 }
630
631 /*
632 * Whenever possible use cpuid(4), deterministic cache
633 * parameters cpuid leaf to find the cache details
634 */
635 for (i = 0; i < num_cache_leaves; i++) {
636 struct _cpuid4_info_regs this_leaf;
637 int retval;
638
639 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
640 if (retval >= 0) {
641 switch (this_leaf.eax.split.level) {
642 case 1:
643 if (this_leaf.eax.split.type ==
644 CACHE_TYPE_DATA)
645 new_l1d = this_leaf.size/1024;
646 else if (this_leaf.eax.split.type ==
647 CACHE_TYPE_INST)
648 new_l1i = this_leaf.size/1024;
649 break;
650 case 2:
651 new_l2 = this_leaf.size/1024;
652 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
653 index_msb = get_count_order(num_threads_sharing);
654 l2_id = c->apicid >> index_msb;
655 break;
656 case 3:
657 new_l3 = this_leaf.size/1024;
658 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
659 index_msb = get_count_order(
660 num_threads_sharing);
661 l3_id = c->apicid >> index_msb;
662 break;
663 default:
664 break;
665 }
666 }
667 }
668 }
669 /*
670 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
671 * trace cache
672 */
673 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
674 /* supports eax=2 call */
675 int j, n;
676 unsigned int regs[4];
677 unsigned char *dp = (unsigned char *)regs;
678 int only_trace = 0;
679
680 if (num_cache_leaves != 0 && c->x86 == 15)
681 only_trace = 1;
682
683 /* Number of times to iterate */
684 n = cpuid_eax(2) & 0xFF;
685
686 for (i = 0 ; i < n ; i++) {
687 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
688
689 /* If bit 31 is set, this is an unknown format */
690 for (j = 0 ; j < 3 ; j++)
691 if (regs[j] & (1 << 31))
692 regs[j] = 0;
693
694 /* Byte 0 is level count, not a descriptor */
695 for (j = 1 ; j < 16 ; j++) {
696 unsigned char des = dp[j];
697 unsigned char k = 0;
698
699 /* look up this descriptor in the table */
700 while (cache_table[k].descriptor != 0) {
701 if (cache_table[k].descriptor == des) {
702 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
703 break;
704 switch (cache_table[k].cache_type) {
705 case LVL_1_INST:
706 l1i += cache_table[k].size;
707 break;
708 case LVL_1_DATA:
709 l1d += cache_table[k].size;
710 break;
711 case LVL_2:
712 l2 += cache_table[k].size;
713 break;
714 case LVL_3:
715 l3 += cache_table[k].size;
716 break;
717 case LVL_TRACE:
718 trace += cache_table[k].size;
719 break;
720 }
721
722 break;
723 }
724
725 k++;
726 }
727 }
728 }
729 }
730
731 if (new_l1d)
732 l1d = new_l1d;
733
734 if (new_l1i)
735 l1i = new_l1i;
736
737 if (new_l2) {
738 l2 = new_l2;
739 #ifdef CONFIG_X86_HT
740 per_cpu(cpu_llc_id, cpu) = l2_id;
741 #endif
742 }
743
744 if (new_l3) {
745 l3 = new_l3;
746 #ifdef CONFIG_X86_HT
747 per_cpu(cpu_llc_id, cpu) = l3_id;
748 #endif
749 }
750
751 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
752
753 return l2;
754 }
755
756 #ifdef CONFIG_SYSFS
757
758 /* pointer to _cpuid4_info array (for each cache leaf) */
759 static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
760 #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
761
762 #ifdef CONFIG_SMP
763 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
764 {
765 struct _cpuid4_info *this_leaf, *sibling_leaf;
766 unsigned long num_threads_sharing;
767 int index_msb, i, sibling;
768 struct cpuinfo_x86 *c = &cpu_data(cpu);
769
770 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
771 for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
772 if (!per_cpu(ici_cpuid4_info, i))
773 continue;
774 this_leaf = CPUID4_INFO_IDX(i, index);
775 for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
776 if (!cpu_online(sibling))
777 continue;
778 set_bit(sibling, this_leaf->shared_cpu_map);
779 }
780 }
781 return;
782 }
783 this_leaf = CPUID4_INFO_IDX(cpu, index);
784 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
785
786 if (num_threads_sharing == 1)
787 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
788 else {
789 index_msb = get_count_order(num_threads_sharing);
790
791 for_each_online_cpu(i) {
792 if (cpu_data(i).apicid >> index_msb ==
793 c->apicid >> index_msb) {
794 cpumask_set_cpu(i,
795 to_cpumask(this_leaf->shared_cpu_map));
796 if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
797 sibling_leaf =
798 CPUID4_INFO_IDX(i, index);
799 cpumask_set_cpu(cpu, to_cpumask(
800 sibling_leaf->shared_cpu_map));
801 }
802 }
803 }
804 }
805 }
806 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
807 {
808 struct _cpuid4_info *this_leaf, *sibling_leaf;
809 int sibling;
810
811 this_leaf = CPUID4_INFO_IDX(cpu, index);
812 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
813 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
814 cpumask_clear_cpu(cpu,
815 to_cpumask(sibling_leaf->shared_cpu_map));
816 }
817 }
818 #else
819 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
820 {
821 }
822
823 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
824 {
825 }
826 #endif
827
828 static void __cpuinit free_cache_attributes(unsigned int cpu)
829 {
830 int i;
831
832 for (i = 0; i < num_cache_leaves; i++)
833 cache_remove_shared_cpu_map(cpu, i);
834
835 kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
836 kfree(per_cpu(ici_cpuid4_info, cpu));
837 per_cpu(ici_cpuid4_info, cpu) = NULL;
838 }
839
840 static int
841 __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
842 {
843 struct _cpuid4_info_regs *leaf_regs =
844 (struct _cpuid4_info_regs *)this_leaf;
845
846 return cpuid4_cache_lookup_regs(index, leaf_regs);
847 }
848
849 static void __cpuinit get_cpu_leaves(void *_retval)
850 {
851 int j, *retval = _retval, cpu = smp_processor_id();
852
853 /* Do cpuid and store the results */
854 for (j = 0; j < num_cache_leaves; j++) {
855 struct _cpuid4_info *this_leaf;
856 this_leaf = CPUID4_INFO_IDX(cpu, j);
857 *retval = cpuid4_cache_lookup(j, this_leaf);
858 if (unlikely(*retval < 0)) {
859 int i;
860
861 for (i = 0; i < j; i++)
862 cache_remove_shared_cpu_map(cpu, i);
863 break;
864 }
865 cache_shared_cpu_map_setup(cpu, j);
866 }
867 }
868
869 static int __cpuinit detect_cache_attributes(unsigned int cpu)
870 {
871 int retval;
872
873 if (num_cache_leaves == 0)
874 return -ENOENT;
875
876 per_cpu(ici_cpuid4_info, cpu) = kzalloc(
877 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
878 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
879 return -ENOMEM;
880
881 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
882 if (retval) {
883 kfree(per_cpu(ici_cpuid4_info, cpu));
884 per_cpu(ici_cpuid4_info, cpu) = NULL;
885 }
886
887 return retval;
888 }
889
890 #include <linux/kobject.h>
891 #include <linux/sysfs.h>
892
893 extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
894
895 /* pointer to kobject for cpuX/cache */
896 static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
897
898 struct _index_kobject {
899 struct kobject kobj;
900 unsigned int cpu;
901 unsigned short index;
902 };
903
904 /* pointer to array of kobjects for cpuX/cache/indexY */
905 static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
906 #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
907
908 #define show_one_plus(file_name, object, val) \
909 static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \
910 unsigned int cpu) \
911 { \
912 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
913 }
914
915 show_one_plus(level, eax.split.level, 0);
916 show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
917 show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
918 show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
919 show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
920
921 static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf,
922 unsigned int cpu)
923 {
924 return sprintf(buf, "%luK\n", this_leaf->size / 1024);
925 }
926
927 static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
928 int type, char *buf)
929 {
930 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
931 int n = 0;
932
933 if (len > 1) {
934 const struct cpumask *mask;
935
936 mask = to_cpumask(this_leaf->shared_cpu_map);
937 n = type ?
938 cpulist_scnprintf(buf, len-2, mask) :
939 cpumask_scnprintf(buf, len-2, mask);
940 buf[n++] = '\n';
941 buf[n] = '\0';
942 }
943 return n;
944 }
945
946 static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf,
947 unsigned int cpu)
948 {
949 return show_shared_cpu_map_func(leaf, 0, buf);
950 }
951
952 static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf,
953 unsigned int cpu)
954 {
955 return show_shared_cpu_map_func(leaf, 1, buf);
956 }
957
958 static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf,
959 unsigned int cpu)
960 {
961 switch (this_leaf->eax.split.type) {
962 case CACHE_TYPE_DATA:
963 return sprintf(buf, "Data\n");
964 case CACHE_TYPE_INST:
965 return sprintf(buf, "Instruction\n");
966 case CACHE_TYPE_UNIFIED:
967 return sprintf(buf, "Unified\n");
968 default:
969 return sprintf(buf, "Unknown\n");
970 }
971 }
972
973 #define to_object(k) container_of(k, struct _index_kobject, kobj)
974 #define to_attr(a) container_of(a, struct _cache_attr, attr)
975
976 #define define_one_ro(_name) \
977 static struct _cache_attr _name = \
978 __ATTR(_name, 0444, show_##_name, NULL)
979
980 define_one_ro(level);
981 define_one_ro(type);
982 define_one_ro(coherency_line_size);
983 define_one_ro(physical_line_partition);
984 define_one_ro(ways_of_associativity);
985 define_one_ro(number_of_sets);
986 define_one_ro(size);
987 define_one_ro(shared_cpu_map);
988 define_one_ro(shared_cpu_list);
989
990 static struct attribute *default_attrs[] = {
991 &type.attr,
992 &level.attr,
993 &coherency_line_size.attr,
994 &physical_line_partition.attr,
995 &ways_of_associativity.attr,
996 &number_of_sets.attr,
997 &size.attr,
998 &shared_cpu_map.attr,
999 &shared_cpu_list.attr,
1000 NULL
1001 };
1002
1003 #ifdef CONFIG_AMD_NB
1004 static struct attribute ** __cpuinit amd_l3_attrs(void)
1005 {
1006 static struct attribute **attrs;
1007 int n;
1008
1009 if (attrs)
1010 return attrs;
1011
1012 n = sizeof (default_attrs) / sizeof (struct attribute *);
1013
1014 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
1015 n += 2;
1016
1017 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1018 n += 1;
1019
1020 attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
1021 if (attrs == NULL)
1022 return attrs = default_attrs;
1023
1024 for (n = 0; default_attrs[n]; n++)
1025 attrs[n] = default_attrs[n];
1026
1027 if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
1028 attrs[n++] = &cache_disable_0.attr;
1029 attrs[n++] = &cache_disable_1.attr;
1030 }
1031
1032 if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
1033 attrs[n++] = &subcaches.attr;
1034
1035 return attrs;
1036 }
1037 #endif
1038
1039 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
1040 {
1041 struct _cache_attr *fattr = to_attr(attr);
1042 struct _index_kobject *this_leaf = to_object(kobj);
1043 ssize_t ret;
1044
1045 ret = fattr->show ?
1046 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1047 buf, this_leaf->cpu) :
1048 0;
1049 return ret;
1050 }
1051
1052 static ssize_t store(struct kobject *kobj, struct attribute *attr,
1053 const char *buf, size_t count)
1054 {
1055 struct _cache_attr *fattr = to_attr(attr);
1056 struct _index_kobject *this_leaf = to_object(kobj);
1057 ssize_t ret;
1058
1059 ret = fattr->store ?
1060 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
1061 buf, count, this_leaf->cpu) :
1062 0;
1063 return ret;
1064 }
1065
1066 static const struct sysfs_ops sysfs_ops = {
1067 .show = show,
1068 .store = store,
1069 };
1070
1071 static struct kobj_type ktype_cache = {
1072 .sysfs_ops = &sysfs_ops,
1073 .default_attrs = default_attrs,
1074 };
1075
1076 static struct kobj_type ktype_percpu_entry = {
1077 .sysfs_ops = &sysfs_ops,
1078 };
1079
1080 static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
1081 {
1082 kfree(per_cpu(ici_cache_kobject, cpu));
1083 kfree(per_cpu(ici_index_kobject, cpu));
1084 per_cpu(ici_cache_kobject, cpu) = NULL;
1085 per_cpu(ici_index_kobject, cpu) = NULL;
1086 free_cache_attributes(cpu);
1087 }
1088
1089 static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1090 {
1091 int err;
1092
1093 if (num_cache_leaves == 0)
1094 return -ENOENT;
1095
1096 err = detect_cache_attributes(cpu);
1097 if (err)
1098 return err;
1099
1100 /* Allocate all required memory */
1101 per_cpu(ici_cache_kobject, cpu) =
1102 kzalloc(sizeof(struct kobject), GFP_KERNEL);
1103 if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1104 goto err_out;
1105
1106 per_cpu(ici_index_kobject, cpu) = kzalloc(
1107 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1108 if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1109 goto err_out;
1110
1111 return 0;
1112
1113 err_out:
1114 cpuid4_cache_sysfs_exit(cpu);
1115 return -ENOMEM;
1116 }
1117
1118 static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1119
1120 /* Add/Remove cache interface for CPU device */
1121 static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1122 {
1123 unsigned int cpu = sys_dev->id;
1124 unsigned long i, j;
1125 struct _index_kobject *this_object;
1126 struct _cpuid4_info *this_leaf;
1127 int retval;
1128
1129 retval = cpuid4_cache_sysfs_init(cpu);
1130 if (unlikely(retval < 0))
1131 return retval;
1132
1133 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1134 &ktype_percpu_entry,
1135 &sys_dev->kobj, "%s", "cache");
1136 if (retval < 0) {
1137 cpuid4_cache_sysfs_exit(cpu);
1138 return retval;
1139 }
1140
1141 for (i = 0; i < num_cache_leaves; i++) {
1142 this_object = INDEX_KOBJECT_PTR(cpu, i);
1143 this_object->cpu = cpu;
1144 this_object->index = i;
1145
1146 this_leaf = CPUID4_INFO_IDX(cpu, i);
1147
1148 ktype_cache.default_attrs = default_attrs;
1149 #ifdef CONFIG_AMD_NB
1150 if (this_leaf->l3)
1151 ktype_cache.default_attrs = amd_l3_attrs();
1152 #endif
1153 retval = kobject_init_and_add(&(this_object->kobj),
1154 &ktype_cache,
1155 per_cpu(ici_cache_kobject, cpu),
1156 "index%1lu", i);
1157 if (unlikely(retval)) {
1158 for (j = 0; j < i; j++)
1159 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1160 kobject_put(per_cpu(ici_cache_kobject, cpu));
1161 cpuid4_cache_sysfs_exit(cpu);
1162 return retval;
1163 }
1164 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1165 }
1166 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1167
1168 kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1169 return 0;
1170 }
1171
1172 static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
1173 {
1174 unsigned int cpu = sys_dev->id;
1175 unsigned long i;
1176
1177 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1178 return;
1179 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1180 return;
1181 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1182
1183 for (i = 0; i < num_cache_leaves; i++)
1184 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1185 kobject_put(per_cpu(ici_cache_kobject, cpu));
1186 cpuid4_cache_sysfs_exit(cpu);
1187 }
1188
1189 static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1190 unsigned long action, void *hcpu)
1191 {
1192 unsigned int cpu = (unsigned long)hcpu;
1193 struct sys_device *sys_dev;
1194
1195 sys_dev = get_cpu_sysdev(cpu);
1196 switch (action) {
1197 case CPU_ONLINE:
1198 case CPU_ONLINE_FROZEN:
1199 cache_add_dev(sys_dev);
1200 break;
1201 case CPU_DEAD:
1202 case CPU_DEAD_FROZEN:
1203 cache_remove_dev(sys_dev);
1204 break;
1205 }
1206 return NOTIFY_OK;
1207 }
1208
1209 static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1210 .notifier_call = cacheinfo_cpu_callback,
1211 };
1212
1213 static int __cpuinit cache_sysfs_init(void)
1214 {
1215 int i;
1216
1217 if (num_cache_leaves == 0)
1218 return 0;
1219
1220 for_each_online_cpu(i) {
1221 int err;
1222 struct sys_device *sys_dev = get_cpu_sysdev(i);
1223
1224 err = cache_add_dev(sys_dev);
1225 if (err)
1226 return err;
1227 }
1228 register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1229 return 0;
1230 }
1231
1232 device_initcall(cache_sysfs_init);
1233
1234 #endif
This page took 0.064592 seconds and 5 git commands to generate.