Commit | Line | Data |
---|---|---|
f580366f | 1 | #include <linux/init.h> |
0f0124fa YL |
2 | #include <linux/kernel.h> |
3 | #include <linux/sched.h> | |
4 | #include <linux/string.h> | |
5 | #include <linux/bootmem.h> | |
6 | #include <linux/bitops.h> | |
7 | #include <linux/module.h> | |
8 | #include <linux/kgdb.h> | |
9 | #include <linux/topology.h> | |
f580366f YL |
10 | #include <linux/delay.h> |
11 | #include <linux/smp.h> | |
f580366f | 12 | #include <linux/percpu.h> |
f580366f YL |
13 | #include <asm/i387.h> |
14 | #include <asm/msr.h> | |
15 | #include <asm/io.h> | |
cbcd79c2 | 16 | #include <asm/linkage.h> |
f580366f YL |
17 | #include <asm/mmu_context.h> |
18 | #include <asm/mtrr.h> | |
19 | #include <asm/mce.h> | |
20 | #include <asm/pat.h> | |
7e00df58 | 21 | #include <asm/asm.h> |
f580366f YL |
22 | #include <asm/numa.h> |
23 | #ifdef CONFIG_X86_LOCAL_APIC | |
24 | #include <asm/mpspec.h> | |
25 | #include <asm/apic.h> | |
26 | #include <mach_apic.h> | |
f0fc4aff | 27 | #include <asm/genapic.h> |
f580366f | 28 | #endif |
0f0124fa YL |
29 | #include <asm/pda.h> |
30 | #include <asm/pgtable.h> | |
31 | #include <asm/processor.h> | |
32 | #include <asm/desc.h> | |
33 | #include <asm/atomic.h> | |
34 | #include <asm/proto.h> | |
35 | #include <asm/sections.h> | |
36 | #include <asm/setup.h> | |
f580366f YL |
37 | |
38 | #include "cpu.h" | |
39 | ||
0a488a53 YL |
40 | static struct cpu_dev *this_cpu __cpuinitdata; |
41 | ||
950ad7ff | 42 | #ifdef CONFIG_X86_64 |
f580366f YL |
43 | /* We need valid kernel segments for data and code in long mode too |
44 | * IRET will check the segment types kkeil 2000/10/28 | |
45 | * Also sysret mandates a special GDT layout | |
46 | */ | |
47 | /* The TLS descriptors are currently at a different place compared to i386. | |
48 | Hopefully nobody expects them at a fixed place (Wine?) */ | |
49 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { | |
50 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, | |
51 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, | |
52 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, | |
53 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, | |
54 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, | |
55 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, | |
56 | } }; | |
950ad7ff YL |
57 | #else |
58 | DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |
59 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, | |
60 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, | |
61 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, | |
62 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, | |
63 | /* | |
64 | * Segments used for calling PnP BIOS have byte granularity. | |
65 | * They code segments and data segments have fixed 64k limits, | |
66 | * the transfer segment sizes are set at run time. | |
67 | */ | |
68 | /* 32-bit code */ | |
69 | [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, | |
70 | /* 16-bit code */ | |
71 | [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, | |
72 | /* 16-bit data */ | |
73 | [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, | |
74 | /* 16-bit data */ | |
75 | [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, | |
76 | /* 16-bit data */ | |
77 | [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, | |
78 | /* | |
79 | * The APM segments have byte granularity and their bases | |
80 | * are set at run time. All have 64k limits. | |
81 | */ | |
82 | /* 32-bit code */ | |
83 | [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, | |
84 | /* 16-bit code */ | |
85 | [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, | |
86 | /* data */ | |
87 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, | |
88 | ||
89 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, | |
90 | [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, | |
91 | } }; | |
92 | #endif | |
f580366f YL |
93 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); |
94 | ||
ba51dced YL |
95 | #ifdef CONFIG_X86_32 |
96 | static int cachesize_override __cpuinitdata = -1; | |
97 | static int disable_x86_serial_nr __cpuinitdata = 1; | |
98 | ||
99 | static int __init cachesize_setup(char *str) | |
100 | { | |
101 | get_option(&str, &cachesize_override); | |
102 | return 1; | |
103 | } | |
104 | __setup("cachesize=", cachesize_setup); | |
105 | ||
106 | /* | |
107 | * Naming convention should be: <Name> [(<Codename>)] | |
108 | * This table only is used unless init_<vendor>() below doesn't set it; | |
109 | * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used | |
110 | * | |
111 | */ | |
112 | ||
113 | /* Look up CPU names by table lookup. */ | |
114 | static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) | |
115 | { | |
116 | struct cpu_model_info *info; | |
117 | ||
118 | if (c->x86_model >= 16) | |
119 | return NULL; /* Range check */ | |
120 | ||
121 | if (!this_cpu) | |
122 | return NULL; | |
123 | ||
124 | info = this_cpu->c_models; | |
125 | ||
126 | while (info && info->family) { | |
127 | if (info->family == c->x86) | |
128 | return info->model_names[c->x86_model]; | |
129 | info++; | |
130 | } | |
131 | return NULL; /* Not found */ | |
132 | } | |
133 | ||
134 | static int __init x86_fxsr_setup(char *s) | |
135 | { | |
136 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | |
137 | setup_clear_cpu_cap(X86_FEATURE_XMM); | |
138 | return 1; | |
139 | } | |
140 | __setup("nofxsr", x86_fxsr_setup); | |
141 | ||
142 | static int __init x86_sep_setup(char *s) | |
143 | { | |
144 | setup_clear_cpu_cap(X86_FEATURE_SEP); | |
145 | return 1; | |
146 | } | |
147 | __setup("nosep", x86_sep_setup); | |
148 | ||
149 | /* Standard macro to see if a specific flag is changeable */ | |
150 | static inline int flag_is_changeable_p(u32 flag) | |
151 | { | |
152 | u32 f1, f2; | |
153 | ||
154 | asm("pushfl\n\t" | |
155 | "pushfl\n\t" | |
156 | "popl %0\n\t" | |
157 | "movl %0,%1\n\t" | |
158 | "xorl %2,%0\n\t" | |
159 | "pushl %0\n\t" | |
160 | "popfl\n\t" | |
161 | "pushfl\n\t" | |
162 | "popl %0\n\t" | |
163 | "popfl\n\t" | |
164 | : "=&r" (f1), "=&r" (f2) | |
165 | : "ir" (flag)); | |
166 | ||
167 | return ((f1^f2) & flag) != 0; | |
168 | } | |
169 | ||
170 | /* Probe for the CPUID instruction */ | |
171 | static int __cpuinit have_cpuid_p(void) | |
172 | { | |
173 | return flag_is_changeable_p(X86_EFLAGS_ID); | |
174 | } | |
175 | ||
176 | static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | |
177 | { | |
178 | if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) { | |
179 | /* Disable processor serial number */ | |
180 | unsigned long lo, hi; | |
181 | rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi); | |
182 | lo |= 0x200000; | |
183 | wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); | |
184 | printk(KERN_NOTICE "CPU serial number disabled.\n"); | |
185 | clear_cpu_cap(c, X86_FEATURE_PN); | |
186 | ||
187 | /* Disabling the serial number may affect the cpuid level */ | |
188 | c->cpuid_level = cpuid_eax(0); | |
189 | } | |
190 | } | |
191 | ||
192 | static int __init x86_serial_nr_setup(char *s) | |
193 | { | |
194 | disable_x86_serial_nr = 0; | |
195 | return 1; | |
196 | } | |
197 | __setup("serialnumber", x86_serial_nr_setup); | |
198 | #else | |
199 | /* Probe for the CPUID instruction */ | |
200 | static inline int have_cpuid_p(void) | |
201 | { | |
202 | return 1; | |
203 | } | |
204 | #endif | |
205 | ||
f580366f YL |
206 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; |
207 | ||
208 | /* Current gdt points %fs at the "master" per-cpu area: after this, | |
209 | * it's on the real one. */ | |
210 | void switch_to_new_gdt(void) | |
211 | { | |
212 | struct desc_ptr gdt_descr; | |
213 | ||
214 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); | |
215 | gdt_descr.size = GDT_SIZE - 1; | |
216 | load_gdt(&gdt_descr); | |
217 | } | |
218 | ||
10a434fc | 219 | static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; |
f580366f YL |
220 | |
221 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | |
222 | { | |
223 | display_cacheinfo(c); | |
224 | } | |
225 | ||
226 | static struct cpu_dev __cpuinitdata default_cpu = { | |
227 | .c_init = default_init, | |
228 | .c_vendor = "Unknown", | |
10a434fc | 229 | .c_x86_vendor = X86_VENDOR_UNKNOWN, |
f580366f | 230 | }; |
f580366f YL |
231 | |
232 | int __cpuinit get_model_name(struct cpuinfo_x86 *c) | |
233 | { | |
234 | unsigned int *v; | |
01b2e16a | 235 | char *p, *q; |
f580366f YL |
236 | |
237 | if (c->extended_cpuid_level < 0x80000004) | |
238 | return 0; | |
239 | ||
240 | v = (unsigned int *) c->x86_model_id; | |
241 | cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); | |
242 | cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]); | |
243 | cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]); | |
244 | c->x86_model_id[48] = 0; | |
01b2e16a YL |
245 | |
246 | /* Intel chips right-justify this string for some dumb reason; | |
247 | undo that brain damage */ | |
248 | p = q = &c->x86_model_id[0]; | |
249 | while (*p == ' ') | |
250 | p++; | |
251 | if (p != q) { | |
252 | while (*p) | |
253 | *q++ = *p++; | |
254 | while (q <= &c->x86_model_id[48]) | |
255 | *q++ = '\0'; /* Zero-pad the rest */ | |
256 | } | |
257 | ||
f580366f YL |
258 | return 1; |
259 | } | |
260 | ||
261 | ||
262 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |
263 | { | |
0a488a53 | 264 | unsigned int n, dummy, ebx, ecx, edx, l2size; |
f580366f YL |
265 | |
266 | n = c->extended_cpuid_level; | |
267 | ||
268 | if (n >= 0x80000005) { | |
269 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | |
9d31d35b YL |
270 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", |
271 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | |
f580366f YL |
272 | c->x86_cache_size = (ecx>>24) + (edx>>24); |
273 | /* On K8 L1 TLB is inclusive, so don't count it */ | |
274 | c->x86_tlbsize = 0; | |
275 | } | |
276 | ||
0a488a53 YL |
277 | if (n < 0x80000006) /* Some chips just has a large L1. */ |
278 | return; | |
f580366f | 279 | |
0a488a53 YL |
280 | cpuid(0x80000006, &dummy, &ebx, &ecx, &edx); |
281 | l2size = ecx >> 16; | |
282 | c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff); | |
283 | ||
284 | c->x86_cache_size = l2size; | |
285 | ||
286 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | |
287 | l2size, ecx & 0xFF); | |
f580366f YL |
288 | } |
289 | ||
290 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |
291 | { | |
97e4db7c | 292 | #ifdef CONFIG_X86_HT |
f580366f YL |
293 | u32 eax, ebx, ecx, edx; |
294 | int index_msb, core_bits; | |
295 | ||
f580366f YL |
296 | if (!cpu_has(c, X86_FEATURE_HT)) |
297 | return; | |
298 | if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) | |
299 | goto out; | |
300 | ||
90427638 IM |
301 | if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) |
302 | return; | |
303 | ||
0a488a53 YL |
304 | cpuid(1, &eax, &ebx, &ecx, &edx); |
305 | ||
f580366f YL |
306 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
307 | ||
308 | if (smp_num_siblings == 1) { | |
309 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | |
310 | } else if (smp_num_siblings > 1) { | |
311 | ||
312 | if (smp_num_siblings > NR_CPUS) { | |
9d31d35b YL |
313 | printk(KERN_WARNING "CPU: Unsupported number of siblings %d", |
314 | smp_num_siblings); | |
f580366f YL |
315 | smp_num_siblings = 1; |
316 | return; | |
317 | } | |
318 | ||
319 | index_msb = get_count_order(smp_num_siblings); | |
320 | c->phys_proc_id = phys_pkg_id(index_msb); | |
321 | ||
322 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; | |
323 | ||
324 | index_msb = get_count_order(smp_num_siblings); | |
325 | ||
326 | core_bits = get_count_order(c->x86_max_cores); | |
327 | ||
328 | c->cpu_core_id = phys_pkg_id(index_msb) & | |
329 | ((1 << core_bits) - 1); | |
330 | } | |
0a488a53 | 331 | |
f580366f YL |
332 | out: |
333 | if ((c->x86_max_cores * smp_num_siblings) > 1) { | |
334 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | |
335 | c->phys_proc_id); | |
336 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | |
337 | c->cpu_core_id); | |
338 | } | |
f580366f YL |
339 | #endif |
340 | } | |
341 | ||
342 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | |
343 | { | |
344 | char *v = c->x86_vendor_id; | |
345 | int i; | |
346 | static int printed; | |
347 | ||
348 | for (i = 0; i < X86_VENDOR_NUM; i++) { | |
10a434fc YL |
349 | if (!cpu_devs[i]) |
350 | break; | |
351 | ||
352 | if (!strcmp(v, cpu_devs[i]->c_ident[0]) || | |
353 | (cpu_devs[i]->c_ident[1] && | |
354 | !strcmp(v, cpu_devs[i]->c_ident[1]))) { | |
355 | this_cpu = cpu_devs[i]; | |
356 | c->x86_vendor = this_cpu->c_x86_vendor; | |
357 | return; | |
f580366f YL |
358 | } |
359 | } | |
10a434fc | 360 | |
f580366f YL |
361 | if (!printed) { |
362 | printed++; | |
363 | printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); | |
364 | printk(KERN_ERR "CPU: Your system may be unstable.\n"); | |
365 | } | |
10a434fc | 366 | |
f580366f | 367 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
3da99c97 | 368 | this_cpu = &default_cpu; |
f580366f YL |
369 | } |
370 | ||
3da99c97 | 371 | void __cpuinit cpu_detect(struct cpuinfo_x86 *c) |
f580366f | 372 | { |
f580366f YL |
373 | /* Get vendor name */ |
374 | cpuid(0x00000000, (unsigned int *)&c->cpuid_level, | |
375 | (unsigned int *)&c->x86_vendor_id[0], | |
376 | (unsigned int *)&c->x86_vendor_id[8], | |
377 | (unsigned int *)&c->x86_vendor_id[4]); | |
378 | ||
9d31d35b | 379 | c->x86 = 4; |
f580366f YL |
380 | /* Intel-defined flags: level 0x00000001 */ |
381 | if (c->cpuid_level >= 0x00000001) { | |
3da99c97 YL |
382 | u32 junk, tfms, cap0, misc; |
383 | cpuid(0x00000001, &tfms, &misc, &junk, &cap0); | |
f580366f YL |
384 | c->x86 = (tfms >> 8) & 0xf; |
385 | c->x86_model = (tfms >> 4) & 0xf; | |
386 | c->x86_mask = tfms & 0xf; | |
387 | if (c->x86 == 0xf) | |
388 | c->x86 += (tfms >> 20) & 0xff; | |
389 | if (c->x86 >= 0x6) | |
9d31d35b YL |
390 | c->x86_model += ((tfms >> 16) & 0xf) << 4; |
391 | if (cap0 & (1<<19)) { | |
f580366f | 392 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; |
9d31d35b YL |
393 | c->x86_cache_alignment = c->x86_clflush_size; |
394 | } | |
f580366f | 395 | } |
3da99c97 YL |
396 | } |
397 | ||
398 | ||
399 | static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) | |
400 | { | |
401 | u32 tfms, xlvl; | |
402 | u32 ebx; | |
403 | ||
3da99c97 YL |
404 | /* Intel-defined flags: level 0x00000001 */ |
405 | if (c->cpuid_level >= 0x00000001) { | |
406 | u32 capability, excap; | |
407 | ||
408 | cpuid(0x00000001, &tfms, &ebx, &excap, &capability); | |
409 | c->x86_capability[0] = capability; | |
410 | c->x86_capability[4] = excap; | |
411 | } | |
f580366f | 412 | |
f580366f YL |
413 | /* AMD-defined flags: level 0x80000001 */ |
414 | xlvl = cpuid_eax(0x80000000); | |
415 | c->extended_cpuid_level = xlvl; | |
416 | if ((xlvl & 0xffff0000) == 0x80000000) { | |
417 | if (xlvl >= 0x80000001) { | |
418 | c->x86_capability[1] = cpuid_edx(0x80000001); | |
419 | c->x86_capability[6] = cpuid_ecx(0x80000001); | |
420 | } | |
f580366f YL |
421 | } |
422 | ||
423 | /* Transmeta-defined flags: level 0x80860001 */ | |
424 | xlvl = cpuid_eax(0x80860000); | |
425 | if ((xlvl & 0xffff0000) == 0x80860000) { | |
426 | /* Don't set x86_cpuid_level here for now to not confuse. */ | |
427 | if (xlvl >= 0x80860001) | |
428 | c->x86_capability[2] = cpuid_edx(0x80860001); | |
429 | } | |
430 | ||
f580366f YL |
431 | if (c->extended_cpuid_level >= 0x80000007) |
432 | c->x86_power = cpuid_edx(0x80000007); | |
433 | ||
87a1c441 YL |
434 | if (c->extended_cpuid_level >= 0x80000008) { |
435 | u32 eax = cpuid_eax(0x80000008); | |
436 | ||
437 | c->x86_virt_bits = (eax >> 8) & 0xff; | |
438 | c->x86_phys_bits = eax & 0xff; | |
439 | } | |
f580366f YL |
440 | } |
441 | ||
3da99c97 YL |
442 | /* Do some early cpuid on the boot CPU to get some parameter that are |
443 | needed before check_bugs. Everything advanced is in identify_cpu | |
444 | below. */ | |
445 | static void __init early_identify_cpu(struct cpuinfo_x86 *c) | |
f580366f | 446 | { |
3da99c97 YL |
447 | |
448 | c->x86_clflush_size = 64; | |
449 | c->x86_cache_alignment = c->x86_clflush_size; | |
450 | ||
451 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | |
452 | ||
453 | c->extended_cpuid_level = 0; | |
454 | ||
455 | cpu_detect(c); | |
456 | ||
457 | get_cpu_vendor(c); | |
458 | ||
459 | get_cpu_cap(c); | |
7e00df58 | 460 | |
10a434fc YL |
461 | if (this_cpu->c_early_init) |
462 | this_cpu->c_early_init(c); | |
f580366f YL |
463 | |
464 | validate_pat_support(c); | |
f580366f YL |
465 | } |
466 | ||
3da99c97 YL |
467 | void __init early_cpu_init(void) |
468 | { | |
10a434fc YL |
469 | struct cpu_dev **cdev; |
470 | int count = 0; | |
f580366f YL |
471 | |
472 | printk("KERNEL supported cpus:\n"); | |
10a434fc YL |
473 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { |
474 | struct cpu_dev *cpudev = *cdev; | |
475 | unsigned int j; | |
3da99c97 | 476 | |
10a434fc YL |
477 | if (count >= X86_VENDOR_NUM) |
478 | break; | |
479 | cpu_devs[count] = cpudev; | |
480 | count++; | |
481 | ||
f580366f | 482 | for (j = 0; j < 2; j++) { |
10a434fc | 483 | if (!cpudev->c_ident[j]) |
f580366f | 484 | continue; |
10a434fc YL |
485 | printk(" %s %s\n", cpudev->c_vendor, |
486 | cpudev->c_ident[j]); | |
f580366f YL |
487 | } |
488 | } | |
3da99c97 | 489 | |
3da99c97 | 490 | early_identify_cpu(&boot_cpu_data); |
f580366f YL |
491 | } |
492 | ||
7e00df58 PA |
493 | /* |
494 | * The NOPL instruction is supposed to exist on all CPUs with | |
495 | * family >= 6, unfortunately, that's not true in practice because | |
496 | * of early VIA chips and (more importantly) broken virtualizers that | |
497 | * are not easy to detect. Hence, probe for it based on first | |
498 | * principles. | |
499 | * | |
500 | * Note: no 64-bit chip is known to lack these, but put the code here | |
501 | * for consistency with 32 bits, and to make it utterly trivial to | |
502 | * diagnose the problem should it ever surface. | |
503 | */ | |
504 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | |
505 | { | |
506 | const u32 nopl_signature = 0x888c53b1; /* Random number */ | |
507 | u32 has_nopl = nopl_signature; | |
508 | ||
509 | clear_cpu_cap(c, X86_FEATURE_NOPL); | |
510 | if (c->x86 >= 6) { | |
511 | asm volatile("\n" | |
512 | "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ | |
513 | "2:\n" | |
514 | " .section .fixup,\"ax\"\n" | |
515 | "3: xor %0,%0\n" | |
516 | " jmp 2b\n" | |
517 | " .previous\n" | |
518 | _ASM_EXTABLE(1b,3b) | |
519 | : "+a" (has_nopl)); | |
520 | ||
521 | if (has_nopl == nopl_signature) | |
522 | set_cpu_cap(c, X86_FEATURE_NOPL); | |
523 | } | |
524 | } | |
525 | ||
3da99c97 | 526 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) |
f580366f | 527 | { |
f580366f | 528 | c->extended_cpuid_level = 0; |
f580366f | 529 | |
3da99c97 | 530 | cpu_detect(c); |
f580366f YL |
531 | |
532 | get_cpu_vendor(c); | |
533 | ||
3da99c97 | 534 | get_cpu_cap(c); |
f580366f YL |
535 | |
536 | c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff; | |
537 | #ifdef CONFIG_SMP | |
538 | c->phys_proc_id = c->initial_apicid; | |
539 | #endif | |
f580366f | 540 | |
3da99c97 YL |
541 | if (c->extended_cpuid_level >= 0x80000004) |
542 | get_model_name(c); /* Default name */ | |
87a1c441 | 543 | |
3da99c97 | 544 | init_scattered_cpuid_features(c); |
7e00df58 | 545 | detect_nopl(c); |
f580366f YL |
546 | } |
547 | ||
548 | /* | |
549 | * This does the hard work of actually picking apart the CPU stuff... | |
550 | */ | |
9a250347 | 551 | static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) |
f580366f YL |
552 | { |
553 | int i; | |
554 | ||
3da99c97 YL |
555 | c->loops_per_jiffy = loops_per_jiffy; |
556 | c->x86_cache_size = -1; | |
557 | c->x86_vendor = X86_VENDOR_UNKNOWN; | |
558 | c->x86_model = c->x86_mask = 0; /* So far unknown... */ | |
559 | c->x86_vendor_id[0] = '\0'; /* Unset */ | |
560 | c->x86_model_id[0] = '\0'; /* Unset */ | |
3da99c97 YL |
561 | c->x86_max_cores = 1; |
562 | c->x86_coreid_bits = 0; | |
0a488a53 YL |
563 | c->x86_clflush_size = 64; |
564 | c->x86_cache_alignment = c->x86_clflush_size; | |
3da99c97 | 565 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
f580366f | 566 | |
3da99c97 | 567 | generic_identify(c); |
f580366f YL |
568 | |
569 | c->apicid = phys_pkg_id(0); | |
570 | ||
571 | /* | |
572 | * Vendor-specific initialization. In this section we | |
573 | * canonicalize the feature flags, meaning if there are | |
574 | * features a certain CPU supports which CPUID doesn't | |
575 | * tell us, CPUID claiming incorrect flags, or other bugs, | |
576 | * we handle them here. | |
577 | * | |
578 | * At the end of this section, c->x86_capability better | |
579 | * indicate the features this CPU genuinely supports! | |
580 | */ | |
581 | if (this_cpu->c_init) | |
582 | this_cpu->c_init(c); | |
583 | ||
584 | detect_ht(c); | |
585 | ||
586 | /* | |
587 | * On SMP, boot_cpu_data holds the common feature set between | |
588 | * all CPUs; so make sure that we indicate which features are | |
589 | * common between the CPUs. The first time this routine gets | |
590 | * executed, c == &boot_cpu_data. | |
591 | */ | |
592 | if (c != &boot_cpu_data) { | |
593 | /* AND the already accumulated flags with these */ | |
594 | for (i = 0; i < NCAPINTS; i++) | |
595 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | |
596 | } | |
597 | ||
598 | /* Clear all flags overriden by options */ | |
599 | for (i = 0; i < NCAPINTS; i++) | |
600 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; | |
601 | ||
602 | #ifdef CONFIG_X86_MCE | |
603 | mcheck_init(c); | |
604 | #endif | |
605 | select_idle_routine(c); | |
606 | ||
607 | #ifdef CONFIG_NUMA | |
608 | numa_add_cpu(smp_processor_id()); | |
609 | #endif | |
610 | ||
611 | } | |
612 | ||
9d31d35b | 613 | void __init identify_boot_cpu(void) |
f580366f YL |
614 | { |
615 | identify_cpu(&boot_cpu_data); | |
616 | } | |
617 | ||
618 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | |
619 | { | |
620 | BUG_ON(c == &boot_cpu_data); | |
621 | identify_cpu(c); | |
622 | mtrr_ap_init(); | |
623 | } | |
624 | ||
b05f78f5 YL |
625 | struct msr_range { |
626 | unsigned min; | |
627 | unsigned max; | |
628 | }; | |
629 | ||
630 | static struct msr_range msr_range_array[] __cpuinitdata = { | |
631 | { 0x00000000, 0x00000418}, | |
632 | { 0xc0000000, 0xc000040b}, | |
633 | { 0xc0010000, 0xc0010142}, | |
634 | { 0xc0011000, 0xc001103b}, | |
635 | }; | |
636 | ||
637 | static void __cpuinit print_cpu_msr(void) | |
638 | { | |
639 | unsigned index; | |
640 | u64 val; | |
641 | int i; | |
642 | unsigned index_min, index_max; | |
643 | ||
644 | for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { | |
645 | index_min = msr_range_array[i].min; | |
646 | index_max = msr_range_array[i].max; | |
647 | for (index = index_min; index < index_max; index++) { | |
648 | if (rdmsrl_amd_safe(index, &val)) | |
649 | continue; | |
650 | printk(KERN_INFO " MSR%08x: %016llx\n", index, val); | |
651 | } | |
652 | } | |
653 | } | |
654 | ||
655 | static int show_msr __cpuinitdata; | |
656 | static __init int setup_show_msr(char *arg) | |
657 | { | |
658 | int num; | |
659 | ||
660 | get_option(&arg, &num); | |
661 | ||
662 | if (num > 0) | |
663 | show_msr = num; | |
664 | return 1; | |
665 | } | |
666 | __setup("show_msr=", setup_show_msr); | |
667 | ||
f580366f YL |
668 | static __init int setup_noclflush(char *arg) |
669 | { | |
670 | setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | |
671 | return 1; | |
672 | } | |
673 | __setup("noclflush", setup_noclflush); | |
674 | ||
675 | void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) | |
676 | { | |
677 | if (c->x86_model_id[0]) | |
678 | printk(KERN_CONT "%s", c->x86_model_id); | |
679 | ||
680 | if (c->x86_mask || c->cpuid_level >= 0) | |
681 | printk(KERN_CONT " stepping %02x\n", c->x86_mask); | |
682 | else | |
683 | printk(KERN_CONT "\n"); | |
b05f78f5 YL |
684 | |
685 | #ifdef CONFIG_SMP | |
686 | if (c->cpu_index < show_msr) | |
687 | print_cpu_msr(); | |
688 | #else | |
689 | if (show_msr) | |
690 | print_cpu_msr(); | |
691 | #endif | |
f580366f YL |
692 | } |
693 | ||
694 | static __init int setup_disablecpuid(char *arg) | |
695 | { | |
696 | int bit; | |
697 | if (get_option(&arg, &bit) && bit < NCAPINTS*32) | |
698 | setup_clear_cpu_cap(bit); | |
699 | else | |
700 | return 0; | |
701 | return 1; | |
702 | } | |
703 | __setup("clearcpuid=", setup_disablecpuid); | |
0f0124fa | 704 | |
0f0124fa YL |
705 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
706 | ||
707 | struct x8664_pda **_cpu_pda __read_mostly; | |
708 | EXPORT_SYMBOL(_cpu_pda); | |
709 | ||
710 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | |
711 | ||
712 | char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | |
713 | ||
714 | unsigned long __supported_pte_mask __read_mostly = ~0UL; | |
715 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | |
716 | ||
717 | static int do_not_nx __cpuinitdata; | |
718 | ||
719 | /* noexec=on|off | |
720 | Control non executable mappings for 64bit processes. | |
721 | ||
722 | on Enable(default) | |
723 | off Disable | |
724 | */ | |
725 | static int __init nonx_setup(char *str) | |
726 | { | |
727 | if (!str) | |
728 | return -EINVAL; | |
729 | if (!strncmp(str, "on", 2)) { | |
730 | __supported_pte_mask |= _PAGE_NX; | |
731 | do_not_nx = 0; | |
732 | } else if (!strncmp(str, "off", 3)) { | |
733 | do_not_nx = 1; | |
734 | __supported_pte_mask &= ~_PAGE_NX; | |
735 | } | |
736 | return 0; | |
737 | } | |
738 | early_param("noexec", nonx_setup); | |
739 | ||
740 | int force_personality32; | |
741 | ||
742 | /* noexec32=on|off | |
743 | Control non executable heap for 32bit processes. | |
744 | To control the stack too use noexec=off | |
745 | ||
746 | on PROT_READ does not imply PROT_EXEC for 32bit processes (default) | |
747 | off PROT_READ implies PROT_EXEC | |
748 | */ | |
749 | static int __init nonx32_setup(char *str) | |
750 | { | |
751 | if (!strcmp(str, "on")) | |
752 | force_personality32 &= ~READ_IMPLIES_EXEC; | |
753 | else if (!strcmp(str, "off")) | |
754 | force_personality32 |= READ_IMPLIES_EXEC; | |
755 | return 1; | |
756 | } | |
757 | __setup("noexec32=", nonx32_setup); | |
758 | ||
759 | void pda_init(int cpu) | |
760 | { | |
761 | struct x8664_pda *pda = cpu_pda(cpu); | |
762 | ||
763 | /* Setup up data that may be needed in __get_free_pages early */ | |
ada85708 JF |
764 | loadsegment(fs, 0); |
765 | loadsegment(gs, 0); | |
0f0124fa YL |
766 | /* Memory clobbers used to order PDA accessed */ |
767 | mb(); | |
768 | wrmsrl(MSR_GS_BASE, pda); | |
769 | mb(); | |
770 | ||
771 | pda->cpunumber = cpu; | |
772 | pda->irqcount = -1; | |
773 | pda->kernelstack = (unsigned long)stack_thread_info() - | |
774 | PDA_STACKOFFSET + THREAD_SIZE; | |
775 | pda->active_mm = &init_mm; | |
776 | pda->mmu_state = 0; | |
777 | ||
778 | if (cpu == 0) { | |
779 | /* others are initialized in smpboot.c */ | |
780 | pda->pcurrent = &init_task; | |
781 | pda->irqstackptr = boot_cpu_stack; | |
49800efc | 782 | pda->irqstackptr += IRQSTACKSIZE - 64; |
0f0124fa | 783 | } else { |
49800efc AH |
784 | if (!pda->irqstackptr) { |
785 | pda->irqstackptr = (char *) | |
786 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | |
787 | if (!pda->irqstackptr) | |
788 | panic("cannot allocate irqstack for cpu %d", | |
789 | cpu); | |
790 | pda->irqstackptr += IRQSTACKSIZE - 64; | |
791 | } | |
0f0124fa YL |
792 | |
793 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | |
794 | pda->nodenumber = cpu_to_node(cpu); | |
795 | } | |
0f0124fa YL |
796 | } |
797 | ||
798 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | |
cbcd79c2 | 799 | DEBUG_STKSZ] __page_aligned_bss; |
0f0124fa YL |
800 | |
801 | extern asmlinkage void ignore_sysret(void); | |
802 | ||
803 | /* May not be marked __init: used by software suspend */ | |
804 | void syscall_init(void) | |
805 | { | |
806 | /* | |
807 | * LSTAR and STAR live in a bit strange symbiosis. | |
808 | * They both write to the same internal register. STAR allows to | |
809 | * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip. | |
810 | */ | |
811 | wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32); | |
812 | wrmsrl(MSR_LSTAR, system_call); | |
813 | wrmsrl(MSR_CSTAR, ignore_sysret); | |
814 | ||
815 | #ifdef CONFIG_IA32_EMULATION | |
816 | syscall32_cpu_init(); | |
817 | #endif | |
818 | ||
819 | /* Flags to clear on syscall */ | |
820 | wrmsrl(MSR_SYSCALL_MASK, | |
821 | X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL); | |
822 | } | |
823 | ||
824 | void __cpuinit check_efer(void) | |
825 | { | |
826 | unsigned long efer; | |
827 | ||
828 | rdmsrl(MSR_EFER, efer); | |
829 | if (!(efer & EFER_NX) || do_not_nx) | |
830 | __supported_pte_mask &= ~_PAGE_NX; | |
831 | } | |
832 | ||
833 | unsigned long kernel_eflags; | |
834 | ||
835 | /* | |
836 | * Copies of the original ist values from the tss are only accessed during | |
837 | * debugging, no special alignment required. | |
838 | */ | |
839 | DEFINE_PER_CPU(struct orig_ist, orig_ist); | |
840 | ||
841 | /* | |
842 | * cpu_init() initializes state that is per-CPU. Some data is already | |
843 | * initialized (naturally) in the bootstrap process, such as the GDT | |
844 | * and IDT. We reload them nevertheless, this function acts as a | |
845 | * 'CPU state barrier', nothing should get across. | |
846 | * A lot of state is already set up in PDA init. | |
847 | */ | |
848 | void __cpuinit cpu_init(void) | |
849 | { | |
850 | int cpu = stack_smp_processor_id(); | |
851 | struct tss_struct *t = &per_cpu(init_tss, cpu); | |
852 | struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); | |
853 | unsigned long v; | |
854 | char *estacks = NULL; | |
855 | struct task_struct *me; | |
856 | int i; | |
857 | ||
858 | /* CPU 0 is initialised in head64.c */ | |
859 | if (cpu != 0) | |
860 | pda_init(cpu); | |
861 | else | |
862 | estacks = boot_exception_stacks; | |
863 | ||
864 | me = current; | |
865 | ||
866 | if (cpu_test_and_set(cpu, cpu_initialized)) | |
867 | panic("CPU#%d already initialized!\n", cpu); | |
868 | ||
869 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | |
870 | ||
871 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | |
872 | ||
873 | /* | |
874 | * Initialize the per-CPU GDT with the boot GDT, | |
875 | * and set up the GDT descriptor: | |
876 | */ | |
877 | ||
878 | switch_to_new_gdt(); | |
879 | load_idt((const struct desc_ptr *)&idt_descr); | |
880 | ||
881 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); | |
882 | syscall_init(); | |
883 | ||
884 | wrmsrl(MSR_FS_BASE, 0); | |
885 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | |
886 | barrier(); | |
887 | ||
888 | check_efer(); | |
6e1cb38a SS |
889 | if (cpu != 0 && x2apic) |
890 | enable_x2apic(); | |
0f0124fa YL |
891 | |
892 | /* | |
893 | * set up and load the per-CPU TSS | |
894 | */ | |
b55793f7 | 895 | if (!orig_ist->ist[0]) { |
0f0124fa | 896 | static const unsigned int order[N_EXCEPTION_STACKS] = { |
b55793f7 AH |
897 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, |
898 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | |
0f0124fa | 899 | }; |
b55793f7 AH |
900 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
901 | if (cpu) { | |
902 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | |
903 | if (!estacks) | |
904 | panic("Cannot allocate exception " | |
905 | "stack %ld %d\n", v, cpu); | |
906 | } | |
907 | estacks += PAGE_SIZE << order[v]; | |
908 | orig_ist->ist[v] = t->x86_tss.ist[v] = | |
909 | (unsigned long)estacks; | |
0f0124fa | 910 | } |
0f0124fa YL |
911 | } |
912 | ||
913 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | |
914 | /* | |
915 | * <= is required because the CPU will access up to | |
916 | * 8 bits beyond the end of the IO permission bitmap. | |
917 | */ | |
918 | for (i = 0; i <= IO_BITMAP_LONGS; i++) | |
919 | t->io_bitmap[i] = ~0UL; | |
920 | ||
921 | atomic_inc(&init_mm.mm_count); | |
922 | me->active_mm = &init_mm; | |
923 | if (me->mm) | |
924 | BUG(); | |
925 | enter_lazy_tlb(&init_mm, me); | |
926 | ||
927 | load_sp0(t, ¤t->thread); | |
928 | set_tss_desc(cpu, t); | |
929 | load_TR_desc(); | |
930 | load_LDT(&init_mm.context); | |
931 | ||
932 | #ifdef CONFIG_KGDB | |
933 | /* | |
934 | * If the kgdb is connected no debug regs should be altered. This | |
935 | * is only applicable when KGDB and a KGDB I/O module are built | |
936 | * into the kernel and you are using early debugging with | |
937 | * kgdbwait. KGDB will control the kernel HW breakpoint registers. | |
938 | */ | |
939 | if (kgdb_connected && arch_kgdb_ops.correct_hw_break) | |
940 | arch_kgdb_ops.correct_hw_break(); | |
941 | else { | |
942 | #endif | |
943 | /* | |
944 | * Clear all 6 debug registers: | |
945 | */ | |
946 | ||
947 | set_debugreg(0UL, 0); | |
948 | set_debugreg(0UL, 1); | |
949 | set_debugreg(0UL, 2); | |
950 | set_debugreg(0UL, 3); | |
951 | set_debugreg(0UL, 6); | |
952 | set_debugreg(0UL, 7); | |
953 | #ifdef CONFIG_KGDB | |
954 | /* If the kgdb is connected no debug regs should be altered. */ | |
955 | } | |
956 | #endif | |
957 | ||
958 | fpu_init(); | |
959 | ||
960 | raw_local_save_flags(kernel_eflags); | |
961 | ||
962 | if (is_uv_system()) | |
963 | uv_cpu_init(); | |
964 | } |