tracing, Text Edit Lock - SMP alternatives support
[deliverable/linux.git] / arch / x86 / kernel / alternative.c
1 #include <linux/module.h>
2 #include <linux/sched.h>
3 #include <linux/mutex.h>
4 #include <linux/list.h>
5 #include <linux/kprobes.h>
6 #include <linux/mm.h>
7 #include <linux/vmalloc.h>
8 #include <linux/memory.h>
9 #include <asm/alternative.h>
10 #include <asm/sections.h>
11 #include <asm/pgtable.h>
12 #include <asm/mce.h>
13 #include <asm/nmi.h>
14 #include <asm/vsyscall.h>
15 #include <asm/cacheflush.h>
16 #include <asm/io.h>
17
18 #define MAX_PATCH_LEN (255-1)
19
20 #ifdef CONFIG_HOTPLUG_CPU
21 static int smp_alt_once;
22
23 static int __init bootonly(char *str)
24 {
25 smp_alt_once = 1;
26 return 1;
27 }
28 __setup("smp-alt-boot", bootonly);
29 #else
30 #define smp_alt_once 1
31 #endif
32
33 static int debug_alternative;
34
35 static int __init debug_alt(char *str)
36 {
37 debug_alternative = 1;
38 return 1;
39 }
40 __setup("debug-alternative", debug_alt);
41
42 static int noreplace_smp;
43
44 static int __init setup_noreplace_smp(char *str)
45 {
46 noreplace_smp = 1;
47 return 1;
48 }
49 __setup("noreplace-smp", setup_noreplace_smp);
50
51 #ifdef CONFIG_PARAVIRT
52 static int noreplace_paravirt = 0;
53
54 static int __init setup_noreplace_paravirt(char *str)
55 {
56 noreplace_paravirt = 1;
57 return 1;
58 }
59 __setup("noreplace-paravirt", setup_noreplace_paravirt);
60 #endif
61
62 #define DPRINTK(fmt, args...) if (debug_alternative) \
63 printk(KERN_DEBUG fmt, args)
64
65 #ifdef GENERIC_NOP1
66 /* Use inline assembly to define this because the nops are defined
67 as inline assembly strings in the include files and we cannot
68 get them easily into strings. */
69 asm("\t.section .rodata, \"a\"\nintelnops: "
70 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
71 GENERIC_NOP7 GENERIC_NOP8
72 "\t.previous");
73 extern const unsigned char intelnops[];
74 static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
75 NULL,
76 intelnops,
77 intelnops + 1,
78 intelnops + 1 + 2,
79 intelnops + 1 + 2 + 3,
80 intelnops + 1 + 2 + 3 + 4,
81 intelnops + 1 + 2 + 3 + 4 + 5,
82 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
83 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
84 };
85 #endif
86
87 #ifdef K8_NOP1
88 asm("\t.section .rodata, \"a\"\nk8nops: "
89 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
90 K8_NOP7 K8_NOP8
91 "\t.previous");
92 extern const unsigned char k8nops[];
93 static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
94 NULL,
95 k8nops,
96 k8nops + 1,
97 k8nops + 1 + 2,
98 k8nops + 1 + 2 + 3,
99 k8nops + 1 + 2 + 3 + 4,
100 k8nops + 1 + 2 + 3 + 4 + 5,
101 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
102 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
103 };
104 #endif
105
106 #ifdef K7_NOP1
107 asm("\t.section .rodata, \"a\"\nk7nops: "
108 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
109 K7_NOP7 K7_NOP8
110 "\t.previous");
111 extern const unsigned char k7nops[];
112 static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
113 NULL,
114 k7nops,
115 k7nops + 1,
116 k7nops + 1 + 2,
117 k7nops + 1 + 2 + 3,
118 k7nops + 1 + 2 + 3 + 4,
119 k7nops + 1 + 2 + 3 + 4 + 5,
120 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
121 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
122 };
123 #endif
124
125 #ifdef P6_NOP1
126 asm("\t.section .rodata, \"a\"\np6nops: "
127 P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
128 P6_NOP7 P6_NOP8
129 "\t.previous");
130 extern const unsigned char p6nops[];
131 static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
132 NULL,
133 p6nops,
134 p6nops + 1,
135 p6nops + 1 + 2,
136 p6nops + 1 + 2 + 3,
137 p6nops + 1 + 2 + 3 + 4,
138 p6nops + 1 + 2 + 3 + 4 + 5,
139 p6nops + 1 + 2 + 3 + 4 + 5 + 6,
140 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
141 };
142 #endif
143
144 #ifdef CONFIG_X86_64
145
146 extern char __vsyscall_0;
147 const unsigned char *const *find_nop_table(void)
148 {
149 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
150 boot_cpu_has(X86_FEATURE_NOPL))
151 return p6_nops;
152 else
153 return k8_nops;
154 }
155
156 #else /* CONFIG_X86_64 */
157
158 const unsigned char *const *find_nop_table(void)
159 {
160 if (boot_cpu_has(X86_FEATURE_K8))
161 return k8_nops;
162 else if (boot_cpu_has(X86_FEATURE_K7))
163 return k7_nops;
164 else if (boot_cpu_has(X86_FEATURE_NOPL))
165 return p6_nops;
166 else
167 return intel_nops;
168 }
169
170 #endif /* CONFIG_X86_64 */
171
172 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
173 void add_nops(void *insns, unsigned int len)
174 {
175 const unsigned char *const *noptable = find_nop_table();
176
177 while (len > 0) {
178 unsigned int noplen = len;
179 if (noplen > ASM_NOP_MAX)
180 noplen = ASM_NOP_MAX;
181 memcpy(insns, noptable[noplen], noplen);
182 insns += noplen;
183 len -= noplen;
184 }
185 }
186 EXPORT_SYMBOL_GPL(add_nops);
187
188 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
189 extern u8 *__smp_locks[], *__smp_locks_end[];
190
191 /* Replace instructions with better alternatives for this CPU type.
192 This runs before SMP is initialized to avoid SMP problems with
193 self modifying code. This implies that assymetric systems where
194 APs have less capabilities than the boot processor are not handled.
195 Tough. Make sure you disable such features by hand. */
196
197 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
198 {
199 struct alt_instr *a;
200 char insnbuf[MAX_PATCH_LEN];
201
202 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
203 for (a = start; a < end; a++) {
204 u8 *instr = a->instr;
205 BUG_ON(a->replacementlen > a->instrlen);
206 BUG_ON(a->instrlen > sizeof(insnbuf));
207 if (!boot_cpu_has(a->cpuid))
208 continue;
209 #ifdef CONFIG_X86_64
210 /* vsyscall code is not mapped yet. resolve it manually. */
211 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
212 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
213 DPRINTK("%s: vsyscall fixup: %p => %p\n",
214 __func__, a->instr, instr);
215 }
216 #endif
217 memcpy(insnbuf, a->replacement, a->replacementlen);
218 add_nops(insnbuf + a->replacementlen,
219 a->instrlen - a->replacementlen);
220 text_poke_early(instr, insnbuf, a->instrlen);
221 }
222 }
223
224 #ifdef CONFIG_SMP
225
226 static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
227 {
228 u8 **ptr;
229
230 mutex_lock(&text_mutex);
231 for (ptr = start; ptr < end; ptr++) {
232 if (*ptr < text)
233 continue;
234 if (*ptr > text_end)
235 continue;
236 /* turn DS segment override prefix into lock prefix */
237 text_poke(*ptr, ((unsigned char []){0xf0}), 1);
238 };
239 mutex_unlock(&text_mutex);
240 }
241
242 static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
243 {
244 u8 **ptr;
245
246 if (noreplace_smp)
247 return;
248
249 mutex_lock(&text_mutex);
250 for (ptr = start; ptr < end; ptr++) {
251 if (*ptr < text)
252 continue;
253 if (*ptr > text_end)
254 continue;
255 /* turn lock prefix into DS segment override prefix */
256 text_poke(*ptr, ((unsigned char []){0x3E}), 1);
257 };
258 mutex_unlock(&text_mutex);
259 }
260
261 struct smp_alt_module {
262 /* what is this ??? */
263 struct module *mod;
264 char *name;
265
266 /* ptrs to lock prefixes */
267 u8 **locks;
268 u8 **locks_end;
269
270 /* .text segment, needed to avoid patching init code ;) */
271 u8 *text;
272 u8 *text_end;
273
274 struct list_head next;
275 };
276 static LIST_HEAD(smp_alt_modules);
277 static DEFINE_MUTEX(smp_alt);
278 static int smp_mode = 1; /* protected by smp_alt */
279
280 void alternatives_smp_module_add(struct module *mod, char *name,
281 void *locks, void *locks_end,
282 void *text, void *text_end)
283 {
284 struct smp_alt_module *smp;
285
286 if (noreplace_smp)
287 return;
288
289 if (smp_alt_once) {
290 if (boot_cpu_has(X86_FEATURE_UP))
291 alternatives_smp_unlock(locks, locks_end,
292 text, text_end);
293 return;
294 }
295
296 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
297 if (NULL == smp)
298 return; /* we'll run the (safe but slow) SMP code then ... */
299
300 smp->mod = mod;
301 smp->name = name;
302 smp->locks = locks;
303 smp->locks_end = locks_end;
304 smp->text = text;
305 smp->text_end = text_end;
306 DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
307 __func__, smp->locks, smp->locks_end,
308 smp->text, smp->text_end, smp->name);
309
310 mutex_lock(&smp_alt);
311 list_add_tail(&smp->next, &smp_alt_modules);
312 if (boot_cpu_has(X86_FEATURE_UP))
313 alternatives_smp_unlock(smp->locks, smp->locks_end,
314 smp->text, smp->text_end);
315 mutex_unlock(&smp_alt);
316 }
317
318 void alternatives_smp_module_del(struct module *mod)
319 {
320 struct smp_alt_module *item;
321
322 if (smp_alt_once || noreplace_smp)
323 return;
324
325 mutex_lock(&smp_alt);
326 list_for_each_entry(item, &smp_alt_modules, next) {
327 if (mod != item->mod)
328 continue;
329 list_del(&item->next);
330 mutex_unlock(&smp_alt);
331 DPRINTK("%s: %s\n", __func__, item->name);
332 kfree(item);
333 return;
334 }
335 mutex_unlock(&smp_alt);
336 }
337
338 void alternatives_smp_switch(int smp)
339 {
340 struct smp_alt_module *mod;
341
342 #ifdef CONFIG_LOCKDEP
343 /*
344 * Older binutils section handling bug prevented
345 * alternatives-replacement from working reliably.
346 *
347 * If this still occurs then you should see a hang
348 * or crash shortly after this line:
349 */
350 printk("lockdep: fixing up alternatives.\n");
351 #endif
352
353 if (noreplace_smp || smp_alt_once)
354 return;
355 BUG_ON(!smp && (num_online_cpus() > 1));
356
357 mutex_lock(&smp_alt);
358
359 /*
360 * Avoid unnecessary switches because it forces JIT based VMs to
361 * throw away all cached translations, which can be quite costly.
362 */
363 if (smp == smp_mode) {
364 /* nothing */
365 } else if (smp) {
366 printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
367 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
368 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
369 list_for_each_entry(mod, &smp_alt_modules, next)
370 alternatives_smp_lock(mod->locks, mod->locks_end,
371 mod->text, mod->text_end);
372 } else {
373 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
374 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
375 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
376 list_for_each_entry(mod, &smp_alt_modules, next)
377 alternatives_smp_unlock(mod->locks, mod->locks_end,
378 mod->text, mod->text_end);
379 }
380 smp_mode = smp;
381 mutex_unlock(&smp_alt);
382 }
383
384 #endif
385
386 #ifdef CONFIG_PARAVIRT
387 void apply_paravirt(struct paravirt_patch_site *start,
388 struct paravirt_patch_site *end)
389 {
390 struct paravirt_patch_site *p;
391 char insnbuf[MAX_PATCH_LEN];
392
393 if (noreplace_paravirt)
394 return;
395
396 for (p = start; p < end; p++) {
397 unsigned int used;
398
399 BUG_ON(p->len > MAX_PATCH_LEN);
400 /* prep the buffer with the original instructions */
401 memcpy(insnbuf, p->instr, p->len);
402 used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
403 (unsigned long)p->instr, p->len);
404
405 BUG_ON(used > p->len);
406
407 /* Pad the rest with nops */
408 add_nops(insnbuf + used, p->len - used);
409 text_poke_early(p->instr, insnbuf, p->len);
410 }
411 }
412 extern struct paravirt_patch_site __start_parainstructions[],
413 __stop_parainstructions[];
414 #endif /* CONFIG_PARAVIRT */
415
416 void __init alternative_instructions(void)
417 {
418 /* The patching is not fully atomic, so try to avoid local interruptions
419 that might execute the to be patched code.
420 Other CPUs are not running. */
421 stop_nmi();
422
423 /*
424 * Don't stop machine check exceptions while patching.
425 * MCEs only happen when something got corrupted and in this
426 * case we must do something about the corruption.
427 * Ignoring it is worse than a unlikely patching race.
428 * Also machine checks tend to be broadcast and if one CPU
429 * goes into machine check the others follow quickly, so we don't
430 * expect a machine check to cause undue problems during to code
431 * patching.
432 */
433
434 apply_alternatives(__alt_instructions, __alt_instructions_end);
435
436 /* switch to patch-once-at-boottime-only mode and free the
437 * tables in case we know the number of CPUs will never ever
438 * change */
439 #ifdef CONFIG_HOTPLUG_CPU
440 if (num_possible_cpus() < 2)
441 smp_alt_once = 1;
442 #endif
443
444 #ifdef CONFIG_SMP
445 if (smp_alt_once) {
446 if (1 == num_possible_cpus()) {
447 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
448 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
449 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
450
451 alternatives_smp_unlock(__smp_locks, __smp_locks_end,
452 _text, _etext);
453 }
454 } else {
455 alternatives_smp_module_add(NULL, "core kernel",
456 __smp_locks, __smp_locks_end,
457 _text, _etext);
458
459 /* Only switch to UP mode if we don't immediately boot others */
460 if (num_present_cpus() == 1 || setup_max_cpus <= 1)
461 alternatives_smp_switch(0);
462 }
463 #endif
464 apply_paravirt(__parainstructions, __parainstructions_end);
465
466 if (smp_alt_once)
467 free_init_pages("SMP alternatives",
468 (unsigned long)__smp_locks,
469 (unsigned long)__smp_locks_end);
470
471 restart_nmi();
472 }
473
474 /**
475 * text_poke_early - Update instructions on a live kernel at boot time
476 * @addr: address to modify
477 * @opcode: source of the copy
478 * @len: length to copy
479 *
480 * When you use this code to patch more than one byte of an instruction
481 * you need to make sure that other CPUs cannot execute this code in parallel.
482 * Also no thread must be currently preempted in the middle of these
483 * instructions. And on the local CPU you need to be protected again NMI or MCE
484 * handlers seeing an inconsistent instruction while you patch.
485 */
486 void *text_poke_early(void *addr, const void *opcode, size_t len)
487 {
488 unsigned long flags;
489 local_irq_save(flags);
490 memcpy(addr, opcode, len);
491 local_irq_restore(flags);
492 sync_core();
493 /* Could also do a CLFLUSH here to speed up CPU recovery; but
494 that causes hangs on some VIA CPUs. */
495 return addr;
496 }
497
498 /**
499 * text_poke - Update instructions on a live kernel
500 * @addr: address to modify
501 * @opcode: source of the copy
502 * @len: length to copy
503 *
504 * Only atomic text poke/set should be allowed when not doing early patching.
505 * It means the size must be writable atomically and the address must be aligned
506 * in a way that permits an atomic write. It also makes sure we fit on a single
507 * page.
508 */
509 void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
510 {
511 char *vaddr;
512 int nr_pages = 2;
513 struct page *pages[2];
514 int i;
515
516 might_sleep();
517 if (!core_kernel_text((unsigned long)addr)) {
518 pages[0] = vmalloc_to_page(addr);
519 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
520 } else {
521 pages[0] = virt_to_page(addr);
522 WARN_ON(!PageReserved(pages[0]));
523 pages[1] = virt_to_page(addr + PAGE_SIZE);
524 }
525 BUG_ON(!pages[0]);
526 if (!pages[1])
527 nr_pages = 1;
528 vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
529 BUG_ON(!vaddr);
530 local_irq_disable();
531 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
532 local_irq_enable();
533 vunmap(vaddr);
534 sync_core();
535 /* Could also do a CLFLUSH here to speed up CPU recovery; but
536 that causes hangs on some VIA CPUs. */
537 for (i = 0; i < len; i++)
538 BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
539 return addr;
540 }
This page took 0.042718 seconds and 5 git commands to generate.