tracing/function-graph-tracer: implement a print_headers function
[deliverable/linux.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4 29#include <linux/interrupt.h>
6612538c 30#include <linux/utsname.h>
1da177e4 31#include <linux/delay.h>
6612538c 32#include <linux/module.h>
1da177e4 33#include <linux/ptrace.h>
1da177e4 34#include <linux/random.h>
95833c83 35#include <linux/notifier.h>
c6fd91f0 36#include <linux/kprobes.h>
1eeb66a1 37#include <linux/kdebug.h>
02290683 38#include <linux/tick.h>
529e25f6 39#include <linux/prctl.h>
7de08b4e
GP
40#include <linux/uaccess.h>
41#include <linux/io.h>
1da177e4 42
1da177e4
LT
43#include <asm/pgtable.h>
44#include <asm/system.h>
1da177e4
LT
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
1da177e4
LT
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
bbc1f698 54#include <asm/syscalls.h>
1da177e4
LT
55
56asmlinkage extern void ret_from_fork(void);
57
58unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
59
e041c683 60static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
61
62void idle_notifier_register(struct notifier_block *n)
63{
e041c683 64 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 65}
c7d87d79
VP
66EXPORT_SYMBOL_GPL(idle_notifier_register);
67
68void idle_notifier_unregister(struct notifier_block *n)
69{
70 atomic_notifier_chain_unregister(&idle_notifier, n);
71}
72EXPORT_SYMBOL_GPL(idle_notifier_unregister);
95833c83 73
95833c83
AK
74void enter_idle(void)
75{
a15da49d 76 write_pda(isidle, 1);
e041c683 77 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
78}
79
80static void __exit_idle(void)
81{
9446868b 82 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 83 return;
e041c683 84 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
85}
86
87/* Called from interrupts to signify idle end */
88void exit_idle(void)
89{
a15da49d
AK
90 /* idle loop has pid 0 */
91 if (current->pid)
95833c83
AK
92 return;
93 __exit_idle();
94}
95
913da64b 96#ifndef CONFIG_SMP
76e4f660
AR
97static inline void play_dead(void)
98{
99 BUG();
100}
913da64b 101#endif
76e4f660 102
1da177e4
LT
103/*
104 * The idle thread. There's no useful work to be
105 * done, so just try to conserve power and have a
106 * low exit latency (ie sit in a loop waiting for
107 * somebody to say that they'd like to reschedule)
108 */
b10db7f0 109void cpu_idle(void)
1da177e4 110{
495ab9c0 111 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
112 /* endless idle loop with no priority at all */
113 while (1) {
b8f8c3cf 114 tick_nohz_stop_sched_tick(1);
1da177e4 115 while (!need_resched()) {
1da177e4 116
1da177e4 117 rmb();
6ddd2a27 118
76e4f660
AR
119 if (cpu_is_offline(smp_processor_id()))
120 play_dead();
d331e739
VP
121 /*
122 * Idle routines should keep interrupts disabled
123 * from here on, until they go to idle.
124 * Otherwise, idle callbacks can misfire.
125 */
126 local_irq_disable();
95833c83 127 enter_idle();
81d68a96
SR
128 /* Don't trace irqs off for idle */
129 stop_critical_timings();
6ddd2a27 130 pm_idle();
81d68a96 131 start_critical_timings();
a15da49d
AK
132 /* In many cases the interrupt that ended idle
133 has already called exit_idle. But some idle
134 loops can be woken up without interrupt. */
95833c83 135 __exit_idle();
1da177e4
LT
136 }
137
02290683 138 tick_nohz_restart_sched_tick();
5bfb5d69 139 preempt_enable_no_resched();
1da177e4 140 schedule();
5bfb5d69 141 preempt_disable();
1da177e4
LT
142 }
143}
144
6612538c 145/* Prints also some state that isn't saved in the pt_regs */
e2ce07c8 146void __show_regs(struct pt_regs *regs, int all)
1da177e4
LT
147{
148 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 149 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
150 unsigned int fsindex, gsindex;
151 unsigned int ds, cs, es;
1da177e4
LT
152
153 printk("\n");
154 print_modules();
8092c654 155 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
9acf23c4 156 current->pid, current->comm, print_tainted(),
96b644bd
SH
157 init_utsname()->release,
158 (int)strcspn(init_utsname()->version, " "),
159 init_utsname()->version);
8092c654 160 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 161 printk_address(regs->ip, 1);
8092c654
GP
162 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
163 regs->sp, regs->flags);
164 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 165 regs->ax, regs->bx, regs->cx);
8092c654 166 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 167 regs->dx, regs->si, regs->di);
8092c654 168 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 169 regs->bp, regs->r8, regs->r9);
8092c654 170 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
7de08b4e 171 regs->r10, regs->r11, regs->r12);
8092c654 172 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
7de08b4e 173 regs->r13, regs->r14, regs->r15);
1da177e4 174
7de08b4e
GP
175 asm("movl %%ds,%0" : "=r" (ds));
176 asm("movl %%cs,%0" : "=r" (cs));
177 asm("movl %%es,%0" : "=r" (es));
1da177e4
LT
178 asm("movl %%fs,%0" : "=r" (fsindex));
179 asm("movl %%gs,%0" : "=r" (gsindex));
180
181 rdmsrl(MSR_FS_BASE, fs);
7de08b4e
GP
182 rdmsrl(MSR_GS_BASE, gs);
183 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
1da177e4 184
e2ce07c8
PE
185 if (!all)
186 return;
1da177e4 187
f51c9452
GOC
188 cr0 = read_cr0();
189 cr2 = read_cr2();
190 cr3 = read_cr3();
191 cr4 = read_cr4();
1da177e4 192
8092c654 193 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
7de08b4e 194 fs, fsindex, gs, gsindex, shadowgs);
8092c654
GP
195 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
196 es, cr0);
197 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
198 cr4);
bb1995d5
AS
199
200 get_debugreg(d0, 0);
201 get_debugreg(d1, 1);
202 get_debugreg(d2, 2);
8092c654 203 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
bb1995d5
AS
204 get_debugreg(d3, 3);
205 get_debugreg(d6, 6);
206 get_debugreg(d7, 7);
8092c654 207 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
208}
209
210void show_regs(struct pt_regs *regs)
211{
8092c654 212 printk(KERN_INFO "CPU %d:", smp_processor_id());
e2ce07c8 213 __show_regs(regs, 1);
bc850d6b 214 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
215}
216
217/*
218 * Free current thread data structures etc..
219 */
220void exit_thread(void)
221{
222 struct task_struct *me = current;
223 struct thread_struct *t = &me->thread;
73649dab 224
6612538c 225 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
226 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
227
228 kfree(t->io_bitmap_ptr);
229 t->io_bitmap_ptr = NULL;
d3a4f48d 230 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
231 /*
232 * Careful, clear this in the TSS too:
233 */
234 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
235 t->io_bitmap_max = 0;
236 put_cpu();
237 }
93fa7636
MM
238#ifdef CONFIG_X86_DS
239 /* Free any DS contexts that have not been properly released. */
240 if (unlikely(t->ds_ctx)) {
241 /* we clear debugctl to make sure DS is not used. */
242 update_debugctlmsr(0);
243 ds_free(t->ds_ctx);
244 }
245#endif /* CONFIG_X86_DS */
1da177e4
LT
246}
247
248void flush_thread(void)
249{
250 struct task_struct *tsk = current;
1da177e4 251
303cd153
MD
252 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
253 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
254 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
255 clear_tsk_thread_flag(tsk, TIF_IA32);
256 } else {
257 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 258 current_thread_info()->status |= TS_COMPAT;
303cd153 259 }
4d9bc79c 260 }
303cd153 261 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
262
263 tsk->thread.debugreg0 = 0;
264 tsk->thread.debugreg1 = 0;
265 tsk->thread.debugreg2 = 0;
266 tsk->thread.debugreg3 = 0;
267 tsk->thread.debugreg6 = 0;
268 tsk->thread.debugreg7 = 0;
6612538c 269 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
270 /*
271 * Forget coprocessor state..
272 */
75118a82 273 tsk->fpu_counter = 0;
1da177e4
LT
274 clear_fpu(tsk);
275 clear_used_math();
276}
277
278void release_thread(struct task_struct *dead_task)
279{
280 if (dead_task->mm) {
281 if (dead_task->mm->context.size) {
282 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
283 dead_task->comm,
284 dead_task->mm->context.ldt,
285 dead_task->mm->context.size);
286 BUG();
287 }
288 }
289}
290
291static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
292{
6612538c 293 struct user_desc ud = {
1da177e4
LT
294 .base_addr = addr,
295 .limit = 0xfffff,
296 .seg_32bit = 1,
297 .limit_in_pages = 1,
298 .useable = 1,
299 };
ade1af77 300 struct desc_struct *desc = t->thread.tls_array;
1da177e4 301 desc += tls;
80fbb69a 302 fill_ldt(desc, &ud);
1da177e4
LT
303}
304
305static inline u32 read_32bit_tls(struct task_struct *t, int tls)
306{
91394eb0 307 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
308}
309
310/*
311 * This gets called before we allocate a new thread and copy
312 * the current task into it.
313 */
314void prepare_to_copy(struct task_struct *tsk)
315{
316 unlazy_fpu(tsk);
317}
318
65ea5b03 319int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4 320 unsigned long unused,
7de08b4e 321 struct task_struct *p, struct pt_regs *regs)
1da177e4
LT
322{
323 int err;
7de08b4e 324 struct pt_regs *childregs;
1da177e4
LT
325 struct task_struct *me = current;
326
a88cde13 327 childregs = ((struct pt_regs *)
57eafdc2 328 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
329 *childregs = *regs;
330
65ea5b03
PA
331 childregs->ax = 0;
332 childregs->sp = sp;
333 if (sp == ~0UL)
334 childregs->sp = (unsigned long)childregs;
1da177e4 335
faca6227
PA
336 p->thread.sp = (unsigned long) childregs;
337 p->thread.sp0 = (unsigned long) (childregs+1);
338 p->thread.usersp = me->thread.usersp;
1da177e4 339
e4f17c43 340 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
341
342 p->thread.fs = me->thread.fs;
343 p->thread.gs = me->thread.gs;
344
ada85708
JF
345 savesegment(gs, p->thread.gsindex);
346 savesegment(fs, p->thread.fsindex);
347 savesegment(es, p->thread.es);
348 savesegment(ds, p->thread.ds);
1da177e4 349
d3a4f48d 350 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
351 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
352 if (!p->thread.io_bitmap_ptr) {
353 p->thread.io_bitmap_max = 0;
354 return -ENOMEM;
355 }
a88cde13
AK
356 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
357 IO_BITMAP_BYTES);
d3a4f48d 358 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 359 }
1da177e4
LT
360
361 /*
362 * Set a new TLS for the child thread?
363 */
364 if (clone_flags & CLONE_SETTLS) {
365#ifdef CONFIG_IA32_EMULATION
366 if (test_thread_flag(TIF_IA32))
efd1ca52 367 err = do_set_thread_area(p, -1,
65ea5b03 368 (struct user_desc __user *)childregs->si, 0);
7de08b4e
GP
369 else
370#endif
371 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
372 if (err)
1da177e4
LT
373 goto out;
374 }
375 err = 0;
376out:
377 if (err && p->thread.io_bitmap_ptr) {
378 kfree(p->thread.io_bitmap_ptr);
379 p->thread.io_bitmap_max = 0;
380 }
381 return err;
382}
383
513ad84b
IM
384void
385start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
386{
ada85708
JF
387 loadsegment(fs, 0);
388 loadsegment(es, 0);
389 loadsegment(ds, 0);
513ad84b
IM
390 load_gs_index(0);
391 regs->ip = new_ip;
392 regs->sp = new_sp;
393 write_pda(oldrsp, new_sp);
394 regs->cs = __USER_CS;
395 regs->ss = __USER_DS;
396 regs->flags = 0x200;
397 set_fs(USER_DS);
aa283f49
SS
398 /*
399 * Free the old FP and other extended state
400 */
401 free_thread_xstate(current);
513ad84b
IM
402}
403EXPORT_SYMBOL_GPL(start_thread);
404
529e25f6
EB
405static void hard_disable_TSC(void)
406{
407 write_cr4(read_cr4() | X86_CR4_TSD);
408}
409
410void disable_TSC(void)
411{
412 preempt_disable();
413 if (!test_and_set_thread_flag(TIF_NOTSC))
414 /*
415 * Must flip the CPU state synchronously with
416 * TIF_NOTSC in the current running context.
417 */
418 hard_disable_TSC();
419 preempt_enable();
420}
421
422static void hard_enable_TSC(void)
423{
424 write_cr4(read_cr4() & ~X86_CR4_TSD);
425}
426
a4928cff 427static void enable_TSC(void)
529e25f6
EB
428{
429 preempt_disable();
430 if (test_and_clear_thread_flag(TIF_NOTSC))
431 /*
432 * Must flip the CPU state synchronously with
433 * TIF_NOTSC in the current running context.
434 */
435 hard_enable_TSC();
436 preempt_enable();
437}
438
439int get_tsc_mode(unsigned long adr)
440{
441 unsigned int val;
442
443 if (test_thread_flag(TIF_NOTSC))
444 val = PR_TSC_SIGSEGV;
445 else
446 val = PR_TSC_ENABLE;
447
448 return put_user(val, (unsigned int __user *)adr);
449}
450
451int set_tsc_mode(unsigned int val)
452{
453 if (val == PR_TSC_SIGSEGV)
454 disable_TSC();
455 else if (val == PR_TSC_ENABLE)
456 enable_TSC();
457 else
458 return -EINVAL;
459
460 return 0;
461}
462
1da177e4
LT
463/*
464 * This special macro can be used to load a debugging register
465 */
6612538c
HS
466#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
467
d3a4f48d 468static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
469 struct task_struct *next_p,
470 struct tss_struct *tss)
d3a4f48d
SE
471{
472 struct thread_struct *prev, *next;
eee3af4a 473 unsigned long debugctl;
d3a4f48d
SE
474
475 prev = &prev_p->thread,
476 next = &next_p->thread;
477
eee3af4a 478 debugctl = prev->debugctlmsr;
93fa7636
MM
479
480#ifdef CONFIG_X86_DS
34b2cd5b
IM
481 {
482 unsigned long ds_prev = 0, ds_next = 0;
483
484 if (prev->ds_ctx)
485 ds_prev = (unsigned long)prev->ds_ctx->ds;
486 if (next->ds_ctx)
487 ds_next = (unsigned long)next->ds_ctx->ds;
488
489 if (ds_next != ds_prev) {
490 /*
491 * We clear debugctl to make sure DS
492 * is not in use when we change it:
493 */
494 debugctl = 0;
495 update_debugctlmsr(0);
496 wrmsrl(MSR_IA32_DS_AREA, ds_next);
497 }
eee3af4a 498 }
93fa7636 499#endif /* CONFIG_X86_DS */
eee3af4a
MM
500
501 if (next->debugctlmsr != debugctl)
5b0e5084 502 update_debugctlmsr(next->debugctlmsr);
7e991604 503
d3a4f48d
SE
504 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
505 loaddebug(next, 0);
506 loaddebug(next, 1);
507 loaddebug(next, 2);
508 loaddebug(next, 3);
509 /* no 4 and 5 */
510 loaddebug(next, 6);
511 loaddebug(next, 7);
512 }
513
529e25f6
EB
514 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
515 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
516 /* prev and next are different */
517 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
518 hard_disable_TSC();
519 else
520 hard_enable_TSC();
521 }
522
d3a4f48d
SE
523 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
524 /*
525 * Copy the relevant range of the IO bitmap.
526 * Normally this is 128 bytes or less:
527 */
528 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
529 max(prev->io_bitmap_max, next->io_bitmap_max));
530 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
531 /*
532 * Clear any possible leftover bits:
533 */
534 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
535 }
eee3af4a 536
93fa7636 537#ifdef CONFIG_X86_PTRACE_BTS
eee3af4a
MM
538 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
539 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
540
541 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
542 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
93fa7636 543#endif /* CONFIG_X86_PTRACE_BTS */
d3a4f48d
SE
544}
545
1da177e4
LT
546/*
547 * switch_to(x,y) should switch tasks from x to y.
548 *
6612538c 549 * This could still be optimized:
1da177e4
LT
550 * - fold all the options into a flag word and test it with a single test.
551 * - could test fs/gs bitsliced
099f318b
AK
552 *
553 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 554 */
f438d914 555struct task_struct *
a88cde13 556__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 557{
87b935a0
JF
558 struct thread_struct *prev = &prev_p->thread;
559 struct thread_struct *next = &next_p->thread;
6612538c 560 int cpu = smp_processor_id();
1da177e4 561 struct tss_struct *tss = &per_cpu(init_tss, cpu);
478de5a9 562 unsigned fsindex, gsindex;
1da177e4 563
e07e23e1 564 /* we're going to use this soon, after a few expensive things */
7de08b4e 565 if (next_p->fpu_counter > 5)
61c4628b 566 prefetch(next->xstate);
e07e23e1 567
1da177e4
LT
568 /*
569 * Reload esp0, LDT and the page table pointer:
570 */
7818a1e0 571 load_sp0(tss, next);
1da177e4 572
7de08b4e 573 /*
1da177e4
LT
574 * Switch DS and ES.
575 * This won't pick up thread selector changes, but I guess that is ok.
576 */
ada85708 577 savesegment(es, prev->es);
1da177e4 578 if (unlikely(next->es | prev->es))
7de08b4e 579 loadsegment(es, next->es);
ada85708
JF
580
581 savesegment(ds, prev->ds);
1da177e4
LT
582 if (unlikely(next->ds | prev->ds))
583 loadsegment(ds, next->ds);
584
478de5a9
JF
585
586 /* We must save %fs and %gs before load_TLS() because
587 * %fs and %gs may be cleared by load_TLS().
588 *
589 * (e.g. xen_load_tls())
590 */
591 savesegment(fs, fsindex);
592 savesegment(gs, gsindex);
593
1da177e4
LT
594 load_TLS(next, cpu);
595
3fe0a63e
JF
596 /*
597 * Leave lazy mode, flushing any hypercalls made here.
598 * This must be done before restoring TLS segments so
599 * the GDT and LDT are properly updated, and must be
600 * done before math_state_restore, so the TS bit is up
601 * to date.
602 */
603 arch_leave_lazy_cpu_mode();
604
7de08b4e 605 /*
1da177e4 606 * Switch FS and GS.
87b935a0
JF
607 *
608 * Segment register != 0 always requires a reload. Also
609 * reload when it has changed. When prev process used 64bit
610 * base always reload to avoid an information leak.
1da177e4 611 */
87b935a0
JF
612 if (unlikely(fsindex | next->fsindex | prev->fs)) {
613 loadsegment(fs, next->fsindex);
7de08b4e 614 /*
87b935a0
JF
615 * Check if the user used a selector != 0; if yes
616 * clear 64bit base, since overloaded base is always
617 * mapped to the Null selector
618 */
619 if (fsindex)
7de08b4e 620 prev->fs = 0;
87b935a0
JF
621 }
622 /* when next process has a 64bit base use it */
623 if (next->fs)
624 wrmsrl(MSR_FS_BASE, next->fs);
625 prev->fsindex = fsindex;
626
627 if (unlikely(gsindex | next->gsindex | prev->gs)) {
628 load_gs_index(next->gsindex);
629 if (gsindex)
7de08b4e 630 prev->gs = 0;
1da177e4 631 }
87b935a0
JF
632 if (next->gs)
633 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
634 prev->gsindex = gsindex;
1da177e4 635
0a5ace2a
AK
636 /* Must be after DS reload */
637 unlazy_fpu(prev_p);
638
7de08b4e 639 /*
45948d77 640 * Switch the PDA and FPU contexts.
1da177e4 641 */
faca6227
PA
642 prev->usersp = read_pda(oldrsp);
643 write_pda(oldrsp, next->usersp);
7de08b4e 644 write_pda(pcurrent, next_p);
18bd057b 645
a88cde13 646 write_pda(kernelstack,
87b935a0
JF
647 (unsigned long)task_stack_page(next_p) +
648 THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
649#ifdef CONFIG_CC_STACKPROTECTOR
650 write_pda(stack_canary, next_p->stack_canary);
651 /*
652 * Build time only check to make sure the stack_canary is at
653 * offset 40 in the pda; this is a gcc ABI requirement
654 */
655 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
656#endif
1da177e4
LT
657
658 /*
d3a4f48d 659 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 660 */
eee3af4a
MM
661 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
662 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 663 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 664
e07e23e1
AV
665 /* If the task has used fpu the last 5 timeslices, just do a full
666 * restore of the math state immediately to avoid the trap; the
667 * chances of needing FPU soon are obviously high now
870568b3
SS
668 *
669 * tsk_used_math() checks prevent calling math_state_restore(),
670 * which can sleep in the case of !tsk_used_math()
e07e23e1 671 */
870568b3 672 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
e07e23e1 673 math_state_restore();
1da177e4
LT
674 return prev_p;
675}
676
677/*
678 * sys_execve() executes a new program.
679 */
6612538c 680asmlinkage
1da177e4 681long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 682 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
683{
684 long error;
7de08b4e 685 char *filename;
1da177e4
LT
686
687 filename = getname(name);
688 error = PTR_ERR(filename);
5d119b2c 689 if (IS_ERR(filename))
1da177e4 690 return error;
5d119b2c 691 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
692 putname(filename);
693 return error;
694}
695
696void set_personality_64bit(void)
697{
698 /* inherit personality from parent */
699
700 /* Make sure to be in 64bit mode */
6612538c 701 clear_thread_flag(TIF_IA32);
1da177e4
LT
702
703 /* TBD: overwrites user setup. Should have two bits.
704 But 64bit processes have always behaved this way,
705 so it's not too bad. The main problem is just that
6612538c 706 32bit childs are affected again. */
1da177e4
LT
707 current->personality &= ~READ_IMPLIES_EXEC;
708}
709
710asmlinkage long sys_fork(struct pt_regs *regs)
711{
65ea5b03 712 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
713}
714
a88cde13
AK
715asmlinkage long
716sys_clone(unsigned long clone_flags, unsigned long newsp,
717 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
718{
719 if (!newsp)
65ea5b03 720 newsp = regs->sp;
1da177e4
LT
721 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
722}
723
724/*
725 * This is trivial, and on the face of it looks like it
726 * could equally well be done in user mode.
727 *
728 * Not so, for quite unobvious reasons - register pressure.
729 * In user mode vfork() cannot have a stack frame, and if
730 * done by calling the "clone()" system call directly, you
731 * do not have enough call-clobbered registers to hold all
732 * the information you need.
733 */
734asmlinkage long sys_vfork(struct pt_regs *regs)
735{
65ea5b03 736 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
737 NULL, NULL);
738}
739
740unsigned long get_wchan(struct task_struct *p)
741{
742 unsigned long stack;
7de08b4e 743 u64 fp, ip;
1da177e4
LT
744 int count = 0;
745
7de08b4e
GP
746 if (!p || p == current || p->state == TASK_RUNNING)
747 return 0;
57eafdc2 748 stack = (unsigned long)task_stack_page(p);
e1e23bb0 749 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
1da177e4 750 return 0;
faca6227 751 fp = *(u64 *)(p->thread.sp);
7de08b4e 752 do {
a88cde13 753 if (fp < (unsigned long)stack ||
e1e23bb0 754 fp >= (unsigned long)stack+THREAD_SIZE)
7de08b4e 755 return 0;
65ea5b03
PA
756 ip = *(u64 *)(fp+8);
757 if (!in_sched_functions(ip))
758 return ip;
7de08b4e
GP
759 fp = *(u64 *)fp;
760 } while (count++ < 16);
1da177e4
LT
761 return 0;
762}
763
764long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
7de08b4e
GP
765{
766 int ret = 0;
1da177e4
LT
767 int doit = task == current;
768 int cpu;
769
7de08b4e 770 switch (code) {
1da177e4 771 case ARCH_SET_GS:
84929801 772 if (addr >= TASK_SIZE_OF(task))
7de08b4e 773 return -EPERM;
1da177e4 774 cpu = get_cpu();
7de08b4e 775 /* handle small bases via the GDT because that's faster to
1da177e4 776 switch. */
7de08b4e
GP
777 if (addr <= 0xffffffff) {
778 set_32bit_tls(task, GS_TLS, addr);
779 if (doit) {
1da177e4 780 load_TLS(&task->thread, cpu);
7de08b4e 781 load_gs_index(GS_TLS_SEL);
1da177e4 782 }
7de08b4e 783 task->thread.gsindex = GS_TLS_SEL;
1da177e4 784 task->thread.gs = 0;
7de08b4e 785 } else {
1da177e4
LT
786 task->thread.gsindex = 0;
787 task->thread.gs = addr;
788 if (doit) {
a88cde13
AK
789 load_gs_index(0);
790 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
7de08b4e 791 }
1da177e4
LT
792 }
793 put_cpu();
794 break;
795 case ARCH_SET_FS:
796 /* Not strictly needed for fs, but do it for symmetry
797 with gs */
84929801 798 if (addr >= TASK_SIZE_OF(task))
6612538c 799 return -EPERM;
1da177e4 800 cpu = get_cpu();
6612538c 801 /* handle small bases via the GDT because that's faster to
1da177e4 802 switch. */
6612538c 803 if (addr <= 0xffffffff) {
1da177e4 804 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
805 if (doit) {
806 load_TLS(&task->thread, cpu);
ada85708 807 loadsegment(fs, FS_TLS_SEL);
1da177e4
LT
808 }
809 task->thread.fsindex = FS_TLS_SEL;
810 task->thread.fs = 0;
6612538c 811 } else {
1da177e4
LT
812 task->thread.fsindex = 0;
813 task->thread.fs = addr;
814 if (doit) {
815 /* set the selector to 0 to not confuse
816 __switch_to */
ada85708 817 loadsegment(fs, 0);
a88cde13 818 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
819 }
820 }
821 put_cpu();
822 break;
6612538c
HS
823 case ARCH_GET_FS: {
824 unsigned long base;
1da177e4
LT
825 if (task->thread.fsindex == FS_TLS_SEL)
826 base = read_32bit_tls(task, FS_TLS);
a88cde13 827 else if (doit)
1da177e4 828 rdmsrl(MSR_FS_BASE, base);
a88cde13 829 else
1da177e4 830 base = task->thread.fs;
6612538c
HS
831 ret = put_user(base, (unsigned long __user *)addr);
832 break;
1da177e4 833 }
6612538c 834 case ARCH_GET_GS: {
1da177e4 835 unsigned long base;
97c2803c 836 unsigned gsindex;
1da177e4
LT
837 if (task->thread.gsindex == GS_TLS_SEL)
838 base = read_32bit_tls(task, GS_TLS);
97c2803c 839 else if (doit) {
ada85708 840 savesegment(gs, gsindex);
97c2803c
JB
841 if (gsindex)
842 rdmsrl(MSR_KERNEL_GS_BASE, base);
843 else
844 base = task->thread.gs;
7de08b4e 845 } else
1da177e4 846 base = task->thread.gs;
6612538c 847 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
848 break;
849 }
850
851 default:
852 ret = -EINVAL;
853 break;
6612538c 854 }
1da177e4 855
6612538c
HS
856 return ret;
857}
1da177e4
LT
858
859long sys_arch_prctl(int code, unsigned long addr)
860{
861 return do_arch_prctl(current, code, addr);
1da177e4
LT
862}
863
864unsigned long arch_align_stack(unsigned long sp)
865{
c16b63e0 866 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
867 sp -= get_random_int() % 8192;
868 return sp & ~0xf;
869}
c1d171a0
JK
870
871unsigned long arch_randomize_brk(struct mm_struct *mm)
872{
873 unsigned long range_end = mm->brk + 0x02000000;
874 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
875}
This page took 0.834302 seconds and 5 git commands to generate.