Merge branches 'tracing/ftrace', 'tracing/ring-buffer' and 'tracing/urgent' into...
[deliverable/linux.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4 29#include <linux/interrupt.h>
6612538c 30#include <linux/utsname.h>
1da177e4 31#include <linux/delay.h>
6612538c 32#include <linux/module.h>
1da177e4 33#include <linux/ptrace.h>
1da177e4 34#include <linux/random.h>
95833c83 35#include <linux/notifier.h>
c6fd91f0 36#include <linux/kprobes.h>
1eeb66a1 37#include <linux/kdebug.h>
02290683 38#include <linux/tick.h>
529e25f6 39#include <linux/prctl.h>
7de08b4e
GP
40#include <linux/uaccess.h>
41#include <linux/io.h>
8b96f011 42#include <linux/ftrace.h>
1da177e4 43
1da177e4
LT
44#include <asm/pgtable.h>
45#include <asm/system.h>
1da177e4
LT
46#include <asm/processor.h>
47#include <asm/i387.h>
48#include <asm/mmu_context.h>
49#include <asm/pda.h>
50#include <asm/prctl.h>
1da177e4
LT
51#include <asm/desc.h>
52#include <asm/proto.h>
53#include <asm/ia32.h>
95833c83 54#include <asm/idle.h>
bbc1f698 55#include <asm/syscalls.h>
1da177e4
LT
56
57asmlinkage extern void ret_from_fork(void);
58
59unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
60
e041c683 61static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
62
63void idle_notifier_register(struct notifier_block *n)
64{
e041c683 65 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 66}
c7d87d79
VP
67EXPORT_SYMBOL_GPL(idle_notifier_register);
68
69void idle_notifier_unregister(struct notifier_block *n)
70{
71 atomic_notifier_chain_unregister(&idle_notifier, n);
72}
73EXPORT_SYMBOL_GPL(idle_notifier_unregister);
95833c83 74
95833c83
AK
75void enter_idle(void)
76{
a15da49d 77 write_pda(isidle, 1);
e041c683 78 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
79}
80
81static void __exit_idle(void)
82{
9446868b 83 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 84 return;
e041c683 85 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
86}
87
88/* Called from interrupts to signify idle end */
89void exit_idle(void)
90{
a15da49d
AK
91 /* idle loop has pid 0 */
92 if (current->pid)
95833c83
AK
93 return;
94 __exit_idle();
95}
96
913da64b 97#ifndef CONFIG_SMP
76e4f660
AR
98static inline void play_dead(void)
99{
100 BUG();
101}
913da64b 102#endif
76e4f660 103
1da177e4
LT
104/*
105 * The idle thread. There's no useful work to be
106 * done, so just try to conserve power and have a
107 * low exit latency (ie sit in a loop waiting for
108 * somebody to say that they'd like to reschedule)
109 */
b10db7f0 110void cpu_idle(void)
1da177e4 111{
495ab9c0 112 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
113 /* endless idle loop with no priority at all */
114 while (1) {
b8f8c3cf 115 tick_nohz_stop_sched_tick(1);
1da177e4 116 while (!need_resched()) {
1da177e4 117
1da177e4 118 rmb();
6ddd2a27 119
76e4f660
AR
120 if (cpu_is_offline(smp_processor_id()))
121 play_dead();
d331e739
VP
122 /*
123 * Idle routines should keep interrupts disabled
124 * from here on, until they go to idle.
125 * Otherwise, idle callbacks can misfire.
126 */
127 local_irq_disable();
95833c83 128 enter_idle();
81d68a96
SR
129 /* Don't trace irqs off for idle */
130 stop_critical_timings();
6ddd2a27 131 pm_idle();
81d68a96 132 start_critical_timings();
a15da49d
AK
133 /* In many cases the interrupt that ended idle
134 has already called exit_idle. But some idle
135 loops can be woken up without interrupt. */
95833c83 136 __exit_idle();
1da177e4
LT
137 }
138
02290683 139 tick_nohz_restart_sched_tick();
5bfb5d69 140 preempt_enable_no_resched();
1da177e4 141 schedule();
5bfb5d69 142 preempt_disable();
1da177e4
LT
143 }
144}
145
6612538c 146/* Prints also some state that isn't saved in the pt_regs */
e2ce07c8 147void __show_regs(struct pt_regs *regs, int all)
1da177e4
LT
148{
149 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 150 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
151 unsigned int fsindex, gsindex;
152 unsigned int ds, cs, es;
1da177e4
LT
153
154 printk("\n");
155 print_modules();
8092c654 156 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
9acf23c4 157 current->pid, current->comm, print_tainted(),
96b644bd
SH
158 init_utsname()->release,
159 (int)strcspn(init_utsname()->version, " "),
160 init_utsname()->version);
8092c654 161 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 162 printk_address(regs->ip, 1);
8092c654
GP
163 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
164 regs->sp, regs->flags);
165 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 166 regs->ax, regs->bx, regs->cx);
8092c654 167 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 168 regs->dx, regs->si, regs->di);
8092c654 169 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 170 regs->bp, regs->r8, regs->r9);
8092c654 171 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
7de08b4e 172 regs->r10, regs->r11, regs->r12);
8092c654 173 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
7de08b4e 174 regs->r13, regs->r14, regs->r15);
1da177e4 175
7de08b4e
GP
176 asm("movl %%ds,%0" : "=r" (ds));
177 asm("movl %%cs,%0" : "=r" (cs));
178 asm("movl %%es,%0" : "=r" (es));
1da177e4
LT
179 asm("movl %%fs,%0" : "=r" (fsindex));
180 asm("movl %%gs,%0" : "=r" (gsindex));
181
182 rdmsrl(MSR_FS_BASE, fs);
7de08b4e
GP
183 rdmsrl(MSR_GS_BASE, gs);
184 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
1da177e4 185
e2ce07c8
PE
186 if (!all)
187 return;
1da177e4 188
f51c9452
GOC
189 cr0 = read_cr0();
190 cr2 = read_cr2();
191 cr3 = read_cr3();
192 cr4 = read_cr4();
1da177e4 193
8092c654 194 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
7de08b4e 195 fs, fsindex, gs, gsindex, shadowgs);
8092c654
GP
196 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
197 es, cr0);
198 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
199 cr4);
bb1995d5
AS
200
201 get_debugreg(d0, 0);
202 get_debugreg(d1, 1);
203 get_debugreg(d2, 2);
8092c654 204 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
bb1995d5
AS
205 get_debugreg(d3, 3);
206 get_debugreg(d6, 6);
207 get_debugreg(d7, 7);
8092c654 208 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
209}
210
211void show_regs(struct pt_regs *regs)
212{
8092c654 213 printk(KERN_INFO "CPU %d:", smp_processor_id());
e2ce07c8 214 __show_regs(regs, 1);
bc850d6b 215 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
216}
217
218/*
219 * Free current thread data structures etc..
220 */
221void exit_thread(void)
222{
223 struct task_struct *me = current;
224 struct thread_struct *t = &me->thread;
73649dab 225
6612538c 226 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
227 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
228
229 kfree(t->io_bitmap_ptr);
230 t->io_bitmap_ptr = NULL;
d3a4f48d 231 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
232 /*
233 * Careful, clear this in the TSS too:
234 */
235 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
236 t->io_bitmap_max = 0;
237 put_cpu();
238 }
93fa7636 239#ifdef CONFIG_X86_DS
c2724775
MM
240 /* Free any BTS tracers that have not been properly released. */
241 if (unlikely(current->bts)) {
242 ds_release_bts(current->bts);
243 current->bts = NULL;
244
245 kfree(current->bts_buffer);
246 current->bts_buffer = NULL;
247 current->bts_size = 0;
93fa7636
MM
248 }
249#endif /* CONFIG_X86_DS */
1da177e4
LT
250}
251
252void flush_thread(void)
253{
254 struct task_struct *tsk = current;
1da177e4 255
303cd153
MD
256 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
257 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
258 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
259 clear_tsk_thread_flag(tsk, TIF_IA32);
260 } else {
261 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 262 current_thread_info()->status |= TS_COMPAT;
303cd153 263 }
4d9bc79c 264 }
303cd153 265 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
266
267 tsk->thread.debugreg0 = 0;
268 tsk->thread.debugreg1 = 0;
269 tsk->thread.debugreg2 = 0;
270 tsk->thread.debugreg3 = 0;
271 tsk->thread.debugreg6 = 0;
272 tsk->thread.debugreg7 = 0;
6612538c 273 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
274 /*
275 * Forget coprocessor state..
276 */
75118a82 277 tsk->fpu_counter = 0;
1da177e4
LT
278 clear_fpu(tsk);
279 clear_used_math();
280}
281
282void release_thread(struct task_struct *dead_task)
283{
284 if (dead_task->mm) {
285 if (dead_task->mm->context.size) {
286 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
287 dead_task->comm,
288 dead_task->mm->context.ldt,
289 dead_task->mm->context.size);
290 BUG();
291 }
292 }
293}
294
295static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
296{
6612538c 297 struct user_desc ud = {
1da177e4
LT
298 .base_addr = addr,
299 .limit = 0xfffff,
300 .seg_32bit = 1,
301 .limit_in_pages = 1,
302 .useable = 1,
303 };
ade1af77 304 struct desc_struct *desc = t->thread.tls_array;
1da177e4 305 desc += tls;
80fbb69a 306 fill_ldt(desc, &ud);
1da177e4
LT
307}
308
309static inline u32 read_32bit_tls(struct task_struct *t, int tls)
310{
91394eb0 311 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
312}
313
314/*
315 * This gets called before we allocate a new thread and copy
316 * the current task into it.
317 */
318void prepare_to_copy(struct task_struct *tsk)
319{
320 unlazy_fpu(tsk);
321}
322
65ea5b03 323int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4 324 unsigned long unused,
7de08b4e 325 struct task_struct *p, struct pt_regs *regs)
1da177e4
LT
326{
327 int err;
7de08b4e 328 struct pt_regs *childregs;
1da177e4
LT
329 struct task_struct *me = current;
330
a88cde13 331 childregs = ((struct pt_regs *)
57eafdc2 332 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
333 *childregs = *regs;
334
65ea5b03
PA
335 childregs->ax = 0;
336 childregs->sp = sp;
337 if (sp == ~0UL)
338 childregs->sp = (unsigned long)childregs;
1da177e4 339
faca6227
PA
340 p->thread.sp = (unsigned long) childregs;
341 p->thread.sp0 = (unsigned long) (childregs+1);
342 p->thread.usersp = me->thread.usersp;
1da177e4 343
e4f17c43 344 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
345
346 p->thread.fs = me->thread.fs;
347 p->thread.gs = me->thread.gs;
348
ada85708
JF
349 savesegment(gs, p->thread.gsindex);
350 savesegment(fs, p->thread.fsindex);
351 savesegment(es, p->thread.es);
352 savesegment(ds, p->thread.ds);
1da177e4 353
d3a4f48d 354 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
355 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
356 if (!p->thread.io_bitmap_ptr) {
357 p->thread.io_bitmap_max = 0;
358 return -ENOMEM;
359 }
a88cde13
AK
360 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
361 IO_BITMAP_BYTES);
d3a4f48d 362 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 363 }
1da177e4
LT
364
365 /*
366 * Set a new TLS for the child thread?
367 */
368 if (clone_flags & CLONE_SETTLS) {
369#ifdef CONFIG_IA32_EMULATION
370 if (test_thread_flag(TIF_IA32))
efd1ca52 371 err = do_set_thread_area(p, -1,
65ea5b03 372 (struct user_desc __user *)childregs->si, 0);
7de08b4e
GP
373 else
374#endif
375 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
376 if (err)
1da177e4
LT
377 goto out;
378 }
379 err = 0;
380out:
381 if (err && p->thread.io_bitmap_ptr) {
382 kfree(p->thread.io_bitmap_ptr);
383 p->thread.io_bitmap_max = 0;
384 }
385 return err;
386}
387
513ad84b
IM
388void
389start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
390{
ada85708
JF
391 loadsegment(fs, 0);
392 loadsegment(es, 0);
393 loadsegment(ds, 0);
513ad84b
IM
394 load_gs_index(0);
395 regs->ip = new_ip;
396 regs->sp = new_sp;
397 write_pda(oldrsp, new_sp);
398 regs->cs = __USER_CS;
399 regs->ss = __USER_DS;
400 regs->flags = 0x200;
401 set_fs(USER_DS);
aa283f49
SS
402 /*
403 * Free the old FP and other extended state
404 */
405 free_thread_xstate(current);
513ad84b
IM
406}
407EXPORT_SYMBOL_GPL(start_thread);
408
529e25f6
EB
409static void hard_disable_TSC(void)
410{
411 write_cr4(read_cr4() | X86_CR4_TSD);
412}
413
414void disable_TSC(void)
415{
416 preempt_disable();
417 if (!test_and_set_thread_flag(TIF_NOTSC))
418 /*
419 * Must flip the CPU state synchronously with
420 * TIF_NOTSC in the current running context.
421 */
422 hard_disable_TSC();
423 preempt_enable();
424}
425
426static void hard_enable_TSC(void)
427{
428 write_cr4(read_cr4() & ~X86_CR4_TSD);
429}
430
a4928cff 431static void enable_TSC(void)
529e25f6
EB
432{
433 preempt_disable();
434 if (test_and_clear_thread_flag(TIF_NOTSC))
435 /*
436 * Must flip the CPU state synchronously with
437 * TIF_NOTSC in the current running context.
438 */
439 hard_enable_TSC();
440 preempt_enable();
441}
442
443int get_tsc_mode(unsigned long adr)
444{
445 unsigned int val;
446
447 if (test_thread_flag(TIF_NOTSC))
448 val = PR_TSC_SIGSEGV;
449 else
450 val = PR_TSC_ENABLE;
451
452 return put_user(val, (unsigned int __user *)adr);
453}
454
455int set_tsc_mode(unsigned int val)
456{
457 if (val == PR_TSC_SIGSEGV)
458 disable_TSC();
459 else if (val == PR_TSC_ENABLE)
460 enable_TSC();
461 else
462 return -EINVAL;
463
464 return 0;
465}
466
1da177e4
LT
467/*
468 * This special macro can be used to load a debugging register
469 */
6612538c
HS
470#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
471
d3a4f48d 472static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
473 struct task_struct *next_p,
474 struct tss_struct *tss)
d3a4f48d
SE
475{
476 struct thread_struct *prev, *next;
477
478 prev = &prev_p->thread,
479 next = &next_p->thread;
480
c2724775
MM
481 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
482 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
483 ds_switch_to(prev_p, next_p);
484 else if (next->debugctlmsr != prev->debugctlmsr)
5b0e5084 485 update_debugctlmsr(next->debugctlmsr);
7e991604 486
d3a4f48d
SE
487 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
488 loaddebug(next, 0);
489 loaddebug(next, 1);
490 loaddebug(next, 2);
491 loaddebug(next, 3);
492 /* no 4 and 5 */
493 loaddebug(next, 6);
494 loaddebug(next, 7);
495 }
496
529e25f6
EB
497 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
498 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
499 /* prev and next are different */
500 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
501 hard_disable_TSC();
502 else
503 hard_enable_TSC();
504 }
505
d3a4f48d
SE
506 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
507 /*
508 * Copy the relevant range of the IO bitmap.
509 * Normally this is 128 bytes or less:
510 */
511 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
512 max(prev->io_bitmap_max, next->io_bitmap_max));
513 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
514 /*
515 * Clear any possible leftover bits:
516 */
517 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
518 }
519}
520
1da177e4
LT
521/*
522 * switch_to(x,y) should switch tasks from x to y.
523 *
6612538c 524 * This could still be optimized:
1da177e4
LT
525 * - fold all the options into a flag word and test it with a single test.
526 * - could test fs/gs bitsliced
099f318b
AK
527 *
528 * Kprobes not supported here. Set the probe on schedule instead.
8b96f011 529 * Function graph tracer not supported too.
1da177e4 530 */
8b96f011 531__notrace_funcgraph struct task_struct *
a88cde13 532__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 533{
87b935a0
JF
534 struct thread_struct *prev = &prev_p->thread;
535 struct thread_struct *next = &next_p->thread;
6612538c 536 int cpu = smp_processor_id();
1da177e4 537 struct tss_struct *tss = &per_cpu(init_tss, cpu);
478de5a9 538 unsigned fsindex, gsindex;
1da177e4 539
e07e23e1 540 /* we're going to use this soon, after a few expensive things */
7de08b4e 541 if (next_p->fpu_counter > 5)
61c4628b 542 prefetch(next->xstate);
e07e23e1 543
1da177e4
LT
544 /*
545 * Reload esp0, LDT and the page table pointer:
546 */
7818a1e0 547 load_sp0(tss, next);
1da177e4 548
7de08b4e 549 /*
1da177e4
LT
550 * Switch DS and ES.
551 * This won't pick up thread selector changes, but I guess that is ok.
552 */
ada85708 553 savesegment(es, prev->es);
1da177e4 554 if (unlikely(next->es | prev->es))
7de08b4e 555 loadsegment(es, next->es);
ada85708
JF
556
557 savesegment(ds, prev->ds);
1da177e4
LT
558 if (unlikely(next->ds | prev->ds))
559 loadsegment(ds, next->ds);
560
478de5a9
JF
561
562 /* We must save %fs and %gs before load_TLS() because
563 * %fs and %gs may be cleared by load_TLS().
564 *
565 * (e.g. xen_load_tls())
566 */
567 savesegment(fs, fsindex);
568 savesegment(gs, gsindex);
569
1da177e4
LT
570 load_TLS(next, cpu);
571
3fe0a63e
JF
572 /*
573 * Leave lazy mode, flushing any hypercalls made here.
574 * This must be done before restoring TLS segments so
575 * the GDT and LDT are properly updated, and must be
576 * done before math_state_restore, so the TS bit is up
577 * to date.
578 */
579 arch_leave_lazy_cpu_mode();
580
7de08b4e 581 /*
1da177e4 582 * Switch FS and GS.
87b935a0
JF
583 *
584 * Segment register != 0 always requires a reload. Also
585 * reload when it has changed. When prev process used 64bit
586 * base always reload to avoid an information leak.
1da177e4 587 */
87b935a0
JF
588 if (unlikely(fsindex | next->fsindex | prev->fs)) {
589 loadsegment(fs, next->fsindex);
7de08b4e 590 /*
87b935a0
JF
591 * Check if the user used a selector != 0; if yes
592 * clear 64bit base, since overloaded base is always
593 * mapped to the Null selector
594 */
595 if (fsindex)
7de08b4e 596 prev->fs = 0;
87b935a0
JF
597 }
598 /* when next process has a 64bit base use it */
599 if (next->fs)
600 wrmsrl(MSR_FS_BASE, next->fs);
601 prev->fsindex = fsindex;
602
603 if (unlikely(gsindex | next->gsindex | prev->gs)) {
604 load_gs_index(next->gsindex);
605 if (gsindex)
7de08b4e 606 prev->gs = 0;
1da177e4 607 }
87b935a0
JF
608 if (next->gs)
609 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
610 prev->gsindex = gsindex;
1da177e4 611
0a5ace2a
AK
612 /* Must be after DS reload */
613 unlazy_fpu(prev_p);
614
7de08b4e 615 /*
45948d77 616 * Switch the PDA and FPU contexts.
1da177e4 617 */
faca6227
PA
618 prev->usersp = read_pda(oldrsp);
619 write_pda(oldrsp, next->usersp);
7de08b4e 620 write_pda(pcurrent, next_p);
18bd057b 621
a88cde13 622 write_pda(kernelstack,
87b935a0
JF
623 (unsigned long)task_stack_page(next_p) +
624 THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
625#ifdef CONFIG_CC_STACKPROTECTOR
626 write_pda(stack_canary, next_p->stack_canary);
627 /*
628 * Build time only check to make sure the stack_canary is at
629 * offset 40 in the pda; this is a gcc ABI requirement
630 */
631 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
632#endif
1da177e4
LT
633
634 /*
d3a4f48d 635 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 636 */
eee3af4a
MM
637 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
638 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 639 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 640
e07e23e1
AV
641 /* If the task has used fpu the last 5 timeslices, just do a full
642 * restore of the math state immediately to avoid the trap; the
643 * chances of needing FPU soon are obviously high now
870568b3
SS
644 *
645 * tsk_used_math() checks prevent calling math_state_restore(),
646 * which can sleep in the case of !tsk_used_math()
e07e23e1 647 */
870568b3 648 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
e07e23e1 649 math_state_restore();
1da177e4
LT
650 return prev_p;
651}
652
653/*
654 * sys_execve() executes a new program.
655 */
6612538c 656asmlinkage
1da177e4 657long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 658 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
659{
660 long error;
7de08b4e 661 char *filename;
1da177e4
LT
662
663 filename = getname(name);
664 error = PTR_ERR(filename);
5d119b2c 665 if (IS_ERR(filename))
1da177e4 666 return error;
5d119b2c 667 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
668 putname(filename);
669 return error;
670}
671
672void set_personality_64bit(void)
673{
674 /* inherit personality from parent */
675
676 /* Make sure to be in 64bit mode */
6612538c 677 clear_thread_flag(TIF_IA32);
1da177e4
LT
678
679 /* TBD: overwrites user setup. Should have two bits.
680 But 64bit processes have always behaved this way,
681 so it's not too bad. The main problem is just that
6612538c 682 32bit childs are affected again. */
1da177e4
LT
683 current->personality &= ~READ_IMPLIES_EXEC;
684}
685
686asmlinkage long sys_fork(struct pt_regs *regs)
687{
65ea5b03 688 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
689}
690
a88cde13
AK
691asmlinkage long
692sys_clone(unsigned long clone_flags, unsigned long newsp,
693 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
694{
695 if (!newsp)
65ea5b03 696 newsp = regs->sp;
1da177e4
LT
697 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
698}
699
700/*
701 * This is trivial, and on the face of it looks like it
702 * could equally well be done in user mode.
703 *
704 * Not so, for quite unobvious reasons - register pressure.
705 * In user mode vfork() cannot have a stack frame, and if
706 * done by calling the "clone()" system call directly, you
707 * do not have enough call-clobbered registers to hold all
708 * the information you need.
709 */
710asmlinkage long sys_vfork(struct pt_regs *regs)
711{
65ea5b03 712 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
713 NULL, NULL);
714}
715
716unsigned long get_wchan(struct task_struct *p)
717{
718 unsigned long stack;
7de08b4e 719 u64 fp, ip;
1da177e4
LT
720 int count = 0;
721
7de08b4e
GP
722 if (!p || p == current || p->state == TASK_RUNNING)
723 return 0;
57eafdc2 724 stack = (unsigned long)task_stack_page(p);
e1e23bb0 725 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
1da177e4 726 return 0;
faca6227 727 fp = *(u64 *)(p->thread.sp);
7de08b4e 728 do {
a88cde13 729 if (fp < (unsigned long)stack ||
e1e23bb0 730 fp >= (unsigned long)stack+THREAD_SIZE)
7de08b4e 731 return 0;
65ea5b03
PA
732 ip = *(u64 *)(fp+8);
733 if (!in_sched_functions(ip))
734 return ip;
7de08b4e
GP
735 fp = *(u64 *)fp;
736 } while (count++ < 16);
1da177e4
LT
737 return 0;
738}
739
740long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
7de08b4e
GP
741{
742 int ret = 0;
1da177e4
LT
743 int doit = task == current;
744 int cpu;
745
7de08b4e 746 switch (code) {
1da177e4 747 case ARCH_SET_GS:
84929801 748 if (addr >= TASK_SIZE_OF(task))
7de08b4e 749 return -EPERM;
1da177e4 750 cpu = get_cpu();
7de08b4e 751 /* handle small bases via the GDT because that's faster to
1da177e4 752 switch. */
7de08b4e
GP
753 if (addr <= 0xffffffff) {
754 set_32bit_tls(task, GS_TLS, addr);
755 if (doit) {
1da177e4 756 load_TLS(&task->thread, cpu);
7de08b4e 757 load_gs_index(GS_TLS_SEL);
1da177e4 758 }
7de08b4e 759 task->thread.gsindex = GS_TLS_SEL;
1da177e4 760 task->thread.gs = 0;
7de08b4e 761 } else {
1da177e4
LT
762 task->thread.gsindex = 0;
763 task->thread.gs = addr;
764 if (doit) {
a88cde13
AK
765 load_gs_index(0);
766 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
7de08b4e 767 }
1da177e4
LT
768 }
769 put_cpu();
770 break;
771 case ARCH_SET_FS:
772 /* Not strictly needed for fs, but do it for symmetry
773 with gs */
84929801 774 if (addr >= TASK_SIZE_OF(task))
6612538c 775 return -EPERM;
1da177e4 776 cpu = get_cpu();
6612538c 777 /* handle small bases via the GDT because that's faster to
1da177e4 778 switch. */
6612538c 779 if (addr <= 0xffffffff) {
1da177e4 780 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
781 if (doit) {
782 load_TLS(&task->thread, cpu);
ada85708 783 loadsegment(fs, FS_TLS_SEL);
1da177e4
LT
784 }
785 task->thread.fsindex = FS_TLS_SEL;
786 task->thread.fs = 0;
6612538c 787 } else {
1da177e4
LT
788 task->thread.fsindex = 0;
789 task->thread.fs = addr;
790 if (doit) {
791 /* set the selector to 0 to not confuse
792 __switch_to */
ada85708 793 loadsegment(fs, 0);
a88cde13 794 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
795 }
796 }
797 put_cpu();
798 break;
6612538c
HS
799 case ARCH_GET_FS: {
800 unsigned long base;
1da177e4
LT
801 if (task->thread.fsindex == FS_TLS_SEL)
802 base = read_32bit_tls(task, FS_TLS);
a88cde13 803 else if (doit)
1da177e4 804 rdmsrl(MSR_FS_BASE, base);
a88cde13 805 else
1da177e4 806 base = task->thread.fs;
6612538c
HS
807 ret = put_user(base, (unsigned long __user *)addr);
808 break;
1da177e4 809 }
6612538c 810 case ARCH_GET_GS: {
1da177e4 811 unsigned long base;
97c2803c 812 unsigned gsindex;
1da177e4
LT
813 if (task->thread.gsindex == GS_TLS_SEL)
814 base = read_32bit_tls(task, GS_TLS);
97c2803c 815 else if (doit) {
ada85708 816 savesegment(gs, gsindex);
97c2803c
JB
817 if (gsindex)
818 rdmsrl(MSR_KERNEL_GS_BASE, base);
819 else
820 base = task->thread.gs;
7de08b4e 821 } else
1da177e4 822 base = task->thread.gs;
6612538c 823 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
824 break;
825 }
826
827 default:
828 ret = -EINVAL;
829 break;
6612538c 830 }
1da177e4 831
6612538c
HS
832 return ret;
833}
1da177e4
LT
834
835long sys_arch_prctl(int code, unsigned long addr)
836{
837 return do_arch_prctl(current, code, addr);
1da177e4
LT
838}
839
840unsigned long arch_align_stack(unsigned long sp)
841{
c16b63e0 842 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
843 sp -= get_random_int() % 8192;
844 return sp & ~0xf;
845}
c1d171a0
JK
846
847unsigned long arch_randomize_brk(struct mm_struct *mm)
848{
849 unsigned long range_end = mm->brk + 0x02000000;
850 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
851}
This page took 0.668714 seconds and 5 git commands to generate.