x86: fix stackprotector canary updates during context switches
[deliverable/linux.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4 29#include <linux/interrupt.h>
6612538c 30#include <linux/utsname.h>
1da177e4 31#include <linux/delay.h>
6612538c 32#include <linux/module.h>
1da177e4 33#include <linux/ptrace.h>
1da177e4 34#include <linux/random.h>
95833c83 35#include <linux/notifier.h>
c6fd91f0 36#include <linux/kprobes.h>
1eeb66a1 37#include <linux/kdebug.h>
02290683 38#include <linux/tick.h>
529e25f6 39#include <linux/prctl.h>
1da177e4
LT
40
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
1da177e4
LT
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
1da177e4
LT
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
1da177e4
LT
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
2ee60e17 66EXPORT_SYMBOL(pm_idle);
1da177e4 67
e041c683 68static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
69
70void idle_notifier_register(struct notifier_block *n)
71{
e041c683 72 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 73}
95833c83 74
95833c83
AK
75void enter_idle(void)
76{
a15da49d 77 write_pda(isidle, 1);
e041c683 78 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
79}
80
81static void __exit_idle(void)
82{
9446868b 83 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 84 return;
e041c683 85 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
86}
87
88/* Called from interrupts to signify idle end */
89void exit_idle(void)
90{
a15da49d
AK
91 /* idle loop has pid 0 */
92 if (current->pid)
95833c83
AK
93 return;
94 __exit_idle();
95}
96
1da177e4
LT
97/*
98 * We use this if we don't have any better
99 * idle routine..
100 */
d8954222 101void default_idle(void)
1da177e4 102{
495ab9c0 103 current_thread_info()->status &= ~TS_POLLING;
0888f06a
IM
104 /*
105 * TS_POLLING-cleared state must be visible before we
106 * test NEED_RESCHED:
107 */
108 smp_mb();
7f424a8b 109 if (!need_resched())
5ee613b6 110 safe_halt(); /* enables interrupts racelessly */
7f424a8b
PZ
111 else
112 local_irq_enable();
495ab9c0 113 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
114}
115
76e4f660
AR
116#ifdef CONFIG_HOTPLUG_CPU
117DECLARE_PER_CPU(int, cpu_state);
118
119#include <asm/nmi.h>
1fa744e6 120/* We halt the CPU with physical CPU hotplug */
76e4f660
AR
121static inline void play_dead(void)
122{
123 idle_task_exit();
124 wbinvd();
125 mb();
126 /* Ack it */
127 __get_cpu_var(cpu_state) = CPU_DEAD;
128
1fa744e6 129 local_irq_disable();
76e4f660 130 while (1)
1fa744e6 131 halt();
76e4f660
AR
132}
133#else
134static inline void play_dead(void)
135{
136 BUG();
137}
138#endif /* CONFIG_HOTPLUG_CPU */
139
1da177e4
LT
140/*
141 * The idle thread. There's no useful work to be
142 * done, so just try to conserve power and have a
143 * low exit latency (ie sit in a loop waiting for
144 * somebody to say that they'd like to reschedule)
145 */
b10db7f0 146void cpu_idle(void)
1da177e4 147{
495ab9c0 148 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
149 /* endless idle loop with no priority at all */
150 while (1) {
3d97775a 151 tick_nohz_stop_sched_tick();
1da177e4
LT
152 while (!need_resched()) {
153 void (*idle)(void);
154
1da177e4
LT
155 rmb();
156 idle = pm_idle;
157 if (!idle)
158 idle = default_idle;
76e4f660
AR
159 if (cpu_is_offline(smp_processor_id()))
160 play_dead();
d331e739
VP
161 /*
162 * Idle routines should keep interrupts disabled
163 * from here on, until they go to idle.
164 * Otherwise, idle callbacks can misfire.
165 */
166 local_irq_disable();
95833c83 167 enter_idle();
1da177e4 168 idle();
a15da49d
AK
169 /* In many cases the interrupt that ended idle
170 has already called exit_idle. But some idle
171 loops can be woken up without interrupt. */
95833c83 172 __exit_idle();
1da177e4
LT
173 }
174
02290683 175 tick_nohz_restart_sched_tick();
5bfb5d69 176 preempt_enable_no_resched();
1da177e4 177 schedule();
5bfb5d69 178 preempt_disable();
1da177e4
LT
179 }
180}
181
6612538c 182/* Prints also some state that isn't saved in the pt_regs */
1da177e4
LT
183void __show_regs(struct pt_regs * regs)
184{
185 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 186 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
187 unsigned int fsindex, gsindex;
188 unsigned int ds, cs, es;
1da177e4
LT
189
190 printk("\n");
191 print_modules();
9acf23c4
AK
192 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
193 current->pid, current->comm, print_tainted(),
96b644bd
SH
194 init_utsname()->release,
195 (int)strcspn(init_utsname()->version, " "),
196 init_utsname()->version);
65ea5b03 197 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 198 printk_address(regs->ip, 1);
65ea5b03
PA
199 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
200 regs->flags);
1da177e4 201 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 202 regs->ax, regs->bx, regs->cx);
1da177e4 203 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 204 regs->dx, regs->si, regs->di);
1da177e4 205 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 206 regs->bp, regs->r8, regs->r9);
1da177e4
LT
207 printk("R10: %016lx R11: %016lx R12: %016lx\n",
208 regs->r10, regs->r11, regs->r12);
209 printk("R13: %016lx R14: %016lx R15: %016lx\n",
210 regs->r13, regs->r14, regs->r15);
211
212 asm("movl %%ds,%0" : "=r" (ds));
213 asm("movl %%cs,%0" : "=r" (cs));
214 asm("movl %%es,%0" : "=r" (es));
215 asm("movl %%fs,%0" : "=r" (fsindex));
216 asm("movl %%gs,%0" : "=r" (gsindex));
217
218 rdmsrl(MSR_FS_BASE, fs);
219 rdmsrl(MSR_GS_BASE, gs);
220 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
221
f51c9452
GOC
222 cr0 = read_cr0();
223 cr2 = read_cr2();
224 cr3 = read_cr3();
225 cr4 = read_cr4();
1da177e4
LT
226
227 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
228 fs,fsindex,gs,gsindex,shadowgs);
229 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
230 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
bb1995d5
AS
231
232 get_debugreg(d0, 0);
233 get_debugreg(d1, 1);
234 get_debugreg(d2, 2);
235 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
236 get_debugreg(d3, 3);
237 get_debugreg(d6, 6);
238 get_debugreg(d7, 7);
239 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
240}
241
242void show_regs(struct pt_regs *regs)
243{
c078d326 244 printk("CPU %d:", smp_processor_id());
1da177e4 245 __show_regs(regs);
bc850d6b 246 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
247}
248
249/*
250 * Free current thread data structures etc..
251 */
252void exit_thread(void)
253{
254 struct task_struct *me = current;
255 struct thread_struct *t = &me->thread;
73649dab 256
6612538c 257 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
258 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
259
260 kfree(t->io_bitmap_ptr);
261 t->io_bitmap_ptr = NULL;
d3a4f48d 262 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
263 /*
264 * Careful, clear this in the TSS too:
265 */
266 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
267 t->io_bitmap_max = 0;
268 put_cpu();
269 }
270}
271
272void flush_thread(void)
273{
274 struct task_struct *tsk = current;
1da177e4 275
303cd153
MD
276 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
277 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
278 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
279 clear_tsk_thread_flag(tsk, TIF_IA32);
280 } else {
281 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 282 current_thread_info()->status |= TS_COMPAT;
303cd153 283 }
4d9bc79c 284 }
303cd153 285 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
286
287 tsk->thread.debugreg0 = 0;
288 tsk->thread.debugreg1 = 0;
289 tsk->thread.debugreg2 = 0;
290 tsk->thread.debugreg3 = 0;
291 tsk->thread.debugreg6 = 0;
292 tsk->thread.debugreg7 = 0;
6612538c 293 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
294 /*
295 * Forget coprocessor state..
296 */
297 clear_fpu(tsk);
298 clear_used_math();
299}
300
301void release_thread(struct task_struct *dead_task)
302{
303 if (dead_task->mm) {
304 if (dead_task->mm->context.size) {
305 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
306 dead_task->comm,
307 dead_task->mm->context.ldt,
308 dead_task->mm->context.size);
309 BUG();
310 }
311 }
312}
313
314static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
315{
6612538c 316 struct user_desc ud = {
1da177e4
LT
317 .base_addr = addr,
318 .limit = 0xfffff,
319 .seg_32bit = 1,
320 .limit_in_pages = 1,
321 .useable = 1,
322 };
ade1af77 323 struct desc_struct *desc = t->thread.tls_array;
1da177e4 324 desc += tls;
80fbb69a 325 fill_ldt(desc, &ud);
1da177e4
LT
326}
327
328static inline u32 read_32bit_tls(struct task_struct *t, int tls)
329{
91394eb0 330 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
331}
332
333/*
334 * This gets called before we allocate a new thread and copy
335 * the current task into it.
336 */
337void prepare_to_copy(struct task_struct *tsk)
338{
339 unlazy_fpu(tsk);
340}
341
65ea5b03 342int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4
LT
343 unsigned long unused,
344 struct task_struct * p, struct pt_regs * regs)
345{
346 int err;
347 struct pt_regs * childregs;
348 struct task_struct *me = current;
349
a88cde13 350 childregs = ((struct pt_regs *)
57eafdc2 351 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
352 *childregs = *regs;
353
65ea5b03
PA
354 childregs->ax = 0;
355 childregs->sp = sp;
356 if (sp == ~0UL)
357 childregs->sp = (unsigned long)childregs;
1da177e4 358
faca6227
PA
359 p->thread.sp = (unsigned long) childregs;
360 p->thread.sp0 = (unsigned long) (childregs+1);
361 p->thread.usersp = me->thread.usersp;
1da177e4 362
e4f17c43 363 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
364
365 p->thread.fs = me->thread.fs;
366 p->thread.gs = me->thread.gs;
367
fd51f666
L
368 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
369 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
370 asm("mov %%es,%0" : "=m" (p->thread.es));
371 asm("mov %%ds,%0" : "=m" (p->thread.ds));
1da177e4 372
d3a4f48d 373 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
374 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
375 if (!p->thread.io_bitmap_ptr) {
376 p->thread.io_bitmap_max = 0;
377 return -ENOMEM;
378 }
a88cde13
AK
379 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
380 IO_BITMAP_BYTES);
d3a4f48d 381 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 382 }
1da177e4
LT
383
384 /*
385 * Set a new TLS for the child thread?
386 */
387 if (clone_flags & CLONE_SETTLS) {
388#ifdef CONFIG_IA32_EMULATION
389 if (test_thread_flag(TIF_IA32))
efd1ca52 390 err = do_set_thread_area(p, -1,
65ea5b03 391 (struct user_desc __user *)childregs->si, 0);
1da177e4
LT
392 else
393#endif
394 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
395 if (err)
396 goto out;
397 }
398 err = 0;
399out:
400 if (err && p->thread.io_bitmap_ptr) {
401 kfree(p->thread.io_bitmap_ptr);
402 p->thread.io_bitmap_max = 0;
403 }
404 return err;
405}
406
513ad84b
IM
407void
408start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
409{
410 asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
411 load_gs_index(0);
412 regs->ip = new_ip;
413 regs->sp = new_sp;
414 write_pda(oldrsp, new_sp);
415 regs->cs = __USER_CS;
416 regs->ss = __USER_DS;
417 regs->flags = 0x200;
418 set_fs(USER_DS);
aa283f49
SS
419 /*
420 * Free the old FP and other extended state
421 */
422 free_thread_xstate(current);
513ad84b
IM
423}
424EXPORT_SYMBOL_GPL(start_thread);
425
529e25f6
EB
426static void hard_disable_TSC(void)
427{
428 write_cr4(read_cr4() | X86_CR4_TSD);
429}
430
431void disable_TSC(void)
432{
433 preempt_disable();
434 if (!test_and_set_thread_flag(TIF_NOTSC))
435 /*
436 * Must flip the CPU state synchronously with
437 * TIF_NOTSC in the current running context.
438 */
439 hard_disable_TSC();
440 preempt_enable();
441}
442
443static void hard_enable_TSC(void)
444{
445 write_cr4(read_cr4() & ~X86_CR4_TSD);
446}
447
a4928cff 448static void enable_TSC(void)
529e25f6
EB
449{
450 preempt_disable();
451 if (test_and_clear_thread_flag(TIF_NOTSC))
452 /*
453 * Must flip the CPU state synchronously with
454 * TIF_NOTSC in the current running context.
455 */
456 hard_enable_TSC();
457 preempt_enable();
458}
459
460int get_tsc_mode(unsigned long adr)
461{
462 unsigned int val;
463
464 if (test_thread_flag(TIF_NOTSC))
465 val = PR_TSC_SIGSEGV;
466 else
467 val = PR_TSC_ENABLE;
468
469 return put_user(val, (unsigned int __user *)adr);
470}
471
472int set_tsc_mode(unsigned int val)
473{
474 if (val == PR_TSC_SIGSEGV)
475 disable_TSC();
476 else if (val == PR_TSC_ENABLE)
477 enable_TSC();
478 else
479 return -EINVAL;
480
481 return 0;
482}
483
1da177e4
LT
484/*
485 * This special macro can be used to load a debugging register
486 */
6612538c
HS
487#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
488
d3a4f48d 489static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
490 struct task_struct *next_p,
491 struct tss_struct *tss)
d3a4f48d
SE
492{
493 struct thread_struct *prev, *next;
eee3af4a 494 unsigned long debugctl;
d3a4f48d
SE
495
496 prev = &prev_p->thread,
497 next = &next_p->thread;
498
eee3af4a
MM
499 debugctl = prev->debugctlmsr;
500 if (next->ds_area_msr != prev->ds_area_msr) {
501 /* we clear debugctl to make sure DS
502 * is not in use when we change it */
503 debugctl = 0;
5b0e5084 504 update_debugctlmsr(0);
eee3af4a
MM
505 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
506 }
507
508 if (next->debugctlmsr != debugctl)
5b0e5084 509 update_debugctlmsr(next->debugctlmsr);
7e991604 510
d3a4f48d
SE
511 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
512 loaddebug(next, 0);
513 loaddebug(next, 1);
514 loaddebug(next, 2);
515 loaddebug(next, 3);
516 /* no 4 and 5 */
517 loaddebug(next, 6);
518 loaddebug(next, 7);
519 }
520
529e25f6
EB
521 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
522 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
523 /* prev and next are different */
524 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
525 hard_disable_TSC();
526 else
527 hard_enable_TSC();
528 }
529
d3a4f48d
SE
530 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
531 /*
532 * Copy the relevant range of the IO bitmap.
533 * Normally this is 128 bytes or less:
534 */
535 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
536 max(prev->io_bitmap_max, next->io_bitmap_max));
537 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
538 /*
539 * Clear any possible leftover bits:
540 */
541 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
542 }
eee3af4a 543
b4ef95de 544#ifdef X86_BTS
eee3af4a
MM
545 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
546 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
547
548 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
549 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
b4ef95de 550#endif
d3a4f48d
SE
551}
552
1da177e4
LT
553/*
554 * switch_to(x,y) should switch tasks from x to y.
555 *
6612538c 556 * This could still be optimized:
1da177e4
LT
557 * - fold all the options into a flag word and test it with a single test.
558 * - could test fs/gs bitsliced
099f318b
AK
559 *
560 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 561 */
f438d914 562struct task_struct *
a88cde13 563__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4
LT
564{
565 struct thread_struct *prev = &prev_p->thread,
566 *next = &next_p->thread;
6612538c 567 int cpu = smp_processor_id();
1da177e4
LT
568 struct tss_struct *tss = &per_cpu(init_tss, cpu);
569
e07e23e1
AV
570 /* we're going to use this soon, after a few expensive things */
571 if (next_p->fpu_counter>5)
61c4628b 572 prefetch(next->xstate);
e07e23e1 573
1da177e4
LT
574 /*
575 * Reload esp0, LDT and the page table pointer:
576 */
7818a1e0 577 load_sp0(tss, next);
1da177e4
LT
578
579 /*
580 * Switch DS and ES.
581 * This won't pick up thread selector changes, but I guess that is ok.
582 */
fd51f666 583 asm volatile("mov %%es,%0" : "=m" (prev->es));
1da177e4
LT
584 if (unlikely(next->es | prev->es))
585 loadsegment(es, next->es);
586
fd51f666 587 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
1da177e4
LT
588 if (unlikely(next->ds | prev->ds))
589 loadsegment(ds, next->ds);
590
591 load_TLS(next, cpu);
592
593 /*
594 * Switch FS and GS.
595 */
596 {
597 unsigned fsindex;
598 asm volatile("movl %%fs,%0" : "=r" (fsindex));
599 /* segment register != 0 always requires a reload.
600 also reload when it has changed.
601 when prev process used 64bit base always reload
602 to avoid an information leak. */
603 if (unlikely(fsindex | next->fsindex | prev->fs)) {
604 loadsegment(fs, next->fsindex);
605 /* check if the user used a selector != 0
606 * if yes clear 64bit base, since overloaded base
607 * is always mapped to the Null selector
608 */
609 if (fsindex)
610 prev->fs = 0;
611 }
612 /* when next process has a 64bit base use it */
613 if (next->fs)
614 wrmsrl(MSR_FS_BASE, next->fs);
615 prev->fsindex = fsindex;
616 }
617 {
618 unsigned gsindex;
619 asm volatile("movl %%gs,%0" : "=r" (gsindex));
620 if (unlikely(gsindex | next->gsindex | prev->gs)) {
621 load_gs_index(next->gsindex);
622 if (gsindex)
623 prev->gs = 0;
624 }
625 if (next->gs)
626 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
627 prev->gsindex = gsindex;
628 }
629
0a5ace2a
AK
630 /* Must be after DS reload */
631 unlazy_fpu(prev_p);
632
1da177e4 633 /*
45948d77 634 * Switch the PDA and FPU contexts.
1da177e4 635 */
faca6227
PA
636 prev->usersp = read_pda(oldrsp);
637 write_pda(oldrsp, next->usersp);
1da177e4 638 write_pda(pcurrent, next_p);
18bd057b 639
a88cde13 640 write_pda(kernelstack,
7b0bda74 641 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
0a425405 642#ifdef CONFIG_CC_STACKPROTECTOR
0a425405
AV
643 /*
644 * Build time only check to make sure the stack_canary is at
645 * offset 40 in the pda; this is a gcc ABI requirement
646 */
647 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
648#endif
1da177e4
LT
649
650 /*
d3a4f48d 651 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 652 */
eee3af4a
MM
653 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
654 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 655 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 656
e07e23e1
AV
657 /* If the task has used fpu the last 5 timeslices, just do a full
658 * restore of the math state immediately to avoid the trap; the
659 * chances of needing FPU soon are obviously high now
660 */
661 if (next_p->fpu_counter>5)
662 math_state_restore();
1da177e4
LT
663 return prev_p;
664}
665
666/*
667 * sys_execve() executes a new program.
668 */
6612538c 669asmlinkage
1da177e4 670long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 671 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
672{
673 long error;
674 char * filename;
675
676 filename = getname(name);
677 error = PTR_ERR(filename);
5d119b2c 678 if (IS_ERR(filename))
1da177e4 679 return error;
5d119b2c 680 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
681 putname(filename);
682 return error;
683}
684
685void set_personality_64bit(void)
686{
687 /* inherit personality from parent */
688
689 /* Make sure to be in 64bit mode */
6612538c 690 clear_thread_flag(TIF_IA32);
1da177e4
LT
691
692 /* TBD: overwrites user setup. Should have two bits.
693 But 64bit processes have always behaved this way,
694 so it's not too bad. The main problem is just that
6612538c 695 32bit childs are affected again. */
1da177e4
LT
696 current->personality &= ~READ_IMPLIES_EXEC;
697}
698
699asmlinkage long sys_fork(struct pt_regs *regs)
700{
65ea5b03 701 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
702}
703
a88cde13
AK
704asmlinkage long
705sys_clone(unsigned long clone_flags, unsigned long newsp,
706 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
707{
708 if (!newsp)
65ea5b03 709 newsp = regs->sp;
1da177e4
LT
710 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
711}
712
713/*
714 * This is trivial, and on the face of it looks like it
715 * could equally well be done in user mode.
716 *
717 * Not so, for quite unobvious reasons - register pressure.
718 * In user mode vfork() cannot have a stack frame, and if
719 * done by calling the "clone()" system call directly, you
720 * do not have enough call-clobbered registers to hold all
721 * the information you need.
722 */
723asmlinkage long sys_vfork(struct pt_regs *regs)
724{
65ea5b03 725 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
726 NULL, NULL);
727}
728
729unsigned long get_wchan(struct task_struct *p)
730{
731 unsigned long stack;
65ea5b03 732 u64 fp,ip;
1da177e4
LT
733 int count = 0;
734
735 if (!p || p == current || p->state==TASK_RUNNING)
736 return 0;
57eafdc2 737 stack = (unsigned long)task_stack_page(p);
faca6227 738 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
1da177e4 739 return 0;
faca6227 740 fp = *(u64 *)(p->thread.sp);
1da177e4 741 do {
a88cde13
AK
742 if (fp < (unsigned long)stack ||
743 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4 744 return 0;
65ea5b03
PA
745 ip = *(u64 *)(fp+8);
746 if (!in_sched_functions(ip))
747 return ip;
1da177e4
LT
748 fp = *(u64 *)fp;
749 } while (count++ < 16);
750 return 0;
751}
752
753long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
754{
755 int ret = 0;
756 int doit = task == current;
757 int cpu;
758
759 switch (code) {
760 case ARCH_SET_GS:
84929801 761 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
762 return -EPERM;
763 cpu = get_cpu();
764 /* handle small bases via the GDT because that's faster to
765 switch. */
766 if (addr <= 0xffffffff) {
767 set_32bit_tls(task, GS_TLS, addr);
768 if (doit) {
769 load_TLS(&task->thread, cpu);
770 load_gs_index(GS_TLS_SEL);
771 }
772 task->thread.gsindex = GS_TLS_SEL;
773 task->thread.gs = 0;
774 } else {
775 task->thread.gsindex = 0;
776 task->thread.gs = addr;
777 if (doit) {
a88cde13
AK
778 load_gs_index(0);
779 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
780 }
781 }
782 put_cpu();
783 break;
784 case ARCH_SET_FS:
785 /* Not strictly needed for fs, but do it for symmetry
786 with gs */
84929801 787 if (addr >= TASK_SIZE_OF(task))
6612538c 788 return -EPERM;
1da177e4 789 cpu = get_cpu();
6612538c 790 /* handle small bases via the GDT because that's faster to
1da177e4 791 switch. */
6612538c 792 if (addr <= 0xffffffff) {
1da177e4 793 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
794 if (doit) {
795 load_TLS(&task->thread, cpu);
a88cde13 796 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
1da177e4
LT
797 }
798 task->thread.fsindex = FS_TLS_SEL;
799 task->thread.fs = 0;
6612538c 800 } else {
1da177e4
LT
801 task->thread.fsindex = 0;
802 task->thread.fs = addr;
803 if (doit) {
804 /* set the selector to 0 to not confuse
805 __switch_to */
a88cde13
AK
806 asm volatile("movl %0,%%fs" :: "r" (0));
807 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
808 }
809 }
810 put_cpu();
811 break;
6612538c
HS
812 case ARCH_GET_FS: {
813 unsigned long base;
1da177e4
LT
814 if (task->thread.fsindex == FS_TLS_SEL)
815 base = read_32bit_tls(task, FS_TLS);
a88cde13 816 else if (doit)
1da177e4 817 rdmsrl(MSR_FS_BASE, base);
a88cde13 818 else
1da177e4 819 base = task->thread.fs;
6612538c
HS
820 ret = put_user(base, (unsigned long __user *)addr);
821 break;
1da177e4 822 }
6612538c 823 case ARCH_GET_GS: {
1da177e4 824 unsigned long base;
97c2803c 825 unsigned gsindex;
1da177e4
LT
826 if (task->thread.gsindex == GS_TLS_SEL)
827 base = read_32bit_tls(task, GS_TLS);
97c2803c 828 else if (doit) {
6612538c 829 asm("movl %%gs,%0" : "=r" (gsindex));
97c2803c
JB
830 if (gsindex)
831 rdmsrl(MSR_KERNEL_GS_BASE, base);
832 else
833 base = task->thread.gs;
834 }
a88cde13 835 else
1da177e4 836 base = task->thread.gs;
6612538c 837 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
838 break;
839 }
840
841 default:
842 ret = -EINVAL;
843 break;
6612538c 844 }
1da177e4 845
6612538c
HS
846 return ret;
847}
1da177e4
LT
848
849long sys_arch_prctl(int code, unsigned long addr)
850{
851 return do_arch_prctl(current, code, addr);
1da177e4
LT
852}
853
854unsigned long arch_align_stack(unsigned long sp)
855{
c16b63e0 856 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
857 sp -= get_random_int() % 8192;
858 return sp & ~0xf;
859}
c1d171a0
JK
860
861unsigned long arch_randomize_brk(struct mm_struct *mm)
862{
863 unsigned long range_end = mm->brk + 0x02000000;
864 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
865}
This page took 0.534484 seconds and 5 git commands to generate.