x86_64: make /proc/interrupts work with dyn irq_desc
[deliverable/linux.git] / arch / x86 / kernel / process_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6612538c 6 *
1da177e4
LT
7 * X86-64 port
8 * Andi Kleen.
76e4f660
AR
9 *
10 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
11 */
12
13/*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17#include <stdarg.h>
18
76e4f660 19#include <linux/cpu.h>
1da177e4
LT
20#include <linux/errno.h>
21#include <linux/sched.h>
6612538c 22#include <linux/fs.h>
1da177e4
LT
23#include <linux/kernel.h>
24#include <linux/mm.h>
25#include <linux/elfcore.h>
26#include <linux/smp.h>
27#include <linux/slab.h>
28#include <linux/user.h>
1da177e4 29#include <linux/interrupt.h>
6612538c 30#include <linux/utsname.h>
1da177e4 31#include <linux/delay.h>
6612538c 32#include <linux/module.h>
1da177e4 33#include <linux/ptrace.h>
1da177e4 34#include <linux/random.h>
95833c83 35#include <linux/notifier.h>
c6fd91f0 36#include <linux/kprobes.h>
1eeb66a1 37#include <linux/kdebug.h>
02290683 38#include <linux/tick.h>
529e25f6 39#include <linux/prctl.h>
7de08b4e
GP
40#include <linux/uaccess.h>
41#include <linux/io.h>
1da177e4 42
1da177e4
LT
43#include <asm/pgtable.h>
44#include <asm/system.h>
1da177e4
LT
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
1da177e4
LT
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
bbc1f698 54#include <asm/syscalls.h>
1da177e4
LT
55
56asmlinkage extern void ret_from_fork(void);
57
58unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
59
e041c683 60static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
61
62void idle_notifier_register(struct notifier_block *n)
63{
e041c683 64 atomic_notifier_chain_register(&idle_notifier, n);
95833c83 65}
95833c83 66
95833c83
AK
67void enter_idle(void)
68{
a15da49d 69 write_pda(isidle, 1);
e041c683 70 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
71}
72
73static void __exit_idle(void)
74{
9446868b 75 if (test_and_clear_bit_pda(0, isidle) == 0)
a15da49d 76 return;
e041c683 77 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
78}
79
80/* Called from interrupts to signify idle end */
81void exit_idle(void)
82{
a15da49d
AK
83 /* idle loop has pid 0 */
84 if (current->pid)
95833c83
AK
85 return;
86 __exit_idle();
87}
88
913da64b
AN
89#ifndef CONFIG_SMP
90static inline void play_dead(void)
91{
92 BUG();
93}
94#endif
95
1da177e4
LT
96/*
97 * The idle thread. There's no useful work to be
98 * done, so just try to conserve power and have a
99 * low exit latency (ie sit in a loop waiting for
100 * somebody to say that they'd like to reschedule)
101 */
b10db7f0 102void cpu_idle(void)
1da177e4 103{
495ab9c0 104 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
105 /* endless idle loop with no priority at all */
106 while (1) {
b8f8c3cf 107 tick_nohz_stop_sched_tick(1);
1da177e4 108 while (!need_resched()) {
1da177e4 109
1da177e4 110 rmb();
6ddd2a27 111
76e4f660
AR
112 if (cpu_is_offline(smp_processor_id()))
113 play_dead();
d331e739
VP
114 /*
115 * Idle routines should keep interrupts disabled
116 * from here on, until they go to idle.
117 * Otherwise, idle callbacks can misfire.
118 */
119 local_irq_disable();
95833c83 120 enter_idle();
81d68a96
SR
121 /* Don't trace irqs off for idle */
122 stop_critical_timings();
6ddd2a27 123 pm_idle();
81d68a96 124 start_critical_timings();
a15da49d
AK
125 /* In many cases the interrupt that ended idle
126 has already called exit_idle. But some idle
127 loops can be woken up without interrupt. */
95833c83 128 __exit_idle();
1da177e4
LT
129 }
130
02290683 131 tick_nohz_restart_sched_tick();
5bfb5d69 132 preempt_enable_no_resched();
1da177e4 133 schedule();
5bfb5d69 134 preempt_disable();
1da177e4
LT
135 }
136}
137
6612538c 138/* Prints also some state that isn't saved in the pt_regs */
e2ce07c8 139void __show_regs(struct pt_regs *regs, int all)
1da177e4
LT
140{
141 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
bb1995d5 142 unsigned long d0, d1, d2, d3, d6, d7;
6612538c
HS
143 unsigned int fsindex, gsindex;
144 unsigned int ds, cs, es;
1da177e4
LT
145
146 printk("\n");
147 print_modules();
8092c654 148 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
9acf23c4 149 current->pid, current->comm, print_tainted(),
96b644bd
SH
150 init_utsname()->release,
151 (int)strcspn(init_utsname()->version, " "),
152 init_utsname()->version);
8092c654 153 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
aafbd7eb 154 printk_address(regs->ip, 1);
8092c654
GP
155 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
156 regs->sp, regs->flags);
157 printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
65ea5b03 158 regs->ax, regs->bx, regs->cx);
8092c654 159 printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
65ea5b03 160 regs->dx, regs->si, regs->di);
8092c654 161 printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
65ea5b03 162 regs->bp, regs->r8, regs->r9);
8092c654 163 printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
7de08b4e 164 regs->r10, regs->r11, regs->r12);
8092c654 165 printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
7de08b4e 166 regs->r13, regs->r14, regs->r15);
1da177e4 167
7de08b4e
GP
168 asm("movl %%ds,%0" : "=r" (ds));
169 asm("movl %%cs,%0" : "=r" (cs));
170 asm("movl %%es,%0" : "=r" (es));
1da177e4
LT
171 asm("movl %%fs,%0" : "=r" (fsindex));
172 asm("movl %%gs,%0" : "=r" (gsindex));
173
174 rdmsrl(MSR_FS_BASE, fs);
7de08b4e
GP
175 rdmsrl(MSR_GS_BASE, gs);
176 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
1da177e4 177
e2ce07c8
PE
178 if (!all)
179 return;
180
f51c9452
GOC
181 cr0 = read_cr0();
182 cr2 = read_cr2();
183 cr3 = read_cr3();
184 cr4 = read_cr4();
1da177e4 185
8092c654 186 printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
7de08b4e 187 fs, fsindex, gs, gsindex, shadowgs);
8092c654
GP
188 printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
189 es, cr0);
190 printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
191 cr4);
bb1995d5
AS
192
193 get_debugreg(d0, 0);
194 get_debugreg(d1, 1);
195 get_debugreg(d2, 2);
8092c654 196 printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
bb1995d5
AS
197 get_debugreg(d3, 3);
198 get_debugreg(d6, 6);
199 get_debugreg(d7, 7);
8092c654 200 printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
1da177e4
LT
201}
202
203void show_regs(struct pt_regs *regs)
204{
8092c654 205 printk(KERN_INFO "CPU %d:", smp_processor_id());
e2ce07c8 206 __show_regs(regs, 1);
bc850d6b 207 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
1da177e4
LT
208}
209
210/*
211 * Free current thread data structures etc..
212 */
213void exit_thread(void)
214{
215 struct task_struct *me = current;
216 struct thread_struct *t = &me->thread;
73649dab 217
6612538c 218 if (me->thread.io_bitmap_ptr) {
1da177e4
LT
219 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
220
221 kfree(t->io_bitmap_ptr);
222 t->io_bitmap_ptr = NULL;
d3a4f48d 223 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
224 /*
225 * Careful, clear this in the TSS too:
226 */
227 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
228 t->io_bitmap_max = 0;
229 put_cpu();
230 }
93fa7636
MM
231#ifdef CONFIG_X86_DS
232 /* Free any DS contexts that have not been properly released. */
233 if (unlikely(t->ds_ctx)) {
234 /* we clear debugctl to make sure DS is not used. */
235 update_debugctlmsr(0);
236 ds_free(t->ds_ctx);
237 }
238#endif /* CONFIG_X86_DS */
1da177e4
LT
239}
240
241void flush_thread(void)
242{
243 struct task_struct *tsk = current;
1da177e4 244
303cd153
MD
245 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
246 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
247 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
248 clear_tsk_thread_flag(tsk, TIF_IA32);
249 } else {
250 set_tsk_thread_flag(tsk, TIF_IA32);
4d9bc79c 251 current_thread_info()->status |= TS_COMPAT;
303cd153 252 }
4d9bc79c 253 }
303cd153 254 clear_tsk_thread_flag(tsk, TIF_DEBUG);
1da177e4
LT
255
256 tsk->thread.debugreg0 = 0;
257 tsk->thread.debugreg1 = 0;
258 tsk->thread.debugreg2 = 0;
259 tsk->thread.debugreg3 = 0;
260 tsk->thread.debugreg6 = 0;
261 tsk->thread.debugreg7 = 0;
6612538c 262 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
1da177e4
LT
263 /*
264 * Forget coprocessor state..
265 */
75118a82 266 tsk->fpu_counter = 0;
1da177e4
LT
267 clear_fpu(tsk);
268 clear_used_math();
269}
270
271void release_thread(struct task_struct *dead_task)
272{
273 if (dead_task->mm) {
274 if (dead_task->mm->context.size) {
275 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
276 dead_task->comm,
277 dead_task->mm->context.ldt,
278 dead_task->mm->context.size);
279 BUG();
280 }
281 }
282}
283
284static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
285{
6612538c 286 struct user_desc ud = {
1da177e4
LT
287 .base_addr = addr,
288 .limit = 0xfffff,
289 .seg_32bit = 1,
290 .limit_in_pages = 1,
291 .useable = 1,
292 };
ade1af77 293 struct desc_struct *desc = t->thread.tls_array;
1da177e4 294 desc += tls;
80fbb69a 295 fill_ldt(desc, &ud);
1da177e4
LT
296}
297
298static inline u32 read_32bit_tls(struct task_struct *t, int tls)
299{
91394eb0 300 return get_desc_base(&t->thread.tls_array[tls]);
1da177e4
LT
301}
302
303/*
304 * This gets called before we allocate a new thread and copy
305 * the current task into it.
306 */
307void prepare_to_copy(struct task_struct *tsk)
308{
309 unlazy_fpu(tsk);
310}
311
65ea5b03 312int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
1da177e4 313 unsigned long unused,
7de08b4e 314 struct task_struct *p, struct pt_regs *regs)
1da177e4
LT
315{
316 int err;
7de08b4e 317 struct pt_regs *childregs;
1da177e4
LT
318 struct task_struct *me = current;
319
a88cde13 320 childregs = ((struct pt_regs *)
57eafdc2 321 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
322 *childregs = *regs;
323
65ea5b03
PA
324 childregs->ax = 0;
325 childregs->sp = sp;
326 if (sp == ~0UL)
327 childregs->sp = (unsigned long)childregs;
1da177e4 328
faca6227
PA
329 p->thread.sp = (unsigned long) childregs;
330 p->thread.sp0 = (unsigned long) (childregs+1);
331 p->thread.usersp = me->thread.usersp;
1da177e4 332
e4f17c43 333 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
334
335 p->thread.fs = me->thread.fs;
336 p->thread.gs = me->thread.gs;
337
ada85708
JF
338 savesegment(gs, p->thread.gsindex);
339 savesegment(fs, p->thread.fsindex);
340 savesegment(es, p->thread.es);
341 savesegment(ds, p->thread.ds);
1da177e4 342
d3a4f48d 343 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
344 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
345 if (!p->thread.io_bitmap_ptr) {
346 p->thread.io_bitmap_max = 0;
347 return -ENOMEM;
348 }
a88cde13
AK
349 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
350 IO_BITMAP_BYTES);
d3a4f48d 351 set_tsk_thread_flag(p, TIF_IO_BITMAP);
6612538c 352 }
1da177e4
LT
353
354 /*
355 * Set a new TLS for the child thread?
356 */
357 if (clone_flags & CLONE_SETTLS) {
358#ifdef CONFIG_IA32_EMULATION
359 if (test_thread_flag(TIF_IA32))
efd1ca52 360 err = do_set_thread_area(p, -1,
65ea5b03 361 (struct user_desc __user *)childregs->si, 0);
7de08b4e
GP
362 else
363#endif
364 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
365 if (err)
1da177e4
LT
366 goto out;
367 }
368 err = 0;
369out:
370 if (err && p->thread.io_bitmap_ptr) {
371 kfree(p->thread.io_bitmap_ptr);
372 p->thread.io_bitmap_max = 0;
373 }
374 return err;
375}
376
513ad84b
IM
377void
378start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
379{
ada85708
JF
380 loadsegment(fs, 0);
381 loadsegment(es, 0);
382 loadsegment(ds, 0);
513ad84b
IM
383 load_gs_index(0);
384 regs->ip = new_ip;
385 regs->sp = new_sp;
386 write_pda(oldrsp, new_sp);
387 regs->cs = __USER_CS;
388 regs->ss = __USER_DS;
389 regs->flags = 0x200;
390 set_fs(USER_DS);
aa283f49
SS
391 /*
392 * Free the old FP and other extended state
393 */
394 free_thread_xstate(current);
513ad84b
IM
395}
396EXPORT_SYMBOL_GPL(start_thread);
397
529e25f6
EB
398static void hard_disable_TSC(void)
399{
400 write_cr4(read_cr4() | X86_CR4_TSD);
401}
402
403void disable_TSC(void)
404{
405 preempt_disable();
406 if (!test_and_set_thread_flag(TIF_NOTSC))
407 /*
408 * Must flip the CPU state synchronously with
409 * TIF_NOTSC in the current running context.
410 */
411 hard_disable_TSC();
412 preempt_enable();
413}
414
415static void hard_enable_TSC(void)
416{
417 write_cr4(read_cr4() & ~X86_CR4_TSD);
418}
419
a4928cff 420static void enable_TSC(void)
529e25f6
EB
421{
422 preempt_disable();
423 if (test_and_clear_thread_flag(TIF_NOTSC))
424 /*
425 * Must flip the CPU state synchronously with
426 * TIF_NOTSC in the current running context.
427 */
428 hard_enable_TSC();
429 preempt_enable();
430}
431
432int get_tsc_mode(unsigned long adr)
433{
434 unsigned int val;
435
436 if (test_thread_flag(TIF_NOTSC))
437 val = PR_TSC_SIGSEGV;
438 else
439 val = PR_TSC_ENABLE;
440
441 return put_user(val, (unsigned int __user *)adr);
442}
443
444int set_tsc_mode(unsigned int val)
445{
446 if (val == PR_TSC_SIGSEGV)
447 disable_TSC();
448 else if (val == PR_TSC_ENABLE)
449 enable_TSC();
450 else
451 return -EINVAL;
452
453 return 0;
454}
455
1da177e4
LT
456/*
457 * This special macro can be used to load a debugging register
458 */
6612538c
HS
459#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
460
d3a4f48d 461static inline void __switch_to_xtra(struct task_struct *prev_p,
6612538c
HS
462 struct task_struct *next_p,
463 struct tss_struct *tss)
d3a4f48d
SE
464{
465 struct thread_struct *prev, *next;
eee3af4a 466 unsigned long debugctl;
d3a4f48d
SE
467
468 prev = &prev_p->thread,
469 next = &next_p->thread;
470
eee3af4a 471 debugctl = prev->debugctlmsr;
93fa7636
MM
472
473#ifdef CONFIG_X86_DS
34b2cd5b
IM
474 {
475 unsigned long ds_prev = 0, ds_next = 0;
476
477 if (prev->ds_ctx)
478 ds_prev = (unsigned long)prev->ds_ctx->ds;
479 if (next->ds_ctx)
480 ds_next = (unsigned long)next->ds_ctx->ds;
481
482 if (ds_next != ds_prev) {
483 /*
484 * We clear debugctl to make sure DS
485 * is not in use when we change it:
486 */
487 debugctl = 0;
488 update_debugctlmsr(0);
489 wrmsrl(MSR_IA32_DS_AREA, ds_next);
490 }
eee3af4a 491 }
93fa7636 492#endif /* CONFIG_X86_DS */
eee3af4a
MM
493
494 if (next->debugctlmsr != debugctl)
5b0e5084 495 update_debugctlmsr(next->debugctlmsr);
7e991604 496
d3a4f48d
SE
497 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
498 loaddebug(next, 0);
499 loaddebug(next, 1);
500 loaddebug(next, 2);
501 loaddebug(next, 3);
502 /* no 4 and 5 */
503 loaddebug(next, 6);
504 loaddebug(next, 7);
505 }
506
529e25f6
EB
507 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
508 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
509 /* prev and next are different */
510 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
511 hard_disable_TSC();
512 else
513 hard_enable_TSC();
514 }
515
d3a4f48d
SE
516 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
517 /*
518 * Copy the relevant range of the IO bitmap.
519 * Normally this is 128 bytes or less:
520 */
521 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
522 max(prev->io_bitmap_max, next->io_bitmap_max));
523 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
524 /*
525 * Clear any possible leftover bits:
526 */
527 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
528 }
eee3af4a 529
93fa7636 530#ifdef CONFIG_X86_PTRACE_BTS
eee3af4a
MM
531 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
532 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
533
534 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
535 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
93fa7636 536#endif /* CONFIG_X86_PTRACE_BTS */
d3a4f48d
SE
537}
538
1da177e4
LT
539/*
540 * switch_to(x,y) should switch tasks from x to y.
541 *
6612538c 542 * This could still be optimized:
1da177e4
LT
543 * - fold all the options into a flag word and test it with a single test.
544 * - could test fs/gs bitsliced
099f318b
AK
545 *
546 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 547 */
f438d914 548struct task_struct *
a88cde13 549__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4 550{
87b935a0
JF
551 struct thread_struct *prev = &prev_p->thread;
552 struct thread_struct *next = &next_p->thread;
6612538c 553 int cpu = smp_processor_id();
1da177e4 554 struct tss_struct *tss = &per_cpu(init_tss, cpu);
478de5a9 555 unsigned fsindex, gsindex;
1da177e4 556
e07e23e1 557 /* we're going to use this soon, after a few expensive things */
7de08b4e 558 if (next_p->fpu_counter > 5)
61c4628b 559 prefetch(next->xstate);
e07e23e1 560
1da177e4
LT
561 /*
562 * Reload esp0, LDT and the page table pointer:
563 */
7818a1e0 564 load_sp0(tss, next);
1da177e4 565
7de08b4e 566 /*
1da177e4
LT
567 * Switch DS and ES.
568 * This won't pick up thread selector changes, but I guess that is ok.
569 */
ada85708 570 savesegment(es, prev->es);
1da177e4 571 if (unlikely(next->es | prev->es))
7de08b4e 572 loadsegment(es, next->es);
ada85708
JF
573
574 savesegment(ds, prev->ds);
1da177e4
LT
575 if (unlikely(next->ds | prev->ds))
576 loadsegment(ds, next->ds);
577
478de5a9
JF
578
579 /* We must save %fs and %gs before load_TLS() because
580 * %fs and %gs may be cleared by load_TLS().
581 *
582 * (e.g. xen_load_tls())
583 */
584 savesegment(fs, fsindex);
585 savesegment(gs, gsindex);
586
1da177e4
LT
587 load_TLS(next, cpu);
588
3fe0a63e
JF
589 /*
590 * Leave lazy mode, flushing any hypercalls made here.
591 * This must be done before restoring TLS segments so
592 * the GDT and LDT are properly updated, and must be
593 * done before math_state_restore, so the TS bit is up
594 * to date.
595 */
596 arch_leave_lazy_cpu_mode();
597
7de08b4e 598 /*
1da177e4 599 * Switch FS and GS.
87b935a0
JF
600 *
601 * Segment register != 0 always requires a reload. Also
602 * reload when it has changed. When prev process used 64bit
603 * base always reload to avoid an information leak.
1da177e4 604 */
87b935a0
JF
605 if (unlikely(fsindex | next->fsindex | prev->fs)) {
606 loadsegment(fs, next->fsindex);
7de08b4e 607 /*
87b935a0
JF
608 * Check if the user used a selector != 0; if yes
609 * clear 64bit base, since overloaded base is always
610 * mapped to the Null selector
611 */
612 if (fsindex)
7de08b4e 613 prev->fs = 0;
87b935a0
JF
614 }
615 /* when next process has a 64bit base use it */
616 if (next->fs)
617 wrmsrl(MSR_FS_BASE, next->fs);
618 prev->fsindex = fsindex;
619
620 if (unlikely(gsindex | next->gsindex | prev->gs)) {
621 load_gs_index(next->gsindex);
622 if (gsindex)
7de08b4e 623 prev->gs = 0;
1da177e4 624 }
87b935a0
JF
625 if (next->gs)
626 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
627 prev->gsindex = gsindex;
1da177e4 628
0a5ace2a
AK
629 /* Must be after DS reload */
630 unlazy_fpu(prev_p);
631
7de08b4e 632 /*
45948d77 633 * Switch the PDA and FPU contexts.
1da177e4 634 */
faca6227
PA
635 prev->usersp = read_pda(oldrsp);
636 write_pda(oldrsp, next->usersp);
7de08b4e 637 write_pda(pcurrent, next_p);
18bd057b 638
a88cde13 639 write_pda(kernelstack,
87b935a0
JF
640 (unsigned long)task_stack_page(next_p) +
641 THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
642#ifdef CONFIG_CC_STACKPROTECTOR
643 write_pda(stack_canary, next_p->stack_canary);
644 /*
645 * Build time only check to make sure the stack_canary is at
646 * offset 40 in the pda; this is a gcc ABI requirement
647 */
648 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
649#endif
1da177e4
LT
650
651 /*
d3a4f48d 652 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 653 */
eee3af4a
MM
654 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
655 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
d3a4f48d 656 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 657
e07e23e1
AV
658 /* If the task has used fpu the last 5 timeslices, just do a full
659 * restore of the math state immediately to avoid the trap; the
660 * chances of needing FPU soon are obviously high now
870568b3
SS
661 *
662 * tsk_used_math() checks prevent calling math_state_restore(),
663 * which can sleep in the case of !tsk_used_math()
e07e23e1 664 */
870568b3 665 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
e07e23e1 666 math_state_restore();
1da177e4
LT
667 return prev_p;
668}
669
670/*
671 * sys_execve() executes a new program.
672 */
6612538c 673asmlinkage
1da177e4 674long sys_execve(char __user *name, char __user * __user *argv,
5d119b2c 675 char __user * __user *envp, struct pt_regs *regs)
1da177e4
LT
676{
677 long error;
7de08b4e 678 char *filename;
1da177e4
LT
679
680 filename = getname(name);
681 error = PTR_ERR(filename);
5d119b2c 682 if (IS_ERR(filename))
1da177e4 683 return error;
5d119b2c 684 error = do_execve(filename, argv, envp, regs);
1da177e4
LT
685 putname(filename);
686 return error;
687}
688
689void set_personality_64bit(void)
690{
691 /* inherit personality from parent */
692
693 /* Make sure to be in 64bit mode */
6612538c 694 clear_thread_flag(TIF_IA32);
1da177e4
LT
695
696 /* TBD: overwrites user setup. Should have two bits.
697 But 64bit processes have always behaved this way,
698 so it's not too bad. The main problem is just that
6612538c 699 32bit childs are affected again. */
1da177e4
LT
700 current->personality &= ~READ_IMPLIES_EXEC;
701}
702
703asmlinkage long sys_fork(struct pt_regs *regs)
704{
65ea5b03 705 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
1da177e4
LT
706}
707
a88cde13
AK
708asmlinkage long
709sys_clone(unsigned long clone_flags, unsigned long newsp,
710 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
711{
712 if (!newsp)
65ea5b03 713 newsp = regs->sp;
1da177e4
LT
714 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
715}
716
717/*
718 * This is trivial, and on the face of it looks like it
719 * could equally well be done in user mode.
720 *
721 * Not so, for quite unobvious reasons - register pressure.
722 * In user mode vfork() cannot have a stack frame, and if
723 * done by calling the "clone()" system call directly, you
724 * do not have enough call-clobbered registers to hold all
725 * the information you need.
726 */
727asmlinkage long sys_vfork(struct pt_regs *regs)
728{
65ea5b03 729 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
1da177e4
LT
730 NULL, NULL);
731}
732
733unsigned long get_wchan(struct task_struct *p)
734{
735 unsigned long stack;
7de08b4e 736 u64 fp, ip;
1da177e4
LT
737 int count = 0;
738
7de08b4e
GP
739 if (!p || p == current || p->state == TASK_RUNNING)
740 return 0;
57eafdc2 741 stack = (unsigned long)task_stack_page(p);
e1e23bb0 742 if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
1da177e4 743 return 0;
faca6227 744 fp = *(u64 *)(p->thread.sp);
7de08b4e 745 do {
a88cde13 746 if (fp < (unsigned long)stack ||
e1e23bb0 747 fp >= (unsigned long)stack+THREAD_SIZE)
7de08b4e 748 return 0;
65ea5b03
PA
749 ip = *(u64 *)(fp+8);
750 if (!in_sched_functions(ip))
751 return ip;
7de08b4e
GP
752 fp = *(u64 *)fp;
753 } while (count++ < 16);
1da177e4
LT
754 return 0;
755}
756
757long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
7de08b4e
GP
758{
759 int ret = 0;
1da177e4
LT
760 int doit = task == current;
761 int cpu;
762
7de08b4e 763 switch (code) {
1da177e4 764 case ARCH_SET_GS:
84929801 765 if (addr >= TASK_SIZE_OF(task))
7de08b4e 766 return -EPERM;
1da177e4 767 cpu = get_cpu();
7de08b4e 768 /* handle small bases via the GDT because that's faster to
1da177e4 769 switch. */
7de08b4e
GP
770 if (addr <= 0xffffffff) {
771 set_32bit_tls(task, GS_TLS, addr);
772 if (doit) {
1da177e4 773 load_TLS(&task->thread, cpu);
7de08b4e 774 load_gs_index(GS_TLS_SEL);
1da177e4 775 }
7de08b4e 776 task->thread.gsindex = GS_TLS_SEL;
1da177e4 777 task->thread.gs = 0;
7de08b4e 778 } else {
1da177e4
LT
779 task->thread.gsindex = 0;
780 task->thread.gs = addr;
781 if (doit) {
a88cde13
AK
782 load_gs_index(0);
783 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
7de08b4e 784 }
1da177e4
LT
785 }
786 put_cpu();
787 break;
788 case ARCH_SET_FS:
789 /* Not strictly needed for fs, but do it for symmetry
790 with gs */
84929801 791 if (addr >= TASK_SIZE_OF(task))
6612538c 792 return -EPERM;
1da177e4 793 cpu = get_cpu();
6612538c 794 /* handle small bases via the GDT because that's faster to
1da177e4 795 switch. */
6612538c 796 if (addr <= 0xffffffff) {
1da177e4 797 set_32bit_tls(task, FS_TLS, addr);
6612538c
HS
798 if (doit) {
799 load_TLS(&task->thread, cpu);
ada85708 800 loadsegment(fs, FS_TLS_SEL);
1da177e4
LT
801 }
802 task->thread.fsindex = FS_TLS_SEL;
803 task->thread.fs = 0;
6612538c 804 } else {
1da177e4
LT
805 task->thread.fsindex = 0;
806 task->thread.fs = addr;
807 if (doit) {
808 /* set the selector to 0 to not confuse
809 __switch_to */
ada85708 810 loadsegment(fs, 0);
a88cde13 811 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
812 }
813 }
814 put_cpu();
815 break;
6612538c
HS
816 case ARCH_GET_FS: {
817 unsigned long base;
1da177e4
LT
818 if (task->thread.fsindex == FS_TLS_SEL)
819 base = read_32bit_tls(task, FS_TLS);
a88cde13 820 else if (doit)
1da177e4 821 rdmsrl(MSR_FS_BASE, base);
a88cde13 822 else
1da177e4 823 base = task->thread.fs;
6612538c
HS
824 ret = put_user(base, (unsigned long __user *)addr);
825 break;
1da177e4 826 }
6612538c 827 case ARCH_GET_GS: {
1da177e4 828 unsigned long base;
97c2803c 829 unsigned gsindex;
1da177e4
LT
830 if (task->thread.gsindex == GS_TLS_SEL)
831 base = read_32bit_tls(task, GS_TLS);
97c2803c 832 else if (doit) {
ada85708 833 savesegment(gs, gsindex);
97c2803c
JB
834 if (gsindex)
835 rdmsrl(MSR_KERNEL_GS_BASE, base);
836 else
837 base = task->thread.gs;
7de08b4e 838 } else
1da177e4 839 base = task->thread.gs;
6612538c 840 ret = put_user(base, (unsigned long __user *)addr);
1da177e4
LT
841 break;
842 }
843
844 default:
845 ret = -EINVAL;
846 break;
6612538c 847 }
1da177e4 848
6612538c
HS
849 return ret;
850}
1da177e4
LT
851
852long sys_arch_prctl(int code, unsigned long addr)
853{
854 return do_arch_prctl(current, code, addr);
1da177e4
LT
855}
856
857unsigned long arch_align_stack(unsigned long sp)
858{
c16b63e0 859 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
860 sp -= get_random_int() % 8192;
861 return sp & ~0xf;
862}
c1d171a0
JK
863
864unsigned long arch_randomize_brk(struct mm_struct *mm)
865{
866 unsigned long range_end = mm->brk + 0x02000000;
867 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
868}
This page took 0.869558 seconds and 5 git commands to generate.