[PATCH] namespaces: utsname: use init_utsname when appropriate
[deliverable/linux.git] / arch / x86_64 / kernel / process.c
CommitLineData
1da177e4
LT
1/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
76e4f660
AR
11 *
12 * CPU hotplug support - ashok.raj@intel.com
1da177e4
LT
13 */
14
15/*
16 * This file handles the architecture-dependent parts of process handling..
17 */
18
19#include <stdarg.h>
20
76e4f660 21#include <linux/cpu.h>
1da177e4
LT
22#include <linux/errno.h>
23#include <linux/sched.h>
24#include <linux/kernel.h>
25#include <linux/mm.h>
26#include <linux/elfcore.h>
27#include <linux/smp.h>
28#include <linux/slab.h>
29#include <linux/user.h>
30#include <linux/module.h>
31#include <linux/a.out.h>
32#include <linux/interrupt.h>
33#include <linux/delay.h>
1da177e4
LT
34#include <linux/ptrace.h>
35#include <linux/utsname.h>
36#include <linux/random.h>
95833c83 37#include <linux/notifier.h>
c6fd91f0 38#include <linux/kprobes.h>
1da177e4
LT
39
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/i387.h>
46#include <asm/mmu_context.h>
47#include <asm/pda.h>
48#include <asm/prctl.h>
49#include <asm/kdebug.h>
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
95833c83 53#include <asm/idle.h>
1da177e4
LT
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
1da177e4
LT
59unsigned long boot_option_idle_override = 0;
60EXPORT_SYMBOL(boot_option_idle_override);
61
62/*
63 * Powermanagement idle function, if any..
64 */
65void (*pm_idle)(void);
2ee60e17 66EXPORT_SYMBOL(pm_idle);
1da177e4
LT
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
e041c683 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
95833c83
AK
70
71void idle_notifier_register(struct notifier_block *n)
72{
e041c683 73 atomic_notifier_chain_register(&idle_notifier, n);
95833c83
AK
74}
75EXPORT_SYMBOL_GPL(idle_notifier_register);
76
77void idle_notifier_unregister(struct notifier_block *n)
78{
e041c683 79 atomic_notifier_chain_unregister(&idle_notifier, n);
95833c83
AK
80}
81EXPORT_SYMBOL(idle_notifier_unregister);
82
95833c83
AK
83void enter_idle(void)
84{
a15da49d 85 write_pda(isidle, 1);
e041c683 86 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
95833c83
AK
87}
88
89static void __exit_idle(void)
90{
a15da49d
AK
91 if (read_pda(isidle) == 0)
92 return;
93 write_pda(isidle, 0);
e041c683 94 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
95833c83
AK
95}
96
97/* Called from interrupts to signify idle end */
98void exit_idle(void)
99{
a15da49d
AK
100 /* idle loop has pid 0 */
101 if (current->pid)
95833c83
AK
102 return;
103 __exit_idle();
104}
105
1da177e4
LT
106/*
107 * We use this if we don't have any better
108 * idle routine..
109 */
cdb04527 110static void default_idle(void)
1da177e4 111{
64c7c8f8
NP
112 local_irq_enable();
113
495ab9c0 114 current_thread_info()->status &= ~TS_POLLING;
2d52ede9
AK
115 smp_mb__after_clear_bit();
116 while (!need_resched()) {
117 local_irq_disable();
118 if (!need_resched())
119 safe_halt();
120 else
121 local_irq_enable();
1da177e4 122 }
495ab9c0 123 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
124}
125
126/*
127 * On SMP it's slightly faster (but much more power-consuming!)
128 * to poll the ->need_resched flag instead of waiting for the
129 * cross-CPU IPI to arrive. Use this option with caution.
130 */
131static void poll_idle (void)
132{
1da177e4
LT
133 local_irq_enable();
134
64c7c8f8
NP
135 asm volatile(
136 "2:"
137 "testl %0,%1;"
138 "rep; nop;"
139 "je 2b;"
140 : :
141 "i" (_TIF_NEED_RESCHED),
142 "m" (current_thread_info()->flags));
1da177e4
LT
143}
144
145void cpu_idle_wait(void)
146{
147 unsigned int cpu, this_cpu = get_cpu();
148 cpumask_t map;
149
150 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
151 put_cpu();
152
153 cpus_clear(map);
154 for_each_online_cpu(cpu) {
155 per_cpu(cpu_idle_state, cpu) = 1;
156 cpu_set(cpu, map);
157 }
158
159 __get_cpu_var(cpu_idle_state) = 0;
160
161 wmb();
162 do {
163 ssleep(1);
164 for_each_online_cpu(cpu) {
a88cde13
AK
165 if (cpu_isset(cpu, map) &&
166 !per_cpu(cpu_idle_state, cpu))
1da177e4
LT
167 cpu_clear(cpu, map);
168 }
169 cpus_and(map, map, cpu_online_map);
170 } while (!cpus_empty(map));
171}
172EXPORT_SYMBOL_GPL(cpu_idle_wait);
173
76e4f660
AR
174#ifdef CONFIG_HOTPLUG_CPU
175DECLARE_PER_CPU(int, cpu_state);
176
177#include <asm/nmi.h>
1fa744e6 178/* We halt the CPU with physical CPU hotplug */
76e4f660
AR
179static inline void play_dead(void)
180{
181 idle_task_exit();
182 wbinvd();
183 mb();
184 /* Ack it */
185 __get_cpu_var(cpu_state) = CPU_DEAD;
186
1fa744e6 187 local_irq_disable();
76e4f660 188 while (1)
1fa744e6 189 halt();
76e4f660
AR
190}
191#else
192static inline void play_dead(void)
193{
194 BUG();
195}
196#endif /* CONFIG_HOTPLUG_CPU */
197
1da177e4
LT
198/*
199 * The idle thread. There's no useful work to be
200 * done, so just try to conserve power and have a
201 * low exit latency (ie sit in a loop waiting for
202 * somebody to say that they'd like to reschedule)
203 */
204void cpu_idle (void)
205{
495ab9c0 206 current_thread_info()->status |= TS_POLLING;
1da177e4
LT
207 /* endless idle loop with no priority at all */
208 while (1) {
209 while (!need_resched()) {
210 void (*idle)(void);
211
212 if (__get_cpu_var(cpu_idle_state))
213 __get_cpu_var(cpu_idle_state) = 0;
214
215 rmb();
216 idle = pm_idle;
217 if (!idle)
218 idle = default_idle;
76e4f660
AR
219 if (cpu_is_offline(smp_processor_id()))
220 play_dead();
95833c83 221 enter_idle();
1da177e4 222 idle();
a15da49d
AK
223 /* In many cases the interrupt that ended idle
224 has already called exit_idle. But some idle
225 loops can be woken up without interrupt. */
95833c83 226 __exit_idle();
1da177e4
LT
227 }
228
5bfb5d69 229 preempt_enable_no_resched();
1da177e4 230 schedule();
5bfb5d69 231 preempt_disable();
1da177e4
LT
232 }
233}
234
235/*
236 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
237 * which can obviate IPI to trigger checking of need_resched.
238 * We execute MONITOR against need_resched and enter optimized wait state
239 * through MWAIT. Whenever someone changes need_resched, we would be woken
240 * up from MWAIT (without an IPI).
241 */
242static void mwait_idle(void)
243{
244 local_irq_enable();
245
64c7c8f8
NP
246 while (!need_resched()) {
247 __monitor((void *)&current_thread_info()->flags, 0, 0);
248 smp_mb();
249 if (need_resched())
250 break;
251 __mwait(0, 0);
1da177e4
LT
252 }
253}
254
e6982c67 255void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
1da177e4
LT
256{
257 static int printed;
258 if (cpu_has(c, X86_FEATURE_MWAIT)) {
259 /*
260 * Skip, if setup has overridden idle.
261 * One CPU supports mwait => All CPUs supports mwait
262 */
263 if (!pm_idle) {
264 if (!printed) {
265 printk("using mwait in idle threads.\n");
266 printed = 1;
267 }
268 pm_idle = mwait_idle;
269 }
270 }
271}
272
273static int __init idle_setup (char *str)
274{
275 if (!strncmp(str, "poll", 4)) {
276 printk("using polling idle threads.\n");
277 pm_idle = poll_idle;
278 }
279
280 boot_option_idle_override = 1;
281 return 1;
282}
283
284__setup("idle=", idle_setup);
285
286/* Prints also some state that isn't saved in the pt_regs */
287void __show_regs(struct pt_regs * regs)
288{
289 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
290 unsigned int fsindex,gsindex;
291 unsigned int ds,cs,es;
292
293 printk("\n");
294 print_modules();
9acf23c4
AK
295 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
296 current->pid, current->comm, print_tainted(),
96b644bd
SH
297 init_utsname()->release,
298 (int)strcspn(init_utsname()->version, " "),
299 init_utsname()->version);
1da177e4
LT
300 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
301 printk_address(regs->rip);
3ac94932 302 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
a88cde13 303 regs->eflags);
1da177e4
LT
304 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
305 regs->rax, regs->rbx, regs->rcx);
306 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
307 regs->rdx, regs->rsi, regs->rdi);
308 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
309 regs->rbp, regs->r8, regs->r9);
310 printk("R10: %016lx R11: %016lx R12: %016lx\n",
311 regs->r10, regs->r11, regs->r12);
312 printk("R13: %016lx R14: %016lx R15: %016lx\n",
313 regs->r13, regs->r14, regs->r15);
314
315 asm("movl %%ds,%0" : "=r" (ds));
316 asm("movl %%cs,%0" : "=r" (cs));
317 asm("movl %%es,%0" : "=r" (es));
318 asm("movl %%fs,%0" : "=r" (fsindex));
319 asm("movl %%gs,%0" : "=r" (gsindex));
320
321 rdmsrl(MSR_FS_BASE, fs);
322 rdmsrl(MSR_GS_BASE, gs);
323 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
324
325 asm("movq %%cr0, %0": "=r" (cr0));
326 asm("movq %%cr2, %0": "=r" (cr2));
327 asm("movq %%cr3, %0": "=r" (cr3));
328 asm("movq %%cr4, %0": "=r" (cr4));
329
330 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
331 fs,fsindex,gs,gsindex,shadowgs);
332 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
333 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
334}
335
336void show_regs(struct pt_regs *regs)
337{
c078d326 338 printk("CPU %d:", smp_processor_id());
1da177e4 339 __show_regs(regs);
b538ed27 340 show_trace(NULL, regs, (void *)(regs + 1));
1da177e4
LT
341}
342
343/*
344 * Free current thread data structures etc..
345 */
346void exit_thread(void)
347{
348 struct task_struct *me = current;
349 struct thread_struct *t = &me->thread;
73649dab 350
1da177e4
LT
351 if (me->thread.io_bitmap_ptr) {
352 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
353
354 kfree(t->io_bitmap_ptr);
355 t->io_bitmap_ptr = NULL;
d3a4f48d 356 clear_thread_flag(TIF_IO_BITMAP);
1da177e4
LT
357 /*
358 * Careful, clear this in the TSS too:
359 */
360 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
361 t->io_bitmap_max = 0;
362 put_cpu();
363 }
364}
365
366void flush_thread(void)
367{
368 struct task_struct *tsk = current;
369 struct thread_info *t = current_thread_info();
370
4d9bc79c 371 if (t->flags & _TIF_ABI_PENDING) {
1da177e4 372 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
4d9bc79c
AK
373 if (t->flags & _TIF_IA32)
374 current_thread_info()->status |= TS_COMPAT;
375 }
d3a4f48d 376 t->flags &= ~_TIF_DEBUG;
1da177e4
LT
377
378 tsk->thread.debugreg0 = 0;
379 tsk->thread.debugreg1 = 0;
380 tsk->thread.debugreg2 = 0;
381 tsk->thread.debugreg3 = 0;
382 tsk->thread.debugreg6 = 0;
383 tsk->thread.debugreg7 = 0;
384 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
385 /*
386 * Forget coprocessor state..
387 */
388 clear_fpu(tsk);
389 clear_used_math();
390}
391
392void release_thread(struct task_struct *dead_task)
393{
394 if (dead_task->mm) {
395 if (dead_task->mm->context.size) {
396 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
397 dead_task->comm,
398 dead_task->mm->context.ldt,
399 dead_task->mm->context.size);
400 BUG();
401 }
402 }
403}
404
405static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
406{
407 struct user_desc ud = {
408 .base_addr = addr,
409 .limit = 0xfffff,
410 .seg_32bit = 1,
411 .limit_in_pages = 1,
412 .useable = 1,
413 };
414 struct n_desc_struct *desc = (void *)t->thread.tls_array;
415 desc += tls;
416 desc->a = LDT_entry_a(&ud);
417 desc->b = LDT_entry_b(&ud);
418}
419
420static inline u32 read_32bit_tls(struct task_struct *t, int tls)
421{
422 struct desc_struct *desc = (void *)t->thread.tls_array;
423 desc += tls;
424 return desc->base0 |
425 (((u32)desc->base1) << 16) |
426 (((u32)desc->base2) << 24);
427}
428
429/*
430 * This gets called before we allocate a new thread and copy
431 * the current task into it.
432 */
433void prepare_to_copy(struct task_struct *tsk)
434{
435 unlazy_fpu(tsk);
436}
437
438int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
439 unsigned long unused,
440 struct task_struct * p, struct pt_regs * regs)
441{
442 int err;
443 struct pt_regs * childregs;
444 struct task_struct *me = current;
445
a88cde13 446 childregs = ((struct pt_regs *)
57eafdc2 447 (THREAD_SIZE + task_stack_page(p))) - 1;
1da177e4
LT
448 *childregs = *regs;
449
450 childregs->rax = 0;
451 childregs->rsp = rsp;
a88cde13 452 if (rsp == ~0UL)
1da177e4 453 childregs->rsp = (unsigned long)childregs;
1da177e4
LT
454
455 p->thread.rsp = (unsigned long) childregs;
456 p->thread.rsp0 = (unsigned long) (childregs+1);
457 p->thread.userrsp = me->thread.userrsp;
458
e4f17c43 459 set_tsk_thread_flag(p, TIF_FORK);
1da177e4
LT
460
461 p->thread.fs = me->thread.fs;
462 p->thread.gs = me->thread.gs;
463
fd51f666
L
464 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
465 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
466 asm("mov %%es,%0" : "=m" (p->thread.es));
467 asm("mov %%ds,%0" : "=m" (p->thread.ds));
1da177e4 468
d3a4f48d 469 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
1da177e4
LT
470 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
471 if (!p->thread.io_bitmap_ptr) {
472 p->thread.io_bitmap_max = 0;
473 return -ENOMEM;
474 }
a88cde13
AK
475 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
476 IO_BITMAP_BYTES);
d3a4f48d 477 set_tsk_thread_flag(p, TIF_IO_BITMAP);
1da177e4
LT
478 }
479
480 /*
481 * Set a new TLS for the child thread?
482 */
483 if (clone_flags & CLONE_SETTLS) {
484#ifdef CONFIG_IA32_EMULATION
485 if (test_thread_flag(TIF_IA32))
486 err = ia32_child_tls(p, childregs);
487 else
488#endif
489 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
490 if (err)
491 goto out;
492 }
493 err = 0;
494out:
495 if (err && p->thread.io_bitmap_ptr) {
496 kfree(p->thread.io_bitmap_ptr);
497 p->thread.io_bitmap_max = 0;
498 }
499 return err;
500}
501
502/*
503 * This special macro can be used to load a debugging register
504 */
2b514e74 505#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
1da177e4 506
d3a4f48d
SE
507static inline void __switch_to_xtra(struct task_struct *prev_p,
508 struct task_struct *next_p,
509 struct tss_struct *tss)
510{
511 struct thread_struct *prev, *next;
512
513 prev = &prev_p->thread,
514 next = &next_p->thread;
515
516 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
517 loaddebug(next, 0);
518 loaddebug(next, 1);
519 loaddebug(next, 2);
520 loaddebug(next, 3);
521 /* no 4 and 5 */
522 loaddebug(next, 6);
523 loaddebug(next, 7);
524 }
525
526 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
527 /*
528 * Copy the relevant range of the IO bitmap.
529 * Normally this is 128 bytes or less:
530 */
531 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
532 max(prev->io_bitmap_max, next->io_bitmap_max));
533 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
534 /*
535 * Clear any possible leftover bits:
536 */
537 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
538 }
539}
540
1da177e4
LT
541/*
542 * switch_to(x,y) should switch tasks from x to y.
543 *
544 * This could still be optimized:
545 * - fold all the options into a flag word and test it with a single test.
546 * - could test fs/gs bitsliced
099f318b
AK
547 *
548 * Kprobes not supported here. Set the probe on schedule instead.
1da177e4 549 */
099f318b 550__kprobes struct task_struct *
a88cde13 551__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
1da177e4
LT
552{
553 struct thread_struct *prev = &prev_p->thread,
554 *next = &next_p->thread;
555 int cpu = smp_processor_id();
556 struct tss_struct *tss = &per_cpu(init_tss, cpu);
557
e07e23e1
AV
558 /* we're going to use this soon, after a few expensive things */
559 if (next_p->fpu_counter>5)
560 prefetch(&next->i387.fxsave);
561
1da177e4
LT
562 /*
563 * Reload esp0, LDT and the page table pointer:
564 */
565 tss->rsp0 = next->rsp0;
566
567 /*
568 * Switch DS and ES.
569 * This won't pick up thread selector changes, but I guess that is ok.
570 */
fd51f666 571 asm volatile("mov %%es,%0" : "=m" (prev->es));
1da177e4
LT
572 if (unlikely(next->es | prev->es))
573 loadsegment(es, next->es);
574
fd51f666 575 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
1da177e4
LT
576 if (unlikely(next->ds | prev->ds))
577 loadsegment(ds, next->ds);
578
579 load_TLS(next, cpu);
580
581 /*
582 * Switch FS and GS.
583 */
584 {
585 unsigned fsindex;
586 asm volatile("movl %%fs,%0" : "=r" (fsindex));
587 /* segment register != 0 always requires a reload.
588 also reload when it has changed.
589 when prev process used 64bit base always reload
590 to avoid an information leak. */
591 if (unlikely(fsindex | next->fsindex | prev->fs)) {
592 loadsegment(fs, next->fsindex);
593 /* check if the user used a selector != 0
594 * if yes clear 64bit base, since overloaded base
595 * is always mapped to the Null selector
596 */
597 if (fsindex)
598 prev->fs = 0;
599 }
600 /* when next process has a 64bit base use it */
601 if (next->fs)
602 wrmsrl(MSR_FS_BASE, next->fs);
603 prev->fsindex = fsindex;
604 }
605 {
606 unsigned gsindex;
607 asm volatile("movl %%gs,%0" : "=r" (gsindex));
608 if (unlikely(gsindex | next->gsindex | prev->gs)) {
609 load_gs_index(next->gsindex);
610 if (gsindex)
611 prev->gs = 0;
612 }
613 if (next->gs)
614 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
615 prev->gsindex = gsindex;
616 }
617
618 /*
45948d77 619 * Switch the PDA and FPU contexts.
1da177e4
LT
620 */
621 prev->userrsp = read_pda(oldrsp);
622 write_pda(oldrsp, next->userrsp);
623 write_pda(pcurrent, next_p);
18bd057b 624
45948d77 625 /* This must be here to ensure both math_state_restore() and
18bd057b
AK
626 kernel_fpu_begin() work consistently.
627 And the AMD workaround requires it to be after DS reload. */
45948d77 628 unlazy_fpu(prev_p);
a88cde13 629 write_pda(kernelstack,
7b0bda74 630 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
0a425405
AV
631#ifdef CONFIG_CC_STACKPROTECTOR
632 write_pda(stack_canary, next_p->stack_canary);
633 /*
634 * Build time only check to make sure the stack_canary is at
635 * offset 40 in the pda; this is a gcc ABI requirement
636 */
637 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
638#endif
1da177e4
LT
639
640 /*
d3a4f48d 641 * Now maybe reload the debug registers and handle I/O bitmaps
1da177e4 642 */
d3a4f48d
SE
643 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
644 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
645 __switch_to_xtra(prev_p, next_p, tss);
1da177e4 646
e07e23e1
AV
647 /* If the task has used fpu the last 5 timeslices, just do a full
648 * restore of the math state immediately to avoid the trap; the
649 * chances of needing FPU soon are obviously high now
650 */
651 if (next_p->fpu_counter>5)
652 math_state_restore();
1da177e4
LT
653 return prev_p;
654}
655
656/*
657 * sys_execve() executes a new program.
658 */
659asmlinkage
660long sys_execve(char __user *name, char __user * __user *argv,
661 char __user * __user *envp, struct pt_regs regs)
662{
663 long error;
664 char * filename;
665
666 filename = getname(name);
667 error = PTR_ERR(filename);
668 if (IS_ERR(filename))
669 return error;
670 error = do_execve(filename, argv, envp, &regs);
671 if (error == 0) {
672 task_lock(current);
673 current->ptrace &= ~PT_DTRACE;
674 task_unlock(current);
675 }
676 putname(filename);
677 return error;
678}
679
680void set_personality_64bit(void)
681{
682 /* inherit personality from parent */
683
684 /* Make sure to be in 64bit mode */
685 clear_thread_flag(TIF_IA32);
686
687 /* TBD: overwrites user setup. Should have two bits.
688 But 64bit processes have always behaved this way,
689 so it's not too bad. The main problem is just that
690 32bit childs are affected again. */
691 current->personality &= ~READ_IMPLIES_EXEC;
692}
693
694asmlinkage long sys_fork(struct pt_regs *regs)
695{
696 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
697}
698
a88cde13
AK
699asmlinkage long
700sys_clone(unsigned long clone_flags, unsigned long newsp,
701 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
1da177e4
LT
702{
703 if (!newsp)
704 newsp = regs->rsp;
705 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
706}
707
708/*
709 * This is trivial, and on the face of it looks like it
710 * could equally well be done in user mode.
711 *
712 * Not so, for quite unobvious reasons - register pressure.
713 * In user mode vfork() cannot have a stack frame, and if
714 * done by calling the "clone()" system call directly, you
715 * do not have enough call-clobbered registers to hold all
716 * the information you need.
717 */
718asmlinkage long sys_vfork(struct pt_regs *regs)
719{
720 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
721 NULL, NULL);
722}
723
724unsigned long get_wchan(struct task_struct *p)
725{
726 unsigned long stack;
727 u64 fp,rip;
728 int count = 0;
729
730 if (!p || p == current || p->state==TASK_RUNNING)
731 return 0;
57eafdc2 732 stack = (unsigned long)task_stack_page(p);
1da177e4
LT
733 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
734 return 0;
735 fp = *(u64 *)(p->thread.rsp);
736 do {
a88cde13
AK
737 if (fp < (unsigned long)stack ||
738 fp > (unsigned long)stack+THREAD_SIZE)
1da177e4
LT
739 return 0;
740 rip = *(u64 *)(fp+8);
741 if (!in_sched_functions(rip))
742 return rip;
743 fp = *(u64 *)fp;
744 } while (count++ < 16);
745 return 0;
746}
747
748long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
749{
750 int ret = 0;
751 int doit = task == current;
752 int cpu;
753
754 switch (code) {
755 case ARCH_SET_GS:
84929801 756 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
757 return -EPERM;
758 cpu = get_cpu();
759 /* handle small bases via the GDT because that's faster to
760 switch. */
761 if (addr <= 0xffffffff) {
762 set_32bit_tls(task, GS_TLS, addr);
763 if (doit) {
764 load_TLS(&task->thread, cpu);
765 load_gs_index(GS_TLS_SEL);
766 }
767 task->thread.gsindex = GS_TLS_SEL;
768 task->thread.gs = 0;
769 } else {
770 task->thread.gsindex = 0;
771 task->thread.gs = addr;
772 if (doit) {
a88cde13
AK
773 load_gs_index(0);
774 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
1da177e4
LT
775 }
776 }
777 put_cpu();
778 break;
779 case ARCH_SET_FS:
780 /* Not strictly needed for fs, but do it for symmetry
781 with gs */
84929801 782 if (addr >= TASK_SIZE_OF(task))
1da177e4
LT
783 return -EPERM;
784 cpu = get_cpu();
785 /* handle small bases via the GDT because that's faster to
786 switch. */
787 if (addr <= 0xffffffff) {
788 set_32bit_tls(task, FS_TLS, addr);
789 if (doit) {
790 load_TLS(&task->thread, cpu);
a88cde13 791 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
1da177e4
LT
792 }
793 task->thread.fsindex = FS_TLS_SEL;
794 task->thread.fs = 0;
795 } else {
796 task->thread.fsindex = 0;
797 task->thread.fs = addr;
798 if (doit) {
799 /* set the selector to 0 to not confuse
800 __switch_to */
a88cde13
AK
801 asm volatile("movl %0,%%fs" :: "r" (0));
802 ret = checking_wrmsrl(MSR_FS_BASE, addr);
1da177e4
LT
803 }
804 }
805 put_cpu();
806 break;
807 case ARCH_GET_FS: {
808 unsigned long base;
809 if (task->thread.fsindex == FS_TLS_SEL)
810 base = read_32bit_tls(task, FS_TLS);
a88cde13 811 else if (doit)
1da177e4 812 rdmsrl(MSR_FS_BASE, base);
a88cde13 813 else
1da177e4
LT
814 base = task->thread.fs;
815 ret = put_user(base, (unsigned long __user *)addr);
816 break;
817 }
818 case ARCH_GET_GS: {
819 unsigned long base;
97c2803c 820 unsigned gsindex;
1da177e4
LT
821 if (task->thread.gsindex == GS_TLS_SEL)
822 base = read_32bit_tls(task, GS_TLS);
97c2803c
JB
823 else if (doit) {
824 asm("movl %%gs,%0" : "=r" (gsindex));
825 if (gsindex)
826 rdmsrl(MSR_KERNEL_GS_BASE, base);
827 else
828 base = task->thread.gs;
829 }
a88cde13 830 else
1da177e4
LT
831 base = task->thread.gs;
832 ret = put_user(base, (unsigned long __user *)addr);
833 break;
834 }
835
836 default:
837 ret = -EINVAL;
838 break;
839 }
840
841 return ret;
842}
843
844long sys_arch_prctl(int code, unsigned long addr)
845{
846 return do_arch_prctl(current, code, addr);
847}
848
849/*
850 * Capture the user space registers if the task is not running (in user space)
851 */
852int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
853{
854 struct pt_regs *pp, ptregs;
855
bb049232 856 pp = task_pt_regs(tsk);
1da177e4
LT
857
858 ptregs = *pp;
859 ptregs.cs &= 0xffff;
860 ptregs.ss &= 0xffff;
861
862 elf_core_copy_regs(regs, &ptregs);
863
864 return 1;
865}
866
867unsigned long arch_align_stack(unsigned long sp)
868{
c16b63e0 869 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
1da177e4
LT
870 sp -= get_random_int() % 8192;
871 return sp & ~0xf;
872}
This page took 0.207173 seconds and 5 git commands to generate.