x86: add the capability to print fuzzy backtraces
[deliverable/linux.git] / arch / x86 / kernel / process_64.c
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <stdarg.h>
18
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/a.out.h>
30 #include <linux/interrupt.h>
31 #include <linux/utsname.h>
32 #include <linux/delay.h>
33 #include <linux/module.h>
34 #include <linux/ptrace.h>
35 #include <linux/random.h>
36 #include <linux/notifier.h>
37 #include <linux/kprobes.h>
38 #include <linux/kdebug.h>
39 #include <linux/tick.h>
40
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54
55 asmlinkage extern void ret_from_fork(void);
56
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61
62 /*
63 * Powermanagement idle function, if any..
64 */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
69 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
70
71 void idle_notifier_register(struct notifier_block *n)
72 {
73 atomic_notifier_chain_register(&idle_notifier, n);
74 }
75
76 void enter_idle(void)
77 {
78 write_pda(isidle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
80 }
81
82 static void __exit_idle(void)
83 {
84 if (test_and_clear_bit_pda(0, isidle) == 0)
85 return;
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
87 }
88
89 /* Called from interrupts to signify idle end */
90 void exit_idle(void)
91 {
92 /* idle loop has pid 0 */
93 if (current->pid)
94 return;
95 __exit_idle();
96 }
97
98 /*
99 * We use this if we don't have any better
100 * idle routine..
101 */
102 void default_idle(void)
103 {
104 current_thread_info()->status &= ~TS_POLLING;
105 /*
106 * TS_POLLING-cleared state must be visible before we
107 * test NEED_RESCHED:
108 */
109 smp_mb();
110 local_irq_disable();
111 if (!need_resched()) {
112 ktime_t t0, t1;
113 u64 t0n, t1n;
114
115 t0 = ktime_get();
116 t0n = ktime_to_ns(t0);
117 safe_halt(); /* enables interrupts racelessly */
118 local_irq_disable();
119 t1 = ktime_get();
120 t1n = ktime_to_ns(t1);
121 sched_clock_idle_wakeup_event(t1n - t0n);
122 }
123 local_irq_enable();
124 current_thread_info()->status |= TS_POLLING;
125 }
126
127 /*
128 * On SMP it's slightly faster (but much more power-consuming!)
129 * to poll the ->need_resched flag instead of waiting for the
130 * cross-CPU IPI to arrive. Use this option with caution.
131 */
132 static void poll_idle(void)
133 {
134 local_irq_enable();
135 cpu_relax();
136 }
137
138 #ifdef CONFIG_HOTPLUG_CPU
139 DECLARE_PER_CPU(int, cpu_state);
140
141 #include <asm/nmi.h>
142 /* We halt the CPU with physical CPU hotplug */
143 static inline void play_dead(void)
144 {
145 idle_task_exit();
146 wbinvd();
147 mb();
148 /* Ack it */
149 __get_cpu_var(cpu_state) = CPU_DEAD;
150
151 local_irq_disable();
152 while (1)
153 halt();
154 }
155 #else
156 static inline void play_dead(void)
157 {
158 BUG();
159 }
160 #endif /* CONFIG_HOTPLUG_CPU */
161
162 /*
163 * The idle thread. There's no useful work to be
164 * done, so just try to conserve power and have a
165 * low exit latency (ie sit in a loop waiting for
166 * somebody to say that they'd like to reschedule)
167 */
168 void cpu_idle(void)
169 {
170 current_thread_info()->status |= TS_POLLING;
171 /* endless idle loop with no priority at all */
172 while (1) {
173 tick_nohz_stop_sched_tick();
174 while (!need_resched()) {
175 void (*idle)(void);
176
177 if (__get_cpu_var(cpu_idle_state))
178 __get_cpu_var(cpu_idle_state) = 0;
179
180 rmb();
181 idle = pm_idle;
182 if (!idle)
183 idle = default_idle;
184 if (cpu_is_offline(smp_processor_id()))
185 play_dead();
186 /*
187 * Idle routines should keep interrupts disabled
188 * from here on, until they go to idle.
189 * Otherwise, idle callbacks can misfire.
190 */
191 local_irq_disable();
192 enter_idle();
193 idle();
194 /* In many cases the interrupt that ended idle
195 has already called exit_idle. But some idle
196 loops can be woken up without interrupt. */
197 __exit_idle();
198 }
199
200 tick_nohz_restart_sched_tick();
201 preempt_enable_no_resched();
202 schedule();
203 preempt_disable();
204 }
205 }
206
207 static void do_nothing(void *unused)
208 {
209 }
210
211 void cpu_idle_wait(void)
212 {
213 unsigned int cpu, this_cpu = get_cpu();
214 cpumask_t map, tmp = current->cpus_allowed;
215
216 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
217 put_cpu();
218
219 cpus_clear(map);
220 for_each_online_cpu(cpu) {
221 per_cpu(cpu_idle_state, cpu) = 1;
222 cpu_set(cpu, map);
223 }
224
225 __get_cpu_var(cpu_idle_state) = 0;
226
227 wmb();
228 do {
229 ssleep(1);
230 for_each_online_cpu(cpu) {
231 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
232 cpu_clear(cpu, map);
233 }
234 cpus_and(map, map, cpu_online_map);
235 /*
236 * We waited 1 sec, if a CPU still did not call idle
237 * it may be because it is in idle and not waking up
238 * because it has nothing to do.
239 * Give all the remaining CPUS a kick.
240 */
241 smp_call_function_mask(map, do_nothing, 0, 0);
242 } while (!cpus_empty(map));
243
244 set_cpus_allowed(current, tmp);
245 }
246 EXPORT_SYMBOL_GPL(cpu_idle_wait);
247
248 /*
249 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
250 * which can obviate IPI to trigger checking of need_resched.
251 * We execute MONITOR against need_resched and enter optimized wait state
252 * through MWAIT. Whenever someone changes need_resched, we would be woken
253 * up from MWAIT (without an IPI).
254 *
255 * New with Core Duo processors, MWAIT can take some hints based on CPU
256 * capability.
257 */
258 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
259 {
260 if (!need_resched()) {
261 __monitor((void *)&current_thread_info()->flags, 0, 0);
262 smp_mb();
263 if (!need_resched())
264 __mwait(ax, cx);
265 }
266 }
267
268 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
269 static void mwait_idle(void)
270 {
271 if (!need_resched()) {
272 __monitor((void *)&current_thread_info()->flags, 0, 0);
273 smp_mb();
274 if (!need_resched())
275 __sti_mwait(0, 0);
276 else
277 local_irq_enable();
278 } else {
279 local_irq_enable();
280 }
281 }
282
283 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
284 {
285 static int printed;
286 if (cpu_has(c, X86_FEATURE_MWAIT)) {
287 /*
288 * Skip, if setup has overridden idle.
289 * One CPU supports mwait => All CPUs supports mwait
290 */
291 if (!pm_idle) {
292 if (!printed) {
293 printk(KERN_INFO "using mwait in idle threads.\n");
294 printed = 1;
295 }
296 pm_idle = mwait_idle;
297 }
298 }
299 }
300
301 static int __init idle_setup(char *str)
302 {
303 if (!strcmp(str, "poll")) {
304 printk("using polling idle threads.\n");
305 pm_idle = poll_idle;
306 } else if (!strcmp(str, "mwait"))
307 force_mwait = 1;
308 else
309 return -1;
310
311 boot_option_idle_override = 1;
312 return 0;
313 }
314 early_param("idle", idle_setup);
315
316 /* Prints also some state that isn't saved in the pt_regs */
317 void __show_regs(struct pt_regs * regs)
318 {
319 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
320 unsigned long d0, d1, d2, d3, d6, d7;
321 unsigned int fsindex, gsindex;
322 unsigned int ds, cs, es;
323
324 printk("\n");
325 print_modules();
326 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
327 current->pid, current->comm, print_tainted(),
328 init_utsname()->release,
329 (int)strcspn(init_utsname()->version, " "),
330 init_utsname()->version);
331 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
332 printk_address(regs->ip, regs->bp);
333 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
334 regs->flags);
335 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
336 regs->ax, regs->bx, regs->cx);
337 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
338 regs->dx, regs->si, regs->di);
339 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
340 regs->bp, regs->r8, regs->r9);
341 printk("R10: %016lx R11: %016lx R12: %016lx\n",
342 regs->r10, regs->r11, regs->r12);
343 printk("R13: %016lx R14: %016lx R15: %016lx\n",
344 regs->r13, regs->r14, regs->r15);
345
346 asm("movl %%ds,%0" : "=r" (ds));
347 asm("movl %%cs,%0" : "=r" (cs));
348 asm("movl %%es,%0" : "=r" (es));
349 asm("movl %%fs,%0" : "=r" (fsindex));
350 asm("movl %%gs,%0" : "=r" (gsindex));
351
352 rdmsrl(MSR_FS_BASE, fs);
353 rdmsrl(MSR_GS_BASE, gs);
354 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
355
356 cr0 = read_cr0();
357 cr2 = read_cr2();
358 cr3 = read_cr3();
359 cr4 = read_cr4();
360
361 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
362 fs,fsindex,gs,gsindex,shadowgs);
363 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
364 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
365
366 get_debugreg(d0, 0);
367 get_debugreg(d1, 1);
368 get_debugreg(d2, 2);
369 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
370 get_debugreg(d3, 3);
371 get_debugreg(d6, 6);
372 get_debugreg(d7, 7);
373 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
374 }
375
376 void show_regs(struct pt_regs *regs)
377 {
378 printk("CPU %d:", smp_processor_id());
379 __show_regs(regs);
380 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
381 }
382
383 /*
384 * Free current thread data structures etc..
385 */
386 void exit_thread(void)
387 {
388 struct task_struct *me = current;
389 struct thread_struct *t = &me->thread;
390
391 if (me->thread.io_bitmap_ptr) {
392 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
393
394 kfree(t->io_bitmap_ptr);
395 t->io_bitmap_ptr = NULL;
396 clear_thread_flag(TIF_IO_BITMAP);
397 /*
398 * Careful, clear this in the TSS too:
399 */
400 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
401 t->io_bitmap_max = 0;
402 put_cpu();
403 }
404 }
405
406 void flush_thread(void)
407 {
408 struct task_struct *tsk = current;
409
410 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
411 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
412 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
413 clear_tsk_thread_flag(tsk, TIF_IA32);
414 } else {
415 set_tsk_thread_flag(tsk, TIF_IA32);
416 current_thread_info()->status |= TS_COMPAT;
417 }
418 }
419 clear_tsk_thread_flag(tsk, TIF_DEBUG);
420
421 tsk->thread.debugreg0 = 0;
422 tsk->thread.debugreg1 = 0;
423 tsk->thread.debugreg2 = 0;
424 tsk->thread.debugreg3 = 0;
425 tsk->thread.debugreg6 = 0;
426 tsk->thread.debugreg7 = 0;
427 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
428 /*
429 * Forget coprocessor state..
430 */
431 clear_fpu(tsk);
432 clear_used_math();
433 }
434
435 void release_thread(struct task_struct *dead_task)
436 {
437 if (dead_task->mm) {
438 if (dead_task->mm->context.size) {
439 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
440 dead_task->comm,
441 dead_task->mm->context.ldt,
442 dead_task->mm->context.size);
443 BUG();
444 }
445 }
446 }
447
448 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
449 {
450 struct user_desc ud = {
451 .base_addr = addr,
452 .limit = 0xfffff,
453 .seg_32bit = 1,
454 .limit_in_pages = 1,
455 .useable = 1,
456 };
457 struct desc_struct *desc = (void *)t->thread.tls_array;
458 desc += tls;
459 fill_ldt(desc, &ud);
460 }
461
462 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
463 {
464 return get_desc_base(&t->thread.tls_array[tls]);
465 }
466
467 /*
468 * This gets called before we allocate a new thread and copy
469 * the current task into it.
470 */
471 void prepare_to_copy(struct task_struct *tsk)
472 {
473 unlazy_fpu(tsk);
474 }
475
476 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
477 unsigned long unused,
478 struct task_struct * p, struct pt_regs * regs)
479 {
480 int err;
481 struct pt_regs * childregs;
482 struct task_struct *me = current;
483
484 childregs = ((struct pt_regs *)
485 (THREAD_SIZE + task_stack_page(p))) - 1;
486 *childregs = *regs;
487
488 childregs->ax = 0;
489 childregs->sp = sp;
490 if (sp == ~0UL)
491 childregs->sp = (unsigned long)childregs;
492
493 p->thread.sp = (unsigned long) childregs;
494 p->thread.sp0 = (unsigned long) (childregs+1);
495 p->thread.usersp = me->thread.usersp;
496
497 set_tsk_thread_flag(p, TIF_FORK);
498
499 p->thread.fs = me->thread.fs;
500 p->thread.gs = me->thread.gs;
501
502 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
503 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
504 asm("mov %%es,%0" : "=m" (p->thread.es));
505 asm("mov %%ds,%0" : "=m" (p->thread.ds));
506
507 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
508 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
509 if (!p->thread.io_bitmap_ptr) {
510 p->thread.io_bitmap_max = 0;
511 return -ENOMEM;
512 }
513 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
514 IO_BITMAP_BYTES);
515 set_tsk_thread_flag(p, TIF_IO_BITMAP);
516 }
517
518 /*
519 * Set a new TLS for the child thread?
520 */
521 if (clone_flags & CLONE_SETTLS) {
522 #ifdef CONFIG_IA32_EMULATION
523 if (test_thread_flag(TIF_IA32))
524 err = do_set_thread_area(p, -1,
525 (struct user_desc __user *)childregs->si, 0);
526 else
527 #endif
528 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
529 if (err)
530 goto out;
531 }
532 err = 0;
533 out:
534 if (err && p->thread.io_bitmap_ptr) {
535 kfree(p->thread.io_bitmap_ptr);
536 p->thread.io_bitmap_max = 0;
537 }
538 return err;
539 }
540
541 /*
542 * This special macro can be used to load a debugging register
543 */
544 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
545
546 static inline void __switch_to_xtra(struct task_struct *prev_p,
547 struct task_struct *next_p,
548 struct tss_struct *tss)
549 {
550 struct thread_struct *prev, *next;
551 unsigned long debugctl;
552
553 prev = &prev_p->thread,
554 next = &next_p->thread;
555
556 debugctl = prev->debugctlmsr;
557 if (next->ds_area_msr != prev->ds_area_msr) {
558 /* we clear debugctl to make sure DS
559 * is not in use when we change it */
560 debugctl = 0;
561 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
562 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
563 }
564
565 if (next->debugctlmsr != debugctl)
566 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
567
568 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
569 loaddebug(next, 0);
570 loaddebug(next, 1);
571 loaddebug(next, 2);
572 loaddebug(next, 3);
573 /* no 4 and 5 */
574 loaddebug(next, 6);
575 loaddebug(next, 7);
576 }
577
578 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
579 /*
580 * Copy the relevant range of the IO bitmap.
581 * Normally this is 128 bytes or less:
582 */
583 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
584 max(prev->io_bitmap_max, next->io_bitmap_max));
585 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
586 /*
587 * Clear any possible leftover bits:
588 */
589 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
590 }
591
592 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
593 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
594
595 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
596 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
597 }
598
599 /*
600 * switch_to(x,y) should switch tasks from x to y.
601 *
602 * This could still be optimized:
603 * - fold all the options into a flag word and test it with a single test.
604 * - could test fs/gs bitsliced
605 *
606 * Kprobes not supported here. Set the probe on schedule instead.
607 */
608 struct task_struct *
609 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
610 {
611 struct thread_struct *prev = &prev_p->thread,
612 *next = &next_p->thread;
613 int cpu = smp_processor_id();
614 struct tss_struct *tss = &per_cpu(init_tss, cpu);
615
616 /* we're going to use this soon, after a few expensive things */
617 if (next_p->fpu_counter>5)
618 prefetch(&next->i387.fxsave);
619
620 /*
621 * Reload esp0, LDT and the page table pointer:
622 */
623 load_sp0(tss, next);
624
625 /*
626 * Switch DS and ES.
627 * This won't pick up thread selector changes, but I guess that is ok.
628 */
629 asm volatile("mov %%es,%0" : "=m" (prev->es));
630 if (unlikely(next->es | prev->es))
631 loadsegment(es, next->es);
632
633 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
634 if (unlikely(next->ds | prev->ds))
635 loadsegment(ds, next->ds);
636
637 load_TLS(next, cpu);
638
639 /*
640 * Switch FS and GS.
641 */
642 {
643 unsigned fsindex;
644 asm volatile("movl %%fs,%0" : "=r" (fsindex));
645 /* segment register != 0 always requires a reload.
646 also reload when it has changed.
647 when prev process used 64bit base always reload
648 to avoid an information leak. */
649 if (unlikely(fsindex | next->fsindex | prev->fs)) {
650 loadsegment(fs, next->fsindex);
651 /* check if the user used a selector != 0
652 * if yes clear 64bit base, since overloaded base
653 * is always mapped to the Null selector
654 */
655 if (fsindex)
656 prev->fs = 0;
657 }
658 /* when next process has a 64bit base use it */
659 if (next->fs)
660 wrmsrl(MSR_FS_BASE, next->fs);
661 prev->fsindex = fsindex;
662 }
663 {
664 unsigned gsindex;
665 asm volatile("movl %%gs,%0" : "=r" (gsindex));
666 if (unlikely(gsindex | next->gsindex | prev->gs)) {
667 load_gs_index(next->gsindex);
668 if (gsindex)
669 prev->gs = 0;
670 }
671 if (next->gs)
672 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
673 prev->gsindex = gsindex;
674 }
675
676 /* Must be after DS reload */
677 unlazy_fpu(prev_p);
678
679 /*
680 * Switch the PDA and FPU contexts.
681 */
682 prev->usersp = read_pda(oldrsp);
683 write_pda(oldrsp, next->usersp);
684 write_pda(pcurrent, next_p);
685
686 write_pda(kernelstack,
687 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
688 #ifdef CONFIG_CC_STACKPROTECTOR
689 write_pda(stack_canary, next_p->stack_canary);
690 /*
691 * Build time only check to make sure the stack_canary is at
692 * offset 40 in the pda; this is a gcc ABI requirement
693 */
694 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
695 #endif
696
697 /*
698 * Now maybe reload the debug registers and handle I/O bitmaps
699 */
700 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
701 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
702 __switch_to_xtra(prev_p, next_p, tss);
703
704 /* If the task has used fpu the last 5 timeslices, just do a full
705 * restore of the math state immediately to avoid the trap; the
706 * chances of needing FPU soon are obviously high now
707 */
708 if (next_p->fpu_counter>5)
709 math_state_restore();
710 return prev_p;
711 }
712
713 /*
714 * sys_execve() executes a new program.
715 */
716 asmlinkage
717 long sys_execve(char __user *name, char __user * __user *argv,
718 char __user * __user *envp, struct pt_regs regs)
719 {
720 long error;
721 char * filename;
722
723 filename = getname(name);
724 error = PTR_ERR(filename);
725 if (IS_ERR(filename))
726 return error;
727 error = do_execve(filename, argv, envp, &regs);
728 putname(filename);
729 return error;
730 }
731
732 void set_personality_64bit(void)
733 {
734 /* inherit personality from parent */
735
736 /* Make sure to be in 64bit mode */
737 clear_thread_flag(TIF_IA32);
738
739 /* TBD: overwrites user setup. Should have two bits.
740 But 64bit processes have always behaved this way,
741 so it's not too bad. The main problem is just that
742 32bit childs are affected again. */
743 current->personality &= ~READ_IMPLIES_EXEC;
744 }
745
746 asmlinkage long sys_fork(struct pt_regs *regs)
747 {
748 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
749 }
750
751 asmlinkage long
752 sys_clone(unsigned long clone_flags, unsigned long newsp,
753 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
754 {
755 if (!newsp)
756 newsp = regs->sp;
757 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
758 }
759
760 /*
761 * This is trivial, and on the face of it looks like it
762 * could equally well be done in user mode.
763 *
764 * Not so, for quite unobvious reasons - register pressure.
765 * In user mode vfork() cannot have a stack frame, and if
766 * done by calling the "clone()" system call directly, you
767 * do not have enough call-clobbered registers to hold all
768 * the information you need.
769 */
770 asmlinkage long sys_vfork(struct pt_regs *regs)
771 {
772 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
773 NULL, NULL);
774 }
775
776 unsigned long get_wchan(struct task_struct *p)
777 {
778 unsigned long stack;
779 u64 fp,ip;
780 int count = 0;
781
782 if (!p || p == current || p->state==TASK_RUNNING)
783 return 0;
784 stack = (unsigned long)task_stack_page(p);
785 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
786 return 0;
787 fp = *(u64 *)(p->thread.sp);
788 do {
789 if (fp < (unsigned long)stack ||
790 fp > (unsigned long)stack+THREAD_SIZE)
791 return 0;
792 ip = *(u64 *)(fp+8);
793 if (!in_sched_functions(ip))
794 return ip;
795 fp = *(u64 *)fp;
796 } while (count++ < 16);
797 return 0;
798 }
799
800 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
801 {
802 int ret = 0;
803 int doit = task == current;
804 int cpu;
805
806 switch (code) {
807 case ARCH_SET_GS:
808 if (addr >= TASK_SIZE_OF(task))
809 return -EPERM;
810 cpu = get_cpu();
811 /* handle small bases via the GDT because that's faster to
812 switch. */
813 if (addr <= 0xffffffff) {
814 set_32bit_tls(task, GS_TLS, addr);
815 if (doit) {
816 load_TLS(&task->thread, cpu);
817 load_gs_index(GS_TLS_SEL);
818 }
819 task->thread.gsindex = GS_TLS_SEL;
820 task->thread.gs = 0;
821 } else {
822 task->thread.gsindex = 0;
823 task->thread.gs = addr;
824 if (doit) {
825 load_gs_index(0);
826 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
827 }
828 }
829 put_cpu();
830 break;
831 case ARCH_SET_FS:
832 /* Not strictly needed for fs, but do it for symmetry
833 with gs */
834 if (addr >= TASK_SIZE_OF(task))
835 return -EPERM;
836 cpu = get_cpu();
837 /* handle small bases via the GDT because that's faster to
838 switch. */
839 if (addr <= 0xffffffff) {
840 set_32bit_tls(task, FS_TLS, addr);
841 if (doit) {
842 load_TLS(&task->thread, cpu);
843 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
844 }
845 task->thread.fsindex = FS_TLS_SEL;
846 task->thread.fs = 0;
847 } else {
848 task->thread.fsindex = 0;
849 task->thread.fs = addr;
850 if (doit) {
851 /* set the selector to 0 to not confuse
852 __switch_to */
853 asm volatile("movl %0,%%fs" :: "r" (0));
854 ret = checking_wrmsrl(MSR_FS_BASE, addr);
855 }
856 }
857 put_cpu();
858 break;
859 case ARCH_GET_FS: {
860 unsigned long base;
861 if (task->thread.fsindex == FS_TLS_SEL)
862 base = read_32bit_tls(task, FS_TLS);
863 else if (doit)
864 rdmsrl(MSR_FS_BASE, base);
865 else
866 base = task->thread.fs;
867 ret = put_user(base, (unsigned long __user *)addr);
868 break;
869 }
870 case ARCH_GET_GS: {
871 unsigned long base;
872 unsigned gsindex;
873 if (task->thread.gsindex == GS_TLS_SEL)
874 base = read_32bit_tls(task, GS_TLS);
875 else if (doit) {
876 asm("movl %%gs,%0" : "=r" (gsindex));
877 if (gsindex)
878 rdmsrl(MSR_KERNEL_GS_BASE, base);
879 else
880 base = task->thread.gs;
881 }
882 else
883 base = task->thread.gs;
884 ret = put_user(base, (unsigned long __user *)addr);
885 break;
886 }
887
888 default:
889 ret = -EINVAL;
890 break;
891 }
892
893 return ret;
894 }
895
896 long sys_arch_prctl(int code, unsigned long addr)
897 {
898 return do_arch_prctl(current, code, addr);
899 }
900
901 unsigned long arch_align_stack(unsigned long sp)
902 {
903 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
904 sp -= get_random_int() % 8192;
905 return sp & ~0xf;
906 }
907
908 unsigned long arch_randomize_brk(struct mm_struct *mm)
909 {
910 unsigned long range_end = mm->brk + 0x02000000;
911 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
912 }
This page took 0.048815 seconds and 6 git commands to generate.