Merge branch 'tracing/ftrace' into auto-ftrace-next
[deliverable/linux.git] / arch / x86 / kernel / process_64.c
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <stdarg.h>
18
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/interrupt.h>
30 #include <linux/utsname.h>
31 #include <linux/delay.h>
32 #include <linux/module.h>
33 #include <linux/ptrace.h>
34 #include <linux/random.h>
35 #include <linux/notifier.h>
36 #include <linux/kprobes.h>
37 #include <linux/kdebug.h>
38 #include <linux/tick.h>
39 #include <linux/prctl.h>
40
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54
55 asmlinkage extern void ret_from_fork(void);
56
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61
62 /*
63 * Powermanagement idle function, if any..
64 */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67
68 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
69
70 void idle_notifier_register(struct notifier_block *n)
71 {
72 atomic_notifier_chain_register(&idle_notifier, n);
73 }
74
75 void enter_idle(void)
76 {
77 write_pda(isidle, 1);
78 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
79 }
80
81 static void __exit_idle(void)
82 {
83 if (test_and_clear_bit_pda(0, isidle) == 0)
84 return;
85 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
86 }
87
88 /* Called from interrupts to signify idle end */
89 void exit_idle(void)
90 {
91 /* idle loop has pid 0 */
92 if (current->pid)
93 return;
94 __exit_idle();
95 }
96
97 /*
98 * We use this if we don't have any better
99 * idle routine..
100 */
101 void default_idle(void)
102 {
103 current_thread_info()->status &= ~TS_POLLING;
104 /*
105 * TS_POLLING-cleared state must be visible before we
106 * test NEED_RESCHED:
107 */
108 smp_mb();
109 if (!need_resched())
110 safe_halt(); /* enables interrupts racelessly */
111 else
112 local_irq_enable();
113 current_thread_info()->status |= TS_POLLING;
114 }
115
116 #ifdef CONFIG_HOTPLUG_CPU
117 DECLARE_PER_CPU(int, cpu_state);
118
119 #include <asm/nmi.h>
120 /* We halt the CPU with physical CPU hotplug */
121 static inline void play_dead(void)
122 {
123 idle_task_exit();
124 wbinvd();
125 mb();
126 /* Ack it */
127 __get_cpu_var(cpu_state) = CPU_DEAD;
128
129 local_irq_disable();
130 while (1)
131 halt();
132 }
133 #else
134 static inline void play_dead(void)
135 {
136 BUG();
137 }
138 #endif /* CONFIG_HOTPLUG_CPU */
139
140 /*
141 * The idle thread. There's no useful work to be
142 * done, so just try to conserve power and have a
143 * low exit latency (ie sit in a loop waiting for
144 * somebody to say that they'd like to reschedule)
145 */
146 void cpu_idle(void)
147 {
148 current_thread_info()->status |= TS_POLLING;
149 /* endless idle loop with no priority at all */
150 while (1) {
151 tick_nohz_stop_sched_tick();
152 while (!need_resched()) {
153 void (*idle)(void);
154
155 rmb();
156 idle = pm_idle;
157 if (!idle)
158 idle = default_idle;
159 if (cpu_is_offline(smp_processor_id()))
160 play_dead();
161 /*
162 * Idle routines should keep interrupts disabled
163 * from here on, until they go to idle.
164 * Otherwise, idle callbacks can misfire.
165 */
166 local_irq_disable();
167 enter_idle();
168 /* Don't trace irqs off for idle */
169 stop_critical_timings();
170 idle();
171 start_critical_timings();
172 /* In many cases the interrupt that ended idle
173 has already called exit_idle. But some idle
174 loops can be woken up without interrupt. */
175 __exit_idle();
176 }
177
178 tick_nohz_restart_sched_tick();
179 preempt_enable_no_resched();
180 schedule();
181 preempt_disable();
182 }
183 }
184
185 /* Prints also some state that isn't saved in the pt_regs */
186 void __show_regs(struct pt_regs * regs)
187 {
188 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
189 unsigned long d0, d1, d2, d3, d6, d7;
190 unsigned int fsindex, gsindex;
191 unsigned int ds, cs, es;
192
193 printk("\n");
194 print_modules();
195 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
196 current->pid, current->comm, print_tainted(),
197 init_utsname()->release,
198 (int)strcspn(init_utsname()->version, " "),
199 init_utsname()->version);
200 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
201 printk_address(regs->ip, 1);
202 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
203 regs->flags);
204 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
205 regs->ax, regs->bx, regs->cx);
206 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
207 regs->dx, regs->si, regs->di);
208 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
209 regs->bp, regs->r8, regs->r9);
210 printk("R10: %016lx R11: %016lx R12: %016lx\n",
211 regs->r10, regs->r11, regs->r12);
212 printk("R13: %016lx R14: %016lx R15: %016lx\n",
213 regs->r13, regs->r14, regs->r15);
214
215 asm("movl %%ds,%0" : "=r" (ds));
216 asm("movl %%cs,%0" : "=r" (cs));
217 asm("movl %%es,%0" : "=r" (es));
218 asm("movl %%fs,%0" : "=r" (fsindex));
219 asm("movl %%gs,%0" : "=r" (gsindex));
220
221 rdmsrl(MSR_FS_BASE, fs);
222 rdmsrl(MSR_GS_BASE, gs);
223 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
224
225 cr0 = read_cr0();
226 cr2 = read_cr2();
227 cr3 = read_cr3();
228 cr4 = read_cr4();
229
230 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
231 fs,fsindex,gs,gsindex,shadowgs);
232 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
233 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
234
235 get_debugreg(d0, 0);
236 get_debugreg(d1, 1);
237 get_debugreg(d2, 2);
238 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
239 get_debugreg(d3, 3);
240 get_debugreg(d6, 6);
241 get_debugreg(d7, 7);
242 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
243 }
244
245 void show_regs(struct pt_regs *regs)
246 {
247 printk("CPU %d:", smp_processor_id());
248 __show_regs(regs);
249 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
250 }
251
252 /*
253 * Free current thread data structures etc..
254 */
255 void exit_thread(void)
256 {
257 struct task_struct *me = current;
258 struct thread_struct *t = &me->thread;
259
260 if (me->thread.io_bitmap_ptr) {
261 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
262
263 kfree(t->io_bitmap_ptr);
264 t->io_bitmap_ptr = NULL;
265 clear_thread_flag(TIF_IO_BITMAP);
266 /*
267 * Careful, clear this in the TSS too:
268 */
269 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
270 t->io_bitmap_max = 0;
271 put_cpu();
272 }
273 }
274
275 void flush_thread(void)
276 {
277 struct task_struct *tsk = current;
278
279 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
280 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
281 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
282 clear_tsk_thread_flag(tsk, TIF_IA32);
283 } else {
284 set_tsk_thread_flag(tsk, TIF_IA32);
285 current_thread_info()->status |= TS_COMPAT;
286 }
287 }
288 clear_tsk_thread_flag(tsk, TIF_DEBUG);
289
290 tsk->thread.debugreg0 = 0;
291 tsk->thread.debugreg1 = 0;
292 tsk->thread.debugreg2 = 0;
293 tsk->thread.debugreg3 = 0;
294 tsk->thread.debugreg6 = 0;
295 tsk->thread.debugreg7 = 0;
296 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
297 /*
298 * Forget coprocessor state..
299 */
300 tsk->fpu_counter = 0;
301 clear_fpu(tsk);
302 clear_used_math();
303 }
304
305 void release_thread(struct task_struct *dead_task)
306 {
307 if (dead_task->mm) {
308 if (dead_task->mm->context.size) {
309 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
310 dead_task->comm,
311 dead_task->mm->context.ldt,
312 dead_task->mm->context.size);
313 BUG();
314 }
315 }
316 }
317
318 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
319 {
320 struct user_desc ud = {
321 .base_addr = addr,
322 .limit = 0xfffff,
323 .seg_32bit = 1,
324 .limit_in_pages = 1,
325 .useable = 1,
326 };
327 struct desc_struct *desc = t->thread.tls_array;
328 desc += tls;
329 fill_ldt(desc, &ud);
330 }
331
332 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
333 {
334 return get_desc_base(&t->thread.tls_array[tls]);
335 }
336
337 /*
338 * This gets called before we allocate a new thread and copy
339 * the current task into it.
340 */
341 void prepare_to_copy(struct task_struct *tsk)
342 {
343 unlazy_fpu(tsk);
344 }
345
346 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
347 unsigned long unused,
348 struct task_struct * p, struct pt_regs * regs)
349 {
350 int err;
351 struct pt_regs * childregs;
352 struct task_struct *me = current;
353
354 childregs = ((struct pt_regs *)
355 (THREAD_SIZE + task_stack_page(p))) - 1;
356 *childregs = *regs;
357
358 childregs->ax = 0;
359 childregs->sp = sp;
360 if (sp == ~0UL)
361 childregs->sp = (unsigned long)childregs;
362
363 p->thread.sp = (unsigned long) childregs;
364 p->thread.sp0 = (unsigned long) (childregs+1);
365 p->thread.usersp = me->thread.usersp;
366
367 set_tsk_thread_flag(p, TIF_FORK);
368
369 p->thread.fs = me->thread.fs;
370 p->thread.gs = me->thread.gs;
371
372 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
373 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
374 asm("mov %%es,%0" : "=m" (p->thread.es));
375 asm("mov %%ds,%0" : "=m" (p->thread.ds));
376
377 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
378 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
379 if (!p->thread.io_bitmap_ptr) {
380 p->thread.io_bitmap_max = 0;
381 return -ENOMEM;
382 }
383 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
384 IO_BITMAP_BYTES);
385 set_tsk_thread_flag(p, TIF_IO_BITMAP);
386 }
387
388 /*
389 * Set a new TLS for the child thread?
390 */
391 if (clone_flags & CLONE_SETTLS) {
392 #ifdef CONFIG_IA32_EMULATION
393 if (test_thread_flag(TIF_IA32))
394 err = do_set_thread_area(p, -1,
395 (struct user_desc __user *)childregs->si, 0);
396 else
397 #endif
398 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
399 if (err)
400 goto out;
401 }
402 err = 0;
403 out:
404 if (err && p->thread.io_bitmap_ptr) {
405 kfree(p->thread.io_bitmap_ptr);
406 p->thread.io_bitmap_max = 0;
407 }
408 return err;
409 }
410
411 void
412 start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
413 {
414 asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
415 load_gs_index(0);
416 regs->ip = new_ip;
417 regs->sp = new_sp;
418 write_pda(oldrsp, new_sp);
419 regs->cs = __USER_CS;
420 regs->ss = __USER_DS;
421 regs->flags = 0x200;
422 set_fs(USER_DS);
423 /*
424 * Free the old FP and other extended state
425 */
426 free_thread_xstate(current);
427 }
428 EXPORT_SYMBOL_GPL(start_thread);
429
430 static void hard_disable_TSC(void)
431 {
432 write_cr4(read_cr4() | X86_CR4_TSD);
433 }
434
435 void disable_TSC(void)
436 {
437 preempt_disable();
438 if (!test_and_set_thread_flag(TIF_NOTSC))
439 /*
440 * Must flip the CPU state synchronously with
441 * TIF_NOTSC in the current running context.
442 */
443 hard_disable_TSC();
444 preempt_enable();
445 }
446
447 static void hard_enable_TSC(void)
448 {
449 write_cr4(read_cr4() & ~X86_CR4_TSD);
450 }
451
452 static void enable_TSC(void)
453 {
454 preempt_disable();
455 if (test_and_clear_thread_flag(TIF_NOTSC))
456 /*
457 * Must flip the CPU state synchronously with
458 * TIF_NOTSC in the current running context.
459 */
460 hard_enable_TSC();
461 preempt_enable();
462 }
463
464 int get_tsc_mode(unsigned long adr)
465 {
466 unsigned int val;
467
468 if (test_thread_flag(TIF_NOTSC))
469 val = PR_TSC_SIGSEGV;
470 else
471 val = PR_TSC_ENABLE;
472
473 return put_user(val, (unsigned int __user *)adr);
474 }
475
476 int set_tsc_mode(unsigned int val)
477 {
478 if (val == PR_TSC_SIGSEGV)
479 disable_TSC();
480 else if (val == PR_TSC_ENABLE)
481 enable_TSC();
482 else
483 return -EINVAL;
484
485 return 0;
486 }
487
488 /*
489 * This special macro can be used to load a debugging register
490 */
491 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
492
493 static inline void __switch_to_xtra(struct task_struct *prev_p,
494 struct task_struct *next_p,
495 struct tss_struct *tss)
496 {
497 struct thread_struct *prev, *next;
498 unsigned long debugctl;
499
500 prev = &prev_p->thread,
501 next = &next_p->thread;
502
503 debugctl = prev->debugctlmsr;
504 if (next->ds_area_msr != prev->ds_area_msr) {
505 /* we clear debugctl to make sure DS
506 * is not in use when we change it */
507 debugctl = 0;
508 update_debugctlmsr(0);
509 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
510 }
511
512 if (next->debugctlmsr != debugctl)
513 update_debugctlmsr(next->debugctlmsr);
514
515 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
516 loaddebug(next, 0);
517 loaddebug(next, 1);
518 loaddebug(next, 2);
519 loaddebug(next, 3);
520 /* no 4 and 5 */
521 loaddebug(next, 6);
522 loaddebug(next, 7);
523 }
524
525 if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
526 test_tsk_thread_flag(next_p, TIF_NOTSC)) {
527 /* prev and next are different */
528 if (test_tsk_thread_flag(next_p, TIF_NOTSC))
529 hard_disable_TSC();
530 else
531 hard_enable_TSC();
532 }
533
534 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
535 /*
536 * Copy the relevant range of the IO bitmap.
537 * Normally this is 128 bytes or less:
538 */
539 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
540 max(prev->io_bitmap_max, next->io_bitmap_max));
541 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
542 /*
543 * Clear any possible leftover bits:
544 */
545 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
546 }
547
548 #ifdef X86_BTS
549 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
550 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
551
552 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
553 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
554 #endif
555 }
556
557 /*
558 * switch_to(x,y) should switch tasks from x to y.
559 *
560 * This could still be optimized:
561 * - fold all the options into a flag word and test it with a single test.
562 * - could test fs/gs bitsliced
563 *
564 * Kprobes not supported here. Set the probe on schedule instead.
565 */
566 struct task_struct *
567 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
568 {
569 struct thread_struct *prev = &prev_p->thread,
570 *next = &next_p->thread;
571 int cpu = smp_processor_id();
572 struct tss_struct *tss = &per_cpu(init_tss, cpu);
573
574 /* we're going to use this soon, after a few expensive things */
575 if (next_p->fpu_counter>5)
576 prefetch(next->xstate);
577
578 /*
579 * Reload esp0, LDT and the page table pointer:
580 */
581 load_sp0(tss, next);
582
583 /*
584 * Switch DS and ES.
585 * This won't pick up thread selector changes, but I guess that is ok.
586 */
587 asm volatile("mov %%es,%0" : "=m" (prev->es));
588 if (unlikely(next->es | prev->es))
589 loadsegment(es, next->es);
590
591 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
592 if (unlikely(next->ds | prev->ds))
593 loadsegment(ds, next->ds);
594
595 load_TLS(next, cpu);
596
597 /*
598 * Switch FS and GS.
599 */
600 {
601 unsigned fsindex;
602 asm volatile("movl %%fs,%0" : "=r" (fsindex));
603 /* segment register != 0 always requires a reload.
604 also reload when it has changed.
605 when prev process used 64bit base always reload
606 to avoid an information leak. */
607 if (unlikely(fsindex | next->fsindex | prev->fs)) {
608 loadsegment(fs, next->fsindex);
609 /* check if the user used a selector != 0
610 * if yes clear 64bit base, since overloaded base
611 * is always mapped to the Null selector
612 */
613 if (fsindex)
614 prev->fs = 0;
615 }
616 /* when next process has a 64bit base use it */
617 if (next->fs)
618 wrmsrl(MSR_FS_BASE, next->fs);
619 prev->fsindex = fsindex;
620 }
621 {
622 unsigned gsindex;
623 asm volatile("movl %%gs,%0" : "=r" (gsindex));
624 if (unlikely(gsindex | next->gsindex | prev->gs)) {
625 load_gs_index(next->gsindex);
626 if (gsindex)
627 prev->gs = 0;
628 }
629 if (next->gs)
630 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
631 prev->gsindex = gsindex;
632 }
633
634 /* Must be after DS reload */
635 unlazy_fpu(prev_p);
636
637 /*
638 * Switch the PDA and FPU contexts.
639 */
640 prev->usersp = read_pda(oldrsp);
641 write_pda(oldrsp, next->usersp);
642 write_pda(pcurrent, next_p);
643
644 write_pda(kernelstack,
645 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
646 #ifdef CONFIG_CC_STACKPROTECTOR
647 write_pda(stack_canary, next_p->stack_canary);
648 /*
649 * Build time only check to make sure the stack_canary is at
650 * offset 40 in the pda; this is a gcc ABI requirement
651 */
652 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
653 #endif
654
655 /*
656 * Now maybe reload the debug registers and handle I/O bitmaps
657 */
658 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
659 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
660 __switch_to_xtra(prev_p, next_p, tss);
661
662 /* If the task has used fpu the last 5 timeslices, just do a full
663 * restore of the math state immediately to avoid the trap; the
664 * chances of needing FPU soon are obviously high now
665 *
666 * tsk_used_math() checks prevent calling math_state_restore(),
667 * which can sleep in the case of !tsk_used_math()
668 */
669 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
670 math_state_restore();
671 return prev_p;
672 }
673
674 /*
675 * sys_execve() executes a new program.
676 */
677 asmlinkage
678 long sys_execve(char __user *name, char __user * __user *argv,
679 char __user * __user *envp, struct pt_regs *regs)
680 {
681 long error;
682 char * filename;
683
684 filename = getname(name);
685 error = PTR_ERR(filename);
686 if (IS_ERR(filename))
687 return error;
688 error = do_execve(filename, argv, envp, regs);
689 putname(filename);
690 return error;
691 }
692
693 void set_personality_64bit(void)
694 {
695 /* inherit personality from parent */
696
697 /* Make sure to be in 64bit mode */
698 clear_thread_flag(TIF_IA32);
699
700 /* TBD: overwrites user setup. Should have two bits.
701 But 64bit processes have always behaved this way,
702 so it's not too bad. The main problem is just that
703 32bit childs are affected again. */
704 current->personality &= ~READ_IMPLIES_EXEC;
705 }
706
707 asmlinkage long sys_fork(struct pt_regs *regs)
708 {
709 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
710 }
711
712 asmlinkage long
713 sys_clone(unsigned long clone_flags, unsigned long newsp,
714 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
715 {
716 if (!newsp)
717 newsp = regs->sp;
718 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
719 }
720
721 /*
722 * This is trivial, and on the face of it looks like it
723 * could equally well be done in user mode.
724 *
725 * Not so, for quite unobvious reasons - register pressure.
726 * In user mode vfork() cannot have a stack frame, and if
727 * done by calling the "clone()" system call directly, you
728 * do not have enough call-clobbered registers to hold all
729 * the information you need.
730 */
731 asmlinkage long sys_vfork(struct pt_regs *regs)
732 {
733 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
734 NULL, NULL);
735 }
736
737 unsigned long get_wchan(struct task_struct *p)
738 {
739 unsigned long stack;
740 u64 fp,ip;
741 int count = 0;
742
743 if (!p || p == current || p->state==TASK_RUNNING)
744 return 0;
745 stack = (unsigned long)task_stack_page(p);
746 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
747 return 0;
748 fp = *(u64 *)(p->thread.sp);
749 do {
750 if (fp < (unsigned long)stack ||
751 fp > (unsigned long)stack+THREAD_SIZE)
752 return 0;
753 ip = *(u64 *)(fp+8);
754 if (!in_sched_functions(ip))
755 return ip;
756 fp = *(u64 *)fp;
757 } while (count++ < 16);
758 return 0;
759 }
760
761 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
762 {
763 int ret = 0;
764 int doit = task == current;
765 int cpu;
766
767 switch (code) {
768 case ARCH_SET_GS:
769 if (addr >= TASK_SIZE_OF(task))
770 return -EPERM;
771 cpu = get_cpu();
772 /* handle small bases via the GDT because that's faster to
773 switch. */
774 if (addr <= 0xffffffff) {
775 set_32bit_tls(task, GS_TLS, addr);
776 if (doit) {
777 load_TLS(&task->thread, cpu);
778 load_gs_index(GS_TLS_SEL);
779 }
780 task->thread.gsindex = GS_TLS_SEL;
781 task->thread.gs = 0;
782 } else {
783 task->thread.gsindex = 0;
784 task->thread.gs = addr;
785 if (doit) {
786 load_gs_index(0);
787 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
788 }
789 }
790 put_cpu();
791 break;
792 case ARCH_SET_FS:
793 /* Not strictly needed for fs, but do it for symmetry
794 with gs */
795 if (addr >= TASK_SIZE_OF(task))
796 return -EPERM;
797 cpu = get_cpu();
798 /* handle small bases via the GDT because that's faster to
799 switch. */
800 if (addr <= 0xffffffff) {
801 set_32bit_tls(task, FS_TLS, addr);
802 if (doit) {
803 load_TLS(&task->thread, cpu);
804 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
805 }
806 task->thread.fsindex = FS_TLS_SEL;
807 task->thread.fs = 0;
808 } else {
809 task->thread.fsindex = 0;
810 task->thread.fs = addr;
811 if (doit) {
812 /* set the selector to 0 to not confuse
813 __switch_to */
814 asm volatile("movl %0,%%fs" :: "r" (0));
815 ret = checking_wrmsrl(MSR_FS_BASE, addr);
816 }
817 }
818 put_cpu();
819 break;
820 case ARCH_GET_FS: {
821 unsigned long base;
822 if (task->thread.fsindex == FS_TLS_SEL)
823 base = read_32bit_tls(task, FS_TLS);
824 else if (doit)
825 rdmsrl(MSR_FS_BASE, base);
826 else
827 base = task->thread.fs;
828 ret = put_user(base, (unsigned long __user *)addr);
829 break;
830 }
831 case ARCH_GET_GS: {
832 unsigned long base;
833 unsigned gsindex;
834 if (task->thread.gsindex == GS_TLS_SEL)
835 base = read_32bit_tls(task, GS_TLS);
836 else if (doit) {
837 asm("movl %%gs,%0" : "=r" (gsindex));
838 if (gsindex)
839 rdmsrl(MSR_KERNEL_GS_BASE, base);
840 else
841 base = task->thread.gs;
842 }
843 else
844 base = task->thread.gs;
845 ret = put_user(base, (unsigned long __user *)addr);
846 break;
847 }
848
849 default:
850 ret = -EINVAL;
851 break;
852 }
853
854 return ret;
855 }
856
857 long sys_arch_prctl(int code, unsigned long addr)
858 {
859 return do_arch_prctl(current, code, addr);
860 }
861
862 unsigned long arch_align_stack(unsigned long sp)
863 {
864 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
865 sp -= get_random_int() % 8192;
866 return sp & ~0xf;
867 }
868
869 unsigned long arch_randomize_brk(struct mm_struct *mm)
870 {
871 unsigned long range_end = mm->brk + 0x02000000;
872 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
873 }
This page took 0.047408 seconds and 5 git commands to generate.