x86: unify desc_struct
[deliverable/linux.git] / arch / x86 / kernel / process_64.c
1 /*
2 * Copyright (C) 1995 Linus Torvalds
3 *
4 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000
6 *
7 * X86-64 port
8 * Andi Kleen.
9 *
10 * CPU hotplug support - ashok.raj@intel.com
11 */
12
13 /*
14 * This file handles the architecture-dependent parts of process handling..
15 */
16
17 #include <stdarg.h>
18
19 #include <linux/cpu.h>
20 #include <linux/errno.h>
21 #include <linux/sched.h>
22 #include <linux/fs.h>
23 #include <linux/kernel.h>
24 #include <linux/mm.h>
25 #include <linux/elfcore.h>
26 #include <linux/smp.h>
27 #include <linux/slab.h>
28 #include <linux/user.h>
29 #include <linux/a.out.h>
30 #include <linux/interrupt.h>
31 #include <linux/utsname.h>
32 #include <linux/delay.h>
33 #include <linux/module.h>
34 #include <linux/ptrace.h>
35 #include <linux/random.h>
36 #include <linux/notifier.h>
37 #include <linux/kprobes.h>
38 #include <linux/kdebug.h>
39 #include <linux/tick.h>
40
41 #include <asm/uaccess.h>
42 #include <asm/pgtable.h>
43 #include <asm/system.h>
44 #include <asm/io.h>
45 #include <asm/processor.h>
46 #include <asm/i387.h>
47 #include <asm/mmu_context.h>
48 #include <asm/pda.h>
49 #include <asm/prctl.h>
50 #include <asm/desc.h>
51 #include <asm/proto.h>
52 #include <asm/ia32.h>
53 #include <asm/idle.h>
54
55 asmlinkage extern void ret_from_fork(void);
56
57 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59 unsigned long boot_option_idle_override = 0;
60 EXPORT_SYMBOL(boot_option_idle_override);
61
62 /*
63 * Powermanagement idle function, if any..
64 */
65 void (*pm_idle)(void);
66 EXPORT_SYMBOL(pm_idle);
67 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
69 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
70
71 void idle_notifier_register(struct notifier_block *n)
72 {
73 atomic_notifier_chain_register(&idle_notifier, n);
74 }
75
76 void enter_idle(void)
77 {
78 write_pda(isidle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
80 }
81
82 static void __exit_idle(void)
83 {
84 if (test_and_clear_bit_pda(0, isidle) == 0)
85 return;
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
87 }
88
89 /* Called from interrupts to signify idle end */
90 void exit_idle(void)
91 {
92 /* idle loop has pid 0 */
93 if (current->pid)
94 return;
95 __exit_idle();
96 }
97
98 /*
99 * We use this if we don't have any better
100 * idle routine..
101 */
102 void default_idle(void)
103 {
104 current_thread_info()->status &= ~TS_POLLING;
105 /*
106 * TS_POLLING-cleared state must be visible before we
107 * test NEED_RESCHED:
108 */
109 smp_mb();
110 local_irq_disable();
111 if (!need_resched()) {
112 ktime_t t0, t1;
113 u64 t0n, t1n;
114
115 t0 = ktime_get();
116 t0n = ktime_to_ns(t0);
117 safe_halt(); /* enables interrupts racelessly */
118 local_irq_disable();
119 t1 = ktime_get();
120 t1n = ktime_to_ns(t1);
121 sched_clock_idle_wakeup_event(t1n - t0n);
122 }
123 local_irq_enable();
124 current_thread_info()->status |= TS_POLLING;
125 }
126
127 /*
128 * On SMP it's slightly faster (but much more power-consuming!)
129 * to poll the ->need_resched flag instead of waiting for the
130 * cross-CPU IPI to arrive. Use this option with caution.
131 */
132 static void poll_idle(void)
133 {
134 local_irq_enable();
135 cpu_relax();
136 }
137
138 #ifdef CONFIG_HOTPLUG_CPU
139 DECLARE_PER_CPU(int, cpu_state);
140
141 #include <asm/nmi.h>
142 /* We halt the CPU with physical CPU hotplug */
143 static inline void play_dead(void)
144 {
145 idle_task_exit();
146 wbinvd();
147 mb();
148 /* Ack it */
149 __get_cpu_var(cpu_state) = CPU_DEAD;
150
151 local_irq_disable();
152 while (1)
153 halt();
154 }
155 #else
156 static inline void play_dead(void)
157 {
158 BUG();
159 }
160 #endif /* CONFIG_HOTPLUG_CPU */
161
162 /*
163 * The idle thread. There's no useful work to be
164 * done, so just try to conserve power and have a
165 * low exit latency (ie sit in a loop waiting for
166 * somebody to say that they'd like to reschedule)
167 */
168 void cpu_idle(void)
169 {
170 current_thread_info()->status |= TS_POLLING;
171 /* endless idle loop with no priority at all */
172 while (1) {
173 while (!need_resched()) {
174 void (*idle)(void);
175
176 if (__get_cpu_var(cpu_idle_state))
177 __get_cpu_var(cpu_idle_state) = 0;
178
179 tick_nohz_stop_sched_tick();
180
181 rmb();
182 idle = pm_idle;
183 if (!idle)
184 idle = default_idle;
185 if (cpu_is_offline(smp_processor_id()))
186 play_dead();
187 /*
188 * Idle routines should keep interrupts disabled
189 * from here on, until they go to idle.
190 * Otherwise, idle callbacks can misfire.
191 */
192 local_irq_disable();
193 enter_idle();
194 idle();
195 /* In many cases the interrupt that ended idle
196 has already called exit_idle. But some idle
197 loops can be woken up without interrupt. */
198 __exit_idle();
199 }
200
201 tick_nohz_restart_sched_tick();
202 preempt_enable_no_resched();
203 schedule();
204 preempt_disable();
205 }
206 }
207
208 static void do_nothing(void *unused)
209 {
210 }
211
212 void cpu_idle_wait(void)
213 {
214 unsigned int cpu, this_cpu = get_cpu();
215 cpumask_t map, tmp = current->cpus_allowed;
216
217 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
218 put_cpu();
219
220 cpus_clear(map);
221 for_each_online_cpu(cpu) {
222 per_cpu(cpu_idle_state, cpu) = 1;
223 cpu_set(cpu, map);
224 }
225
226 __get_cpu_var(cpu_idle_state) = 0;
227
228 wmb();
229 do {
230 ssleep(1);
231 for_each_online_cpu(cpu) {
232 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
233 cpu_clear(cpu, map);
234 }
235 cpus_and(map, map, cpu_online_map);
236 /*
237 * We waited 1 sec, if a CPU still did not call idle
238 * it may be because it is in idle and not waking up
239 * because it has nothing to do.
240 * Give all the remaining CPUS a kick.
241 */
242 smp_call_function_mask(map, do_nothing, 0, 0);
243 } while (!cpus_empty(map));
244
245 set_cpus_allowed(current, tmp);
246 }
247 EXPORT_SYMBOL_GPL(cpu_idle_wait);
248
249 /*
250 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
251 * which can obviate IPI to trigger checking of need_resched.
252 * We execute MONITOR against need_resched and enter optimized wait state
253 * through MWAIT. Whenever someone changes need_resched, we would be woken
254 * up from MWAIT (without an IPI).
255 *
256 * New with Core Duo processors, MWAIT can take some hints based on CPU
257 * capability.
258 */
259 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
260 {
261 if (!need_resched()) {
262 __monitor((void *)&current_thread_info()->flags, 0, 0);
263 smp_mb();
264 if (!need_resched())
265 __mwait(ax, cx);
266 }
267 }
268
269 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
270 static void mwait_idle(void)
271 {
272 if (!need_resched()) {
273 __monitor((void *)&current_thread_info()->flags, 0, 0);
274 smp_mb();
275 if (!need_resched())
276 __sti_mwait(0, 0);
277 else
278 local_irq_enable();
279 } else {
280 local_irq_enable();
281 }
282 }
283
284 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
285 {
286 static int printed;
287 if (cpu_has(c, X86_FEATURE_MWAIT)) {
288 /*
289 * Skip, if setup has overridden idle.
290 * One CPU supports mwait => All CPUs supports mwait
291 */
292 if (!pm_idle) {
293 if (!printed) {
294 printk(KERN_INFO "using mwait in idle threads.\n");
295 printed = 1;
296 }
297 pm_idle = mwait_idle;
298 }
299 }
300 }
301
302 static int __init idle_setup(char *str)
303 {
304 if (!strcmp(str, "poll")) {
305 printk("using polling idle threads.\n");
306 pm_idle = poll_idle;
307 } else if (!strcmp(str, "mwait"))
308 force_mwait = 1;
309 else
310 return -1;
311
312 boot_option_idle_override = 1;
313 return 0;
314 }
315 early_param("idle", idle_setup);
316
317 /* Prints also some state that isn't saved in the pt_regs */
318 void __show_regs(struct pt_regs * regs)
319 {
320 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
321 unsigned long d0, d1, d2, d3, d6, d7;
322 unsigned int fsindex, gsindex;
323 unsigned int ds, cs, es;
324
325 printk("\n");
326 print_modules();
327 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
328 current->pid, current->comm, print_tainted(),
329 init_utsname()->release,
330 (int)strcspn(init_utsname()->version, " "),
331 init_utsname()->version);
332 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
333 printk_address(regs->ip);
334 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
335 regs->flags);
336 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
337 regs->ax, regs->bx, regs->cx);
338 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
339 regs->dx, regs->si, regs->di);
340 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
341 regs->bp, regs->r8, regs->r9);
342 printk("R10: %016lx R11: %016lx R12: %016lx\n",
343 regs->r10, regs->r11, regs->r12);
344 printk("R13: %016lx R14: %016lx R15: %016lx\n",
345 regs->r13, regs->r14, regs->r15);
346
347 asm("movl %%ds,%0" : "=r" (ds));
348 asm("movl %%cs,%0" : "=r" (cs));
349 asm("movl %%es,%0" : "=r" (es));
350 asm("movl %%fs,%0" : "=r" (fsindex));
351 asm("movl %%gs,%0" : "=r" (gsindex));
352
353 rdmsrl(MSR_FS_BASE, fs);
354 rdmsrl(MSR_GS_BASE, gs);
355 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
356
357 cr0 = read_cr0();
358 cr2 = read_cr2();
359 cr3 = read_cr3();
360 cr4 = read_cr4();
361
362 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
363 fs,fsindex,gs,gsindex,shadowgs);
364 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
365 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
366
367 get_debugreg(d0, 0);
368 get_debugreg(d1, 1);
369 get_debugreg(d2, 2);
370 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
371 get_debugreg(d3, 3);
372 get_debugreg(d6, 6);
373 get_debugreg(d7, 7);
374 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
375 }
376
377 void show_regs(struct pt_regs *regs)
378 {
379 printk("CPU %d:", smp_processor_id());
380 __show_regs(regs);
381 show_trace(NULL, regs, (void *)(regs + 1));
382 }
383
384 /*
385 * Free current thread data structures etc..
386 */
387 void exit_thread(void)
388 {
389 struct task_struct *me = current;
390 struct thread_struct *t = &me->thread;
391
392 if (me->thread.io_bitmap_ptr) {
393 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
394
395 kfree(t->io_bitmap_ptr);
396 t->io_bitmap_ptr = NULL;
397 clear_thread_flag(TIF_IO_BITMAP);
398 /*
399 * Careful, clear this in the TSS too:
400 */
401 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
402 t->io_bitmap_max = 0;
403 put_cpu();
404 }
405 }
406
407 void flush_thread(void)
408 {
409 struct task_struct *tsk = current;
410
411 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
412 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
413 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
414 clear_tsk_thread_flag(tsk, TIF_IA32);
415 } else {
416 set_tsk_thread_flag(tsk, TIF_IA32);
417 current_thread_info()->status |= TS_COMPAT;
418 }
419 }
420 clear_tsk_thread_flag(tsk, TIF_DEBUG);
421
422 tsk->thread.debugreg0 = 0;
423 tsk->thread.debugreg1 = 0;
424 tsk->thread.debugreg2 = 0;
425 tsk->thread.debugreg3 = 0;
426 tsk->thread.debugreg6 = 0;
427 tsk->thread.debugreg7 = 0;
428 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
429 /*
430 * Forget coprocessor state..
431 */
432 clear_fpu(tsk);
433 clear_used_math();
434 }
435
436 void release_thread(struct task_struct *dead_task)
437 {
438 if (dead_task->mm) {
439 if (dead_task->mm->context.size) {
440 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
441 dead_task->comm,
442 dead_task->mm->context.ldt,
443 dead_task->mm->context.size);
444 BUG();
445 }
446 }
447 }
448
449 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
450 {
451 struct user_desc ud = {
452 .base_addr = addr,
453 .limit = 0xfffff,
454 .seg_32bit = 1,
455 .limit_in_pages = 1,
456 .useable = 1,
457 };
458 struct desc_struct *desc = (void *)t->thread.tls_array;
459 desc += tls;
460 desc->a = LDT_entry_a(&ud);
461 desc->b = LDT_entry_b(&ud);
462 }
463
464 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
465 {
466 return get_desc_base(&t->thread.tls_array[tls]);
467 }
468
469 /*
470 * This gets called before we allocate a new thread and copy
471 * the current task into it.
472 */
473 void prepare_to_copy(struct task_struct *tsk)
474 {
475 unlazy_fpu(tsk);
476 }
477
478 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
479 unsigned long unused,
480 struct task_struct * p, struct pt_regs * regs)
481 {
482 int err;
483 struct pt_regs * childregs;
484 struct task_struct *me = current;
485
486 childregs = ((struct pt_regs *)
487 (THREAD_SIZE + task_stack_page(p))) - 1;
488 *childregs = *regs;
489
490 childregs->ax = 0;
491 childregs->sp = sp;
492 if (sp == ~0UL)
493 childregs->sp = (unsigned long)childregs;
494
495 p->thread.sp = (unsigned long) childregs;
496 p->thread.sp0 = (unsigned long) (childregs+1);
497 p->thread.usersp = me->thread.usersp;
498
499 set_tsk_thread_flag(p, TIF_FORK);
500
501 p->thread.fs = me->thread.fs;
502 p->thread.gs = me->thread.gs;
503
504 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
505 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
506 asm("mov %%es,%0" : "=m" (p->thread.es));
507 asm("mov %%ds,%0" : "=m" (p->thread.ds));
508
509 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
510 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
511 if (!p->thread.io_bitmap_ptr) {
512 p->thread.io_bitmap_max = 0;
513 return -ENOMEM;
514 }
515 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
516 IO_BITMAP_BYTES);
517 set_tsk_thread_flag(p, TIF_IO_BITMAP);
518 }
519
520 /*
521 * Set a new TLS for the child thread?
522 */
523 if (clone_flags & CLONE_SETTLS) {
524 #ifdef CONFIG_IA32_EMULATION
525 if (test_thread_flag(TIF_IA32))
526 err = do_set_thread_area(p, -1,
527 (struct user_desc __user *)childregs->si, 0);
528 else
529 #endif
530 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
531 if (err)
532 goto out;
533 }
534 err = 0;
535 out:
536 if (err && p->thread.io_bitmap_ptr) {
537 kfree(p->thread.io_bitmap_ptr);
538 p->thread.io_bitmap_max = 0;
539 }
540 return err;
541 }
542
543 /*
544 * This special macro can be used to load a debugging register
545 */
546 #define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
547
548 /*
549 * Capture the user space registers if the task is not running (in user space)
550 */
551 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
552 {
553 struct pt_regs *pp, ptregs;
554
555 pp = task_pt_regs(tsk);
556
557 ptregs = *pp;
558 ptregs.cs &= 0xffff;
559 ptregs.ss &= 0xffff;
560
561 elf_core_copy_regs(regs, &ptregs);
562
563 return 1;
564 }
565
566 static inline void __switch_to_xtra(struct task_struct *prev_p,
567 struct task_struct *next_p,
568 struct tss_struct *tss)
569 {
570 struct thread_struct *prev, *next;
571 unsigned long debugctl;
572
573 prev = &prev_p->thread,
574 next = &next_p->thread;
575
576 debugctl = prev->debugctlmsr;
577 if (next->ds_area_msr != prev->ds_area_msr) {
578 /* we clear debugctl to make sure DS
579 * is not in use when we change it */
580 debugctl = 0;
581 wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
582 wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
583 }
584
585 if (next->debugctlmsr != debugctl)
586 wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
587
588 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
589 loaddebug(next, 0);
590 loaddebug(next, 1);
591 loaddebug(next, 2);
592 loaddebug(next, 3);
593 /* no 4 and 5 */
594 loaddebug(next, 6);
595 loaddebug(next, 7);
596 }
597
598 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
599 /*
600 * Copy the relevant range of the IO bitmap.
601 * Normally this is 128 bytes or less:
602 */
603 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
604 max(prev->io_bitmap_max, next->io_bitmap_max));
605 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
606 /*
607 * Clear any possible leftover bits:
608 */
609 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
610 }
611
612 /*
613 * Last branch recording recofiguration of trace hardware and
614 * disentangling of trace data per task.
615 */
616 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
617 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
618
619 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
620 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
621 }
622
623 /*
624 * switch_to(x,y) should switch tasks from x to y.
625 *
626 * This could still be optimized:
627 * - fold all the options into a flag word and test it with a single test.
628 * - could test fs/gs bitsliced
629 *
630 * Kprobes not supported here. Set the probe on schedule instead.
631 */
632 struct task_struct *
633 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
634 {
635 struct thread_struct *prev = &prev_p->thread,
636 *next = &next_p->thread;
637 int cpu = smp_processor_id();
638 struct tss_struct *tss = &per_cpu(init_tss, cpu);
639
640 /* we're going to use this soon, after a few expensive things */
641 if (next_p->fpu_counter>5)
642 prefetch(&next->i387.fxsave);
643
644 /*
645 * Reload esp0, LDT and the page table pointer:
646 */
647 tss->sp0 = next->sp0;
648
649 /*
650 * Switch DS and ES.
651 * This won't pick up thread selector changes, but I guess that is ok.
652 */
653 asm volatile("mov %%es,%0" : "=m" (prev->es));
654 if (unlikely(next->es | prev->es))
655 loadsegment(es, next->es);
656
657 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
658 if (unlikely(next->ds | prev->ds))
659 loadsegment(ds, next->ds);
660
661 load_TLS(next, cpu);
662
663 /*
664 * Switch FS and GS.
665 */
666 {
667 unsigned fsindex;
668 asm volatile("movl %%fs,%0" : "=r" (fsindex));
669 /* segment register != 0 always requires a reload.
670 also reload when it has changed.
671 when prev process used 64bit base always reload
672 to avoid an information leak. */
673 if (unlikely(fsindex | next->fsindex | prev->fs)) {
674 loadsegment(fs, next->fsindex);
675 /* check if the user used a selector != 0
676 * if yes clear 64bit base, since overloaded base
677 * is always mapped to the Null selector
678 */
679 if (fsindex)
680 prev->fs = 0;
681 }
682 /* when next process has a 64bit base use it */
683 if (next->fs)
684 wrmsrl(MSR_FS_BASE, next->fs);
685 prev->fsindex = fsindex;
686 }
687 {
688 unsigned gsindex;
689 asm volatile("movl %%gs,%0" : "=r" (gsindex));
690 if (unlikely(gsindex | next->gsindex | prev->gs)) {
691 load_gs_index(next->gsindex);
692 if (gsindex)
693 prev->gs = 0;
694 }
695 if (next->gs)
696 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
697 prev->gsindex = gsindex;
698 }
699
700 /* Must be after DS reload */
701 unlazy_fpu(prev_p);
702
703 /*
704 * Switch the PDA and FPU contexts.
705 */
706 prev->usersp = read_pda(oldrsp);
707 write_pda(oldrsp, next->usersp);
708 write_pda(pcurrent, next_p);
709
710 write_pda(kernelstack,
711 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
712 #ifdef CONFIG_CC_STACKPROTECTOR
713 write_pda(stack_canary, next_p->stack_canary);
714 /*
715 * Build time only check to make sure the stack_canary is at
716 * offset 40 in the pda; this is a gcc ABI requirement
717 */
718 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
719 #endif
720
721 /*
722 * Now maybe reload the debug registers and handle I/O bitmaps
723 */
724 if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
725 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
726 __switch_to_xtra(prev_p, next_p, tss);
727
728 /* If the task has used fpu the last 5 timeslices, just do a full
729 * restore of the math state immediately to avoid the trap; the
730 * chances of needing FPU soon are obviously high now
731 */
732 if (next_p->fpu_counter>5)
733 math_state_restore();
734 return prev_p;
735 }
736
737 /*
738 * sys_execve() executes a new program.
739 */
740 asmlinkage
741 long sys_execve(char __user *name, char __user * __user *argv,
742 char __user * __user *envp, struct pt_regs regs)
743 {
744 long error;
745 char * filename;
746
747 filename = getname(name);
748 error = PTR_ERR(filename);
749 if (IS_ERR(filename))
750 return error;
751 error = do_execve(filename, argv, envp, &regs);
752 putname(filename);
753 return error;
754 }
755
756 void set_personality_64bit(void)
757 {
758 /* inherit personality from parent */
759
760 /* Make sure to be in 64bit mode */
761 clear_thread_flag(TIF_IA32);
762
763 /* TBD: overwrites user setup. Should have two bits.
764 But 64bit processes have always behaved this way,
765 so it's not too bad. The main problem is just that
766 32bit childs are affected again. */
767 current->personality &= ~READ_IMPLIES_EXEC;
768 }
769
770 asmlinkage long sys_fork(struct pt_regs *regs)
771 {
772 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
773 }
774
775 asmlinkage long
776 sys_clone(unsigned long clone_flags, unsigned long newsp,
777 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
778 {
779 if (!newsp)
780 newsp = regs->sp;
781 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
782 }
783
784 /*
785 * This is trivial, and on the face of it looks like it
786 * could equally well be done in user mode.
787 *
788 * Not so, for quite unobvious reasons - register pressure.
789 * In user mode vfork() cannot have a stack frame, and if
790 * done by calling the "clone()" system call directly, you
791 * do not have enough call-clobbered registers to hold all
792 * the information you need.
793 */
794 asmlinkage long sys_vfork(struct pt_regs *regs)
795 {
796 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
797 NULL, NULL);
798 }
799
800 unsigned long get_wchan(struct task_struct *p)
801 {
802 unsigned long stack;
803 u64 fp,ip;
804 int count = 0;
805
806 if (!p || p == current || p->state==TASK_RUNNING)
807 return 0;
808 stack = (unsigned long)task_stack_page(p);
809 if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
810 return 0;
811 fp = *(u64 *)(p->thread.sp);
812 do {
813 if (fp < (unsigned long)stack ||
814 fp > (unsigned long)stack+THREAD_SIZE)
815 return 0;
816 ip = *(u64 *)(fp+8);
817 if (!in_sched_functions(ip))
818 return ip;
819 fp = *(u64 *)fp;
820 } while (count++ < 16);
821 return 0;
822 }
823
824 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
825 {
826 int ret = 0;
827 int doit = task == current;
828 int cpu;
829
830 switch (code) {
831 case ARCH_SET_GS:
832 if (addr >= TASK_SIZE_OF(task))
833 return -EPERM;
834 cpu = get_cpu();
835 /* handle small bases via the GDT because that's faster to
836 switch. */
837 if (addr <= 0xffffffff) {
838 set_32bit_tls(task, GS_TLS, addr);
839 if (doit) {
840 load_TLS(&task->thread, cpu);
841 load_gs_index(GS_TLS_SEL);
842 }
843 task->thread.gsindex = GS_TLS_SEL;
844 task->thread.gs = 0;
845 } else {
846 task->thread.gsindex = 0;
847 task->thread.gs = addr;
848 if (doit) {
849 load_gs_index(0);
850 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
851 }
852 }
853 put_cpu();
854 break;
855 case ARCH_SET_FS:
856 /* Not strictly needed for fs, but do it for symmetry
857 with gs */
858 if (addr >= TASK_SIZE_OF(task))
859 return -EPERM;
860 cpu = get_cpu();
861 /* handle small bases via the GDT because that's faster to
862 switch. */
863 if (addr <= 0xffffffff) {
864 set_32bit_tls(task, FS_TLS, addr);
865 if (doit) {
866 load_TLS(&task->thread, cpu);
867 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
868 }
869 task->thread.fsindex = FS_TLS_SEL;
870 task->thread.fs = 0;
871 } else {
872 task->thread.fsindex = 0;
873 task->thread.fs = addr;
874 if (doit) {
875 /* set the selector to 0 to not confuse
876 __switch_to */
877 asm volatile("movl %0,%%fs" :: "r" (0));
878 ret = checking_wrmsrl(MSR_FS_BASE, addr);
879 }
880 }
881 put_cpu();
882 break;
883 case ARCH_GET_FS: {
884 unsigned long base;
885 if (task->thread.fsindex == FS_TLS_SEL)
886 base = read_32bit_tls(task, FS_TLS);
887 else if (doit)
888 rdmsrl(MSR_FS_BASE, base);
889 else
890 base = task->thread.fs;
891 ret = put_user(base, (unsigned long __user *)addr);
892 break;
893 }
894 case ARCH_GET_GS: {
895 unsigned long base;
896 unsigned gsindex;
897 if (task->thread.gsindex == GS_TLS_SEL)
898 base = read_32bit_tls(task, GS_TLS);
899 else if (doit) {
900 asm("movl %%gs,%0" : "=r" (gsindex));
901 if (gsindex)
902 rdmsrl(MSR_KERNEL_GS_BASE, base);
903 else
904 base = task->thread.gs;
905 }
906 else
907 base = task->thread.gs;
908 ret = put_user(base, (unsigned long __user *)addr);
909 break;
910 }
911
912 default:
913 ret = -EINVAL;
914 break;
915 }
916
917 return ret;
918 }
919
920 long sys_arch_prctl(int code, unsigned long addr)
921 {
922 return do_arch_prctl(current, code, addr);
923 }
924
925 unsigned long arch_align_stack(unsigned long sp)
926 {
927 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
928 sp -= get_random_int() % 8192;
929 return sp & ~0xf;
930 }
931
932 unsigned long arch_randomize_brk(struct mm_struct *mm)
933 {
934 unsigned long range_end = mm->brk + 0x02000000;
935 return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
936 }
937
This page took 0.049187 seconds and 6 git commands to generate.