2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
15 #include <linux/sched.h>
16 #include <linux/preempt.h>
17 #include <linux/module.h>
19 #include <linux/kprobes.h>
20 #include <linux/elfcore.h>
21 #include <linux/tick.h>
22 #include <linux/init.h>
24 #include <linux/compat.h>
25 #include <linux/hardirq.h>
26 #include <linux/syscalls.h>
27 #include <linux/kernel.h>
28 #include <linux/tracehook.h>
29 #include <linux/signal.h>
30 #include <asm/stack.h>
31 #include <asm/switch_to.h>
32 #include <asm/homecache.h>
33 #include <asm/syscalls.h>
34 #include <asm/traps.h>
35 #include <asm/setup.h>
36 #ifdef CONFIG_HARDWALL
37 #include <asm/hardwall.h>
39 #include <arch/chip.h>
41 #include <arch/sim_def.h>
45 * Use the (x86) "idle=poll" option to prefer low latency when leaving the
46 * idle loop over low power while in the idle loop, e.g. if we have
47 * one thread per core and we want to get threads out of futex waits fast.
49 static int no_idle_nap
;
50 static int __init
idle_setup(char *str
)
55 if (!strcmp(str
, "poll")) {
56 pr_info("using polling idle threads.\n");
58 } else if (!strcmp(str
, "halt"))
65 early_param("idle", idle_setup
);
68 * The idle thread. There's no useful work to be
69 * done, so just try to conserve power and have a
70 * low exit latency (ie sit in a loop waiting for
71 * somebody to say that they'd like to reschedule)
75 int cpu
= smp_processor_id();
78 current_thread_info()->status
|= TS_POLLING
;
82 while (!need_resched())
88 /* endless idle loop with no priority at all */
90 tick_nohz_idle_enter();
92 while (!need_resched()) {
93 if (cpu_is_offline(cpu
))
94 BUG(); /* no HOTPLUG_CPU */
97 __get_cpu_var(irq_stat
).idle_timestamp
= jiffies
;
98 current_thread_info()->status
&= ~TS_POLLING
;
100 * TS_POLLING-cleared state must be visible before we
109 current_thread_info()->status
|= TS_POLLING
;
112 tick_nohz_idle_exit();
113 schedule_preempt_disabled();
118 * Release a thread_info structure
120 void arch_release_thread_info(struct thread_info
*info
)
122 struct single_step_state
*step_state
= info
->step_state
;
124 #ifdef CONFIG_HARDWALL
126 * We free a thread_info from the context of the task that has
127 * been scheduled next, so the original task is already dead.
128 * Calling deactivate here just frees up the data structures.
129 * If the task we're freeing held the last reference to a
130 * hardwall fd, it would have been released prior to this point
131 * anyway via exit_files(), and the hardwall_task.info pointers
132 * would be NULL by now.
134 hardwall_deactivate_all(info
->task
);
140 * FIXME: we don't munmap step_state->buffer
141 * because the mm_struct for this process (info->task->mm)
142 * has already been zeroed in exit_mm(). Keeping a
143 * reference to it here seems like a bad move, so this
144 * means we can't munmap() the buffer, and therefore if we
145 * ptrace multiple threads in a process, we will slowly
146 * leak user memory. (Note that as soon as the last
147 * thread in a process dies, we will reclaim all user
148 * memory including single-step buffers in the usual way.)
149 * We should either assign a kernel VA to this buffer
150 * somehow, or we should associate the buffer(s) with the
151 * mm itself so we can clean them up that way.
157 static void save_arch_state(struct thread_struct
*t
);
159 int copy_thread(unsigned long clone_flags
, unsigned long sp
,
160 unsigned long arg
, struct task_struct
*p
)
162 struct pt_regs
*childregs
= task_pt_regs(p
);
164 unsigned long *callee_regs
;
167 * Set up the stack and stack pointer appropriately for the
168 * new child to find itself woken up in __switch_to().
169 * The callee-saved registers must be on the stack to be read;
170 * the new task will then jump to assembly support to handle
171 * calling schedule_tail(), etc., and (for userspace tasks)
172 * returning to the context set up in the pt_regs.
174 ksp
= (unsigned long) childregs
;
175 ksp
-= C_ABI_SAVE_AREA_SIZE
; /* interrupt-entry save area */
176 ((long *)ksp
)[0] = ((long *)ksp
)[1] = 0;
177 ksp
-= CALLEE_SAVED_REGS_COUNT
* sizeof(unsigned long);
178 callee_regs
= (unsigned long *)ksp
;
179 ksp
-= C_ABI_SAVE_AREA_SIZE
; /* __switch_to() save area */
180 ((long *)ksp
)[0] = ((long *)ksp
)[1] = 0;
183 /* Record the pid of the task that created this one. */
184 p
->thread
.creator_pid
= current
->pid
;
186 if (unlikely(p
->flags
& PF_KTHREAD
)) {
188 memset(childregs
, 0, sizeof(struct pt_regs
));
189 memset(&callee_regs
[2], 0,
190 (CALLEE_SAVED_REGS_COUNT
- 2) * sizeof(unsigned long));
191 callee_regs
[0] = sp
; /* r30 = function */
192 callee_regs
[1] = arg
; /* r31 = arg */
193 childregs
->ex1
= PL_ICS_EX1(KERNEL_PL
, 0);
194 p
->thread
.pc
= (unsigned long) ret_from_kernel_thread
;
199 * Start new thread in ret_from_fork so it schedules properly
200 * and then return from interrupt like the parent.
202 p
->thread
.pc
= (unsigned long) ret_from_fork
;
205 * Do not clone step state from the parent; each thread
206 * must make its own lazily.
208 task_thread_info(p
)->step_state
= NULL
;
211 * Copy the registers onto the kernel stack so the
212 * return-from-interrupt code will reload it into registers.
214 *childregs
= *current_pt_regs();
215 childregs
->regs
[0] = 0; /* return value is zero */
217 childregs
->sp
= sp
; /* override with new user stack pointer */
218 memcpy(callee_regs
, &childregs
->regs
[CALLEE_SAVED_FIRST_REG
],
219 CALLEE_SAVED_REGS_COUNT
* sizeof(unsigned long));
221 /* Save user stack top pointer so we can ID the stack vm area later. */
222 p
->thread
.usp0
= childregs
->sp
;
225 * If CLONE_SETTLS is set, set "tp" in the new task to "r4",
226 * which is passed in as arg #5 to sys_clone().
228 if (clone_flags
& CLONE_SETTLS
)
229 childregs
->tp
= childregs
->regs
[4];
232 #if CHIP_HAS_TILE_DMA()
234 * No DMA in the new thread. We model this on the fact that
235 * fork() clears the pending signals, alarms, and aio for the child.
237 memset(&p
->thread
.tile_dma_state
, 0, sizeof(struct tile_dma_state
));
238 memset(&p
->thread
.dma_async_tlb
, 0, sizeof(struct async_tlb
));
241 #if CHIP_HAS_SN_PROC()
242 /* Likewise, the new thread is not running static processor code. */
243 p
->thread
.sn_proc_running
= 0;
244 memset(&p
->thread
.sn_async_tlb
, 0, sizeof(struct async_tlb
));
247 #if CHIP_HAS_PROC_STATUS_SPR()
248 /* New thread has its miscellaneous processor state bits clear. */
249 p
->thread
.proc_status
= 0;
252 #ifdef CONFIG_HARDWALL
253 /* New thread does not own any networks. */
254 memset(&p
->thread
.hardwall
[0], 0,
255 sizeof(struct hardwall_task
) * HARDWALL_TYPES
);
260 * Start the new thread with the current architecture state
261 * (user interrupt masks, etc.).
263 save_arch_state(&p
->thread
);
269 * Return "current" if it looks plausible, or else a pointer to a dummy.
270 * This can be helpful if we are just trying to emit a clean panic.
272 struct task_struct
*validate_current(void)
274 static struct task_struct corrupt
= { .comm
= "<corrupt>" };
275 struct task_struct
*tsk
= current
;
276 if (unlikely((unsigned long)tsk
< PAGE_OFFSET
||
277 (high_memory
&& (void *)tsk
> high_memory
) ||
278 ((unsigned long)tsk
& (__alignof__(*tsk
) - 1)) != 0)) {
279 pr_err("Corrupt 'current' %p (sp %#lx)\n", tsk
, stack_pointer
);
285 /* Take and return the pointer to the previous task, for schedule_tail(). */
286 struct task_struct
*sim_notify_fork(struct task_struct
*prev
)
288 struct task_struct
*tsk
= current
;
289 __insn_mtspr(SPR_SIM_CONTROL
, SIM_CONTROL_OS_FORK_PARENT
|
290 (tsk
->thread
.creator_pid
<< _SIM_CONTROL_OPERATOR_BITS
));
291 __insn_mtspr(SPR_SIM_CONTROL
, SIM_CONTROL_OS_FORK
|
292 (tsk
->pid
<< _SIM_CONTROL_OPERATOR_BITS
));
296 int dump_task_regs(struct task_struct
*tsk
, elf_gregset_t
*regs
)
298 struct pt_regs
*ptregs
= task_pt_regs(tsk
);
299 elf_core_copy_regs(regs
, ptregs
);
303 #if CHIP_HAS_TILE_DMA()
305 /* Allow user processes to access the DMA SPRs */
306 void grant_dma_mpls(void)
308 #if CONFIG_KERNEL_PL == 2
309 __insn_mtspr(SPR_MPL_DMA_CPL_SET_1
, 1);
310 __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1
, 1);
312 __insn_mtspr(SPR_MPL_DMA_CPL_SET_0
, 1);
313 __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_0
, 1);
317 /* Forbid user processes from accessing the DMA SPRs */
318 void restrict_dma_mpls(void)
320 #if CONFIG_KERNEL_PL == 2
321 __insn_mtspr(SPR_MPL_DMA_CPL_SET_2
, 1);
322 __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_2
, 1);
324 __insn_mtspr(SPR_MPL_DMA_CPL_SET_1
, 1);
325 __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1
, 1);
329 /* Pause the DMA engine, then save off its state registers. */
330 static void save_tile_dma_state(struct tile_dma_state
*dma
)
332 unsigned long state
= __insn_mfspr(SPR_DMA_USER_STATUS
);
333 unsigned long post_suspend_state
;
335 /* If we're running, suspend the engine. */
336 if ((state
& DMA_STATUS_MASK
) == SPR_DMA_STATUS__RUNNING_MASK
)
337 __insn_mtspr(SPR_DMA_CTR
, SPR_DMA_CTR__SUSPEND_MASK
);
340 * Wait for the engine to idle, then save regs. Note that we
341 * want to record the "running" bit from before suspension,
342 * and the "done" bit from after, so that we can properly
343 * distinguish a case where the user suspended the engine from
344 * the case where the kernel suspended as part of the context
348 post_suspend_state
= __insn_mfspr(SPR_DMA_USER_STATUS
);
349 } while (post_suspend_state
& SPR_DMA_STATUS__BUSY_MASK
);
351 dma
->src
= __insn_mfspr(SPR_DMA_SRC_ADDR
);
352 dma
->src_chunk
= __insn_mfspr(SPR_DMA_SRC_CHUNK_ADDR
);
353 dma
->dest
= __insn_mfspr(SPR_DMA_DST_ADDR
);
354 dma
->dest_chunk
= __insn_mfspr(SPR_DMA_DST_CHUNK_ADDR
);
355 dma
->strides
= __insn_mfspr(SPR_DMA_STRIDE
);
356 dma
->chunk_size
= __insn_mfspr(SPR_DMA_CHUNK_SIZE
);
357 dma
->byte
= __insn_mfspr(SPR_DMA_BYTE
);
358 dma
->status
= (state
& SPR_DMA_STATUS__RUNNING_MASK
) |
359 (post_suspend_state
& SPR_DMA_STATUS__DONE_MASK
);
362 /* Restart a DMA that was running before we were context-switched out. */
363 static void restore_tile_dma_state(struct thread_struct
*t
)
365 const struct tile_dma_state
*dma
= &t
->tile_dma_state
;
368 * The only way to restore the done bit is to run a zero
369 * length transaction.
371 if ((dma
->status
& SPR_DMA_STATUS__DONE_MASK
) &&
372 !(__insn_mfspr(SPR_DMA_USER_STATUS
) & SPR_DMA_STATUS__DONE_MASK
)) {
373 __insn_mtspr(SPR_DMA_BYTE
, 0);
374 __insn_mtspr(SPR_DMA_CTR
, SPR_DMA_CTR__REQUEST_MASK
);
375 while (__insn_mfspr(SPR_DMA_USER_STATUS
) &
376 SPR_DMA_STATUS__BUSY_MASK
)
380 __insn_mtspr(SPR_DMA_SRC_ADDR
, dma
->src
);
381 __insn_mtspr(SPR_DMA_SRC_CHUNK_ADDR
, dma
->src_chunk
);
382 __insn_mtspr(SPR_DMA_DST_ADDR
, dma
->dest
);
383 __insn_mtspr(SPR_DMA_DST_CHUNK_ADDR
, dma
->dest_chunk
);
384 __insn_mtspr(SPR_DMA_STRIDE
, dma
->strides
);
385 __insn_mtspr(SPR_DMA_CHUNK_SIZE
, dma
->chunk_size
);
386 __insn_mtspr(SPR_DMA_BYTE
, dma
->byte
);
389 * Restart the engine if we were running and not done.
390 * Clear a pending async DMA fault that we were waiting on return
391 * to user space to execute, since we expect the DMA engine
392 * to regenerate those faults for us now. Note that we don't
393 * try to clear the TIF_ASYNC_TLB flag, since it's relatively
394 * harmless if set, and it covers both DMA and the SN processor.
396 if ((dma
->status
& DMA_STATUS_MASK
) == SPR_DMA_STATUS__RUNNING_MASK
) {
397 t
->dma_async_tlb
.fault_num
= 0;
398 __insn_mtspr(SPR_DMA_CTR
, SPR_DMA_CTR__REQUEST_MASK
);
404 static void save_arch_state(struct thread_struct
*t
)
406 #if CHIP_HAS_SPLIT_INTR_MASK()
407 t
->interrupt_mask
= __insn_mfspr(SPR_INTERRUPT_MASK_0_0
) |
408 ((u64
)__insn_mfspr(SPR_INTERRUPT_MASK_0_1
) << 32);
410 t
->interrupt_mask
= __insn_mfspr(SPR_INTERRUPT_MASK_0
);
412 t
->ex_context
[0] = __insn_mfspr(SPR_EX_CONTEXT_0_0
);
413 t
->ex_context
[1] = __insn_mfspr(SPR_EX_CONTEXT_0_1
);
414 t
->system_save
[0] = __insn_mfspr(SPR_SYSTEM_SAVE_0_0
);
415 t
->system_save
[1] = __insn_mfspr(SPR_SYSTEM_SAVE_0_1
);
416 t
->system_save
[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2
);
417 t
->system_save
[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3
);
418 t
->intctrl_0
= __insn_mfspr(SPR_INTCTRL_0_STATUS
);
419 #if CHIP_HAS_PROC_STATUS_SPR()
420 t
->proc_status
= __insn_mfspr(SPR_PROC_STATUS
);
422 #if !CHIP_HAS_FIXED_INTVEC_BASE()
423 t
->interrupt_vector_base
= __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0
);
425 #if CHIP_HAS_TILE_RTF_HWM()
426 t
->tile_rtf_hwm
= __insn_mfspr(SPR_TILE_RTF_HWM
);
428 #if CHIP_HAS_DSTREAM_PF()
429 t
->dstream_pf
= __insn_mfspr(SPR_DSTREAM_PF
);
433 static void restore_arch_state(const struct thread_struct
*t
)
435 #if CHIP_HAS_SPLIT_INTR_MASK()
436 __insn_mtspr(SPR_INTERRUPT_MASK_0_0
, (u32
) t
->interrupt_mask
);
437 __insn_mtspr(SPR_INTERRUPT_MASK_0_1
, t
->interrupt_mask
>> 32);
439 __insn_mtspr(SPR_INTERRUPT_MASK_0
, t
->interrupt_mask
);
441 __insn_mtspr(SPR_EX_CONTEXT_0_0
, t
->ex_context
[0]);
442 __insn_mtspr(SPR_EX_CONTEXT_0_1
, t
->ex_context
[1]);
443 __insn_mtspr(SPR_SYSTEM_SAVE_0_0
, t
->system_save
[0]);
444 __insn_mtspr(SPR_SYSTEM_SAVE_0_1
, t
->system_save
[1]);
445 __insn_mtspr(SPR_SYSTEM_SAVE_0_2
, t
->system_save
[2]);
446 __insn_mtspr(SPR_SYSTEM_SAVE_0_3
, t
->system_save
[3]);
447 __insn_mtspr(SPR_INTCTRL_0_STATUS
, t
->intctrl_0
);
448 #if CHIP_HAS_PROC_STATUS_SPR()
449 __insn_mtspr(SPR_PROC_STATUS
, t
->proc_status
);
451 #if !CHIP_HAS_FIXED_INTVEC_BASE()
452 __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0
, t
->interrupt_vector_base
);
454 #if CHIP_HAS_TILE_RTF_HWM()
455 __insn_mtspr(SPR_TILE_RTF_HWM
, t
->tile_rtf_hwm
);
457 #if CHIP_HAS_DSTREAM_PF()
458 __insn_mtspr(SPR_DSTREAM_PF
, t
->dstream_pf
);
463 void _prepare_arch_switch(struct task_struct
*next
)
465 #if CHIP_HAS_SN_PROC()
468 #if CHIP_HAS_TILE_DMA()
469 struct tile_dma_state
*dma
= ¤t
->thread
.tile_dma_state
;
471 save_tile_dma_state(dma
);
473 #if CHIP_HAS_SN_PROC()
475 * Suspend the static network processor if it was running.
476 * We do not suspend the fabric itself, just like we don't
477 * try to suspend the UDN.
479 snctl
= __insn_mfspr(SPR_SNCTL
);
480 current
->thread
.sn_proc_running
=
481 (snctl
& SPR_SNCTL__FRZPROC_MASK
) == 0;
482 if (current
->thread
.sn_proc_running
)
483 __insn_mtspr(SPR_SNCTL
, snctl
| SPR_SNCTL__FRZPROC_MASK
);
488 struct task_struct
*__sched
_switch_to(struct task_struct
*prev
,
489 struct task_struct
*next
)
491 /* DMA state is already saved; save off other arch state. */
492 save_arch_state(&prev
->thread
);
494 #if CHIP_HAS_TILE_DMA()
496 * Restore DMA in new task if desired.
497 * Note that it is only safe to restart here since interrupts
498 * are disabled, so we can't take any DMATLB miss or access
499 * interrupts before we have finished switching stacks.
501 if (next
->thread
.tile_dma_state
.enabled
) {
502 restore_tile_dma_state(&next
->thread
);
509 /* Restore other arch state. */
510 restore_arch_state(&next
->thread
);
512 #if CHIP_HAS_SN_PROC()
514 * Restart static network processor in the new process
515 * if it was running before.
517 if (next
->thread
.sn_proc_running
) {
518 int snctl
= __insn_mfspr(SPR_SNCTL
);
519 __insn_mtspr(SPR_SNCTL
, snctl
& ~SPR_SNCTL__FRZPROC_MASK
);
523 #ifdef CONFIG_HARDWALL
524 /* Enable or disable access to the network registers appropriately. */
525 hardwall_switch_tasks(prev
, next
);
529 * Switch kernel SP, PC, and callee-saved registers.
530 * In the context of the new task, return the old task pointer
531 * (i.e. the task that actually called __switch_to).
532 * Pass the value to use for SYSTEM_SAVE_K_0 when we reset our sp.
534 return __switch_to(prev
, next
, next_current_ksp0(next
));
538 * This routine is called on return from interrupt if any of the
539 * TIF_WORK_MASK flags are set in thread_info->flags. It is
540 * entered with interrupts disabled so we don't miss an event
541 * that modified the thread_info flags. If any flag is set, we
542 * handle it and return, and the calling assembly code will
543 * re-disable interrupts, reload the thread flags, and call back
544 * if more flags need to be handled.
546 * We return whether we need to check the thread_info flags again
547 * or not. Note that we don't clear TIF_SINGLESTEP here, so it's
548 * important that it be tested last, and then claim that we don't
549 * need to recheck the flags.
551 int do_work_pending(struct pt_regs
*regs
, u32 thread_info_flags
)
553 /* If we enter in kernel mode, do nothing and exit the caller loop. */
554 if (!user_mode(regs
))
557 /* Enable interrupts; they are disabled again on return to caller. */
560 if (thread_info_flags
& _TIF_NEED_RESCHED
) {
564 #if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
565 if (thread_info_flags
& _TIF_ASYNC_TLB
) {
566 do_async_page_fault(regs
);
570 if (thread_info_flags
& _TIF_SIGPENDING
) {
574 if (thread_info_flags
& _TIF_NOTIFY_RESUME
) {
575 clear_thread_flag(TIF_NOTIFY_RESUME
);
576 tracehook_notify_resume(regs
);
579 if (thread_info_flags
& _TIF_SINGLESTEP
) {
580 single_step_once(regs
);
583 panic("work_pending: bad flags %#x\n", thread_info_flags
);
586 unsigned long get_wchan(struct task_struct
*p
)
588 struct KBacktraceIterator kbt
;
590 if (!p
|| p
== current
|| p
->state
== TASK_RUNNING
)
593 for (KBacktraceIterator_init(&kbt
, p
, NULL
);
594 !KBacktraceIterator_end(&kbt
);
595 KBacktraceIterator_next(&kbt
)) {
596 if (!in_sched_functions(kbt
.it
.pc
))
603 /* Flush thread state. */
604 void flush_thread(void)
610 * Free current thread data structures etc..
612 void exit_thread(void)
617 void show_regs(struct pt_regs
*regs
)
619 struct task_struct
*tsk
= validate_current();
623 pr_err(" Pid: %d, comm: %20s, CPU: %d\n",
624 tsk
->pid
, tsk
->comm
, smp_processor_id());
626 for (i
= 0; i
< 51; i
+= 3)
627 pr_err(" r%-2d: "REGFMT
" r%-2d: "REGFMT
" r%-2d: "REGFMT
"\n",
628 i
, regs
->regs
[i
], i
+1, regs
->regs
[i
+1],
629 i
+2, regs
->regs
[i
+2]);
630 pr_err(" r51: "REGFMT
" r52: "REGFMT
" tp : "REGFMT
"\n",
631 regs
->regs
[51], regs
->regs
[52], regs
->tp
);
632 pr_err(" sp : "REGFMT
" lr : "REGFMT
"\n", regs
->sp
, regs
->lr
);
634 for (i
= 0; i
< 52; i
+= 4)
635 pr_err(" r%-2d: "REGFMT
" r%-2d: "REGFMT
636 " r%-2d: "REGFMT
" r%-2d: "REGFMT
"\n",
637 i
, regs
->regs
[i
], i
+1, regs
->regs
[i
+1],
638 i
+2, regs
->regs
[i
+2], i
+3, regs
->regs
[i
+3]);
639 pr_err(" r52: "REGFMT
" tp : "REGFMT
" sp : "REGFMT
" lr : "REGFMT
"\n",
640 regs
->regs
[52], regs
->tp
, regs
->sp
, regs
->lr
);
642 pr_err(" pc : "REGFMT
" ex1: %ld faultnum: %ld\n",
643 regs
->pc
, regs
->ex1
, regs
->faultnum
);
645 dump_stack_regs(regs
);