1 /* sched.c - SPU scheduler.
3 * Copyright (C) IBM 2005
4 * Author: Mark Nutter <mnutter@us.ibm.com>
6 * 2006-03-31 NUMA domains added.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2, or (at your option)
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include <linux/module.h>
26 #include <linux/errno.h>
27 #include <linux/sched.h>
28 #include <linux/kernel.h>
30 #include <linux/completion.h>
31 #include <linux/vmalloc.h>
32 #include <linux/smp.h>
33 #include <linux/stddef.h>
34 #include <linux/unistd.h>
35 #include <linux/numa.h>
36 #include <linux/mutex.h>
37 #include <linux/notifier.h>
40 #include <asm/mmu_context.h>
42 #include <asm/spu_csa.h>
43 #include <asm/spu_priv1.h>
46 #define SPU_TIMESLICE (HZ)
48 struct spu_prio_array
{
49 DECLARE_BITMAP(bitmap
, MAX_PRIO
);
50 struct list_head runq
[MAX_PRIO
];
52 struct list_head active_list
[MAX_NUMNODES
];
53 struct mutex active_mutex
[MAX_NUMNODES
];
56 static struct spu_prio_array
*spu_prio
;
57 static struct workqueue_struct
*spu_sched_wq
;
59 static inline int node_allowed(int node
)
63 if (!nr_cpus_node(node
))
65 mask
= node_to_cpumask(node
);
66 if (!cpus_intersects(mask
, current
->cpus_allowed
))
71 void spu_start_tick(struct spu_context
*ctx
)
73 if (ctx
->policy
== SCHED_RR
) {
75 * Make sure the exiting bit is cleared.
77 clear_bit(SPU_SCHED_EXITING
, &ctx
->sched_flags
);
79 queue_delayed_work(spu_sched_wq
, &ctx
->sched_work
, SPU_TIMESLICE
);
83 void spu_stop_tick(struct spu_context
*ctx
)
85 if (ctx
->policy
== SCHED_RR
) {
87 * While the work can be rearming normally setting this flag
88 * makes sure it does not rearm itself anymore.
90 set_bit(SPU_SCHED_EXITING
, &ctx
->sched_flags
);
92 cancel_delayed_work(&ctx
->sched_work
);
97 * spu_add_to_active_list - add spu to active list
98 * @spu: spu to add to the active list
100 static void spu_add_to_active_list(struct spu
*spu
)
102 mutex_lock(&spu_prio
->active_mutex
[spu
->node
]);
103 list_add_tail(&spu
->list
, &spu_prio
->active_list
[spu
->node
]);
104 mutex_unlock(&spu_prio
->active_mutex
[spu
->node
]);
108 * spu_remove_from_active_list - remove spu from active list
109 * @spu: spu to remove from the active list
111 static void spu_remove_from_active_list(struct spu
*spu
)
113 int node
= spu
->node
;
115 mutex_lock(&spu_prio
->active_mutex
[node
]);
116 list_del_init(&spu
->list
);
117 mutex_unlock(&spu_prio
->active_mutex
[node
]);
120 static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier
);
122 static void spu_switch_notify(struct spu
*spu
, struct spu_context
*ctx
)
124 blocking_notifier_call_chain(&spu_switch_notifier
,
125 ctx
? ctx
->object_id
: 0, spu
);
128 int spu_switch_event_register(struct notifier_block
* n
)
130 return blocking_notifier_chain_register(&spu_switch_notifier
, n
);
133 int spu_switch_event_unregister(struct notifier_block
* n
)
135 return blocking_notifier_chain_unregister(&spu_switch_notifier
, n
);
139 * spu_bind_context - bind spu context to physical spu
140 * @spu: physical spu to bind to
141 * @ctx: context to bind
143 static void spu_bind_context(struct spu
*spu
, struct spu_context
*ctx
)
145 pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__
, current
->pid
,
146 spu
->number
, spu
->node
);
150 ctx
->ops
= &spu_hw_ops
;
151 spu
->pid
= current
->pid
;
152 spu_associate_mm(spu
, ctx
->owner
);
153 spu
->ibox_callback
= spufs_ibox_callback
;
154 spu
->wbox_callback
= spufs_wbox_callback
;
155 spu
->stop_callback
= spufs_stop_callback
;
156 spu
->mfc_callback
= spufs_mfc_callback
;
157 spu
->dma_callback
= spufs_dma_callback
;
159 spu_unmap_mappings(ctx
);
160 spu_restore(&ctx
->csa
, spu
);
161 spu
->timestamp
= jiffies
;
162 spu_cpu_affinity_set(spu
, raw_smp_processor_id());
163 spu_switch_notify(spu
, ctx
);
164 spu_add_to_active_list(spu
);
165 ctx
->state
= SPU_STATE_RUNNABLE
;
169 * spu_unbind_context - unbind spu context from physical spu
170 * @spu: physical spu to unbind from
171 * @ctx: context to unbind
173 static void spu_unbind_context(struct spu
*spu
, struct spu_context
*ctx
)
175 pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__
,
176 spu
->pid
, spu
->number
, spu
->node
);
178 spu_remove_from_active_list(spu
);
179 spu_switch_notify(spu
, NULL
);
180 spu_unmap_mappings(ctx
);
181 spu_save(&ctx
->csa
, spu
);
182 spu
->timestamp
= jiffies
;
183 ctx
->state
= SPU_STATE_SAVED
;
184 spu
->ibox_callback
= NULL
;
185 spu
->wbox_callback
= NULL
;
186 spu
->stop_callback
= NULL
;
187 spu
->mfc_callback
= NULL
;
188 spu
->dma_callback
= NULL
;
189 spu_associate_mm(spu
, NULL
);
191 ctx
->ops
= &spu_backing_ops
;
198 * spu_add_to_rq - add a context to the runqueue
199 * @ctx: context to add
201 static void __spu_add_to_rq(struct spu_context
*ctx
)
203 int prio
= ctx
->prio
;
205 list_add_tail(&ctx
->rq
, &spu_prio
->runq
[prio
]);
206 set_bit(prio
, spu_prio
->bitmap
);
209 static void __spu_del_from_rq(struct spu_context
*ctx
)
211 int prio
= ctx
->prio
;
213 if (!list_empty(&ctx
->rq
))
214 list_del_init(&ctx
->rq
);
215 if (list_empty(&spu_prio
->runq
[prio
]))
216 clear_bit(prio
, spu_prio
->bitmap
);
219 static void spu_prio_wait(struct spu_context
*ctx
)
223 spin_lock(&spu_prio
->runq_lock
);
224 prepare_to_wait_exclusive(&ctx
->stop_wq
, &wait
, TASK_INTERRUPTIBLE
);
225 if (!signal_pending(current
)) {
226 __spu_add_to_rq(ctx
);
227 spin_unlock(&spu_prio
->runq_lock
);
228 mutex_unlock(&ctx
->state_mutex
);
230 mutex_lock(&ctx
->state_mutex
);
231 spin_lock(&spu_prio
->runq_lock
);
232 __spu_del_from_rq(ctx
);
234 spin_unlock(&spu_prio
->runq_lock
);
235 __set_current_state(TASK_RUNNING
);
236 remove_wait_queue(&ctx
->stop_wq
, &wait
);
239 static struct spu
*spu_get_idle(struct spu_context
*ctx
)
241 struct spu
*spu
= NULL
;
242 int node
= cpu_to_node(raw_smp_processor_id());
245 for (n
= 0; n
< MAX_NUMNODES
; n
++, node
++) {
246 node
= (node
< MAX_NUMNODES
) ? node
: 0;
247 if (!node_allowed(node
))
249 spu
= spu_alloc_node(node
);
257 * find_victim - find a lower priority context to preempt
258 * @ctx: canidate context for running
260 * Returns the freed physical spu to run the new context on.
262 static struct spu
*find_victim(struct spu_context
*ctx
)
264 struct spu_context
*victim
= NULL
;
269 * Look for a possible preemption candidate on the local node first.
270 * If there is no candidate look at the other nodes. This isn't
271 * exactly fair, but so far the whole spu schedule tries to keep
272 * a strong node affinity. We might want to fine-tune this in
276 node
= cpu_to_node(raw_smp_processor_id());
277 for (n
= 0; n
< MAX_NUMNODES
; n
++, node
++) {
278 node
= (node
< MAX_NUMNODES
) ? node
: 0;
279 if (!node_allowed(node
))
282 mutex_lock(&spu_prio
->active_mutex
[node
]);
283 list_for_each_entry(spu
, &spu_prio
->active_list
[node
], list
) {
284 struct spu_context
*tmp
= spu
->ctx
;
286 if (tmp
->rt_priority
< ctx
->rt_priority
&&
287 (!victim
|| tmp
->rt_priority
< victim
->rt_priority
))
290 mutex_unlock(&spu_prio
->active_mutex
[node
]);
294 * This nests ctx->state_mutex, but we always lock
295 * higher priority contexts before lower priority
296 * ones, so this is safe until we introduce
297 * priority inheritance schemes.
299 if (!mutex_trylock(&victim
->state_mutex
)) {
307 * This race can happen because we've dropped
308 * the active list mutex. No a problem, just
309 * restart the search.
311 mutex_unlock(&victim
->state_mutex
);
315 spu_unbind_context(spu
, victim
);
316 mutex_unlock(&victim
->state_mutex
);
318 * We need to break out of the wait loop in spu_run
319 * manually to ensure this context gets put on the
320 * runqueue again ASAP.
322 wake_up(&victim
->stop_wq
);
331 * spu_activate - find a free spu for a context and execute it
332 * @ctx: spu context to schedule
333 * @flags: flags (currently ignored)
335 * Tries to find a free spu to run @ctx. If no free spu is available
336 * add the context to the runqueue so it gets woken up once an spu
339 int spu_activate(struct spu_context
*ctx
, unsigned long flags
)
348 spu
= spu_get_idle(ctx
);
350 * If this is a realtime thread we try to get it running by
351 * preempting a lower priority thread.
353 if (!spu
&& ctx
->rt_priority
)
354 spu
= find_victim(ctx
);
356 spu_bind_context(spu
, ctx
);
361 } while (!signal_pending(current
));
367 * grab_runnable_context - try to find a runnable context
369 * Remove the highest priority context on the runqueue and return it
370 * to the caller. Returns %NULL if no runnable context was found.
372 static struct spu_context
*grab_runnable_context(int prio
)
374 struct spu_context
*ctx
= NULL
;
377 spin_lock(&spu_prio
->runq_lock
);
378 best
= sched_find_first_bit(spu_prio
->bitmap
);
380 struct list_head
*rq
= &spu_prio
->runq
[best
];
382 BUG_ON(list_empty(rq
));
384 ctx
= list_entry(rq
->next
, struct spu_context
, rq
);
385 __spu_del_from_rq(ctx
);
387 spin_unlock(&spu_prio
->runq_lock
);
392 static int __spu_deactivate(struct spu_context
*ctx
, int force
, int max_prio
)
394 struct spu
*spu
= ctx
->spu
;
395 struct spu_context
*new = NULL
;
398 new = grab_runnable_context(max_prio
);
400 spu_unbind_context(spu
, ctx
);
403 wake_up(&new->stop_wq
);
412 * spu_deactivate - unbind a context from it's physical spu
413 * @ctx: spu context to unbind
415 * Unbind @ctx from the physical spu it is running on and schedule
416 * the highest priority context to run on the freed physical spu.
418 void spu_deactivate(struct spu_context
*ctx
)
420 __spu_deactivate(ctx
, 1, MAX_PRIO
);
424 * spu_yield - yield a physical spu if others are waiting
425 * @ctx: spu context to yield
427 * Check if there is a higher priority context waiting and if yes
428 * unbind @ctx from the physical spu and schedule the highest
429 * priority context to run on the freed physical spu instead.
431 void spu_yield(struct spu_context
*ctx
)
433 if (!(ctx
->flags
& SPU_CREATE_NOSCHED
)) {
434 mutex_lock(&ctx
->state_mutex
);
435 __spu_deactivate(ctx
, 0, MAX_PRIO
);
436 mutex_unlock(&ctx
->state_mutex
);
440 void spu_sched_tick(struct work_struct
*work
)
442 struct spu_context
*ctx
=
443 container_of(work
, struct spu_context
, sched_work
.work
);
447 * If this context is being stopped avoid rescheduling from the
448 * scheduler tick because we would block on the state_mutex.
449 * The caller will yield the spu later on anyway.
451 if (test_bit(SPU_SCHED_EXITING
, &ctx
->sched_flags
))
454 mutex_lock(&ctx
->state_mutex
);
455 preempted
= __spu_deactivate(ctx
, 0, ctx
->prio
+ 1);
456 mutex_unlock(&ctx
->state_mutex
);
460 * We need to break out of the wait loop in spu_run manually
461 * to ensure this context gets put on the runqueue again
464 wake_up(&ctx
->stop_wq
);
470 int __init
spu_sched_init(void)
474 spu_sched_wq
= create_singlethread_workqueue("spusched");
478 spu_prio
= kzalloc(sizeof(struct spu_prio_array
), GFP_KERNEL
);
480 printk(KERN_WARNING
"%s: Unable to allocate priority queue.\n",
482 destroy_workqueue(spu_sched_wq
);
485 for (i
= 0; i
< MAX_PRIO
; i
++) {
486 INIT_LIST_HEAD(&spu_prio
->runq
[i
]);
487 __clear_bit(i
, spu_prio
->bitmap
);
489 __set_bit(MAX_PRIO
, spu_prio
->bitmap
);
490 for (i
= 0; i
< MAX_NUMNODES
; i
++) {
491 mutex_init(&spu_prio
->active_mutex
[i
]);
492 INIT_LIST_HEAD(&spu_prio
->active_list
[i
]);
494 spin_lock_init(&spu_prio
->runq_lock
);
498 void __exit
spu_sched_exit(void)
500 struct spu
*spu
, *tmp
;
503 for (node
= 0; node
< MAX_NUMNODES
; node
++) {
504 mutex_lock(&spu_prio
->active_mutex
[node
]);
505 list_for_each_entry_safe(spu
, tmp
, &spu_prio
->active_list
[node
],
507 list_del_init(&spu
->list
);
510 mutex_unlock(&spu_prio
->active_mutex
[node
]);
513 destroy_workqueue(spu_sched_wq
);
This page took 0.041031 seconds and 6 git commands to generate.