Merge branch 'linus' into cpus4096
[deliverable/linux.git] / kernel / cpu.c
1 /* CPU control.
2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 *
4 * This code is licenced under the GPL.
5 */
6 #include <linux/proc_fs.h>
7 #include <linux/smp.h>
8 #include <linux/init.h>
9 #include <linux/notifier.h>
10 #include <linux/sched.h>
11 #include <linux/unistd.h>
12 #include <linux/cpu.h>
13 #include <linux/module.h>
14 #include <linux/kthread.h>
15 #include <linux/stop_machine.h>
16 #include <linux/mutex.h>
17
18 /*
19 * Represents all cpu's present in the system
20 * In systems capable of hotplug, this map could dynamically grow
21 * as new cpu's are detected in the system via any platform specific
22 * method, such as ACPI for e.g.
23 */
24 cpumask_t cpu_present_map __read_mostly;
25 EXPORT_SYMBOL(cpu_present_map);
26
27 #ifndef CONFIG_SMP
28
29 /*
30 * Represents all cpu's that are currently online.
31 */
32 cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
33 EXPORT_SYMBOL(cpu_online_map);
34
35 cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
36 EXPORT_SYMBOL(cpu_possible_map);
37
38 #else /* CONFIG_SMP */
39
40 /* Serializes the updates to cpu_online_map, cpu_present_map */
41 static DEFINE_MUTEX(cpu_add_remove_lock);
42
43 static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
44
45 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
46 * Should always be manipulated under cpu_add_remove_lock
47 */
48 static int cpu_hotplug_disabled;
49
50 static struct {
51 struct task_struct *active_writer;
52 struct mutex lock; /* Synchronizes accesses to refcount, */
53 /*
54 * Also blocks the new readers during
55 * an ongoing cpu hotplug operation.
56 */
57 int refcount;
58 } cpu_hotplug;
59
60 void __init cpu_hotplug_init(void)
61 {
62 cpu_hotplug.active_writer = NULL;
63 mutex_init(&cpu_hotplug.lock);
64 cpu_hotplug.refcount = 0;
65 }
66
67 cpumask_t cpu_active_map;
68
69 #ifdef CONFIG_HOTPLUG_CPU
70
71 void get_online_cpus(void)
72 {
73 might_sleep();
74 if (cpu_hotplug.active_writer == current)
75 return;
76 mutex_lock(&cpu_hotplug.lock);
77 cpu_hotplug.refcount++;
78 mutex_unlock(&cpu_hotplug.lock);
79
80 }
81 EXPORT_SYMBOL_GPL(get_online_cpus);
82
83 void put_online_cpus(void)
84 {
85 if (cpu_hotplug.active_writer == current)
86 return;
87 mutex_lock(&cpu_hotplug.lock);
88 if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
89 wake_up_process(cpu_hotplug.active_writer);
90 mutex_unlock(&cpu_hotplug.lock);
91
92 }
93 EXPORT_SYMBOL_GPL(put_online_cpus);
94
95 #endif /* CONFIG_HOTPLUG_CPU */
96
97 /*
98 * The following two API's must be used when attempting
99 * to serialize the updates to cpu_online_map, cpu_present_map.
100 */
101 void cpu_maps_update_begin(void)
102 {
103 mutex_lock(&cpu_add_remove_lock);
104 }
105
106 void cpu_maps_update_done(void)
107 {
108 mutex_unlock(&cpu_add_remove_lock);
109 }
110
111 /*
112 * This ensures that the hotplug operation can begin only when the
113 * refcount goes to zero.
114 *
115 * Note that during a cpu-hotplug operation, the new readers, if any,
116 * will be blocked by the cpu_hotplug.lock
117 *
118 * Since cpu_hotplug_begin() is always called after invoking
119 * cpu_maps_update_begin(), we can be sure that only one writer is active.
120 *
121 * Note that theoretically, there is a possibility of a livelock:
122 * - Refcount goes to zero, last reader wakes up the sleeping
123 * writer.
124 * - Last reader unlocks the cpu_hotplug.lock.
125 * - A new reader arrives at this moment, bumps up the refcount.
126 * - The writer acquires the cpu_hotplug.lock finds the refcount
127 * non zero and goes to sleep again.
128 *
129 * However, this is very difficult to achieve in practice since
130 * get_online_cpus() not an api which is called all that often.
131 *
132 */
133 static void cpu_hotplug_begin(void)
134 {
135 cpu_hotplug.active_writer = current;
136
137 for (;;) {
138 mutex_lock(&cpu_hotplug.lock);
139 if (likely(!cpu_hotplug.refcount))
140 break;
141 __set_current_state(TASK_UNINTERRUPTIBLE);
142 mutex_unlock(&cpu_hotplug.lock);
143 schedule();
144 }
145 }
146
147 static void cpu_hotplug_done(void)
148 {
149 cpu_hotplug.active_writer = NULL;
150 mutex_unlock(&cpu_hotplug.lock);
151 }
152 /* Need to know about CPUs going up/down? */
153 int __ref register_cpu_notifier(struct notifier_block *nb)
154 {
155 int ret;
156 cpu_maps_update_begin();
157 ret = raw_notifier_chain_register(&cpu_chain, nb);
158 cpu_maps_update_done();
159 return ret;
160 }
161
162 #ifdef CONFIG_HOTPLUG_CPU
163
164 EXPORT_SYMBOL(register_cpu_notifier);
165
166 void __ref unregister_cpu_notifier(struct notifier_block *nb)
167 {
168 cpu_maps_update_begin();
169 raw_notifier_chain_unregister(&cpu_chain, nb);
170 cpu_maps_update_done();
171 }
172 EXPORT_SYMBOL(unregister_cpu_notifier);
173
174 static inline void check_for_tasks(int cpu)
175 {
176 struct task_struct *p;
177
178 write_lock_irq(&tasklist_lock);
179 for_each_process(p) {
180 if (task_cpu(p) == cpu &&
181 (!cputime_eq(p->utime, cputime_zero) ||
182 !cputime_eq(p->stime, cputime_zero)))
183 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
184 (state = %ld, flags = %x) \n",
185 p->comm, task_pid_nr(p), cpu,
186 p->state, p->flags);
187 }
188 write_unlock_irq(&tasklist_lock);
189 }
190
191 struct take_cpu_down_param {
192 unsigned long mod;
193 void *hcpu;
194 };
195
196 /* Take this CPU down. */
197 static int __ref take_cpu_down(void *_param)
198 {
199 struct take_cpu_down_param *param = _param;
200 int err;
201
202 raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
203 param->hcpu);
204 /* Ensure this CPU doesn't handle any more interrupts. */
205 err = __cpu_disable();
206 if (err < 0)
207 return err;
208
209 /* Force idle task to run as soon as we yield: it should
210 immediately notice cpu is offline and die quickly. */
211 sched_idle_next();
212 return 0;
213 }
214
215 /* Requires cpu_add_remove_lock to be held */
216 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
217 {
218 int err, nr_calls = 0;
219 struct task_struct *p;
220 cpumask_t old_allowed, tmp;
221 void *hcpu = (void *)(long)cpu;
222 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
223 struct take_cpu_down_param tcd_param = {
224 .mod = mod,
225 .hcpu = hcpu,
226 };
227
228 if (num_online_cpus() == 1)
229 return -EBUSY;
230
231 if (!cpu_online(cpu))
232 return -EINVAL;
233
234 cpu_hotplug_begin();
235 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
236 hcpu, -1, &nr_calls);
237 if (err == NOTIFY_BAD) {
238 nr_calls--;
239 __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
240 hcpu, nr_calls, NULL);
241 printk("%s: attempt to take down CPU %u failed\n",
242 __func__, cpu);
243 err = -EINVAL;
244 goto out_release;
245 }
246
247 /* Ensure that we are not runnable on dying cpu */
248 old_allowed = current->cpus_allowed;
249 cpus_setall(tmp);
250 cpu_clear(cpu, tmp);
251 set_cpus_allowed_ptr(current, &tmp);
252
253 p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
254
255 if (IS_ERR(p) || cpu_online(cpu)) {
256 /* CPU didn't die: tell everyone. Can't complain. */
257 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
258 hcpu) == NOTIFY_BAD)
259 BUG();
260
261 if (IS_ERR(p)) {
262 err = PTR_ERR(p);
263 goto out_allowed;
264 }
265 goto out_thread;
266 }
267
268 /* Wait for it to sleep (leaving idle task). */
269 while (!idle_cpu(cpu))
270 yield();
271
272 /* This actually kills the CPU. */
273 __cpu_die(cpu);
274
275 /* CPU is completely dead: tell everyone. Too late to complain. */
276 if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
277 hcpu) == NOTIFY_BAD)
278 BUG();
279
280 check_for_tasks(cpu);
281
282 out_thread:
283 err = kthread_stop(p);
284 out_allowed:
285 set_cpus_allowed_ptr(current, &old_allowed);
286 out_release:
287 cpu_hotplug_done();
288 if (!err) {
289 if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
290 hcpu) == NOTIFY_BAD)
291 BUG();
292 }
293 return err;
294 }
295
296 int __ref cpu_down(unsigned int cpu)
297 {
298 int err = 0;
299
300 cpu_maps_update_begin();
301
302 if (cpu_hotplug_disabled) {
303 err = -EBUSY;
304 goto out;
305 }
306
307 cpu_clear(cpu, cpu_active_map);
308
309 /*
310 * Make sure the all cpus did the reschedule and are not
311 * using stale version of the cpu_active_map.
312 * This is not strictly necessary becuase stop_machine()
313 * that we run down the line already provides the required
314 * synchronization. But it's really a side effect and we do not
315 * want to depend on the innards of the stop_machine here.
316 */
317 synchronize_sched();
318
319 err = _cpu_down(cpu, 0);
320
321 if (cpu_online(cpu))
322 cpu_set(cpu, cpu_active_map);
323
324 out:
325 cpu_maps_update_done();
326 return err;
327 }
328 EXPORT_SYMBOL(cpu_down);
329 #endif /*CONFIG_HOTPLUG_CPU*/
330
331 /* Requires cpu_add_remove_lock to be held */
332 static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
333 {
334 int ret, nr_calls = 0;
335 void *hcpu = (void *)(long)cpu;
336 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
337
338 if (cpu_online(cpu) || !cpu_present(cpu))
339 return -EINVAL;
340
341 cpu_hotplug_begin();
342 ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
343 -1, &nr_calls);
344 if (ret == NOTIFY_BAD) {
345 nr_calls--;
346 printk("%s: attempt to bring up CPU %u failed\n",
347 __func__, cpu);
348 ret = -EINVAL;
349 goto out_notify;
350 }
351
352 /* Arch-specific enabling code. */
353 ret = __cpu_up(cpu);
354 if (ret != 0)
355 goto out_notify;
356 BUG_ON(!cpu_online(cpu));
357
358 /* Now call notifier in preparation. */
359 raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
360
361 out_notify:
362 if (ret != 0)
363 __raw_notifier_call_chain(&cpu_chain,
364 CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
365 cpu_hotplug_done();
366
367 return ret;
368 }
369
370 int __cpuinit cpu_up(unsigned int cpu)
371 {
372 int err = 0;
373 if (!cpu_isset(cpu, cpu_possible_map)) {
374 printk(KERN_ERR "can't online cpu %d because it is not "
375 "configured as may-hotadd at boot time\n", cpu);
376 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390)
377 printk(KERN_ERR "please check additional_cpus= boot "
378 "parameter\n");
379 #endif
380 return -EINVAL;
381 }
382
383 cpu_maps_update_begin();
384
385 if (cpu_hotplug_disabled) {
386 err = -EBUSY;
387 goto out;
388 }
389
390 err = _cpu_up(cpu, 0);
391
392 if (cpu_online(cpu))
393 cpu_set(cpu, cpu_active_map);
394
395 out:
396 cpu_maps_update_done();
397 return err;
398 }
399
400 #ifdef CONFIG_PM_SLEEP_SMP
401 static cpumask_t frozen_cpus;
402
403 int disable_nonboot_cpus(void)
404 {
405 int cpu, first_cpu, error = 0;
406
407 cpu_maps_update_begin();
408 first_cpu = first_cpu(cpu_online_map);
409 /* We take down all of the non-boot CPUs in one shot to avoid races
410 * with the userspace trying to use the CPU hotplug at the same time
411 */
412 cpus_clear(frozen_cpus);
413 printk("Disabling non-boot CPUs ...\n");
414 for_each_online_cpu(cpu) {
415 if (cpu == first_cpu)
416 continue;
417 error = _cpu_down(cpu, 1);
418 if (!error) {
419 cpu_set(cpu, frozen_cpus);
420 printk("CPU%d is down\n", cpu);
421 } else {
422 printk(KERN_ERR "Error taking CPU%d down: %d\n",
423 cpu, error);
424 break;
425 }
426 }
427 if (!error) {
428 BUG_ON(num_online_cpus() > 1);
429 /* Make sure the CPUs won't be enabled by someone else */
430 cpu_hotplug_disabled = 1;
431 } else {
432 printk(KERN_ERR "Non-boot CPUs are not disabled\n");
433 }
434 cpu_maps_update_done();
435 return error;
436 }
437
438 void __ref enable_nonboot_cpus(void)
439 {
440 int cpu, error;
441
442 /* Allow everyone to use the CPU hotplug again */
443 cpu_maps_update_begin();
444 cpu_hotplug_disabled = 0;
445 if (cpus_empty(frozen_cpus))
446 goto out;
447
448 printk("Enabling non-boot CPUs ...\n");
449 for_each_cpu_mask_nr(cpu, frozen_cpus) {
450 error = _cpu_up(cpu, 1);
451 if (!error) {
452 printk("CPU%d is up\n", cpu);
453 continue;
454 }
455 printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
456 }
457 cpus_clear(frozen_cpus);
458 out:
459 cpu_maps_update_done();
460 }
461 #endif /* CONFIG_PM_SLEEP_SMP */
462
463 #endif /* CONFIG_SMP */
464
465 /* 64 bits of zeros, for initializers. */
466 #if BITS_PER_LONG == 32
467 #define Z64 0, 0
468 #else
469 #define Z64 0
470 #endif
471
472 /* Initializer macros. */
473 #define CMI0(n) { .bits = { 1UL << (n) } }
474 #define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } }
475
476 #define CMI8(n, ...) \
477 CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__), \
478 CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__), \
479 CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__), \
480 CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__)
481
482 #if BITS_PER_LONG == 32
483 #define CMI64(n, ...) \
484 CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__), \
485 CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__), \
486 CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__), \
487 CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__)
488 #else
489 #define CMI64(n, ...) \
490 CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__), \
491 CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__), \
492 CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__), \
493 CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__)
494 #endif
495
496 #define CMI256(n, ...) \
497 CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__), \
498 CMI64((n)+128, Z64, Z64, __VA_ARGS__), \
499 CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__)
500 #define Z256 Z64, Z64, Z64, Z64
501
502 #define CMI1024(n, ...) \
503 CMI256((n), __VA_ARGS__), \
504 CMI256((n)+256, Z256, __VA_ARGS__), \
505 CMI256((n)+512, Z256, Z256, __VA_ARGS__), \
506 CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__)
507 #define Z1024 Z256, Z256, Z256, Z256
508
509 /* We want this statically initialized, just to be safe. We try not
510 * to waste too much space, either. */
511 static const cpumask_t cpumask_map[]
512 #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
513 __initdata
514 #endif
515 = {
516 CMI0(0), CMI0(1), CMI0(2), CMI0(3),
517 #if NR_CPUS > 4
518 CMI0(4), CMI0(5), CMI0(6), CMI0(7),
519 #endif
520 #if NR_CPUS > 8
521 CMI0(8), CMI0(9), CMI0(10), CMI0(11),
522 CMI0(12), CMI0(13), CMI0(14), CMI0(15),
523 #endif
524 #if NR_CPUS > 16
525 CMI0(16), CMI0(17), CMI0(18), CMI0(19),
526 CMI0(20), CMI0(21), CMI0(22), CMI0(23),
527 CMI0(24), CMI0(25), CMI0(26), CMI0(27),
528 CMI0(28), CMI0(29), CMI0(30), CMI0(31),
529 #endif
530 #if NR_CPUS > 32
531 #if BITS_PER_LONG == 32
532 CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0),
533 CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0),
534 CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0),
535 CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0),
536 CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0),
537 CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0),
538 CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0),
539 CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0),
540 #else
541 CMI0(32), CMI0(33), CMI0(34), CMI0(35),
542 CMI0(36), CMI0(37), CMI0(38), CMI0(39),
543 CMI0(40), CMI0(41), CMI0(42), CMI0(43),
544 CMI0(44), CMI0(45), CMI0(46), CMI0(47),
545 CMI0(48), CMI0(49), CMI0(50), CMI0(51),
546 CMI0(52), CMI0(53), CMI0(54), CMI0(55),
547 CMI0(56), CMI0(57), CMI0(58), CMI0(59),
548 CMI0(60), CMI0(61), CMI0(62), CMI0(63),
549 #endif /* BITS_PER_LONG == 64 */
550 #endif
551 #if NR_CPUS > 64
552 CMI64(64, Z64),
553 #endif
554 #if NR_CPUS > 128
555 CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64),
556 #endif
557 #if NR_CPUS > 256
558 CMI256(256, Z256),
559 #endif
560 #if NR_CPUS > 512
561 CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256),
562 #endif
563 #if NR_CPUS > 1024
564 CMI1024(1024, Z1024),
565 #endif
566 #if NR_CPUS > 2048
567 CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024),
568 #endif
569 #if NR_CPUS > 4096
570 #error NR_CPUS too big. Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP
571 #endif
572 };
573
574 const cpumask_t *cpumask_of_cpu_map = cpumask_map;
575
576 EXPORT_SYMBOL_GPL(cpumask_of_cpu_map);
This page took 0.065186 seconds and 6 git commands to generate.