workqueue: prepare for WQ_UNBOUND implementation
[deliverable/linux.git] / kernel / workqueue.c
1 /*
2 * linux/kernel/workqueue.c
3 *
4 * Generic mechanism for defining kernel helper threads for running
5 * arbitrary tasks in process context.
6 *
7 * Started by Ingo Molnar, Copyright (C) 2002
8 *
9 * Derived from the taskqueue/keventd code by:
10 *
11 * David Woodhouse <dwmw2@infradead.org>
12 * Andrew Morton
13 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
14 * Theodore Ts'o <tytso@mit.edu>
15 *
16 * Made to use alloc_percpu by Christoph Lameter.
17 */
18
19 #include <linux/module.h>
20 #include <linux/kernel.h>
21 #include <linux/sched.h>
22 #include <linux/init.h>
23 #include <linux/signal.h>
24 #include <linux/completion.h>
25 #include <linux/workqueue.h>
26 #include <linux/slab.h>
27 #include <linux/cpu.h>
28 #include <linux/notifier.h>
29 #include <linux/kthread.h>
30 #include <linux/hardirq.h>
31 #include <linux/mempolicy.h>
32 #include <linux/freezer.h>
33 #include <linux/kallsyms.h>
34 #include <linux/debug_locks.h>
35 #include <linux/lockdep.h>
36 #include <linux/idr.h>
37
38 #include "workqueue_sched.h"
39
40 enum {
41 /* global_cwq flags */
42 GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
43 GCWQ_MANAGING_WORKERS = 1 << 1, /* managing workers */
44 GCWQ_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
45 GCWQ_FREEZING = 1 << 3, /* freeze in progress */
46 GCWQ_HIGHPRI_PENDING = 1 << 4, /* highpri works on queue */
47
48 /* worker flags */
49 WORKER_STARTED = 1 << 0, /* started */
50 WORKER_DIE = 1 << 1, /* die die die */
51 WORKER_IDLE = 1 << 2, /* is idle */
52 WORKER_PREP = 1 << 3, /* preparing to run works */
53 WORKER_ROGUE = 1 << 4, /* not bound to any cpu */
54 WORKER_REBIND = 1 << 5, /* mom is home, come back */
55 WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */
56
57 WORKER_NOT_RUNNING = WORKER_PREP | WORKER_ROGUE | WORKER_REBIND |
58 WORKER_CPU_INTENSIVE,
59
60 /* gcwq->trustee_state */
61 TRUSTEE_START = 0, /* start */
62 TRUSTEE_IN_CHARGE = 1, /* trustee in charge of gcwq */
63 TRUSTEE_BUTCHER = 2, /* butcher workers */
64 TRUSTEE_RELEASE = 3, /* release workers */
65 TRUSTEE_DONE = 4, /* trustee is done */
66
67 BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
68 BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
69 BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1,
70
71 MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
72 IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
73
74 MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */
75 MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
76 CREATE_COOLDOWN = HZ, /* time to breath after fail */
77 TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */
78
79 /*
80 * Rescue workers are used only on emergencies and shared by
81 * all cpus. Give -20.
82 */
83 RESCUER_NICE_LEVEL = -20,
84 };
85
86 /*
87 * Structure fields follow one of the following exclusion rules.
88 *
89 * I: Set during initialization and read-only afterwards.
90 *
91 * P: Preemption protected. Disabling preemption is enough and should
92 * only be modified and accessed from the local cpu.
93 *
94 * L: gcwq->lock protected. Access with gcwq->lock held.
95 *
96 * X: During normal operation, modification requires gcwq->lock and
97 * should be done only from local cpu. Either disabling preemption
98 * on local cpu or grabbing gcwq->lock is enough for read access.
99 * While trustee is in charge, it's identical to L.
100 *
101 * F: wq->flush_mutex protected.
102 *
103 * W: workqueue_lock protected.
104 */
105
106 struct global_cwq;
107
108 /*
109 * The poor guys doing the actual heavy lifting. All on-duty workers
110 * are either serving the manager role, on idle list or on busy hash.
111 */
112 struct worker {
113 /* on idle list while idle, on busy hash table while busy */
114 union {
115 struct list_head entry; /* L: while idle */
116 struct hlist_node hentry; /* L: while busy */
117 };
118
119 struct work_struct *current_work; /* L: work being processed */
120 struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
121 struct list_head scheduled; /* L: scheduled works */
122 struct task_struct *task; /* I: worker task */
123 struct global_cwq *gcwq; /* I: the associated gcwq */
124 /* 64 bytes boundary on 64bit, 32 on 32bit */
125 unsigned long last_active; /* L: last active timestamp */
126 unsigned int flags; /* X: flags */
127 int id; /* I: worker id */
128 struct work_struct rebind_work; /* L: rebind worker to cpu */
129 };
130
131 /*
132 * Global per-cpu workqueue. There's one and only one for each cpu
133 * and all works are queued and processed here regardless of their
134 * target workqueues.
135 */
136 struct global_cwq {
137 spinlock_t lock; /* the gcwq lock */
138 struct list_head worklist; /* L: list of pending works */
139 unsigned int cpu; /* I: the associated cpu */
140 unsigned int flags; /* L: GCWQ_* flags */
141
142 int nr_workers; /* L: total number of workers */
143 int nr_idle; /* L: currently idle ones */
144
145 /* workers are chained either in the idle_list or busy_hash */
146 struct list_head idle_list; /* X: list of idle workers */
147 struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE];
148 /* L: hash of busy workers */
149
150 struct timer_list idle_timer; /* L: worker idle timeout */
151 struct timer_list mayday_timer; /* L: SOS timer for dworkers */
152
153 struct ida worker_ida; /* L: for worker IDs */
154
155 struct task_struct *trustee; /* L: for gcwq shutdown */
156 unsigned int trustee_state; /* L: trustee state */
157 wait_queue_head_t trustee_wait; /* trustee wait */
158 struct worker *first_idle; /* L: first idle worker */
159 } ____cacheline_aligned_in_smp;
160
161 /*
162 * The per-CPU workqueue. The lower WORK_STRUCT_FLAG_BITS of
163 * work_struct->data are used for flags and thus cwqs need to be
164 * aligned at two's power of the number of flag bits.
165 */
166 struct cpu_workqueue_struct {
167 struct global_cwq *gcwq; /* I: the associated gcwq */
168 struct workqueue_struct *wq; /* I: the owning workqueue */
169 int work_color; /* L: current color */
170 int flush_color; /* L: flushing color */
171 int nr_in_flight[WORK_NR_COLORS];
172 /* L: nr of in_flight works */
173 int nr_active; /* L: nr of active works */
174 int max_active; /* L: max active works */
175 struct list_head delayed_works; /* L: delayed works */
176 };
177
178 /*
179 * Structure used to wait for workqueue flush.
180 */
181 struct wq_flusher {
182 struct list_head list; /* F: list of flushers */
183 int flush_color; /* F: flush color waiting for */
184 struct completion done; /* flush completion */
185 };
186
187 /*
188 * The externally visible workqueue abstraction is an array of
189 * per-CPU workqueues:
190 */
191 struct workqueue_struct {
192 unsigned int flags; /* I: WQ_* flags */
193 union {
194 struct cpu_workqueue_struct __percpu *pcpu;
195 struct cpu_workqueue_struct *single;
196 unsigned long v;
197 } cpu_wq; /* I: cwq's */
198 struct list_head list; /* W: list of all workqueues */
199
200 struct mutex flush_mutex; /* protects wq flushing */
201 int work_color; /* F: current work color */
202 int flush_color; /* F: current flush color */
203 atomic_t nr_cwqs_to_flush; /* flush in progress */
204 struct wq_flusher *first_flusher; /* F: first flusher */
205 struct list_head flusher_queue; /* F: flush waiters */
206 struct list_head flusher_overflow; /* F: flush overflow list */
207
208 unsigned long single_cpu; /* cpu for single cpu wq */
209
210 cpumask_var_t mayday_mask; /* cpus requesting rescue */
211 struct worker *rescuer; /* I: rescue worker */
212
213 int saved_max_active; /* W: saved cwq max_active */
214 const char *name; /* I: workqueue name */
215 #ifdef CONFIG_LOCKDEP
216 struct lockdep_map lockdep_map;
217 #endif
218 };
219
220 struct workqueue_struct *system_wq __read_mostly;
221 struct workqueue_struct *system_long_wq __read_mostly;
222 struct workqueue_struct *system_nrt_wq __read_mostly;
223 EXPORT_SYMBOL_GPL(system_wq);
224 EXPORT_SYMBOL_GPL(system_long_wq);
225 EXPORT_SYMBOL_GPL(system_nrt_wq);
226
227 #define for_each_busy_worker(worker, i, pos, gcwq) \
228 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \
229 hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
230
231 #ifdef CONFIG_DEBUG_OBJECTS_WORK
232
233 static struct debug_obj_descr work_debug_descr;
234
235 /*
236 * fixup_init is called when:
237 * - an active object is initialized
238 */
239 static int work_fixup_init(void *addr, enum debug_obj_state state)
240 {
241 struct work_struct *work = addr;
242
243 switch (state) {
244 case ODEBUG_STATE_ACTIVE:
245 cancel_work_sync(work);
246 debug_object_init(work, &work_debug_descr);
247 return 1;
248 default:
249 return 0;
250 }
251 }
252
253 /*
254 * fixup_activate is called when:
255 * - an active object is activated
256 * - an unknown object is activated (might be a statically initialized object)
257 */
258 static int work_fixup_activate(void *addr, enum debug_obj_state state)
259 {
260 struct work_struct *work = addr;
261
262 switch (state) {
263
264 case ODEBUG_STATE_NOTAVAILABLE:
265 /*
266 * This is not really a fixup. The work struct was
267 * statically initialized. We just make sure that it
268 * is tracked in the object tracker.
269 */
270 if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
271 debug_object_init(work, &work_debug_descr);
272 debug_object_activate(work, &work_debug_descr);
273 return 0;
274 }
275 WARN_ON_ONCE(1);
276 return 0;
277
278 case ODEBUG_STATE_ACTIVE:
279 WARN_ON(1);
280
281 default:
282 return 0;
283 }
284 }
285
286 /*
287 * fixup_free is called when:
288 * - an active object is freed
289 */
290 static int work_fixup_free(void *addr, enum debug_obj_state state)
291 {
292 struct work_struct *work = addr;
293
294 switch (state) {
295 case ODEBUG_STATE_ACTIVE:
296 cancel_work_sync(work);
297 debug_object_free(work, &work_debug_descr);
298 return 1;
299 default:
300 return 0;
301 }
302 }
303
304 static struct debug_obj_descr work_debug_descr = {
305 .name = "work_struct",
306 .fixup_init = work_fixup_init,
307 .fixup_activate = work_fixup_activate,
308 .fixup_free = work_fixup_free,
309 };
310
311 static inline void debug_work_activate(struct work_struct *work)
312 {
313 debug_object_activate(work, &work_debug_descr);
314 }
315
316 static inline void debug_work_deactivate(struct work_struct *work)
317 {
318 debug_object_deactivate(work, &work_debug_descr);
319 }
320
321 void __init_work(struct work_struct *work, int onstack)
322 {
323 if (onstack)
324 debug_object_init_on_stack(work, &work_debug_descr);
325 else
326 debug_object_init(work, &work_debug_descr);
327 }
328 EXPORT_SYMBOL_GPL(__init_work);
329
330 void destroy_work_on_stack(struct work_struct *work)
331 {
332 debug_object_free(work, &work_debug_descr);
333 }
334 EXPORT_SYMBOL_GPL(destroy_work_on_stack);
335
336 #else
337 static inline void debug_work_activate(struct work_struct *work) { }
338 static inline void debug_work_deactivate(struct work_struct *work) { }
339 #endif
340
341 /* Serializes the accesses to the list of workqueues. */
342 static DEFINE_SPINLOCK(workqueue_lock);
343 static LIST_HEAD(workqueues);
344 static bool workqueue_freezing; /* W: have wqs started freezing? */
345
346 /*
347 * The almighty global cpu workqueues. nr_running is the only field
348 * which is expected to be used frequently by other cpus via
349 * try_to_wake_up(). Put it in a separate cacheline.
350 */
351 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
352 static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);
353
354 static int worker_thread(void *__worker);
355
356 static struct global_cwq *get_gcwq(unsigned int cpu)
357 {
358 return &per_cpu(global_cwq, cpu);
359 }
360
361 static atomic_t *get_gcwq_nr_running(unsigned int cpu)
362 {
363 return &per_cpu(gcwq_nr_running, cpu);
364 }
365
366 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
367 struct workqueue_struct *wq)
368 {
369 #ifndef CONFIG_SMP
370 return wq->cpu_wq.single;
371 #else
372 return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
373 #endif
374 }
375
376 static unsigned int work_color_to_flags(int color)
377 {
378 return color << WORK_STRUCT_COLOR_SHIFT;
379 }
380
381 static int get_work_color(struct work_struct *work)
382 {
383 return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
384 ((1 << WORK_STRUCT_COLOR_BITS) - 1);
385 }
386
387 static int work_next_color(int color)
388 {
389 return (color + 1) % WORK_NR_COLORS;
390 }
391
392 /*
393 * Work data points to the cwq while a work is on queue. Once
394 * execution starts, it points to the cpu the work was last on. This
395 * can be distinguished by comparing the data value against
396 * PAGE_OFFSET.
397 *
398 * set_work_{cwq|cpu}() and clear_work_data() can be used to set the
399 * cwq, cpu or clear work->data. These functions should only be
400 * called while the work is owned - ie. while the PENDING bit is set.
401 *
402 * get_work_[g]cwq() can be used to obtain the gcwq or cwq
403 * corresponding to a work. gcwq is available once the work has been
404 * queued anywhere after initialization. cwq is available only from
405 * queueing until execution starts.
406 */
407 static inline void set_work_data(struct work_struct *work, unsigned long data,
408 unsigned long flags)
409 {
410 BUG_ON(!work_pending(work));
411 atomic_long_set(&work->data, data | flags | work_static(work));
412 }
413
414 static void set_work_cwq(struct work_struct *work,
415 struct cpu_workqueue_struct *cwq,
416 unsigned long extra_flags)
417 {
418 set_work_data(work, (unsigned long)cwq,
419 WORK_STRUCT_PENDING | extra_flags);
420 }
421
422 static void set_work_cpu(struct work_struct *work, unsigned int cpu)
423 {
424 set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING);
425 }
426
427 static void clear_work_data(struct work_struct *work)
428 {
429 set_work_data(work, WORK_STRUCT_NO_CPU, 0);
430 }
431
432 static inline unsigned long get_work_data(struct work_struct *work)
433 {
434 return atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK;
435 }
436
437 static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
438 {
439 unsigned long data = get_work_data(work);
440
441 return data >= PAGE_OFFSET ? (void *)data : NULL;
442 }
443
444 static struct global_cwq *get_work_gcwq(struct work_struct *work)
445 {
446 unsigned long data = get_work_data(work);
447 unsigned int cpu;
448
449 if (data >= PAGE_OFFSET)
450 return ((struct cpu_workqueue_struct *)data)->gcwq;
451
452 cpu = data >> WORK_STRUCT_FLAG_BITS;
453 if (cpu == WORK_CPU_NONE)
454 return NULL;
455
456 BUG_ON(cpu >= nr_cpu_ids);
457 return get_gcwq(cpu);
458 }
459
460 /*
461 * Policy functions. These define the policies on how the global
462 * worker pool is managed. Unless noted otherwise, these functions
463 * assume that they're being called with gcwq->lock held.
464 */
465
466 static bool __need_more_worker(struct global_cwq *gcwq)
467 {
468 return !atomic_read(get_gcwq_nr_running(gcwq->cpu)) ||
469 gcwq->flags & GCWQ_HIGHPRI_PENDING;
470 }
471
472 /*
473 * Need to wake up a worker? Called from anything but currently
474 * running workers.
475 */
476 static bool need_more_worker(struct global_cwq *gcwq)
477 {
478 return !list_empty(&gcwq->worklist) && __need_more_worker(gcwq);
479 }
480
481 /* Can I start working? Called from busy but !running workers. */
482 static bool may_start_working(struct global_cwq *gcwq)
483 {
484 return gcwq->nr_idle;
485 }
486
487 /* Do I need to keep working? Called from currently running workers. */
488 static bool keep_working(struct global_cwq *gcwq)
489 {
490 atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
491
492 return !list_empty(&gcwq->worklist) && atomic_read(nr_running) <= 1;
493 }
494
495 /* Do we need a new worker? Called from manager. */
496 static bool need_to_create_worker(struct global_cwq *gcwq)
497 {
498 return need_more_worker(gcwq) && !may_start_working(gcwq);
499 }
500
501 /* Do I need to be the manager? */
502 static bool need_to_manage_workers(struct global_cwq *gcwq)
503 {
504 return need_to_create_worker(gcwq) || gcwq->flags & GCWQ_MANAGE_WORKERS;
505 }
506
507 /* Do we have too many workers and should some go away? */
508 static bool too_many_workers(struct global_cwq *gcwq)
509 {
510 bool managing = gcwq->flags & GCWQ_MANAGING_WORKERS;
511 int nr_idle = gcwq->nr_idle + managing; /* manager is considered idle */
512 int nr_busy = gcwq->nr_workers - nr_idle;
513
514 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
515 }
516
517 /*
518 * Wake up functions.
519 */
520
521 /* Return the first worker. Safe with preemption disabled */
522 static struct worker *first_worker(struct global_cwq *gcwq)
523 {
524 if (unlikely(list_empty(&gcwq->idle_list)))
525 return NULL;
526
527 return list_first_entry(&gcwq->idle_list, struct worker, entry);
528 }
529
530 /**
531 * wake_up_worker - wake up an idle worker
532 * @gcwq: gcwq to wake worker for
533 *
534 * Wake up the first idle worker of @gcwq.
535 *
536 * CONTEXT:
537 * spin_lock_irq(gcwq->lock).
538 */
539 static void wake_up_worker(struct global_cwq *gcwq)
540 {
541 struct worker *worker = first_worker(gcwq);
542
543 if (likely(worker))
544 wake_up_process(worker->task);
545 }
546
547 /**
548 * wq_worker_waking_up - a worker is waking up
549 * @task: task waking up
550 * @cpu: CPU @task is waking up to
551 *
552 * This function is called during try_to_wake_up() when a worker is
553 * being awoken.
554 *
555 * CONTEXT:
556 * spin_lock_irq(rq->lock)
557 */
558 void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
559 {
560 struct worker *worker = kthread_data(task);
561
562 if (likely(!(worker->flags & WORKER_NOT_RUNNING)))
563 atomic_inc(get_gcwq_nr_running(cpu));
564 }
565
566 /**
567 * wq_worker_sleeping - a worker is going to sleep
568 * @task: task going to sleep
569 * @cpu: CPU in question, must be the current CPU number
570 *
571 * This function is called during schedule() when a busy worker is
572 * going to sleep. Worker on the same cpu can be woken up by
573 * returning pointer to its task.
574 *
575 * CONTEXT:
576 * spin_lock_irq(rq->lock)
577 *
578 * RETURNS:
579 * Worker task on @cpu to wake up, %NULL if none.
580 */
581 struct task_struct *wq_worker_sleeping(struct task_struct *task,
582 unsigned int cpu)
583 {
584 struct worker *worker = kthread_data(task), *to_wakeup = NULL;
585 struct global_cwq *gcwq = get_gcwq(cpu);
586 atomic_t *nr_running = get_gcwq_nr_running(cpu);
587
588 if (unlikely(worker->flags & WORKER_NOT_RUNNING))
589 return NULL;
590
591 /* this can only happen on the local cpu */
592 BUG_ON(cpu != raw_smp_processor_id());
593
594 /*
595 * The counterpart of the following dec_and_test, implied mb,
596 * worklist not empty test sequence is in insert_work().
597 * Please read comment there.
598 *
599 * NOT_RUNNING is clear. This means that trustee is not in
600 * charge and we're running on the local cpu w/ rq lock held
601 * and preemption disabled, which in turn means that none else
602 * could be manipulating idle_list, so dereferencing idle_list
603 * without gcwq lock is safe.
604 */
605 if (atomic_dec_and_test(nr_running) && !list_empty(&gcwq->worklist))
606 to_wakeup = first_worker(gcwq);
607 return to_wakeup ? to_wakeup->task : NULL;
608 }
609
610 /**
611 * worker_set_flags - set worker flags and adjust nr_running accordingly
612 * @worker: self
613 * @flags: flags to set
614 * @wakeup: wakeup an idle worker if necessary
615 *
616 * Set @flags in @worker->flags and adjust nr_running accordingly. If
617 * nr_running becomes zero and @wakeup is %true, an idle worker is
618 * woken up.
619 *
620 * CONTEXT:
621 * spin_lock_irq(gcwq->lock)
622 */
623 static inline void worker_set_flags(struct worker *worker, unsigned int flags,
624 bool wakeup)
625 {
626 struct global_cwq *gcwq = worker->gcwq;
627
628 WARN_ON_ONCE(worker->task != current);
629
630 /*
631 * If transitioning into NOT_RUNNING, adjust nr_running and
632 * wake up an idle worker as necessary if requested by
633 * @wakeup.
634 */
635 if ((flags & WORKER_NOT_RUNNING) &&
636 !(worker->flags & WORKER_NOT_RUNNING)) {
637 atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
638
639 if (wakeup) {
640 if (atomic_dec_and_test(nr_running) &&
641 !list_empty(&gcwq->worklist))
642 wake_up_worker(gcwq);
643 } else
644 atomic_dec(nr_running);
645 }
646
647 worker->flags |= flags;
648 }
649
650 /**
651 * worker_clr_flags - clear worker flags and adjust nr_running accordingly
652 * @worker: self
653 * @flags: flags to clear
654 *
655 * Clear @flags in @worker->flags and adjust nr_running accordingly.
656 *
657 * CONTEXT:
658 * spin_lock_irq(gcwq->lock)
659 */
660 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
661 {
662 struct global_cwq *gcwq = worker->gcwq;
663 unsigned int oflags = worker->flags;
664
665 WARN_ON_ONCE(worker->task != current);
666
667 worker->flags &= ~flags;
668
669 /* if transitioning out of NOT_RUNNING, increment nr_running */
670 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
671 if (!(worker->flags & WORKER_NOT_RUNNING))
672 atomic_inc(get_gcwq_nr_running(gcwq->cpu));
673 }
674
675 /**
676 * busy_worker_head - return the busy hash head for a work
677 * @gcwq: gcwq of interest
678 * @work: work to be hashed
679 *
680 * Return hash head of @gcwq for @work.
681 *
682 * CONTEXT:
683 * spin_lock_irq(gcwq->lock).
684 *
685 * RETURNS:
686 * Pointer to the hash head.
687 */
688 static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
689 struct work_struct *work)
690 {
691 const int base_shift = ilog2(sizeof(struct work_struct));
692 unsigned long v = (unsigned long)work;
693
694 /* simple shift and fold hash, do we need something better? */
695 v >>= base_shift;
696 v += v >> BUSY_WORKER_HASH_ORDER;
697 v &= BUSY_WORKER_HASH_MASK;
698
699 return &gcwq->busy_hash[v];
700 }
701
702 /**
703 * __find_worker_executing_work - find worker which is executing a work
704 * @gcwq: gcwq of interest
705 * @bwh: hash head as returned by busy_worker_head()
706 * @work: work to find worker for
707 *
708 * Find a worker which is executing @work on @gcwq. @bwh should be
709 * the hash head obtained by calling busy_worker_head() with the same
710 * work.
711 *
712 * CONTEXT:
713 * spin_lock_irq(gcwq->lock).
714 *
715 * RETURNS:
716 * Pointer to worker which is executing @work if found, NULL
717 * otherwise.
718 */
719 static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
720 struct hlist_head *bwh,
721 struct work_struct *work)
722 {
723 struct worker *worker;
724 struct hlist_node *tmp;
725
726 hlist_for_each_entry(worker, tmp, bwh, hentry)
727 if (worker->current_work == work)
728 return worker;
729 return NULL;
730 }
731
732 /**
733 * find_worker_executing_work - find worker which is executing a work
734 * @gcwq: gcwq of interest
735 * @work: work to find worker for
736 *
737 * Find a worker which is executing @work on @gcwq. This function is
738 * identical to __find_worker_executing_work() except that this
739 * function calculates @bwh itself.
740 *
741 * CONTEXT:
742 * spin_lock_irq(gcwq->lock).
743 *
744 * RETURNS:
745 * Pointer to worker which is executing @work if found, NULL
746 * otherwise.
747 */
748 static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
749 struct work_struct *work)
750 {
751 return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
752 work);
753 }
754
755 /**
756 * gcwq_determine_ins_pos - find insertion position
757 * @gcwq: gcwq of interest
758 * @cwq: cwq a work is being queued for
759 *
760 * A work for @cwq is about to be queued on @gcwq, determine insertion
761 * position for the work. If @cwq is for HIGHPRI wq, the work is
762 * queued at the head of the queue but in FIFO order with respect to
763 * other HIGHPRI works; otherwise, at the end of the queue. This
764 * function also sets GCWQ_HIGHPRI_PENDING flag to hint @gcwq that
765 * there are HIGHPRI works pending.
766 *
767 * CONTEXT:
768 * spin_lock_irq(gcwq->lock).
769 *
770 * RETURNS:
771 * Pointer to inserstion position.
772 */
773 static inline struct list_head *gcwq_determine_ins_pos(struct global_cwq *gcwq,
774 struct cpu_workqueue_struct *cwq)
775 {
776 struct work_struct *twork;
777
778 if (likely(!(cwq->wq->flags & WQ_HIGHPRI)))
779 return &gcwq->worklist;
780
781 list_for_each_entry(twork, &gcwq->worklist, entry) {
782 struct cpu_workqueue_struct *tcwq = get_work_cwq(twork);
783
784 if (!(tcwq->wq->flags & WQ_HIGHPRI))
785 break;
786 }
787
788 gcwq->flags |= GCWQ_HIGHPRI_PENDING;
789 return &twork->entry;
790 }
791
792 /**
793 * insert_work - insert a work into gcwq
794 * @cwq: cwq @work belongs to
795 * @work: work to insert
796 * @head: insertion point
797 * @extra_flags: extra WORK_STRUCT_* flags to set
798 *
799 * Insert @work which belongs to @cwq into @gcwq after @head.
800 * @extra_flags is or'd to work_struct flags.
801 *
802 * CONTEXT:
803 * spin_lock_irq(gcwq->lock).
804 */
805 static void insert_work(struct cpu_workqueue_struct *cwq,
806 struct work_struct *work, struct list_head *head,
807 unsigned int extra_flags)
808 {
809 struct global_cwq *gcwq = cwq->gcwq;
810
811 /* we own @work, set data and link */
812 set_work_cwq(work, cwq, extra_flags);
813
814 /*
815 * Ensure that we get the right work->data if we see the
816 * result of list_add() below, see try_to_grab_pending().
817 */
818 smp_wmb();
819
820 list_add_tail(&work->entry, head);
821
822 /*
823 * Ensure either worker_sched_deactivated() sees the above
824 * list_add_tail() or we see zero nr_running to avoid workers
825 * lying around lazily while there are works to be processed.
826 */
827 smp_mb();
828
829 if (__need_more_worker(gcwq))
830 wake_up_worker(gcwq);
831 }
832
833 /**
834 * cwq_unbind_single_cpu - unbind cwq from single cpu workqueue processing
835 * @cwq: cwq to unbind
836 *
837 * Try to unbind @cwq from single cpu workqueue processing. If
838 * @cwq->wq is frozen, unbind is delayed till the workqueue is thawed.
839 *
840 * CONTEXT:
841 * spin_lock_irq(gcwq->lock).
842 */
843 static void cwq_unbind_single_cpu(struct cpu_workqueue_struct *cwq)
844 {
845 struct workqueue_struct *wq = cwq->wq;
846 struct global_cwq *gcwq = cwq->gcwq;
847
848 BUG_ON(wq->single_cpu != gcwq->cpu);
849 /*
850 * Unbind from workqueue if @cwq is not frozen. If frozen,
851 * thaw_workqueues() will either restart processing on this
852 * cpu or unbind if empty. This keeps works queued while
853 * frozen fully ordered and flushable.
854 */
855 if (likely(!(gcwq->flags & GCWQ_FREEZING))) {
856 smp_wmb(); /* paired with cmpxchg() in __queue_work() */
857 wq->single_cpu = WORK_CPU_NONE;
858 }
859 }
860
861 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
862 struct work_struct *work)
863 {
864 struct global_cwq *gcwq;
865 struct cpu_workqueue_struct *cwq;
866 struct list_head *worklist;
867 unsigned long flags;
868 bool arbitrate;
869
870 debug_work_activate(work);
871
872 /*
873 * Determine gcwq to use. SINGLE_CPU is inherently
874 * NON_REENTRANT, so test it first.
875 */
876 if (!(wq->flags & WQ_SINGLE_CPU)) {
877 struct global_cwq *last_gcwq;
878
879 /*
880 * It's multi cpu. If @wq is non-reentrant and @work
881 * was previously on a different cpu, it might still
882 * be running there, in which case the work needs to
883 * be queued on that cpu to guarantee non-reentrance.
884 */
885 gcwq = get_gcwq(cpu);
886 if (wq->flags & WQ_NON_REENTRANT &&
887 (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
888 struct worker *worker;
889
890 spin_lock_irqsave(&last_gcwq->lock, flags);
891
892 worker = find_worker_executing_work(last_gcwq, work);
893
894 if (worker && worker->current_cwq->wq == wq)
895 gcwq = last_gcwq;
896 else {
897 /* meh... not running there, queue here */
898 spin_unlock_irqrestore(&last_gcwq->lock, flags);
899 spin_lock_irqsave(&gcwq->lock, flags);
900 }
901 } else
902 spin_lock_irqsave(&gcwq->lock, flags);
903 } else {
904 unsigned int req_cpu = cpu;
905
906 /*
907 * It's a bit more complex for single cpu workqueues.
908 * We first need to determine which cpu is going to be
909 * used. If no cpu is currently serving this
910 * workqueue, arbitrate using atomic accesses to
911 * wq->single_cpu; otherwise, use the current one.
912 */
913 retry:
914 cpu = wq->single_cpu;
915 arbitrate = cpu == WORK_CPU_NONE;
916 if (arbitrate)
917 cpu = req_cpu;
918
919 gcwq = get_gcwq(cpu);
920 spin_lock_irqsave(&gcwq->lock, flags);
921
922 /*
923 * The following cmpxchg() is a full barrier paired
924 * with smp_wmb() in cwq_unbind_single_cpu() and
925 * guarantees that all changes to wq->st_* fields are
926 * visible on the new cpu after this point.
927 */
928 if (arbitrate)
929 cmpxchg(&wq->single_cpu, WORK_CPU_NONE, cpu);
930
931 if (unlikely(wq->single_cpu != cpu)) {
932 spin_unlock_irqrestore(&gcwq->lock, flags);
933 goto retry;
934 }
935 }
936
937 /* gcwq determined, get cwq and queue */
938 cwq = get_cwq(gcwq->cpu, wq);
939
940 BUG_ON(!list_empty(&work->entry));
941
942 cwq->nr_in_flight[cwq->work_color]++;
943
944 if (likely(cwq->nr_active < cwq->max_active)) {
945 cwq->nr_active++;
946 worklist = gcwq_determine_ins_pos(gcwq, cwq);
947 } else
948 worklist = &cwq->delayed_works;
949
950 insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
951
952 spin_unlock_irqrestore(&gcwq->lock, flags);
953 }
954
955 /**
956 * queue_work - queue work on a workqueue
957 * @wq: workqueue to use
958 * @work: work to queue
959 *
960 * Returns 0 if @work was already on a queue, non-zero otherwise.
961 *
962 * We queue the work to the CPU on which it was submitted, but if the CPU dies
963 * it can be processed by another CPU.
964 */
965 int queue_work(struct workqueue_struct *wq, struct work_struct *work)
966 {
967 int ret;
968
969 ret = queue_work_on(get_cpu(), wq, work);
970 put_cpu();
971
972 return ret;
973 }
974 EXPORT_SYMBOL_GPL(queue_work);
975
976 /**
977 * queue_work_on - queue work on specific cpu
978 * @cpu: CPU number to execute work on
979 * @wq: workqueue to use
980 * @work: work to queue
981 *
982 * Returns 0 if @work was already on a queue, non-zero otherwise.
983 *
984 * We queue the work to a specific CPU, the caller must ensure it
985 * can't go away.
986 */
987 int
988 queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
989 {
990 int ret = 0;
991
992 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
993 __queue_work(cpu, wq, work);
994 ret = 1;
995 }
996 return ret;
997 }
998 EXPORT_SYMBOL_GPL(queue_work_on);
999
1000 static void delayed_work_timer_fn(unsigned long __data)
1001 {
1002 struct delayed_work *dwork = (struct delayed_work *)__data;
1003 struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
1004
1005 __queue_work(smp_processor_id(), cwq->wq, &dwork->work);
1006 }
1007
1008 /**
1009 * queue_delayed_work - queue work on a workqueue after delay
1010 * @wq: workqueue to use
1011 * @dwork: delayable work to queue
1012 * @delay: number of jiffies to wait before queueing
1013 *
1014 * Returns 0 if @work was already on a queue, non-zero otherwise.
1015 */
1016 int queue_delayed_work(struct workqueue_struct *wq,
1017 struct delayed_work *dwork, unsigned long delay)
1018 {
1019 if (delay == 0)
1020 return queue_work(wq, &dwork->work);
1021
1022 return queue_delayed_work_on(-1, wq, dwork, delay);
1023 }
1024 EXPORT_SYMBOL_GPL(queue_delayed_work);
1025
1026 /**
1027 * queue_delayed_work_on - queue work on specific CPU after delay
1028 * @cpu: CPU number to execute work on
1029 * @wq: workqueue to use
1030 * @dwork: work to queue
1031 * @delay: number of jiffies to wait before queueing
1032 *
1033 * Returns 0 if @work was already on a queue, non-zero otherwise.
1034 */
1035 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1036 struct delayed_work *dwork, unsigned long delay)
1037 {
1038 int ret = 0;
1039 struct timer_list *timer = &dwork->timer;
1040 struct work_struct *work = &dwork->work;
1041
1042 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1043 struct global_cwq *gcwq = get_work_gcwq(work);
1044 unsigned int lcpu = gcwq ? gcwq->cpu : raw_smp_processor_id();
1045
1046 BUG_ON(timer_pending(timer));
1047 BUG_ON(!list_empty(&work->entry));
1048
1049 timer_stats_timer_set_start_info(&dwork->timer);
1050 /*
1051 * This stores cwq for the moment, for the timer_fn.
1052 * Note that the work's gcwq is preserved to allow
1053 * reentrance detection for delayed works.
1054 */
1055 set_work_cwq(work, get_cwq(lcpu, wq), 0);
1056 timer->expires = jiffies + delay;
1057 timer->data = (unsigned long)dwork;
1058 timer->function = delayed_work_timer_fn;
1059
1060 if (unlikely(cpu >= 0))
1061 add_timer_on(timer, cpu);
1062 else
1063 add_timer(timer);
1064 ret = 1;
1065 }
1066 return ret;
1067 }
1068 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
1069
1070 /**
1071 * worker_enter_idle - enter idle state
1072 * @worker: worker which is entering idle state
1073 *
1074 * @worker is entering idle state. Update stats and idle timer if
1075 * necessary.
1076 *
1077 * LOCKING:
1078 * spin_lock_irq(gcwq->lock).
1079 */
1080 static void worker_enter_idle(struct worker *worker)
1081 {
1082 struct global_cwq *gcwq = worker->gcwq;
1083
1084 BUG_ON(worker->flags & WORKER_IDLE);
1085 BUG_ON(!list_empty(&worker->entry) &&
1086 (worker->hentry.next || worker->hentry.pprev));
1087
1088 /* can't use worker_set_flags(), also called from start_worker() */
1089 worker->flags |= WORKER_IDLE;
1090 gcwq->nr_idle++;
1091 worker->last_active = jiffies;
1092
1093 /* idle_list is LIFO */
1094 list_add(&worker->entry, &gcwq->idle_list);
1095
1096 if (likely(!(worker->flags & WORKER_ROGUE))) {
1097 if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer))
1098 mod_timer(&gcwq->idle_timer,
1099 jiffies + IDLE_WORKER_TIMEOUT);
1100 } else
1101 wake_up_all(&gcwq->trustee_wait);
1102
1103 /* sanity check nr_running */
1104 WARN_ON_ONCE(gcwq->nr_workers == gcwq->nr_idle &&
1105 atomic_read(get_gcwq_nr_running(gcwq->cpu)));
1106 }
1107
1108 /**
1109 * worker_leave_idle - leave idle state
1110 * @worker: worker which is leaving idle state
1111 *
1112 * @worker is leaving idle state. Update stats.
1113 *
1114 * LOCKING:
1115 * spin_lock_irq(gcwq->lock).
1116 */
1117 static void worker_leave_idle(struct worker *worker)
1118 {
1119 struct global_cwq *gcwq = worker->gcwq;
1120
1121 BUG_ON(!(worker->flags & WORKER_IDLE));
1122 worker_clr_flags(worker, WORKER_IDLE);
1123 gcwq->nr_idle--;
1124 list_del_init(&worker->entry);
1125 }
1126
1127 /**
1128 * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq
1129 * @worker: self
1130 *
1131 * Works which are scheduled while the cpu is online must at least be
1132 * scheduled to a worker which is bound to the cpu so that if they are
1133 * flushed from cpu callbacks while cpu is going down, they are
1134 * guaranteed to execute on the cpu.
1135 *
1136 * This function is to be used by rogue workers and rescuers to bind
1137 * themselves to the target cpu and may race with cpu going down or
1138 * coming online. kthread_bind() can't be used because it may put the
1139 * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
1140 * verbatim as it's best effort and blocking and gcwq may be
1141 * [dis]associated in the meantime.
1142 *
1143 * This function tries set_cpus_allowed() and locks gcwq and verifies
1144 * the binding against GCWQ_DISASSOCIATED which is set during
1145 * CPU_DYING and cleared during CPU_ONLINE, so if the worker enters
1146 * idle state or fetches works without dropping lock, it can guarantee
1147 * the scheduling requirement described in the first paragraph.
1148 *
1149 * CONTEXT:
1150 * Might sleep. Called without any lock but returns with gcwq->lock
1151 * held.
1152 *
1153 * RETURNS:
1154 * %true if the associated gcwq is online (@worker is successfully
1155 * bound), %false if offline.
1156 */
1157 static bool worker_maybe_bind_and_lock(struct worker *worker)
1158 {
1159 struct global_cwq *gcwq = worker->gcwq;
1160 struct task_struct *task = worker->task;
1161
1162 while (true) {
1163 /*
1164 * The following call may fail, succeed or succeed
1165 * without actually migrating the task to the cpu if
1166 * it races with cpu hotunplug operation. Verify
1167 * against GCWQ_DISASSOCIATED.
1168 */
1169 set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
1170
1171 spin_lock_irq(&gcwq->lock);
1172 if (gcwq->flags & GCWQ_DISASSOCIATED)
1173 return false;
1174 if (task_cpu(task) == gcwq->cpu &&
1175 cpumask_equal(&current->cpus_allowed,
1176 get_cpu_mask(gcwq->cpu)))
1177 return true;
1178 spin_unlock_irq(&gcwq->lock);
1179
1180 /* CPU has come up inbetween, retry migration */
1181 cpu_relax();
1182 }
1183 }
1184
1185 /*
1186 * Function for worker->rebind_work used to rebind rogue busy workers
1187 * to the associated cpu which is coming back online. This is
1188 * scheduled by cpu up but can race with other cpu hotplug operations
1189 * and may be executed twice without intervening cpu down.
1190 */
1191 static void worker_rebind_fn(struct work_struct *work)
1192 {
1193 struct worker *worker = container_of(work, struct worker, rebind_work);
1194 struct global_cwq *gcwq = worker->gcwq;
1195
1196 if (worker_maybe_bind_and_lock(worker))
1197 worker_clr_flags(worker, WORKER_REBIND);
1198
1199 spin_unlock_irq(&gcwq->lock);
1200 }
1201
1202 static struct worker *alloc_worker(void)
1203 {
1204 struct worker *worker;
1205
1206 worker = kzalloc(sizeof(*worker), GFP_KERNEL);
1207 if (worker) {
1208 INIT_LIST_HEAD(&worker->entry);
1209 INIT_LIST_HEAD(&worker->scheduled);
1210 INIT_WORK(&worker->rebind_work, worker_rebind_fn);
1211 /* on creation a worker is in !idle && prep state */
1212 worker->flags = WORKER_PREP;
1213 }
1214 return worker;
1215 }
1216
1217 /**
1218 * create_worker - create a new workqueue worker
1219 * @gcwq: gcwq the new worker will belong to
1220 * @bind: whether to set affinity to @cpu or not
1221 *
1222 * Create a new worker which is bound to @gcwq. The returned worker
1223 * can be started by calling start_worker() or destroyed using
1224 * destroy_worker().
1225 *
1226 * CONTEXT:
1227 * Might sleep. Does GFP_KERNEL allocations.
1228 *
1229 * RETURNS:
1230 * Pointer to the newly created worker.
1231 */
1232 static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
1233 {
1234 int id = -1;
1235 struct worker *worker = NULL;
1236
1237 spin_lock_irq(&gcwq->lock);
1238 while (ida_get_new(&gcwq->worker_ida, &id)) {
1239 spin_unlock_irq(&gcwq->lock);
1240 if (!ida_pre_get(&gcwq->worker_ida, GFP_KERNEL))
1241 goto fail;
1242 spin_lock_irq(&gcwq->lock);
1243 }
1244 spin_unlock_irq(&gcwq->lock);
1245
1246 worker = alloc_worker();
1247 if (!worker)
1248 goto fail;
1249
1250 worker->gcwq = gcwq;
1251 worker->id = id;
1252
1253 worker->task = kthread_create(worker_thread, worker, "kworker/%u:%d",
1254 gcwq->cpu, id);
1255 if (IS_ERR(worker->task))
1256 goto fail;
1257
1258 /*
1259 * A rogue worker will become a regular one if CPU comes
1260 * online later on. Make sure every worker has
1261 * PF_THREAD_BOUND set.
1262 */
1263 if (bind)
1264 kthread_bind(worker->task, gcwq->cpu);
1265 else
1266 worker->task->flags |= PF_THREAD_BOUND;
1267
1268 return worker;
1269 fail:
1270 if (id >= 0) {
1271 spin_lock_irq(&gcwq->lock);
1272 ida_remove(&gcwq->worker_ida, id);
1273 spin_unlock_irq(&gcwq->lock);
1274 }
1275 kfree(worker);
1276 return NULL;
1277 }
1278
1279 /**
1280 * start_worker - start a newly created worker
1281 * @worker: worker to start
1282 *
1283 * Make the gcwq aware of @worker and start it.
1284 *
1285 * CONTEXT:
1286 * spin_lock_irq(gcwq->lock).
1287 */
1288 static void start_worker(struct worker *worker)
1289 {
1290 worker->flags |= WORKER_STARTED;
1291 worker->gcwq->nr_workers++;
1292 worker_enter_idle(worker);
1293 wake_up_process(worker->task);
1294 }
1295
1296 /**
1297 * destroy_worker - destroy a workqueue worker
1298 * @worker: worker to be destroyed
1299 *
1300 * Destroy @worker and adjust @gcwq stats accordingly.
1301 *
1302 * CONTEXT:
1303 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
1304 */
1305 static void destroy_worker(struct worker *worker)
1306 {
1307 struct global_cwq *gcwq = worker->gcwq;
1308 int id = worker->id;
1309
1310 /* sanity check frenzy */
1311 BUG_ON(worker->current_work);
1312 BUG_ON(!list_empty(&worker->scheduled));
1313
1314 if (worker->flags & WORKER_STARTED)
1315 gcwq->nr_workers--;
1316 if (worker->flags & WORKER_IDLE)
1317 gcwq->nr_idle--;
1318
1319 list_del_init(&worker->entry);
1320 worker->flags |= WORKER_DIE;
1321
1322 spin_unlock_irq(&gcwq->lock);
1323
1324 kthread_stop(worker->task);
1325 kfree(worker);
1326
1327 spin_lock_irq(&gcwq->lock);
1328 ida_remove(&gcwq->worker_ida, id);
1329 }
1330
1331 static void idle_worker_timeout(unsigned long __gcwq)
1332 {
1333 struct global_cwq *gcwq = (void *)__gcwq;
1334
1335 spin_lock_irq(&gcwq->lock);
1336
1337 if (too_many_workers(gcwq)) {
1338 struct worker *worker;
1339 unsigned long expires;
1340
1341 /* idle_list is kept in LIFO order, check the last one */
1342 worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
1343 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1344
1345 if (time_before(jiffies, expires))
1346 mod_timer(&gcwq->idle_timer, expires);
1347 else {
1348 /* it's been idle for too long, wake up manager */
1349 gcwq->flags |= GCWQ_MANAGE_WORKERS;
1350 wake_up_worker(gcwq);
1351 }
1352 }
1353
1354 spin_unlock_irq(&gcwq->lock);
1355 }
1356
1357 static bool send_mayday(struct work_struct *work)
1358 {
1359 struct cpu_workqueue_struct *cwq = get_work_cwq(work);
1360 struct workqueue_struct *wq = cwq->wq;
1361
1362 if (!(wq->flags & WQ_RESCUER))
1363 return false;
1364
1365 /* mayday mayday mayday */
1366 if (!cpumask_test_and_set_cpu(cwq->gcwq->cpu, wq->mayday_mask))
1367 wake_up_process(wq->rescuer->task);
1368 return true;
1369 }
1370
1371 static void gcwq_mayday_timeout(unsigned long __gcwq)
1372 {
1373 struct global_cwq *gcwq = (void *)__gcwq;
1374 struct work_struct *work;
1375
1376 spin_lock_irq(&gcwq->lock);
1377
1378 if (need_to_create_worker(gcwq)) {
1379 /*
1380 * We've been trying to create a new worker but
1381 * haven't been successful. We might be hitting an
1382 * allocation deadlock. Send distress signals to
1383 * rescuers.
1384 */
1385 list_for_each_entry(work, &gcwq->worklist, entry)
1386 send_mayday(work);
1387 }
1388
1389 spin_unlock_irq(&gcwq->lock);
1390
1391 mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INTERVAL);
1392 }
1393
1394 /**
1395 * maybe_create_worker - create a new worker if necessary
1396 * @gcwq: gcwq to create a new worker for
1397 *
1398 * Create a new worker for @gcwq if necessary. @gcwq is guaranteed to
1399 * have at least one idle worker on return from this function. If
1400 * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
1401 * sent to all rescuers with works scheduled on @gcwq to resolve
1402 * possible allocation deadlock.
1403 *
1404 * On return, need_to_create_worker() is guaranteed to be false and
1405 * may_start_working() true.
1406 *
1407 * LOCKING:
1408 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1409 * multiple times. Does GFP_KERNEL allocations. Called only from
1410 * manager.
1411 *
1412 * RETURNS:
1413 * false if no action was taken and gcwq->lock stayed locked, true
1414 * otherwise.
1415 */
1416 static bool maybe_create_worker(struct global_cwq *gcwq)
1417 {
1418 if (!need_to_create_worker(gcwq))
1419 return false;
1420 restart:
1421 /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
1422 mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
1423
1424 while (true) {
1425 struct worker *worker;
1426
1427 spin_unlock_irq(&gcwq->lock);
1428
1429 worker = create_worker(gcwq, true);
1430 if (worker) {
1431 del_timer_sync(&gcwq->mayday_timer);
1432 spin_lock_irq(&gcwq->lock);
1433 start_worker(worker);
1434 BUG_ON(need_to_create_worker(gcwq));
1435 return true;
1436 }
1437
1438 if (!need_to_create_worker(gcwq))
1439 break;
1440
1441 spin_unlock_irq(&gcwq->lock);
1442 __set_current_state(TASK_INTERRUPTIBLE);
1443 schedule_timeout(CREATE_COOLDOWN);
1444 spin_lock_irq(&gcwq->lock);
1445 if (!need_to_create_worker(gcwq))
1446 break;
1447 }
1448
1449 spin_unlock_irq(&gcwq->lock);
1450 del_timer_sync(&gcwq->mayday_timer);
1451 spin_lock_irq(&gcwq->lock);
1452 if (need_to_create_worker(gcwq))
1453 goto restart;
1454 return true;
1455 }
1456
1457 /**
1458 * maybe_destroy_worker - destroy workers which have been idle for a while
1459 * @gcwq: gcwq to destroy workers for
1460 *
1461 * Destroy @gcwq workers which have been idle for longer than
1462 * IDLE_WORKER_TIMEOUT.
1463 *
1464 * LOCKING:
1465 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1466 * multiple times. Called only from manager.
1467 *
1468 * RETURNS:
1469 * false if no action was taken and gcwq->lock stayed locked, true
1470 * otherwise.
1471 */
1472 static bool maybe_destroy_workers(struct global_cwq *gcwq)
1473 {
1474 bool ret = false;
1475
1476 while (too_many_workers(gcwq)) {
1477 struct worker *worker;
1478 unsigned long expires;
1479
1480 worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
1481 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1482
1483 if (time_before(jiffies, expires)) {
1484 mod_timer(&gcwq->idle_timer, expires);
1485 break;
1486 }
1487
1488 destroy_worker(worker);
1489 ret = true;
1490 }
1491
1492 return ret;
1493 }
1494
1495 /**
1496 * manage_workers - manage worker pool
1497 * @worker: self
1498 *
1499 * Assume the manager role and manage gcwq worker pool @worker belongs
1500 * to. At any given time, there can be only zero or one manager per
1501 * gcwq. The exclusion is handled automatically by this function.
1502 *
1503 * The caller can safely start processing works on false return. On
1504 * true return, it's guaranteed that need_to_create_worker() is false
1505 * and may_start_working() is true.
1506 *
1507 * CONTEXT:
1508 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1509 * multiple times. Does GFP_KERNEL allocations.
1510 *
1511 * RETURNS:
1512 * false if no action was taken and gcwq->lock stayed locked, true if
1513 * some action was taken.
1514 */
1515 static bool manage_workers(struct worker *worker)
1516 {
1517 struct global_cwq *gcwq = worker->gcwq;
1518 bool ret = false;
1519
1520 if (gcwq->flags & GCWQ_MANAGING_WORKERS)
1521 return ret;
1522
1523 gcwq->flags &= ~GCWQ_MANAGE_WORKERS;
1524 gcwq->flags |= GCWQ_MANAGING_WORKERS;
1525
1526 /*
1527 * Destroy and then create so that may_start_working() is true
1528 * on return.
1529 */
1530 ret |= maybe_destroy_workers(gcwq);
1531 ret |= maybe_create_worker(gcwq);
1532
1533 gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
1534
1535 /*
1536 * The trustee might be waiting to take over the manager
1537 * position, tell it we're done.
1538 */
1539 if (unlikely(gcwq->trustee))
1540 wake_up_all(&gcwq->trustee_wait);
1541
1542 return ret;
1543 }
1544
1545 /**
1546 * move_linked_works - move linked works to a list
1547 * @work: start of series of works to be scheduled
1548 * @head: target list to append @work to
1549 * @nextp: out paramter for nested worklist walking
1550 *
1551 * Schedule linked works starting from @work to @head. Work series to
1552 * be scheduled starts at @work and includes any consecutive work with
1553 * WORK_STRUCT_LINKED set in its predecessor.
1554 *
1555 * If @nextp is not NULL, it's updated to point to the next work of
1556 * the last scheduled work. This allows move_linked_works() to be
1557 * nested inside outer list_for_each_entry_safe().
1558 *
1559 * CONTEXT:
1560 * spin_lock_irq(gcwq->lock).
1561 */
1562 static void move_linked_works(struct work_struct *work, struct list_head *head,
1563 struct work_struct **nextp)
1564 {
1565 struct work_struct *n;
1566
1567 /*
1568 * Linked worklist will always end before the end of the list,
1569 * use NULL for list head.
1570 */
1571 list_for_each_entry_safe_from(work, n, NULL, entry) {
1572 list_move_tail(&work->entry, head);
1573 if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1574 break;
1575 }
1576
1577 /*
1578 * If we're already inside safe list traversal and have moved
1579 * multiple works to the scheduled queue, the next position
1580 * needs to be updated.
1581 */
1582 if (nextp)
1583 *nextp = n;
1584 }
1585
1586 static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
1587 {
1588 struct work_struct *work = list_first_entry(&cwq->delayed_works,
1589 struct work_struct, entry);
1590 struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
1591
1592 move_linked_works(work, pos, NULL);
1593 cwq->nr_active++;
1594 }
1595
1596 /**
1597 * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
1598 * @cwq: cwq of interest
1599 * @color: color of work which left the queue
1600 *
1601 * A work either has completed or is removed from pending queue,
1602 * decrement nr_in_flight of its cwq and handle workqueue flushing.
1603 *
1604 * CONTEXT:
1605 * spin_lock_irq(gcwq->lock).
1606 */
1607 static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
1608 {
1609 /* ignore uncolored works */
1610 if (color == WORK_NO_COLOR)
1611 return;
1612
1613 cwq->nr_in_flight[color]--;
1614 cwq->nr_active--;
1615
1616 if (!list_empty(&cwq->delayed_works)) {
1617 /* one down, submit a delayed one */
1618 if (cwq->nr_active < cwq->max_active)
1619 cwq_activate_first_delayed(cwq);
1620 } else if (!cwq->nr_active && cwq->wq->flags & WQ_SINGLE_CPU) {
1621 /* this was the last work, unbind from single cpu */
1622 cwq_unbind_single_cpu(cwq);
1623 }
1624
1625 /* is flush in progress and are we at the flushing tip? */
1626 if (likely(cwq->flush_color != color))
1627 return;
1628
1629 /* are there still in-flight works? */
1630 if (cwq->nr_in_flight[color])
1631 return;
1632
1633 /* this cwq is done, clear flush_color */
1634 cwq->flush_color = -1;
1635
1636 /*
1637 * If this was the last cwq, wake up the first flusher. It
1638 * will handle the rest.
1639 */
1640 if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
1641 complete(&cwq->wq->first_flusher->done);
1642 }
1643
1644 /**
1645 * process_one_work - process single work
1646 * @worker: self
1647 * @work: work to process
1648 *
1649 * Process @work. This function contains all the logics necessary to
1650 * process a single work including synchronization against and
1651 * interaction with other workers on the same cpu, queueing and
1652 * flushing. As long as context requirement is met, any worker can
1653 * call this function to process a work.
1654 *
1655 * CONTEXT:
1656 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
1657 */
1658 static void process_one_work(struct worker *worker, struct work_struct *work)
1659 {
1660 struct cpu_workqueue_struct *cwq = get_work_cwq(work);
1661 struct global_cwq *gcwq = cwq->gcwq;
1662 struct hlist_head *bwh = busy_worker_head(gcwq, work);
1663 bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
1664 work_func_t f = work->func;
1665 int work_color;
1666 struct worker *collision;
1667 #ifdef CONFIG_LOCKDEP
1668 /*
1669 * It is permissible to free the struct work_struct from
1670 * inside the function that is called from it, this we need to
1671 * take into account for lockdep too. To avoid bogus "held
1672 * lock freed" warnings as well as problems when looking into
1673 * work->lockdep_map, make a copy and use that here.
1674 */
1675 struct lockdep_map lockdep_map = work->lockdep_map;
1676 #endif
1677 /*
1678 * A single work shouldn't be executed concurrently by
1679 * multiple workers on a single cpu. Check whether anyone is
1680 * already processing the work. If so, defer the work to the
1681 * currently executing one.
1682 */
1683 collision = __find_worker_executing_work(gcwq, bwh, work);
1684 if (unlikely(collision)) {
1685 move_linked_works(work, &collision->scheduled, NULL);
1686 return;
1687 }
1688
1689 /* claim and process */
1690 debug_work_deactivate(work);
1691 hlist_add_head(&worker->hentry, bwh);
1692 worker->current_work = work;
1693 worker->current_cwq = cwq;
1694 work_color = get_work_color(work);
1695
1696 /* record the current cpu number in the work data and dequeue */
1697 set_work_cpu(work, gcwq->cpu);
1698 list_del_init(&work->entry);
1699
1700 /*
1701 * If HIGHPRI_PENDING, check the next work, and, if HIGHPRI,
1702 * wake up another worker; otherwise, clear HIGHPRI_PENDING.
1703 */
1704 if (unlikely(gcwq->flags & GCWQ_HIGHPRI_PENDING)) {
1705 struct work_struct *nwork = list_first_entry(&gcwq->worklist,
1706 struct work_struct, entry);
1707
1708 if (!list_empty(&gcwq->worklist) &&
1709 get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI)
1710 wake_up_worker(gcwq);
1711 else
1712 gcwq->flags &= ~GCWQ_HIGHPRI_PENDING;
1713 }
1714
1715 /*
1716 * CPU intensive works don't participate in concurrency
1717 * management. They're the scheduler's responsibility.
1718 */
1719 if (unlikely(cpu_intensive))
1720 worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
1721
1722 spin_unlock_irq(&gcwq->lock);
1723
1724 work_clear_pending(work);
1725 lock_map_acquire(&cwq->wq->lockdep_map);
1726 lock_map_acquire(&lockdep_map);
1727 f(work);
1728 lock_map_release(&lockdep_map);
1729 lock_map_release(&cwq->wq->lockdep_map);
1730
1731 if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
1732 printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
1733 "%s/0x%08x/%d\n",
1734 current->comm, preempt_count(), task_pid_nr(current));
1735 printk(KERN_ERR " last function: ");
1736 print_symbol("%s\n", (unsigned long)f);
1737 debug_show_held_locks(current);
1738 dump_stack();
1739 }
1740
1741 spin_lock_irq(&gcwq->lock);
1742
1743 /* clear cpu intensive status */
1744 if (unlikely(cpu_intensive))
1745 worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
1746
1747 /* we're done with it, release */
1748 hlist_del_init(&worker->hentry);
1749 worker->current_work = NULL;
1750 worker->current_cwq = NULL;
1751 cwq_dec_nr_in_flight(cwq, work_color);
1752 }
1753
1754 /**
1755 * process_scheduled_works - process scheduled works
1756 * @worker: self
1757 *
1758 * Process all scheduled works. Please note that the scheduled list
1759 * may change while processing a work, so this function repeatedly
1760 * fetches a work from the top and executes it.
1761 *
1762 * CONTEXT:
1763 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1764 * multiple times.
1765 */
1766 static void process_scheduled_works(struct worker *worker)
1767 {
1768 while (!list_empty(&worker->scheduled)) {
1769 struct work_struct *work = list_first_entry(&worker->scheduled,
1770 struct work_struct, entry);
1771 process_one_work(worker, work);
1772 }
1773 }
1774
1775 /**
1776 * worker_thread - the worker thread function
1777 * @__worker: self
1778 *
1779 * The gcwq worker thread function. There's a single dynamic pool of
1780 * these per each cpu. These workers process all works regardless of
1781 * their specific target workqueue. The only exception is works which
1782 * belong to workqueues with a rescuer which will be explained in
1783 * rescuer_thread().
1784 */
1785 static int worker_thread(void *__worker)
1786 {
1787 struct worker *worker = __worker;
1788 struct global_cwq *gcwq = worker->gcwq;
1789
1790 /* tell the scheduler that this is a workqueue worker */
1791 worker->task->flags |= PF_WQ_WORKER;
1792 woke_up:
1793 spin_lock_irq(&gcwq->lock);
1794
1795 /* DIE can be set only while we're idle, checking here is enough */
1796 if (worker->flags & WORKER_DIE) {
1797 spin_unlock_irq(&gcwq->lock);
1798 worker->task->flags &= ~PF_WQ_WORKER;
1799 return 0;
1800 }
1801
1802 worker_leave_idle(worker);
1803 recheck:
1804 /* no more worker necessary? */
1805 if (!need_more_worker(gcwq))
1806 goto sleep;
1807
1808 /* do we need to manage? */
1809 if (unlikely(!may_start_working(gcwq)) && manage_workers(worker))
1810 goto recheck;
1811
1812 /*
1813 * ->scheduled list can only be filled while a worker is
1814 * preparing to process a work or actually processing it.
1815 * Make sure nobody diddled with it while I was sleeping.
1816 */
1817 BUG_ON(!list_empty(&worker->scheduled));
1818
1819 /*
1820 * When control reaches this point, we're guaranteed to have
1821 * at least one idle worker or that someone else has already
1822 * assumed the manager role.
1823 */
1824 worker_clr_flags(worker, WORKER_PREP);
1825
1826 do {
1827 struct work_struct *work =
1828 list_first_entry(&gcwq->worklist,
1829 struct work_struct, entry);
1830
1831 if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
1832 /* optimization path, not strictly necessary */
1833 process_one_work(worker, work);
1834 if (unlikely(!list_empty(&worker->scheduled)))
1835 process_scheduled_works(worker);
1836 } else {
1837 move_linked_works(work, &worker->scheduled, NULL);
1838 process_scheduled_works(worker);
1839 }
1840 } while (keep_working(gcwq));
1841
1842 worker_set_flags(worker, WORKER_PREP, false);
1843 sleep:
1844 if (unlikely(need_to_manage_workers(gcwq)) && manage_workers(worker))
1845 goto recheck;
1846
1847 /*
1848 * gcwq->lock is held and there's no work to process and no
1849 * need to manage, sleep. Workers are woken up only while
1850 * holding gcwq->lock or from local cpu, so setting the
1851 * current state before releasing gcwq->lock is enough to
1852 * prevent losing any event.
1853 */
1854 worker_enter_idle(worker);
1855 __set_current_state(TASK_INTERRUPTIBLE);
1856 spin_unlock_irq(&gcwq->lock);
1857 schedule();
1858 goto woke_up;
1859 }
1860
1861 /**
1862 * rescuer_thread - the rescuer thread function
1863 * @__wq: the associated workqueue
1864 *
1865 * Workqueue rescuer thread function. There's one rescuer for each
1866 * workqueue which has WQ_RESCUER set.
1867 *
1868 * Regular work processing on a gcwq may block trying to create a new
1869 * worker which uses GFP_KERNEL allocation which has slight chance of
1870 * developing into deadlock if some works currently on the same queue
1871 * need to be processed to satisfy the GFP_KERNEL allocation. This is
1872 * the problem rescuer solves.
1873 *
1874 * When such condition is possible, the gcwq summons rescuers of all
1875 * workqueues which have works queued on the gcwq and let them process
1876 * those works so that forward progress can be guaranteed.
1877 *
1878 * This should happen rarely.
1879 */
1880 static int rescuer_thread(void *__wq)
1881 {
1882 struct workqueue_struct *wq = __wq;
1883 struct worker *rescuer = wq->rescuer;
1884 struct list_head *scheduled = &rescuer->scheduled;
1885 unsigned int cpu;
1886
1887 set_user_nice(current, RESCUER_NICE_LEVEL);
1888 repeat:
1889 set_current_state(TASK_INTERRUPTIBLE);
1890
1891 if (kthread_should_stop())
1892 return 0;
1893
1894 for_each_cpu(cpu, wq->mayday_mask) {
1895 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
1896 struct global_cwq *gcwq = cwq->gcwq;
1897 struct work_struct *work, *n;
1898
1899 __set_current_state(TASK_RUNNING);
1900 cpumask_clear_cpu(cpu, wq->mayday_mask);
1901
1902 /* migrate to the target cpu if possible */
1903 rescuer->gcwq = gcwq;
1904 worker_maybe_bind_and_lock(rescuer);
1905
1906 /*
1907 * Slurp in all works issued via this workqueue and
1908 * process'em.
1909 */
1910 BUG_ON(!list_empty(&rescuer->scheduled));
1911 list_for_each_entry_safe(work, n, &gcwq->worklist, entry)
1912 if (get_work_cwq(work) == cwq)
1913 move_linked_works(work, scheduled, &n);
1914
1915 process_scheduled_works(rescuer);
1916 spin_unlock_irq(&gcwq->lock);
1917 }
1918
1919 schedule();
1920 goto repeat;
1921 }
1922
1923 struct wq_barrier {
1924 struct work_struct work;
1925 struct completion done;
1926 };
1927
1928 static void wq_barrier_func(struct work_struct *work)
1929 {
1930 struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
1931 complete(&barr->done);
1932 }
1933
1934 /**
1935 * insert_wq_barrier - insert a barrier work
1936 * @cwq: cwq to insert barrier into
1937 * @barr: wq_barrier to insert
1938 * @target: target work to attach @barr to
1939 * @worker: worker currently executing @target, NULL if @target is not executing
1940 *
1941 * @barr is linked to @target such that @barr is completed only after
1942 * @target finishes execution. Please note that the ordering
1943 * guarantee is observed only with respect to @target and on the local
1944 * cpu.
1945 *
1946 * Currently, a queued barrier can't be canceled. This is because
1947 * try_to_grab_pending() can't determine whether the work to be
1948 * grabbed is at the head of the queue and thus can't clear LINKED
1949 * flag of the previous work while there must be a valid next work
1950 * after a work with LINKED flag set.
1951 *
1952 * Note that when @worker is non-NULL, @target may be modified
1953 * underneath us, so we can't reliably determine cwq from @target.
1954 *
1955 * CONTEXT:
1956 * spin_lock_irq(gcwq->lock).
1957 */
1958 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
1959 struct wq_barrier *barr,
1960 struct work_struct *target, struct worker *worker)
1961 {
1962 struct list_head *head;
1963 unsigned int linked = 0;
1964
1965 /*
1966 * debugobject calls are safe here even with gcwq->lock locked
1967 * as we know for sure that this will not trigger any of the
1968 * checks and call back into the fixup functions where we
1969 * might deadlock.
1970 */
1971 INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
1972 __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
1973 init_completion(&barr->done);
1974
1975 /*
1976 * If @target is currently being executed, schedule the
1977 * barrier to the worker; otherwise, put it after @target.
1978 */
1979 if (worker)
1980 head = worker->scheduled.next;
1981 else {
1982 unsigned long *bits = work_data_bits(target);
1983
1984 head = target->entry.next;
1985 /* there can already be other linked works, inherit and set */
1986 linked = *bits & WORK_STRUCT_LINKED;
1987 __set_bit(WORK_STRUCT_LINKED_BIT, bits);
1988 }
1989
1990 debug_work_activate(&barr->work);
1991 insert_work(cwq, &barr->work, head,
1992 work_color_to_flags(WORK_NO_COLOR) | linked);
1993 }
1994
1995 /**
1996 * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
1997 * @wq: workqueue being flushed
1998 * @flush_color: new flush color, < 0 for no-op
1999 * @work_color: new work color, < 0 for no-op
2000 *
2001 * Prepare cwqs for workqueue flushing.
2002 *
2003 * If @flush_color is non-negative, flush_color on all cwqs should be
2004 * -1. If no cwq has in-flight commands at the specified color, all
2005 * cwq->flush_color's stay at -1 and %false is returned. If any cwq
2006 * has in flight commands, its cwq->flush_color is set to
2007 * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
2008 * wakeup logic is armed and %true is returned.
2009 *
2010 * The caller should have initialized @wq->first_flusher prior to
2011 * calling this function with non-negative @flush_color. If
2012 * @flush_color is negative, no flush color update is done and %false
2013 * is returned.
2014 *
2015 * If @work_color is non-negative, all cwqs should have the same
2016 * work_color which is previous to @work_color and all will be
2017 * advanced to @work_color.
2018 *
2019 * CONTEXT:
2020 * mutex_lock(wq->flush_mutex).
2021 *
2022 * RETURNS:
2023 * %true if @flush_color >= 0 and there's something to flush. %false
2024 * otherwise.
2025 */
2026 static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
2027 int flush_color, int work_color)
2028 {
2029 bool wait = false;
2030 unsigned int cpu;
2031
2032 if (flush_color >= 0) {
2033 BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
2034 atomic_set(&wq->nr_cwqs_to_flush, 1);
2035 }
2036
2037 for_each_possible_cpu(cpu) {
2038 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2039 struct global_cwq *gcwq = cwq->gcwq;
2040
2041 spin_lock_irq(&gcwq->lock);
2042
2043 if (flush_color >= 0) {
2044 BUG_ON(cwq->flush_color != -1);
2045
2046 if (cwq->nr_in_flight[flush_color]) {
2047 cwq->flush_color = flush_color;
2048 atomic_inc(&wq->nr_cwqs_to_flush);
2049 wait = true;
2050 }
2051 }
2052
2053 if (work_color >= 0) {
2054 BUG_ON(work_color != work_next_color(cwq->work_color));
2055 cwq->work_color = work_color;
2056 }
2057
2058 spin_unlock_irq(&gcwq->lock);
2059 }
2060
2061 if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
2062 complete(&wq->first_flusher->done);
2063
2064 return wait;
2065 }
2066
2067 /**
2068 * flush_workqueue - ensure that any scheduled work has run to completion.
2069 * @wq: workqueue to flush
2070 *
2071 * Forces execution of the workqueue and blocks until its completion.
2072 * This is typically used in driver shutdown handlers.
2073 *
2074 * We sleep until all works which were queued on entry have been handled,
2075 * but we are not livelocked by new incoming ones.
2076 */
2077 void flush_workqueue(struct workqueue_struct *wq)
2078 {
2079 struct wq_flusher this_flusher = {
2080 .list = LIST_HEAD_INIT(this_flusher.list),
2081 .flush_color = -1,
2082 .done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
2083 };
2084 int next_color;
2085
2086 lock_map_acquire(&wq->lockdep_map);
2087 lock_map_release(&wq->lockdep_map);
2088
2089 mutex_lock(&wq->flush_mutex);
2090
2091 /*
2092 * Start-to-wait phase
2093 */
2094 next_color = work_next_color(wq->work_color);
2095
2096 if (next_color != wq->flush_color) {
2097 /*
2098 * Color space is not full. The current work_color
2099 * becomes our flush_color and work_color is advanced
2100 * by one.
2101 */
2102 BUG_ON(!list_empty(&wq->flusher_overflow));
2103 this_flusher.flush_color = wq->work_color;
2104 wq->work_color = next_color;
2105
2106 if (!wq->first_flusher) {
2107 /* no flush in progress, become the first flusher */
2108 BUG_ON(wq->flush_color != this_flusher.flush_color);
2109
2110 wq->first_flusher = &this_flusher;
2111
2112 if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
2113 wq->work_color)) {
2114 /* nothing to flush, done */
2115 wq->flush_color = next_color;
2116 wq->first_flusher = NULL;
2117 goto out_unlock;
2118 }
2119 } else {
2120 /* wait in queue */
2121 BUG_ON(wq->flush_color == this_flusher.flush_color);
2122 list_add_tail(&this_flusher.list, &wq->flusher_queue);
2123 flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
2124 }
2125 } else {
2126 /*
2127 * Oops, color space is full, wait on overflow queue.
2128 * The next flush completion will assign us
2129 * flush_color and transfer to flusher_queue.
2130 */
2131 list_add_tail(&this_flusher.list, &wq->flusher_overflow);
2132 }
2133
2134 mutex_unlock(&wq->flush_mutex);
2135
2136 wait_for_completion(&this_flusher.done);
2137
2138 /*
2139 * Wake-up-and-cascade phase
2140 *
2141 * First flushers are responsible for cascading flushes and
2142 * handling overflow. Non-first flushers can simply return.
2143 */
2144 if (wq->first_flusher != &this_flusher)
2145 return;
2146
2147 mutex_lock(&wq->flush_mutex);
2148
2149 /* we might have raced, check again with mutex held */
2150 if (wq->first_flusher != &this_flusher)
2151 goto out_unlock;
2152
2153 wq->first_flusher = NULL;
2154
2155 BUG_ON(!list_empty(&this_flusher.list));
2156 BUG_ON(wq->flush_color != this_flusher.flush_color);
2157
2158 while (true) {
2159 struct wq_flusher *next, *tmp;
2160
2161 /* complete all the flushers sharing the current flush color */
2162 list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
2163 if (next->flush_color != wq->flush_color)
2164 break;
2165 list_del_init(&next->list);
2166 complete(&next->done);
2167 }
2168
2169 BUG_ON(!list_empty(&wq->flusher_overflow) &&
2170 wq->flush_color != work_next_color(wq->work_color));
2171
2172 /* this flush_color is finished, advance by one */
2173 wq->flush_color = work_next_color(wq->flush_color);
2174
2175 /* one color has been freed, handle overflow queue */
2176 if (!list_empty(&wq->flusher_overflow)) {
2177 /*
2178 * Assign the same color to all overflowed
2179 * flushers, advance work_color and append to
2180 * flusher_queue. This is the start-to-wait
2181 * phase for these overflowed flushers.
2182 */
2183 list_for_each_entry(tmp, &wq->flusher_overflow, list)
2184 tmp->flush_color = wq->work_color;
2185
2186 wq->work_color = work_next_color(wq->work_color);
2187
2188 list_splice_tail_init(&wq->flusher_overflow,
2189 &wq->flusher_queue);
2190 flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
2191 }
2192
2193 if (list_empty(&wq->flusher_queue)) {
2194 BUG_ON(wq->flush_color != wq->work_color);
2195 break;
2196 }
2197
2198 /*
2199 * Need to flush more colors. Make the next flusher
2200 * the new first flusher and arm cwqs.
2201 */
2202 BUG_ON(wq->flush_color == wq->work_color);
2203 BUG_ON(wq->flush_color != next->flush_color);
2204
2205 list_del_init(&next->list);
2206 wq->first_flusher = next;
2207
2208 if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
2209 break;
2210
2211 /*
2212 * Meh... this color is already done, clear first
2213 * flusher and repeat cascading.
2214 */
2215 wq->first_flusher = NULL;
2216 }
2217
2218 out_unlock:
2219 mutex_unlock(&wq->flush_mutex);
2220 }
2221 EXPORT_SYMBOL_GPL(flush_workqueue);
2222
2223 /**
2224 * flush_work - block until a work_struct's callback has terminated
2225 * @work: the work which is to be flushed
2226 *
2227 * Returns false if @work has already terminated.
2228 *
2229 * It is expected that, prior to calling flush_work(), the caller has
2230 * arranged for the work to not be requeued, otherwise it doesn't make
2231 * sense to use this function.
2232 */
2233 int flush_work(struct work_struct *work)
2234 {
2235 struct worker *worker = NULL;
2236 struct global_cwq *gcwq;
2237 struct cpu_workqueue_struct *cwq;
2238 struct wq_barrier barr;
2239
2240 might_sleep();
2241 gcwq = get_work_gcwq(work);
2242 if (!gcwq)
2243 return 0;
2244
2245 spin_lock_irq(&gcwq->lock);
2246 if (!list_empty(&work->entry)) {
2247 /*
2248 * See the comment near try_to_grab_pending()->smp_rmb().
2249 * If it was re-queued to a different gcwq under us, we
2250 * are not going to wait.
2251 */
2252 smp_rmb();
2253 cwq = get_work_cwq(work);
2254 if (unlikely(!cwq || gcwq != cwq->gcwq))
2255 goto already_gone;
2256 } else {
2257 worker = find_worker_executing_work(gcwq, work);
2258 if (!worker)
2259 goto already_gone;
2260 cwq = worker->current_cwq;
2261 }
2262
2263 insert_wq_barrier(cwq, &barr, work, worker);
2264 spin_unlock_irq(&gcwq->lock);
2265
2266 lock_map_acquire(&cwq->wq->lockdep_map);
2267 lock_map_release(&cwq->wq->lockdep_map);
2268
2269 wait_for_completion(&barr.done);
2270 destroy_work_on_stack(&barr.work);
2271 return 1;
2272 already_gone:
2273 spin_unlock_irq(&gcwq->lock);
2274 return 0;
2275 }
2276 EXPORT_SYMBOL_GPL(flush_work);
2277
2278 /*
2279 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
2280 * so this work can't be re-armed in any way.
2281 */
2282 static int try_to_grab_pending(struct work_struct *work)
2283 {
2284 struct global_cwq *gcwq;
2285 int ret = -1;
2286
2287 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
2288 return 0;
2289
2290 /*
2291 * The queueing is in progress, or it is already queued. Try to
2292 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
2293 */
2294 gcwq = get_work_gcwq(work);
2295 if (!gcwq)
2296 return ret;
2297
2298 spin_lock_irq(&gcwq->lock);
2299 if (!list_empty(&work->entry)) {
2300 /*
2301 * This work is queued, but perhaps we locked the wrong gcwq.
2302 * In that case we must see the new value after rmb(), see
2303 * insert_work()->wmb().
2304 */
2305 smp_rmb();
2306 if (gcwq == get_work_gcwq(work)) {
2307 debug_work_deactivate(work);
2308 list_del_init(&work->entry);
2309 cwq_dec_nr_in_flight(get_work_cwq(work),
2310 get_work_color(work));
2311 ret = 1;
2312 }
2313 }
2314 spin_unlock_irq(&gcwq->lock);
2315
2316 return ret;
2317 }
2318
2319 static void wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
2320 {
2321 struct wq_barrier barr;
2322 struct worker *worker;
2323
2324 spin_lock_irq(&gcwq->lock);
2325
2326 worker = find_worker_executing_work(gcwq, work);
2327 if (unlikely(worker))
2328 insert_wq_barrier(worker->current_cwq, &barr, work, worker);
2329
2330 spin_unlock_irq(&gcwq->lock);
2331
2332 if (unlikely(worker)) {
2333 wait_for_completion(&barr.done);
2334 destroy_work_on_stack(&barr.work);
2335 }
2336 }
2337
2338 static void wait_on_work(struct work_struct *work)
2339 {
2340 int cpu;
2341
2342 might_sleep();
2343
2344 lock_map_acquire(&work->lockdep_map);
2345 lock_map_release(&work->lockdep_map);
2346
2347 for_each_possible_cpu(cpu)
2348 wait_on_cpu_work(get_gcwq(cpu), work);
2349 }
2350
2351 static int __cancel_work_timer(struct work_struct *work,
2352 struct timer_list* timer)
2353 {
2354 int ret;
2355
2356 do {
2357 ret = (timer && likely(del_timer(timer)));
2358 if (!ret)
2359 ret = try_to_grab_pending(work);
2360 wait_on_work(work);
2361 } while (unlikely(ret < 0));
2362
2363 clear_work_data(work);
2364 return ret;
2365 }
2366
2367 /**
2368 * cancel_work_sync - block until a work_struct's callback has terminated
2369 * @work: the work which is to be flushed
2370 *
2371 * Returns true if @work was pending.
2372 *
2373 * cancel_work_sync() will cancel the work if it is queued. If the work's
2374 * callback appears to be running, cancel_work_sync() will block until it
2375 * has completed.
2376 *
2377 * It is possible to use this function if the work re-queues itself. It can
2378 * cancel the work even if it migrates to another workqueue, however in that
2379 * case it only guarantees that work->func() has completed on the last queued
2380 * workqueue.
2381 *
2382 * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
2383 * pending, otherwise it goes into a busy-wait loop until the timer expires.
2384 *
2385 * The caller must ensure that workqueue_struct on which this work was last
2386 * queued can't be destroyed before this function returns.
2387 */
2388 int cancel_work_sync(struct work_struct *work)
2389 {
2390 return __cancel_work_timer(work, NULL);
2391 }
2392 EXPORT_SYMBOL_GPL(cancel_work_sync);
2393
2394 /**
2395 * cancel_delayed_work_sync - reliably kill off a delayed work.
2396 * @dwork: the delayed work struct
2397 *
2398 * Returns true if @dwork was pending.
2399 *
2400 * It is possible to use this function if @dwork rearms itself via queue_work()
2401 * or queue_delayed_work(). See also the comment for cancel_work_sync().
2402 */
2403 int cancel_delayed_work_sync(struct delayed_work *dwork)
2404 {
2405 return __cancel_work_timer(&dwork->work, &dwork->timer);
2406 }
2407 EXPORT_SYMBOL(cancel_delayed_work_sync);
2408
2409 /**
2410 * schedule_work - put work task in global workqueue
2411 * @work: job to be done
2412 *
2413 * Returns zero if @work was already on the kernel-global workqueue and
2414 * non-zero otherwise.
2415 *
2416 * This puts a job in the kernel-global workqueue if it was not already
2417 * queued and leaves it in the same position on the kernel-global
2418 * workqueue otherwise.
2419 */
2420 int schedule_work(struct work_struct *work)
2421 {
2422 return queue_work(system_wq, work);
2423 }
2424 EXPORT_SYMBOL(schedule_work);
2425
2426 /*
2427 * schedule_work_on - put work task on a specific cpu
2428 * @cpu: cpu to put the work task on
2429 * @work: job to be done
2430 *
2431 * This puts a job on a specific cpu
2432 */
2433 int schedule_work_on(int cpu, struct work_struct *work)
2434 {
2435 return queue_work_on(cpu, system_wq, work);
2436 }
2437 EXPORT_SYMBOL(schedule_work_on);
2438
2439 /**
2440 * schedule_delayed_work - put work task in global workqueue after delay
2441 * @dwork: job to be done
2442 * @delay: number of jiffies to wait or 0 for immediate execution
2443 *
2444 * After waiting for a given time this puts a job in the kernel-global
2445 * workqueue.
2446 */
2447 int schedule_delayed_work(struct delayed_work *dwork,
2448 unsigned long delay)
2449 {
2450 return queue_delayed_work(system_wq, dwork, delay);
2451 }
2452 EXPORT_SYMBOL(schedule_delayed_work);
2453
2454 /**
2455 * flush_delayed_work - block until a dwork_struct's callback has terminated
2456 * @dwork: the delayed work which is to be flushed
2457 *
2458 * Any timeout is cancelled, and any pending work is run immediately.
2459 */
2460 void flush_delayed_work(struct delayed_work *dwork)
2461 {
2462 if (del_timer_sync(&dwork->timer)) {
2463 __queue_work(get_cpu(), get_work_cwq(&dwork->work)->wq,
2464 &dwork->work);
2465 put_cpu();
2466 }
2467 flush_work(&dwork->work);
2468 }
2469 EXPORT_SYMBOL(flush_delayed_work);
2470
2471 /**
2472 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
2473 * @cpu: cpu to use
2474 * @dwork: job to be done
2475 * @delay: number of jiffies to wait
2476 *
2477 * After waiting for a given time this puts a job in the kernel-global
2478 * workqueue on the specified CPU.
2479 */
2480 int schedule_delayed_work_on(int cpu,
2481 struct delayed_work *dwork, unsigned long delay)
2482 {
2483 return queue_delayed_work_on(cpu, system_wq, dwork, delay);
2484 }
2485 EXPORT_SYMBOL(schedule_delayed_work_on);
2486
2487 /**
2488 * schedule_on_each_cpu - call a function on each online CPU from keventd
2489 * @func: the function to call
2490 *
2491 * Returns zero on success.
2492 * Returns -ve errno on failure.
2493 *
2494 * schedule_on_each_cpu() is very slow.
2495 */
2496 int schedule_on_each_cpu(work_func_t func)
2497 {
2498 int cpu;
2499 struct work_struct *works;
2500
2501 works = alloc_percpu(struct work_struct);
2502 if (!works)
2503 return -ENOMEM;
2504
2505 get_online_cpus();
2506
2507 for_each_online_cpu(cpu) {
2508 struct work_struct *work = per_cpu_ptr(works, cpu);
2509
2510 INIT_WORK(work, func);
2511 schedule_work_on(cpu, work);
2512 }
2513
2514 for_each_online_cpu(cpu)
2515 flush_work(per_cpu_ptr(works, cpu));
2516
2517 put_online_cpus();
2518 free_percpu(works);
2519 return 0;
2520 }
2521
2522 /**
2523 * flush_scheduled_work - ensure that any scheduled work has run to completion.
2524 *
2525 * Forces execution of the kernel-global workqueue and blocks until its
2526 * completion.
2527 *
2528 * Think twice before calling this function! It's very easy to get into
2529 * trouble if you don't take great care. Either of the following situations
2530 * will lead to deadlock:
2531 *
2532 * One of the work items currently on the workqueue needs to acquire
2533 * a lock held by your code or its caller.
2534 *
2535 * Your code is running in the context of a work routine.
2536 *
2537 * They will be detected by lockdep when they occur, but the first might not
2538 * occur very often. It depends on what work items are on the workqueue and
2539 * what locks they need, which you have no control over.
2540 *
2541 * In most situations flushing the entire workqueue is overkill; you merely
2542 * need to know that a particular work item isn't queued and isn't running.
2543 * In such cases you should use cancel_delayed_work_sync() or
2544 * cancel_work_sync() instead.
2545 */
2546 void flush_scheduled_work(void)
2547 {
2548 flush_workqueue(system_wq);
2549 }
2550 EXPORT_SYMBOL(flush_scheduled_work);
2551
2552 /**
2553 * execute_in_process_context - reliably execute the routine with user context
2554 * @fn: the function to execute
2555 * @ew: guaranteed storage for the execute work structure (must
2556 * be available when the work executes)
2557 *
2558 * Executes the function immediately if process context is available,
2559 * otherwise schedules the function for delayed execution.
2560 *
2561 * Returns: 0 - function was executed
2562 * 1 - function was scheduled for execution
2563 */
2564 int execute_in_process_context(work_func_t fn, struct execute_work *ew)
2565 {
2566 if (!in_interrupt()) {
2567 fn(&ew->work);
2568 return 0;
2569 }
2570
2571 INIT_WORK(&ew->work, fn);
2572 schedule_work(&ew->work);
2573
2574 return 1;
2575 }
2576 EXPORT_SYMBOL_GPL(execute_in_process_context);
2577
2578 int keventd_up(void)
2579 {
2580 return system_wq != NULL;
2581 }
2582
2583 static int alloc_cwqs(struct workqueue_struct *wq)
2584 {
2585 /*
2586 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
2587 * Make sure that the alignment isn't lower than that of
2588 * unsigned long long.
2589 */
2590 const size_t size = sizeof(struct cpu_workqueue_struct);
2591 const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
2592 __alignof__(unsigned long long));
2593 #ifndef CONFIG_SMP
2594 void *ptr;
2595
2596 /*
2597 * Allocate enough room to align cwq and put an extra pointer
2598 * at the end pointing back to the originally allocated
2599 * pointer which will be used for free.
2600 */
2601 ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
2602 if (ptr) {
2603 wq->cpu_wq.single = PTR_ALIGN(ptr, align);
2604 *(void **)(wq->cpu_wq.single + 1) = ptr;
2605 }
2606 #else
2607 /* On SMP, percpu allocator can align itself */
2608 wq->cpu_wq.pcpu = __alloc_percpu(size, align);
2609 #endif
2610 /* just in case, make sure it's actually aligned */
2611 BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
2612 return wq->cpu_wq.v ? 0 : -ENOMEM;
2613 }
2614
2615 static void free_cwqs(struct workqueue_struct *wq)
2616 {
2617 #ifndef CONFIG_SMP
2618 /* on UP, the pointer to free is stored right after the cwq */
2619 if (wq->cpu_wq.single)
2620 kfree(*(void **)(wq->cpu_wq.single + 1));
2621 #else
2622 free_percpu(wq->cpu_wq.pcpu);
2623 #endif
2624 }
2625
2626 static int wq_clamp_max_active(int max_active, const char *name)
2627 {
2628 if (max_active < 1 || max_active > WQ_MAX_ACTIVE)
2629 printk(KERN_WARNING "workqueue: max_active %d requested for %s "
2630 "is out of range, clamping between %d and %d\n",
2631 max_active, name, 1, WQ_MAX_ACTIVE);
2632
2633 return clamp_val(max_active, 1, WQ_MAX_ACTIVE);
2634 }
2635
2636 struct workqueue_struct *__alloc_workqueue_key(const char *name,
2637 unsigned int flags,
2638 int max_active,
2639 struct lock_class_key *key,
2640 const char *lock_name)
2641 {
2642 struct workqueue_struct *wq;
2643 unsigned int cpu;
2644
2645 max_active = max_active ?: WQ_DFL_ACTIVE;
2646 max_active = wq_clamp_max_active(max_active, name);
2647
2648 wq = kzalloc(sizeof(*wq), GFP_KERNEL);
2649 if (!wq)
2650 goto err;
2651
2652 wq->flags = flags;
2653 wq->saved_max_active = max_active;
2654 mutex_init(&wq->flush_mutex);
2655 atomic_set(&wq->nr_cwqs_to_flush, 0);
2656 INIT_LIST_HEAD(&wq->flusher_queue);
2657 INIT_LIST_HEAD(&wq->flusher_overflow);
2658 wq->single_cpu = WORK_CPU_NONE;
2659
2660 wq->name = name;
2661 lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
2662 INIT_LIST_HEAD(&wq->list);
2663
2664 if (alloc_cwqs(wq) < 0)
2665 goto err;
2666
2667 for_each_possible_cpu(cpu) {
2668 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2669 struct global_cwq *gcwq = get_gcwq(cpu);
2670
2671 BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
2672 cwq->gcwq = gcwq;
2673 cwq->wq = wq;
2674 cwq->flush_color = -1;
2675 cwq->max_active = max_active;
2676 INIT_LIST_HEAD(&cwq->delayed_works);
2677 }
2678
2679 if (flags & WQ_RESCUER) {
2680 struct worker *rescuer;
2681
2682 if (!alloc_cpumask_var(&wq->mayday_mask, GFP_KERNEL))
2683 goto err;
2684
2685 wq->rescuer = rescuer = alloc_worker();
2686 if (!rescuer)
2687 goto err;
2688
2689 rescuer->task = kthread_create(rescuer_thread, wq, "%s", name);
2690 if (IS_ERR(rescuer->task))
2691 goto err;
2692
2693 wq->rescuer = rescuer;
2694 rescuer->task->flags |= PF_THREAD_BOUND;
2695 wake_up_process(rescuer->task);
2696 }
2697
2698 /*
2699 * workqueue_lock protects global freeze state and workqueues
2700 * list. Grab it, set max_active accordingly and add the new
2701 * workqueue to workqueues list.
2702 */
2703 spin_lock(&workqueue_lock);
2704
2705 if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
2706 for_each_possible_cpu(cpu)
2707 get_cwq(cpu, wq)->max_active = 0;
2708
2709 list_add(&wq->list, &workqueues);
2710
2711 spin_unlock(&workqueue_lock);
2712
2713 return wq;
2714 err:
2715 if (wq) {
2716 free_cwqs(wq);
2717 free_cpumask_var(wq->mayday_mask);
2718 kfree(wq->rescuer);
2719 kfree(wq);
2720 }
2721 return NULL;
2722 }
2723 EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
2724
2725 /**
2726 * destroy_workqueue - safely terminate a workqueue
2727 * @wq: target workqueue
2728 *
2729 * Safely destroy a workqueue. All work currently pending will be done first.
2730 */
2731 void destroy_workqueue(struct workqueue_struct *wq)
2732 {
2733 unsigned int cpu;
2734
2735 flush_workqueue(wq);
2736
2737 /*
2738 * wq list is used to freeze wq, remove from list after
2739 * flushing is complete in case freeze races us.
2740 */
2741 spin_lock(&workqueue_lock);
2742 list_del(&wq->list);
2743 spin_unlock(&workqueue_lock);
2744
2745 /* sanity check */
2746 for_each_possible_cpu(cpu) {
2747 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2748 int i;
2749
2750 for (i = 0; i < WORK_NR_COLORS; i++)
2751 BUG_ON(cwq->nr_in_flight[i]);
2752 BUG_ON(cwq->nr_active);
2753 BUG_ON(!list_empty(&cwq->delayed_works));
2754 }
2755
2756 if (wq->flags & WQ_RESCUER) {
2757 kthread_stop(wq->rescuer->task);
2758 free_cpumask_var(wq->mayday_mask);
2759 }
2760
2761 free_cwqs(wq);
2762 kfree(wq);
2763 }
2764 EXPORT_SYMBOL_GPL(destroy_workqueue);
2765
2766 /**
2767 * workqueue_set_max_active - adjust max_active of a workqueue
2768 * @wq: target workqueue
2769 * @max_active: new max_active value.
2770 *
2771 * Set max_active of @wq to @max_active.
2772 *
2773 * CONTEXT:
2774 * Don't call from IRQ context.
2775 */
2776 void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
2777 {
2778 unsigned int cpu;
2779
2780 max_active = wq_clamp_max_active(max_active, wq->name);
2781
2782 spin_lock(&workqueue_lock);
2783
2784 wq->saved_max_active = max_active;
2785
2786 for_each_possible_cpu(cpu) {
2787 struct global_cwq *gcwq = get_gcwq(cpu);
2788
2789 spin_lock_irq(&gcwq->lock);
2790
2791 if (!(wq->flags & WQ_FREEZEABLE) ||
2792 !(gcwq->flags & GCWQ_FREEZING))
2793 get_cwq(gcwq->cpu, wq)->max_active = max_active;
2794
2795 spin_unlock_irq(&gcwq->lock);
2796 }
2797
2798 spin_unlock(&workqueue_lock);
2799 }
2800 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
2801
2802 /**
2803 * workqueue_congested - test whether a workqueue is congested
2804 * @cpu: CPU in question
2805 * @wq: target workqueue
2806 *
2807 * Test whether @wq's cpu workqueue for @cpu is congested. There is
2808 * no synchronization around this function and the test result is
2809 * unreliable and only useful as advisory hints or for debugging.
2810 *
2811 * RETURNS:
2812 * %true if congested, %false otherwise.
2813 */
2814 bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
2815 {
2816 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2817
2818 return !list_empty(&cwq->delayed_works);
2819 }
2820 EXPORT_SYMBOL_GPL(workqueue_congested);
2821
2822 /**
2823 * work_cpu - return the last known associated cpu for @work
2824 * @work: the work of interest
2825 *
2826 * RETURNS:
2827 * CPU number if @work was ever queued. WORK_CPU_NONE otherwise.
2828 */
2829 unsigned int work_cpu(struct work_struct *work)
2830 {
2831 struct global_cwq *gcwq = get_work_gcwq(work);
2832
2833 return gcwq ? gcwq->cpu : WORK_CPU_NONE;
2834 }
2835 EXPORT_SYMBOL_GPL(work_cpu);
2836
2837 /**
2838 * work_busy - test whether a work is currently pending or running
2839 * @work: the work to be tested
2840 *
2841 * Test whether @work is currently pending or running. There is no
2842 * synchronization around this function and the test result is
2843 * unreliable and only useful as advisory hints or for debugging.
2844 * Especially for reentrant wqs, the pending state might hide the
2845 * running state.
2846 *
2847 * RETURNS:
2848 * OR'd bitmask of WORK_BUSY_* bits.
2849 */
2850 unsigned int work_busy(struct work_struct *work)
2851 {
2852 struct global_cwq *gcwq = get_work_gcwq(work);
2853 unsigned long flags;
2854 unsigned int ret = 0;
2855
2856 if (!gcwq)
2857 return false;
2858
2859 spin_lock_irqsave(&gcwq->lock, flags);
2860
2861 if (work_pending(work))
2862 ret |= WORK_BUSY_PENDING;
2863 if (find_worker_executing_work(gcwq, work))
2864 ret |= WORK_BUSY_RUNNING;
2865
2866 spin_unlock_irqrestore(&gcwq->lock, flags);
2867
2868 return ret;
2869 }
2870 EXPORT_SYMBOL_GPL(work_busy);
2871
2872 /*
2873 * CPU hotplug.
2874 *
2875 * There are two challenges in supporting CPU hotplug. Firstly, there
2876 * are a lot of assumptions on strong associations among work, cwq and
2877 * gcwq which make migrating pending and scheduled works very
2878 * difficult to implement without impacting hot paths. Secondly,
2879 * gcwqs serve mix of short, long and very long running works making
2880 * blocked draining impractical.
2881 *
2882 * This is solved by allowing a gcwq to be detached from CPU, running
2883 * it with unbound (rogue) workers and allowing it to be reattached
2884 * later if the cpu comes back online. A separate thread is created
2885 * to govern a gcwq in such state and is called the trustee of the
2886 * gcwq.
2887 *
2888 * Trustee states and their descriptions.
2889 *
2890 * START Command state used on startup. On CPU_DOWN_PREPARE, a
2891 * new trustee is started with this state.
2892 *
2893 * IN_CHARGE Once started, trustee will enter this state after
2894 * assuming the manager role and making all existing
2895 * workers rogue. DOWN_PREPARE waits for trustee to
2896 * enter this state. After reaching IN_CHARGE, trustee
2897 * tries to execute the pending worklist until it's empty
2898 * and the state is set to BUTCHER, or the state is set
2899 * to RELEASE.
2900 *
2901 * BUTCHER Command state which is set by the cpu callback after
2902 * the cpu has went down. Once this state is set trustee
2903 * knows that there will be no new works on the worklist
2904 * and once the worklist is empty it can proceed to
2905 * killing idle workers.
2906 *
2907 * RELEASE Command state which is set by the cpu callback if the
2908 * cpu down has been canceled or it has come online
2909 * again. After recognizing this state, trustee stops
2910 * trying to drain or butcher and clears ROGUE, rebinds
2911 * all remaining workers back to the cpu and releases
2912 * manager role.
2913 *
2914 * DONE Trustee will enter this state after BUTCHER or RELEASE
2915 * is complete.
2916 *
2917 * trustee CPU draining
2918 * took over down complete
2919 * START -----------> IN_CHARGE -----------> BUTCHER -----------> DONE
2920 * | | ^
2921 * | CPU is back online v return workers |
2922 * ----------------> RELEASE --------------
2923 */
2924
2925 /**
2926 * trustee_wait_event_timeout - timed event wait for trustee
2927 * @cond: condition to wait for
2928 * @timeout: timeout in jiffies
2929 *
2930 * wait_event_timeout() for trustee to use. Handles locking and
2931 * checks for RELEASE request.
2932 *
2933 * CONTEXT:
2934 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
2935 * multiple times. To be used by trustee.
2936 *
2937 * RETURNS:
2938 * Positive indicating left time if @cond is satisfied, 0 if timed
2939 * out, -1 if canceled.
2940 */
2941 #define trustee_wait_event_timeout(cond, timeout) ({ \
2942 long __ret = (timeout); \
2943 while (!((cond) || (gcwq->trustee_state == TRUSTEE_RELEASE)) && \
2944 __ret) { \
2945 spin_unlock_irq(&gcwq->lock); \
2946 __wait_event_timeout(gcwq->trustee_wait, (cond) || \
2947 (gcwq->trustee_state == TRUSTEE_RELEASE), \
2948 __ret); \
2949 spin_lock_irq(&gcwq->lock); \
2950 } \
2951 gcwq->trustee_state == TRUSTEE_RELEASE ? -1 : (__ret); \
2952 })
2953
2954 /**
2955 * trustee_wait_event - event wait for trustee
2956 * @cond: condition to wait for
2957 *
2958 * wait_event() for trustee to use. Automatically handles locking and
2959 * checks for CANCEL request.
2960 *
2961 * CONTEXT:
2962 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
2963 * multiple times. To be used by trustee.
2964 *
2965 * RETURNS:
2966 * 0 if @cond is satisfied, -1 if canceled.
2967 */
2968 #define trustee_wait_event(cond) ({ \
2969 long __ret1; \
2970 __ret1 = trustee_wait_event_timeout(cond, MAX_SCHEDULE_TIMEOUT);\
2971 __ret1 < 0 ? -1 : 0; \
2972 })
2973
2974 static int __cpuinit trustee_thread(void *__gcwq)
2975 {
2976 struct global_cwq *gcwq = __gcwq;
2977 struct worker *worker;
2978 struct work_struct *work;
2979 struct hlist_node *pos;
2980 long rc;
2981 int i;
2982
2983 BUG_ON(gcwq->cpu != smp_processor_id());
2984
2985 spin_lock_irq(&gcwq->lock);
2986 /*
2987 * Claim the manager position and make all workers rogue.
2988 * Trustee must be bound to the target cpu and can't be
2989 * cancelled.
2990 */
2991 BUG_ON(gcwq->cpu != smp_processor_id());
2992 rc = trustee_wait_event(!(gcwq->flags & GCWQ_MANAGING_WORKERS));
2993 BUG_ON(rc < 0);
2994
2995 gcwq->flags |= GCWQ_MANAGING_WORKERS;
2996
2997 list_for_each_entry(worker, &gcwq->idle_list, entry)
2998 worker->flags |= WORKER_ROGUE;
2999
3000 for_each_busy_worker(worker, i, pos, gcwq)
3001 worker->flags |= WORKER_ROGUE;
3002
3003 /*
3004 * Call schedule() so that we cross rq->lock and thus can
3005 * guarantee sched callbacks see the rogue flag. This is
3006 * necessary as scheduler callbacks may be invoked from other
3007 * cpus.
3008 */
3009 spin_unlock_irq(&gcwq->lock);
3010 schedule();
3011 spin_lock_irq(&gcwq->lock);
3012
3013 /*
3014 * Sched callbacks are disabled now. Zap nr_running. After
3015 * this, nr_running stays zero and need_more_worker() and
3016 * keep_working() are always true as long as the worklist is
3017 * not empty.
3018 */
3019 atomic_set(get_gcwq_nr_running(gcwq->cpu), 0);
3020
3021 spin_unlock_irq(&gcwq->lock);
3022 del_timer_sync(&gcwq->idle_timer);
3023 spin_lock_irq(&gcwq->lock);
3024
3025 /*
3026 * We're now in charge. Notify and proceed to drain. We need
3027 * to keep the gcwq running during the whole CPU down
3028 * procedure as other cpu hotunplug callbacks may need to
3029 * flush currently running tasks.
3030 */
3031 gcwq->trustee_state = TRUSTEE_IN_CHARGE;
3032 wake_up_all(&gcwq->trustee_wait);
3033
3034 /*
3035 * The original cpu is in the process of dying and may go away
3036 * anytime now. When that happens, we and all workers would
3037 * be migrated to other cpus. Try draining any left work. We
3038 * want to get it over with ASAP - spam rescuers, wake up as
3039 * many idlers as necessary and create new ones till the
3040 * worklist is empty. Note that if the gcwq is frozen, there
3041 * may be frozen works in freezeable cwqs. Don't declare
3042 * completion while frozen.
3043 */
3044 while (gcwq->nr_workers != gcwq->nr_idle ||
3045 gcwq->flags & GCWQ_FREEZING ||
3046 gcwq->trustee_state == TRUSTEE_IN_CHARGE) {
3047 int nr_works = 0;
3048
3049 list_for_each_entry(work, &gcwq->worklist, entry) {
3050 send_mayday(work);
3051 nr_works++;
3052 }
3053
3054 list_for_each_entry(worker, &gcwq->idle_list, entry) {
3055 if (!nr_works--)
3056 break;
3057 wake_up_process(worker->task);
3058 }
3059
3060 if (need_to_create_worker(gcwq)) {
3061 spin_unlock_irq(&gcwq->lock);
3062 worker = create_worker(gcwq, false);
3063 spin_lock_irq(&gcwq->lock);
3064 if (worker) {
3065 worker->flags |= WORKER_ROGUE;
3066 start_worker(worker);
3067 }
3068 }
3069
3070 /* give a breather */
3071 if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0)
3072 break;
3073 }
3074
3075 /*
3076 * Either all works have been scheduled and cpu is down, or
3077 * cpu down has already been canceled. Wait for and butcher
3078 * all workers till we're canceled.
3079 */
3080 do {
3081 rc = trustee_wait_event(!list_empty(&gcwq->idle_list));
3082 while (!list_empty(&gcwq->idle_list))
3083 destroy_worker(list_first_entry(&gcwq->idle_list,
3084 struct worker, entry));
3085 } while (gcwq->nr_workers && rc >= 0);
3086
3087 /*
3088 * At this point, either draining has completed and no worker
3089 * is left, or cpu down has been canceled or the cpu is being
3090 * brought back up. There shouldn't be any idle one left.
3091 * Tell the remaining busy ones to rebind once it finishes the
3092 * currently scheduled works by scheduling the rebind_work.
3093 */
3094 WARN_ON(!list_empty(&gcwq->idle_list));
3095
3096 for_each_busy_worker(worker, i, pos, gcwq) {
3097 struct work_struct *rebind_work = &worker->rebind_work;
3098
3099 /*
3100 * Rebind_work may race with future cpu hotplug
3101 * operations. Use a separate flag to mark that
3102 * rebinding is scheduled.
3103 */
3104 worker->flags |= WORKER_REBIND;
3105 worker->flags &= ~WORKER_ROGUE;
3106
3107 /* queue rebind_work, wq doesn't matter, use the default one */
3108 if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
3109 work_data_bits(rebind_work)))
3110 continue;
3111
3112 debug_work_activate(rebind_work);
3113 insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
3114 worker->scheduled.next,
3115 work_color_to_flags(WORK_NO_COLOR));
3116 }
3117
3118 /* relinquish manager role */
3119 gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
3120
3121 /* notify completion */
3122 gcwq->trustee = NULL;
3123 gcwq->trustee_state = TRUSTEE_DONE;
3124 wake_up_all(&gcwq->trustee_wait);
3125 spin_unlock_irq(&gcwq->lock);
3126 return 0;
3127 }
3128
3129 /**
3130 * wait_trustee_state - wait for trustee to enter the specified state
3131 * @gcwq: gcwq the trustee of interest belongs to
3132 * @state: target state to wait for
3133 *
3134 * Wait for the trustee to reach @state. DONE is already matched.
3135 *
3136 * CONTEXT:
3137 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
3138 * multiple times. To be used by cpu_callback.
3139 */
3140 static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state)
3141 {
3142 if (!(gcwq->trustee_state == state ||
3143 gcwq->trustee_state == TRUSTEE_DONE)) {
3144 spin_unlock_irq(&gcwq->lock);
3145 __wait_event(gcwq->trustee_wait,
3146 gcwq->trustee_state == state ||
3147 gcwq->trustee_state == TRUSTEE_DONE);
3148 spin_lock_irq(&gcwq->lock);
3149 }
3150 }
3151
3152 static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
3153 unsigned long action,
3154 void *hcpu)
3155 {
3156 unsigned int cpu = (unsigned long)hcpu;
3157 struct global_cwq *gcwq = get_gcwq(cpu);
3158 struct task_struct *new_trustee = NULL;
3159 struct worker *uninitialized_var(new_worker);
3160 unsigned long flags;
3161
3162 action &= ~CPU_TASKS_FROZEN;
3163
3164 switch (action) {
3165 case CPU_DOWN_PREPARE:
3166 new_trustee = kthread_create(trustee_thread, gcwq,
3167 "workqueue_trustee/%d\n", cpu);
3168 if (IS_ERR(new_trustee))
3169 return notifier_from_errno(PTR_ERR(new_trustee));
3170 kthread_bind(new_trustee, cpu);
3171 /* fall through */
3172 case CPU_UP_PREPARE:
3173 BUG_ON(gcwq->first_idle);
3174 new_worker = create_worker(gcwq, false);
3175 if (!new_worker) {
3176 if (new_trustee)
3177 kthread_stop(new_trustee);
3178 return NOTIFY_BAD;
3179 }
3180 }
3181
3182 /* some are called w/ irq disabled, don't disturb irq status */
3183 spin_lock_irqsave(&gcwq->lock, flags);
3184
3185 switch (action) {
3186 case CPU_DOWN_PREPARE:
3187 /* initialize trustee and tell it to acquire the gcwq */
3188 BUG_ON(gcwq->trustee || gcwq->trustee_state != TRUSTEE_DONE);
3189 gcwq->trustee = new_trustee;
3190 gcwq->trustee_state = TRUSTEE_START;
3191 wake_up_process(gcwq->trustee);
3192 wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
3193 /* fall through */
3194 case CPU_UP_PREPARE:
3195 BUG_ON(gcwq->first_idle);
3196 gcwq->first_idle = new_worker;
3197 break;
3198
3199 case CPU_DYING:
3200 /*
3201 * Before this, the trustee and all workers except for
3202 * the ones which are still executing works from
3203 * before the last CPU down must be on the cpu. After
3204 * this, they'll all be diasporas.
3205 */
3206 gcwq->flags |= GCWQ_DISASSOCIATED;
3207 break;
3208
3209 case CPU_POST_DEAD:
3210 gcwq->trustee_state = TRUSTEE_BUTCHER;
3211 /* fall through */
3212 case CPU_UP_CANCELED:
3213 destroy_worker(gcwq->first_idle);
3214 gcwq->first_idle = NULL;
3215 break;
3216
3217 case CPU_DOWN_FAILED:
3218 case CPU_ONLINE:
3219 gcwq->flags &= ~GCWQ_DISASSOCIATED;
3220 if (gcwq->trustee_state != TRUSTEE_DONE) {
3221 gcwq->trustee_state = TRUSTEE_RELEASE;
3222 wake_up_process(gcwq->trustee);
3223 wait_trustee_state(gcwq, TRUSTEE_DONE);
3224 }
3225
3226 /*
3227 * Trustee is done and there might be no worker left.
3228 * Put the first_idle in and request a real manager to
3229 * take a look.
3230 */
3231 spin_unlock_irq(&gcwq->lock);
3232 kthread_bind(gcwq->first_idle->task, cpu);
3233 spin_lock_irq(&gcwq->lock);
3234 gcwq->flags |= GCWQ_MANAGE_WORKERS;
3235 start_worker(gcwq->first_idle);
3236 gcwq->first_idle = NULL;
3237 break;
3238 }
3239
3240 spin_unlock_irqrestore(&gcwq->lock, flags);
3241
3242 return notifier_from_errno(0);
3243 }
3244
3245 #ifdef CONFIG_SMP
3246
3247 struct work_for_cpu {
3248 struct completion completion;
3249 long (*fn)(void *);
3250 void *arg;
3251 long ret;
3252 };
3253
3254 static int do_work_for_cpu(void *_wfc)
3255 {
3256 struct work_for_cpu *wfc = _wfc;
3257 wfc->ret = wfc->fn(wfc->arg);
3258 complete(&wfc->completion);
3259 return 0;
3260 }
3261
3262 /**
3263 * work_on_cpu - run a function in user context on a particular cpu
3264 * @cpu: the cpu to run on
3265 * @fn: the function to run
3266 * @arg: the function arg
3267 *
3268 * This will return the value @fn returns.
3269 * It is up to the caller to ensure that the cpu doesn't go offline.
3270 * The caller must not hold any locks which would prevent @fn from completing.
3271 */
3272 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
3273 {
3274 struct task_struct *sub_thread;
3275 struct work_for_cpu wfc = {
3276 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
3277 .fn = fn,
3278 .arg = arg,
3279 };
3280
3281 sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
3282 if (IS_ERR(sub_thread))
3283 return PTR_ERR(sub_thread);
3284 kthread_bind(sub_thread, cpu);
3285 wake_up_process(sub_thread);
3286 wait_for_completion(&wfc.completion);
3287 return wfc.ret;
3288 }
3289 EXPORT_SYMBOL_GPL(work_on_cpu);
3290 #endif /* CONFIG_SMP */
3291
3292 #ifdef CONFIG_FREEZER
3293
3294 /**
3295 * freeze_workqueues_begin - begin freezing workqueues
3296 *
3297 * Start freezing workqueues. After this function returns, all
3298 * freezeable workqueues will queue new works to their frozen_works
3299 * list instead of gcwq->worklist.
3300 *
3301 * CONTEXT:
3302 * Grabs and releases workqueue_lock and gcwq->lock's.
3303 */
3304 void freeze_workqueues_begin(void)
3305 {
3306 unsigned int cpu;
3307
3308 spin_lock(&workqueue_lock);
3309
3310 BUG_ON(workqueue_freezing);
3311 workqueue_freezing = true;
3312
3313 for_each_possible_cpu(cpu) {
3314 struct global_cwq *gcwq = get_gcwq(cpu);
3315 struct workqueue_struct *wq;
3316
3317 spin_lock_irq(&gcwq->lock);
3318
3319 BUG_ON(gcwq->flags & GCWQ_FREEZING);
3320 gcwq->flags |= GCWQ_FREEZING;
3321
3322 list_for_each_entry(wq, &workqueues, list) {
3323 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3324
3325 if (wq->flags & WQ_FREEZEABLE)
3326 cwq->max_active = 0;
3327 }
3328
3329 spin_unlock_irq(&gcwq->lock);
3330 }
3331
3332 spin_unlock(&workqueue_lock);
3333 }
3334
3335 /**
3336 * freeze_workqueues_busy - are freezeable workqueues still busy?
3337 *
3338 * Check whether freezing is complete. This function must be called
3339 * between freeze_workqueues_begin() and thaw_workqueues().
3340 *
3341 * CONTEXT:
3342 * Grabs and releases workqueue_lock.
3343 *
3344 * RETURNS:
3345 * %true if some freezeable workqueues are still busy. %false if
3346 * freezing is complete.
3347 */
3348 bool freeze_workqueues_busy(void)
3349 {
3350 unsigned int cpu;
3351 bool busy = false;
3352
3353 spin_lock(&workqueue_lock);
3354
3355 BUG_ON(!workqueue_freezing);
3356
3357 for_each_possible_cpu(cpu) {
3358 struct workqueue_struct *wq;
3359 /*
3360 * nr_active is monotonically decreasing. It's safe
3361 * to peek without lock.
3362 */
3363 list_for_each_entry(wq, &workqueues, list) {
3364 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3365
3366 if (!(wq->flags & WQ_FREEZEABLE))
3367 continue;
3368
3369 BUG_ON(cwq->nr_active < 0);
3370 if (cwq->nr_active) {
3371 busy = true;
3372 goto out_unlock;
3373 }
3374 }
3375 }
3376 out_unlock:
3377 spin_unlock(&workqueue_lock);
3378 return busy;
3379 }
3380
3381 /**
3382 * thaw_workqueues - thaw workqueues
3383 *
3384 * Thaw workqueues. Normal queueing is restored and all collected
3385 * frozen works are transferred to their respective gcwq worklists.
3386 *
3387 * CONTEXT:
3388 * Grabs and releases workqueue_lock and gcwq->lock's.
3389 */
3390 void thaw_workqueues(void)
3391 {
3392 unsigned int cpu;
3393
3394 spin_lock(&workqueue_lock);
3395
3396 if (!workqueue_freezing)
3397 goto out_unlock;
3398
3399 for_each_possible_cpu(cpu) {
3400 struct global_cwq *gcwq = get_gcwq(cpu);
3401 struct workqueue_struct *wq;
3402
3403 spin_lock_irq(&gcwq->lock);
3404
3405 BUG_ON(!(gcwq->flags & GCWQ_FREEZING));
3406 gcwq->flags &= ~GCWQ_FREEZING;
3407
3408 list_for_each_entry(wq, &workqueues, list) {
3409 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
3410
3411 if (!(wq->flags & WQ_FREEZEABLE))
3412 continue;
3413
3414 /* restore max_active and repopulate worklist */
3415 cwq->max_active = wq->saved_max_active;
3416
3417 while (!list_empty(&cwq->delayed_works) &&
3418 cwq->nr_active < cwq->max_active)
3419 cwq_activate_first_delayed(cwq);
3420
3421 /* perform delayed unbind from single cpu if empty */
3422 if (wq->single_cpu == gcwq->cpu &&
3423 !cwq->nr_active && list_empty(&cwq->delayed_works))
3424 cwq_unbind_single_cpu(cwq);
3425 }
3426
3427 wake_up_worker(gcwq);
3428
3429 spin_unlock_irq(&gcwq->lock);
3430 }
3431
3432 workqueue_freezing = false;
3433 out_unlock:
3434 spin_unlock(&workqueue_lock);
3435 }
3436 #endif /* CONFIG_FREEZER */
3437
3438 void __init init_workqueues(void)
3439 {
3440 unsigned int cpu;
3441 int i;
3442
3443 /*
3444 * The pointer part of work->data is either pointing to the
3445 * cwq or contains the cpu number the work ran last on. Make
3446 * sure cpu number won't overflow into kernel pointer area so
3447 * that they can be distinguished.
3448 */
3449 BUILD_BUG_ON(WORK_CPU_LAST << WORK_STRUCT_FLAG_BITS >= PAGE_OFFSET);
3450
3451 hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
3452
3453 /* initialize gcwqs */
3454 for_each_possible_cpu(cpu) {
3455 struct global_cwq *gcwq = get_gcwq(cpu);
3456
3457 spin_lock_init(&gcwq->lock);
3458 INIT_LIST_HEAD(&gcwq->worklist);
3459 gcwq->cpu = cpu;
3460
3461 INIT_LIST_HEAD(&gcwq->idle_list);
3462 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
3463 INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
3464
3465 init_timer_deferrable(&gcwq->idle_timer);
3466 gcwq->idle_timer.function = idle_worker_timeout;
3467 gcwq->idle_timer.data = (unsigned long)gcwq;
3468
3469 setup_timer(&gcwq->mayday_timer, gcwq_mayday_timeout,
3470 (unsigned long)gcwq);
3471
3472 ida_init(&gcwq->worker_ida);
3473
3474 gcwq->trustee_state = TRUSTEE_DONE;
3475 init_waitqueue_head(&gcwq->trustee_wait);
3476 }
3477
3478 /* create the initial worker */
3479 for_each_online_cpu(cpu) {
3480 struct global_cwq *gcwq = get_gcwq(cpu);
3481 struct worker *worker;
3482
3483 worker = create_worker(gcwq, true);
3484 BUG_ON(!worker);
3485 spin_lock_irq(&gcwq->lock);
3486 start_worker(worker);
3487 spin_unlock_irq(&gcwq->lock);
3488 }
3489
3490 system_wq = alloc_workqueue("events", 0, 0);
3491 system_long_wq = alloc_workqueue("events_long", 0, 0);
3492 system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
3493 BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq);
3494 }
This page took 0.100906 seconds and 6 git commands to generate.