2 * linux/kernel/workqueue.c
4 * Generic mechanism for defining kernel helper threads for running
5 * arbitrary tasks in process context.
7 * Started by Ingo Molnar, Copyright (C) 2002
9 * Derived from the taskqueue/keventd code by:
11 * David Woodhouse <dwmw2@infradead.org>
13 * Kai Petzke <wpp@marie.physik.tu-berlin.de>
14 * Theodore Ts'o <tytso@mit.edu>
16 * Made to use alloc_percpu by Christoph Lameter.
19 #include <linux/module.h>
20 #include <linux/kernel.h>
21 #include <linux/sched.h>
22 #include <linux/init.h>
23 #include <linux/signal.h>
24 #include <linux/completion.h>
25 #include <linux/workqueue.h>
26 #include <linux/slab.h>
27 #include <linux/cpu.h>
28 #include <linux/notifier.h>
29 #include <linux/kthread.h>
30 #include <linux/hardirq.h>
31 #include <linux/mempolicy.h>
32 #include <linux/freezer.h>
33 #include <linux/kallsyms.h>
34 #include <linux/debug_locks.h>
35 #include <linux/lockdep.h>
36 #include <linux/idr.h>
39 /* global_cwq flags */
40 GCWQ_FREEZING
= 1 << 3, /* freeze in progress */
43 WORKER_STARTED
= 1 << 0, /* started */
44 WORKER_DIE
= 1 << 1, /* die die die */
45 WORKER_IDLE
= 1 << 2, /* is idle */
46 WORKER_ROGUE
= 1 << 4, /* not bound to any cpu */
48 /* gcwq->trustee_state */
49 TRUSTEE_START
= 0, /* start */
50 TRUSTEE_IN_CHARGE
= 1, /* trustee in charge of gcwq */
51 TRUSTEE_BUTCHER
= 2, /* butcher workers */
52 TRUSTEE_RELEASE
= 3, /* release workers */
53 TRUSTEE_DONE
= 4, /* trustee is done */
55 BUSY_WORKER_HASH_ORDER
= 6, /* 64 pointers */
56 BUSY_WORKER_HASH_SIZE
= 1 << BUSY_WORKER_HASH_ORDER
,
57 BUSY_WORKER_HASH_MASK
= BUSY_WORKER_HASH_SIZE
- 1,
59 TRUSTEE_COOLDOWN
= HZ
/ 10, /* for trustee draining */
63 * Structure fields follow one of the following exclusion rules.
65 * I: Set during initialization and read-only afterwards.
67 * L: gcwq->lock protected. Access with gcwq->lock held.
69 * F: wq->flush_mutex protected.
71 * W: workqueue_lock protected.
75 struct cpu_workqueue_struct
;
78 /* on idle list while idle, on busy hash table while busy */
80 struct list_head entry
; /* L: while idle */
81 struct hlist_node hentry
; /* L: while busy */
84 struct work_struct
*current_work
; /* L: work being processed */
85 struct list_head scheduled
; /* L: scheduled works */
86 struct task_struct
*task
; /* I: worker task */
87 struct global_cwq
*gcwq
; /* I: the associated gcwq */
88 struct cpu_workqueue_struct
*cwq
; /* I: the associated cwq */
89 unsigned int flags
; /* L: flags */
90 int id
; /* I: worker id */
94 * Global per-cpu workqueue.
97 spinlock_t lock
; /* the gcwq lock */
98 unsigned int cpu
; /* I: the associated cpu */
99 unsigned int flags
; /* L: GCWQ_* flags */
101 int nr_workers
; /* L: total number of workers */
102 int nr_idle
; /* L: currently idle ones */
104 /* workers are chained either in the idle_list or busy_hash */
105 struct list_head idle_list
; /* L: list of idle workers */
106 struct hlist_head busy_hash
[BUSY_WORKER_HASH_SIZE
];
107 /* L: hash of busy workers */
109 struct ida worker_ida
; /* L: for worker IDs */
111 struct task_struct
*trustee
; /* L: for gcwq shutdown */
112 unsigned int trustee_state
; /* L: trustee state */
113 wait_queue_head_t trustee_wait
; /* trustee wait */
114 } ____cacheline_aligned_in_smp
;
117 * The per-CPU workqueue (if single thread, we always use the first
118 * possible cpu). The lower WORK_STRUCT_FLAG_BITS of
119 * work_struct->data are used for flags and thus cwqs need to be
120 * aligned at two's power of the number of flag bits.
122 struct cpu_workqueue_struct
{
123 struct global_cwq
*gcwq
; /* I: the associated gcwq */
124 struct list_head worklist
;
125 struct worker
*worker
;
126 struct workqueue_struct
*wq
; /* I: the owning workqueue */
127 int work_color
; /* L: current color */
128 int flush_color
; /* L: flushing color */
129 int nr_in_flight
[WORK_NR_COLORS
];
130 /* L: nr of in_flight works */
131 int nr_active
; /* L: nr of active works */
132 int max_active
; /* L: max active works */
133 struct list_head delayed_works
; /* L: delayed works */
137 * Structure used to wait for workqueue flush.
140 struct list_head list
; /* F: list of flushers */
141 int flush_color
; /* F: flush color waiting for */
142 struct completion done
; /* flush completion */
146 * The externally visible workqueue abstraction is an array of
147 * per-CPU workqueues:
149 struct workqueue_struct
{
150 unsigned int flags
; /* I: WQ_* flags */
151 struct cpu_workqueue_struct
*cpu_wq
; /* I: cwq's */
152 struct list_head list
; /* W: list of all workqueues */
154 struct mutex flush_mutex
; /* protects wq flushing */
155 int work_color
; /* F: current work color */
156 int flush_color
; /* F: current flush color */
157 atomic_t nr_cwqs_to_flush
; /* flush in progress */
158 struct wq_flusher
*first_flusher
; /* F: first flusher */
159 struct list_head flusher_queue
; /* F: flush waiters */
160 struct list_head flusher_overflow
; /* F: flush overflow list */
162 int saved_max_active
; /* I: saved cwq max_active */
163 const char *name
; /* I: workqueue name */
164 #ifdef CONFIG_LOCKDEP
165 struct lockdep_map lockdep_map
;
169 #define for_each_busy_worker(worker, i, pos, gcwq) \
170 for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \
171 hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
173 #ifdef CONFIG_DEBUG_OBJECTS_WORK
175 static struct debug_obj_descr work_debug_descr
;
178 * fixup_init is called when:
179 * - an active object is initialized
181 static int work_fixup_init(void *addr
, enum debug_obj_state state
)
183 struct work_struct
*work
= addr
;
186 case ODEBUG_STATE_ACTIVE
:
187 cancel_work_sync(work
);
188 debug_object_init(work
, &work_debug_descr
);
196 * fixup_activate is called when:
197 * - an active object is activated
198 * - an unknown object is activated (might be a statically initialized object)
200 static int work_fixup_activate(void *addr
, enum debug_obj_state state
)
202 struct work_struct
*work
= addr
;
206 case ODEBUG_STATE_NOTAVAILABLE
:
208 * This is not really a fixup. The work struct was
209 * statically initialized. We just make sure that it
210 * is tracked in the object tracker.
212 if (test_bit(WORK_STRUCT_STATIC_BIT
, work_data_bits(work
))) {
213 debug_object_init(work
, &work_debug_descr
);
214 debug_object_activate(work
, &work_debug_descr
);
220 case ODEBUG_STATE_ACTIVE
:
229 * fixup_free is called when:
230 * - an active object is freed
232 static int work_fixup_free(void *addr
, enum debug_obj_state state
)
234 struct work_struct
*work
= addr
;
237 case ODEBUG_STATE_ACTIVE
:
238 cancel_work_sync(work
);
239 debug_object_free(work
, &work_debug_descr
);
246 static struct debug_obj_descr work_debug_descr
= {
247 .name
= "work_struct",
248 .fixup_init
= work_fixup_init
,
249 .fixup_activate
= work_fixup_activate
,
250 .fixup_free
= work_fixup_free
,
253 static inline void debug_work_activate(struct work_struct
*work
)
255 debug_object_activate(work
, &work_debug_descr
);
258 static inline void debug_work_deactivate(struct work_struct
*work
)
260 debug_object_deactivate(work
, &work_debug_descr
);
263 void __init_work(struct work_struct
*work
, int onstack
)
266 debug_object_init_on_stack(work
, &work_debug_descr
);
268 debug_object_init(work
, &work_debug_descr
);
270 EXPORT_SYMBOL_GPL(__init_work
);
272 void destroy_work_on_stack(struct work_struct
*work
)
274 debug_object_free(work
, &work_debug_descr
);
276 EXPORT_SYMBOL_GPL(destroy_work_on_stack
);
279 static inline void debug_work_activate(struct work_struct
*work
) { }
280 static inline void debug_work_deactivate(struct work_struct
*work
) { }
283 /* Serializes the accesses to the list of workqueues. */
284 static DEFINE_SPINLOCK(workqueue_lock
);
285 static LIST_HEAD(workqueues
);
286 static bool workqueue_freezing
; /* W: have wqs started freezing? */
288 static DEFINE_PER_CPU(struct global_cwq
, global_cwq
);
290 static int worker_thread(void *__worker
);
292 static int singlethread_cpu __read_mostly
;
294 static struct global_cwq
*get_gcwq(unsigned int cpu
)
296 return &per_cpu(global_cwq
, cpu
);
299 static struct cpu_workqueue_struct
*get_cwq(unsigned int cpu
,
300 struct workqueue_struct
*wq
)
302 return per_cpu_ptr(wq
->cpu_wq
, cpu
);
305 static struct cpu_workqueue_struct
*target_cwq(unsigned int cpu
,
306 struct workqueue_struct
*wq
)
308 if (unlikely(wq
->flags
& WQ_SINGLE_THREAD
))
309 cpu
= singlethread_cpu
;
310 return get_cwq(cpu
, wq
);
313 static unsigned int work_color_to_flags(int color
)
315 return color
<< WORK_STRUCT_COLOR_SHIFT
;
318 static int get_work_color(struct work_struct
*work
)
320 return (*work_data_bits(work
) >> WORK_STRUCT_COLOR_SHIFT
) &
321 ((1 << WORK_STRUCT_COLOR_BITS
) - 1);
324 static int work_next_color(int color
)
326 return (color
+ 1) % WORK_NR_COLORS
;
330 * Set the workqueue on which a work item is to be run
331 * - Must *only* be called if the pending flag is set
333 static inline void set_wq_data(struct work_struct
*work
,
334 struct cpu_workqueue_struct
*cwq
,
335 unsigned long extra_flags
)
337 BUG_ON(!work_pending(work
));
339 atomic_long_set(&work
->data
, (unsigned long)cwq
| work_static(work
) |
340 WORK_STRUCT_PENDING
| extra_flags
);
344 * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
346 static inline void clear_wq_data(struct work_struct
*work
)
348 atomic_long_set(&work
->data
, work_static(work
));
351 static inline struct cpu_workqueue_struct
*get_wq_data(struct work_struct
*work
)
353 return (void *)(atomic_long_read(&work
->data
) &
354 WORK_STRUCT_WQ_DATA_MASK
);
358 * busy_worker_head - return the busy hash head for a work
359 * @gcwq: gcwq of interest
360 * @work: work to be hashed
362 * Return hash head of @gcwq for @work.
365 * spin_lock_irq(gcwq->lock).
368 * Pointer to the hash head.
370 static struct hlist_head
*busy_worker_head(struct global_cwq
*gcwq
,
371 struct work_struct
*work
)
373 const int base_shift
= ilog2(sizeof(struct work_struct
));
374 unsigned long v
= (unsigned long)work
;
376 /* simple shift and fold hash, do we need something better? */
378 v
+= v
>> BUSY_WORKER_HASH_ORDER
;
379 v
&= BUSY_WORKER_HASH_MASK
;
381 return &gcwq
->busy_hash
[v
];
385 * insert_work - insert a work into cwq
386 * @cwq: cwq @work belongs to
387 * @work: work to insert
388 * @head: insertion point
389 * @extra_flags: extra WORK_STRUCT_* flags to set
391 * Insert @work into @cwq after @head.
394 * spin_lock_irq(gcwq->lock).
396 static void insert_work(struct cpu_workqueue_struct
*cwq
,
397 struct work_struct
*work
, struct list_head
*head
,
398 unsigned int extra_flags
)
400 /* we own @work, set data and link */
401 set_wq_data(work
, cwq
, extra_flags
);
404 * Ensure that we get the right work->data if we see the
405 * result of list_add() below, see try_to_grab_pending().
409 list_add_tail(&work
->entry
, head
);
410 wake_up_process(cwq
->worker
->task
);
413 static void __queue_work(unsigned int cpu
, struct workqueue_struct
*wq
,
414 struct work_struct
*work
)
416 struct cpu_workqueue_struct
*cwq
= target_cwq(cpu
, wq
);
417 struct global_cwq
*gcwq
= cwq
->gcwq
;
418 struct list_head
*worklist
;
421 debug_work_activate(work
);
423 spin_lock_irqsave(&gcwq
->lock
, flags
);
424 BUG_ON(!list_empty(&work
->entry
));
426 cwq
->nr_in_flight
[cwq
->work_color
]++;
428 if (likely(cwq
->nr_active
< cwq
->max_active
)) {
430 worklist
= &cwq
->worklist
;
432 worklist
= &cwq
->delayed_works
;
434 insert_work(cwq
, work
, worklist
, work_color_to_flags(cwq
->work_color
));
436 spin_unlock_irqrestore(&gcwq
->lock
, flags
);
440 * queue_work - queue work on a workqueue
441 * @wq: workqueue to use
442 * @work: work to queue
444 * Returns 0 if @work was already on a queue, non-zero otherwise.
446 * We queue the work to the CPU on which it was submitted, but if the CPU dies
447 * it can be processed by another CPU.
449 int queue_work(struct workqueue_struct
*wq
, struct work_struct
*work
)
453 ret
= queue_work_on(get_cpu(), wq
, work
);
458 EXPORT_SYMBOL_GPL(queue_work
);
461 * queue_work_on - queue work on specific cpu
462 * @cpu: CPU number to execute work on
463 * @wq: workqueue to use
464 * @work: work to queue
466 * Returns 0 if @work was already on a queue, non-zero otherwise.
468 * We queue the work to a specific CPU, the caller must ensure it
472 queue_work_on(int cpu
, struct workqueue_struct
*wq
, struct work_struct
*work
)
476 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT
, work_data_bits(work
))) {
477 __queue_work(cpu
, wq
, work
);
482 EXPORT_SYMBOL_GPL(queue_work_on
);
484 static void delayed_work_timer_fn(unsigned long __data
)
486 struct delayed_work
*dwork
= (struct delayed_work
*)__data
;
487 struct cpu_workqueue_struct
*cwq
= get_wq_data(&dwork
->work
);
489 __queue_work(smp_processor_id(), cwq
->wq
, &dwork
->work
);
493 * queue_delayed_work - queue work on a workqueue after delay
494 * @wq: workqueue to use
495 * @dwork: delayable work to queue
496 * @delay: number of jiffies to wait before queueing
498 * Returns 0 if @work was already on a queue, non-zero otherwise.
500 int queue_delayed_work(struct workqueue_struct
*wq
,
501 struct delayed_work
*dwork
, unsigned long delay
)
504 return queue_work(wq
, &dwork
->work
);
506 return queue_delayed_work_on(-1, wq
, dwork
, delay
);
508 EXPORT_SYMBOL_GPL(queue_delayed_work
);
511 * queue_delayed_work_on - queue work on specific CPU after delay
512 * @cpu: CPU number to execute work on
513 * @wq: workqueue to use
514 * @dwork: work to queue
515 * @delay: number of jiffies to wait before queueing
517 * Returns 0 if @work was already on a queue, non-zero otherwise.
519 int queue_delayed_work_on(int cpu
, struct workqueue_struct
*wq
,
520 struct delayed_work
*dwork
, unsigned long delay
)
523 struct timer_list
*timer
= &dwork
->timer
;
524 struct work_struct
*work
= &dwork
->work
;
526 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT
, work_data_bits(work
))) {
527 BUG_ON(timer_pending(timer
));
528 BUG_ON(!list_empty(&work
->entry
));
530 timer_stats_timer_set_start_info(&dwork
->timer
);
532 /* This stores cwq for the moment, for the timer_fn */
533 set_wq_data(work
, target_cwq(raw_smp_processor_id(), wq
), 0);
534 timer
->expires
= jiffies
+ delay
;
535 timer
->data
= (unsigned long)dwork
;
536 timer
->function
= delayed_work_timer_fn
;
538 if (unlikely(cpu
>= 0))
539 add_timer_on(timer
, cpu
);
546 EXPORT_SYMBOL_GPL(queue_delayed_work_on
);
549 * worker_enter_idle - enter idle state
550 * @worker: worker which is entering idle state
552 * @worker is entering idle state. Update stats and idle timer if
556 * spin_lock_irq(gcwq->lock).
558 static void worker_enter_idle(struct worker
*worker
)
560 struct global_cwq
*gcwq
= worker
->gcwq
;
562 BUG_ON(worker
->flags
& WORKER_IDLE
);
563 BUG_ON(!list_empty(&worker
->entry
) &&
564 (worker
->hentry
.next
|| worker
->hentry
.pprev
));
566 worker
->flags
|= WORKER_IDLE
;
569 /* idle_list is LIFO */
570 list_add(&worker
->entry
, &gcwq
->idle_list
);
572 if (unlikely(worker
->flags
& WORKER_ROGUE
))
573 wake_up_all(&gcwq
->trustee_wait
);
577 * worker_leave_idle - leave idle state
578 * @worker: worker which is leaving idle state
580 * @worker is leaving idle state. Update stats.
583 * spin_lock_irq(gcwq->lock).
585 static void worker_leave_idle(struct worker
*worker
)
587 struct global_cwq
*gcwq
= worker
->gcwq
;
589 BUG_ON(!(worker
->flags
& WORKER_IDLE
));
590 worker
->flags
&= ~WORKER_IDLE
;
592 list_del_init(&worker
->entry
);
595 static struct worker
*alloc_worker(void)
597 struct worker
*worker
;
599 worker
= kzalloc(sizeof(*worker
), GFP_KERNEL
);
601 INIT_LIST_HEAD(&worker
->entry
);
602 INIT_LIST_HEAD(&worker
->scheduled
);
608 * create_worker - create a new workqueue worker
609 * @cwq: cwq the new worker will belong to
610 * @bind: whether to set affinity to @cpu or not
612 * Create a new worker which is bound to @cwq. The returned worker
613 * can be started by calling start_worker() or destroyed using
617 * Might sleep. Does GFP_KERNEL allocations.
620 * Pointer to the newly created worker.
622 static struct worker
*create_worker(struct cpu_workqueue_struct
*cwq
, bool bind
)
624 struct global_cwq
*gcwq
= cwq
->gcwq
;
626 struct worker
*worker
= NULL
;
628 spin_lock_irq(&gcwq
->lock
);
629 while (ida_get_new(&gcwq
->worker_ida
, &id
)) {
630 spin_unlock_irq(&gcwq
->lock
);
631 if (!ida_pre_get(&gcwq
->worker_ida
, GFP_KERNEL
))
633 spin_lock_irq(&gcwq
->lock
);
635 spin_unlock_irq(&gcwq
->lock
);
637 worker
= alloc_worker();
645 worker
->task
= kthread_create(worker_thread
, worker
, "kworker/%u:%d",
647 if (IS_ERR(worker
->task
))
651 * A rogue worker will become a regular one if CPU comes
652 * online later on. Make sure every worker has
653 * PF_THREAD_BOUND set.
656 kthread_bind(worker
->task
, gcwq
->cpu
);
658 worker
->task
->flags
|= PF_THREAD_BOUND
;
663 spin_lock_irq(&gcwq
->lock
);
664 ida_remove(&gcwq
->worker_ida
, id
);
665 spin_unlock_irq(&gcwq
->lock
);
672 * start_worker - start a newly created worker
673 * @worker: worker to start
675 * Make the gcwq aware of @worker and start it.
678 * spin_lock_irq(gcwq->lock).
680 static void start_worker(struct worker
*worker
)
682 worker
->flags
|= WORKER_STARTED
;
683 worker
->gcwq
->nr_workers
++;
684 worker_enter_idle(worker
);
685 wake_up_process(worker
->task
);
689 * destroy_worker - destroy a workqueue worker
690 * @worker: worker to be destroyed
692 * Destroy @worker and adjust @gcwq stats accordingly.
695 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
697 static void destroy_worker(struct worker
*worker
)
699 struct global_cwq
*gcwq
= worker
->gcwq
;
702 /* sanity check frenzy */
703 BUG_ON(worker
->current_work
);
704 BUG_ON(!list_empty(&worker
->scheduled
));
706 if (worker
->flags
& WORKER_STARTED
)
708 if (worker
->flags
& WORKER_IDLE
)
711 list_del_init(&worker
->entry
);
712 worker
->flags
|= WORKER_DIE
;
714 spin_unlock_irq(&gcwq
->lock
);
716 kthread_stop(worker
->task
);
719 spin_lock_irq(&gcwq
->lock
);
720 ida_remove(&gcwq
->worker_ida
, id
);
724 * move_linked_works - move linked works to a list
725 * @work: start of series of works to be scheduled
726 * @head: target list to append @work to
727 * @nextp: out paramter for nested worklist walking
729 * Schedule linked works starting from @work to @head. Work series to
730 * be scheduled starts at @work and includes any consecutive work with
731 * WORK_STRUCT_LINKED set in its predecessor.
733 * If @nextp is not NULL, it's updated to point to the next work of
734 * the last scheduled work. This allows move_linked_works() to be
735 * nested inside outer list_for_each_entry_safe().
738 * spin_lock_irq(gcwq->lock).
740 static void move_linked_works(struct work_struct
*work
, struct list_head
*head
,
741 struct work_struct
**nextp
)
743 struct work_struct
*n
;
746 * Linked worklist will always end before the end of the list,
747 * use NULL for list head.
749 list_for_each_entry_safe_from(work
, n
, NULL
, entry
) {
750 list_move_tail(&work
->entry
, head
);
751 if (!(*work_data_bits(work
) & WORK_STRUCT_LINKED
))
756 * If we're already inside safe list traversal and have moved
757 * multiple works to the scheduled queue, the next position
758 * needs to be updated.
764 static void cwq_activate_first_delayed(struct cpu_workqueue_struct
*cwq
)
766 struct work_struct
*work
= list_first_entry(&cwq
->delayed_works
,
767 struct work_struct
, entry
);
769 move_linked_works(work
, &cwq
->worklist
, NULL
);
774 * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
775 * @cwq: cwq of interest
776 * @color: color of work which left the queue
778 * A work either has completed or is removed from pending queue,
779 * decrement nr_in_flight of its cwq and handle workqueue flushing.
782 * spin_lock_irq(gcwq->lock).
784 static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct
*cwq
, int color
)
786 /* ignore uncolored works */
787 if (color
== WORK_NO_COLOR
)
790 cwq
->nr_in_flight
[color
]--;
793 /* one down, submit a delayed one */
794 if (!list_empty(&cwq
->delayed_works
) &&
795 cwq
->nr_active
< cwq
->max_active
)
796 cwq_activate_first_delayed(cwq
);
798 /* is flush in progress and are we at the flushing tip? */
799 if (likely(cwq
->flush_color
!= color
))
802 /* are there still in-flight works? */
803 if (cwq
->nr_in_flight
[color
])
806 /* this cwq is done, clear flush_color */
807 cwq
->flush_color
= -1;
810 * If this was the last cwq, wake up the first flusher. It
811 * will handle the rest.
813 if (atomic_dec_and_test(&cwq
->wq
->nr_cwqs_to_flush
))
814 complete(&cwq
->wq
->first_flusher
->done
);
818 * process_one_work - process single work
820 * @work: work to process
822 * Process @work. This function contains all the logics necessary to
823 * process a single work including synchronization against and
824 * interaction with other workers on the same cpu, queueing and
825 * flushing. As long as context requirement is met, any worker can
826 * call this function to process a work.
829 * spin_lock_irq(gcwq->lock) which is released and regrabbed.
831 static void process_one_work(struct worker
*worker
, struct work_struct
*work
)
833 struct cpu_workqueue_struct
*cwq
= worker
->cwq
;
834 struct global_cwq
*gcwq
= cwq
->gcwq
;
835 struct hlist_head
*bwh
= busy_worker_head(gcwq
, work
);
836 work_func_t f
= work
->func
;
838 #ifdef CONFIG_LOCKDEP
840 * It is permissible to free the struct work_struct from
841 * inside the function that is called from it, this we need to
842 * take into account for lockdep too. To avoid bogus "held
843 * lock freed" warnings as well as problems when looking into
844 * work->lockdep_map, make a copy and use that here.
846 struct lockdep_map lockdep_map
= work
->lockdep_map
;
848 /* claim and process */
849 debug_work_deactivate(work
);
850 hlist_add_head(&worker
->hentry
, bwh
);
851 worker
->current_work
= work
;
852 work_color
= get_work_color(work
);
853 list_del_init(&work
->entry
);
855 spin_unlock_irq(&gcwq
->lock
);
857 BUG_ON(get_wq_data(work
) != cwq
);
858 work_clear_pending(work
);
859 lock_map_acquire(&cwq
->wq
->lockdep_map
);
860 lock_map_acquire(&lockdep_map
);
862 lock_map_release(&lockdep_map
);
863 lock_map_release(&cwq
->wq
->lockdep_map
);
865 if (unlikely(in_atomic() || lockdep_depth(current
) > 0)) {
866 printk(KERN_ERR
"BUG: workqueue leaked lock or atomic: "
868 current
->comm
, preempt_count(), task_pid_nr(current
));
869 printk(KERN_ERR
" last function: ");
870 print_symbol("%s\n", (unsigned long)f
);
871 debug_show_held_locks(current
);
875 spin_lock_irq(&gcwq
->lock
);
877 /* we're done with it, release */
878 hlist_del_init(&worker
->hentry
);
879 worker
->current_work
= NULL
;
880 cwq_dec_nr_in_flight(cwq
, work_color
);
884 * process_scheduled_works - process scheduled works
887 * Process all scheduled works. Please note that the scheduled list
888 * may change while processing a work, so this function repeatedly
889 * fetches a work from the top and executes it.
892 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
895 static void process_scheduled_works(struct worker
*worker
)
897 while (!list_empty(&worker
->scheduled
)) {
898 struct work_struct
*work
= list_first_entry(&worker
->scheduled
,
899 struct work_struct
, entry
);
900 process_one_work(worker
, work
);
905 * worker_thread - the worker thread function
908 * The cwq worker thread function.
910 static int worker_thread(void *__worker
)
912 struct worker
*worker
= __worker
;
913 struct global_cwq
*gcwq
= worker
->gcwq
;
914 struct cpu_workqueue_struct
*cwq
= worker
->cwq
;
917 spin_lock_irq(&gcwq
->lock
);
919 /* DIE can be set only while we're idle, checking here is enough */
920 if (worker
->flags
& WORKER_DIE
) {
921 spin_unlock_irq(&gcwq
->lock
);
925 worker_leave_idle(worker
);
928 * ->scheduled list can only be filled while a worker is
929 * preparing to process a work or actually processing it.
930 * Make sure nobody diddled with it while I was sleeping.
932 BUG_ON(!list_empty(&worker
->scheduled
));
934 while (!list_empty(&cwq
->worklist
)) {
935 struct work_struct
*work
=
936 list_first_entry(&cwq
->worklist
,
937 struct work_struct
, entry
);
940 * The following is a rather inefficient way to close
941 * race window against cpu hotplug operations. Will
944 if (unlikely(!(worker
->flags
& WORKER_ROGUE
) &&
945 !cpumask_equal(&worker
->task
->cpus_allowed
,
946 get_cpu_mask(gcwq
->cpu
)))) {
947 spin_unlock_irq(&gcwq
->lock
);
948 set_cpus_allowed_ptr(worker
->task
,
949 get_cpu_mask(gcwq
->cpu
));
951 spin_lock_irq(&gcwq
->lock
);
955 if (likely(!(*work_data_bits(work
) & WORK_STRUCT_LINKED
))) {
956 /* optimization path, not strictly necessary */
957 process_one_work(worker
, work
);
958 if (unlikely(!list_empty(&worker
->scheduled
)))
959 process_scheduled_works(worker
);
961 move_linked_works(work
, &worker
->scheduled
, NULL
);
962 process_scheduled_works(worker
);
967 * gcwq->lock is held and there's no work to process, sleep.
968 * Workers are woken up only while holding gcwq->lock, so
969 * setting the current state before releasing gcwq->lock is
970 * enough to prevent losing any event.
972 worker_enter_idle(worker
);
973 __set_current_state(TASK_INTERRUPTIBLE
);
974 spin_unlock_irq(&gcwq
->lock
);
980 struct work_struct work
;
981 struct completion done
;
984 static void wq_barrier_func(struct work_struct
*work
)
986 struct wq_barrier
*barr
= container_of(work
, struct wq_barrier
, work
);
987 complete(&barr
->done
);
991 * insert_wq_barrier - insert a barrier work
992 * @cwq: cwq to insert barrier into
993 * @barr: wq_barrier to insert
994 * @target: target work to attach @barr to
995 * @worker: worker currently executing @target, NULL if @target is not executing
997 * @barr is linked to @target such that @barr is completed only after
998 * @target finishes execution. Please note that the ordering
999 * guarantee is observed only with respect to @target and on the local
1002 * Currently, a queued barrier can't be canceled. This is because
1003 * try_to_grab_pending() can't determine whether the work to be
1004 * grabbed is at the head of the queue and thus can't clear LINKED
1005 * flag of the previous work while there must be a valid next work
1006 * after a work with LINKED flag set.
1008 * Note that when @worker is non-NULL, @target may be modified
1009 * underneath us, so we can't reliably determine cwq from @target.
1012 * spin_lock_irq(gcwq->lock).
1014 static void insert_wq_barrier(struct cpu_workqueue_struct
*cwq
,
1015 struct wq_barrier
*barr
,
1016 struct work_struct
*target
, struct worker
*worker
)
1018 struct list_head
*head
;
1019 unsigned int linked
= 0;
1022 * debugobject calls are safe here even with gcwq->lock locked
1023 * as we know for sure that this will not trigger any of the
1024 * checks and call back into the fixup functions where we
1027 INIT_WORK_ON_STACK(&barr
->work
, wq_barrier_func
);
1028 __set_bit(WORK_STRUCT_PENDING_BIT
, work_data_bits(&barr
->work
));
1029 init_completion(&barr
->done
);
1032 * If @target is currently being executed, schedule the
1033 * barrier to the worker; otherwise, put it after @target.
1036 head
= worker
->scheduled
.next
;
1038 unsigned long *bits
= work_data_bits(target
);
1040 head
= target
->entry
.next
;
1041 /* there can already be other linked works, inherit and set */
1042 linked
= *bits
& WORK_STRUCT_LINKED
;
1043 __set_bit(WORK_STRUCT_LINKED_BIT
, bits
);
1046 debug_work_activate(&barr
->work
);
1047 insert_work(cwq
, &barr
->work
, head
,
1048 work_color_to_flags(WORK_NO_COLOR
) | linked
);
1052 * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
1053 * @wq: workqueue being flushed
1054 * @flush_color: new flush color, < 0 for no-op
1055 * @work_color: new work color, < 0 for no-op
1057 * Prepare cwqs for workqueue flushing.
1059 * If @flush_color is non-negative, flush_color on all cwqs should be
1060 * -1. If no cwq has in-flight commands at the specified color, all
1061 * cwq->flush_color's stay at -1 and %false is returned. If any cwq
1062 * has in flight commands, its cwq->flush_color is set to
1063 * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
1064 * wakeup logic is armed and %true is returned.
1066 * The caller should have initialized @wq->first_flusher prior to
1067 * calling this function with non-negative @flush_color. If
1068 * @flush_color is negative, no flush color update is done and %false
1071 * If @work_color is non-negative, all cwqs should have the same
1072 * work_color which is previous to @work_color and all will be
1073 * advanced to @work_color.
1076 * mutex_lock(wq->flush_mutex).
1079 * %true if @flush_color >= 0 and there's something to flush. %false
1082 static bool flush_workqueue_prep_cwqs(struct workqueue_struct
*wq
,
1083 int flush_color
, int work_color
)
1088 if (flush_color
>= 0) {
1089 BUG_ON(atomic_read(&wq
->nr_cwqs_to_flush
));
1090 atomic_set(&wq
->nr_cwqs_to_flush
, 1);
1093 for_each_possible_cpu(cpu
) {
1094 struct cpu_workqueue_struct
*cwq
= get_cwq(cpu
, wq
);
1095 struct global_cwq
*gcwq
= cwq
->gcwq
;
1097 spin_lock_irq(&gcwq
->lock
);
1099 if (flush_color
>= 0) {
1100 BUG_ON(cwq
->flush_color
!= -1);
1102 if (cwq
->nr_in_flight
[flush_color
]) {
1103 cwq
->flush_color
= flush_color
;
1104 atomic_inc(&wq
->nr_cwqs_to_flush
);
1109 if (work_color
>= 0) {
1110 BUG_ON(work_color
!= work_next_color(cwq
->work_color
));
1111 cwq
->work_color
= work_color
;
1114 spin_unlock_irq(&gcwq
->lock
);
1117 if (flush_color
>= 0 && atomic_dec_and_test(&wq
->nr_cwqs_to_flush
))
1118 complete(&wq
->first_flusher
->done
);
1124 * flush_workqueue - ensure that any scheduled work has run to completion.
1125 * @wq: workqueue to flush
1127 * Forces execution of the workqueue and blocks until its completion.
1128 * This is typically used in driver shutdown handlers.
1130 * We sleep until all works which were queued on entry have been handled,
1131 * but we are not livelocked by new incoming ones.
1133 void flush_workqueue(struct workqueue_struct
*wq
)
1135 struct wq_flusher this_flusher
= {
1136 .list
= LIST_HEAD_INIT(this_flusher
.list
),
1138 .done
= COMPLETION_INITIALIZER_ONSTACK(this_flusher
.done
),
1142 lock_map_acquire(&wq
->lockdep_map
);
1143 lock_map_release(&wq
->lockdep_map
);
1145 mutex_lock(&wq
->flush_mutex
);
1148 * Start-to-wait phase
1150 next_color
= work_next_color(wq
->work_color
);
1152 if (next_color
!= wq
->flush_color
) {
1154 * Color space is not full. The current work_color
1155 * becomes our flush_color and work_color is advanced
1158 BUG_ON(!list_empty(&wq
->flusher_overflow
));
1159 this_flusher
.flush_color
= wq
->work_color
;
1160 wq
->work_color
= next_color
;
1162 if (!wq
->first_flusher
) {
1163 /* no flush in progress, become the first flusher */
1164 BUG_ON(wq
->flush_color
!= this_flusher
.flush_color
);
1166 wq
->first_flusher
= &this_flusher
;
1168 if (!flush_workqueue_prep_cwqs(wq
, wq
->flush_color
,
1170 /* nothing to flush, done */
1171 wq
->flush_color
= next_color
;
1172 wq
->first_flusher
= NULL
;
1177 BUG_ON(wq
->flush_color
== this_flusher
.flush_color
);
1178 list_add_tail(&this_flusher
.list
, &wq
->flusher_queue
);
1179 flush_workqueue_prep_cwqs(wq
, -1, wq
->work_color
);
1183 * Oops, color space is full, wait on overflow queue.
1184 * The next flush completion will assign us
1185 * flush_color and transfer to flusher_queue.
1187 list_add_tail(&this_flusher
.list
, &wq
->flusher_overflow
);
1190 mutex_unlock(&wq
->flush_mutex
);
1192 wait_for_completion(&this_flusher
.done
);
1195 * Wake-up-and-cascade phase
1197 * First flushers are responsible for cascading flushes and
1198 * handling overflow. Non-first flushers can simply return.
1200 if (wq
->first_flusher
!= &this_flusher
)
1203 mutex_lock(&wq
->flush_mutex
);
1205 wq
->first_flusher
= NULL
;
1207 BUG_ON(!list_empty(&this_flusher
.list
));
1208 BUG_ON(wq
->flush_color
!= this_flusher
.flush_color
);
1211 struct wq_flusher
*next
, *tmp
;
1213 /* complete all the flushers sharing the current flush color */
1214 list_for_each_entry_safe(next
, tmp
, &wq
->flusher_queue
, list
) {
1215 if (next
->flush_color
!= wq
->flush_color
)
1217 list_del_init(&next
->list
);
1218 complete(&next
->done
);
1221 BUG_ON(!list_empty(&wq
->flusher_overflow
) &&
1222 wq
->flush_color
!= work_next_color(wq
->work_color
));
1224 /* this flush_color is finished, advance by one */
1225 wq
->flush_color
= work_next_color(wq
->flush_color
);
1227 /* one color has been freed, handle overflow queue */
1228 if (!list_empty(&wq
->flusher_overflow
)) {
1230 * Assign the same color to all overflowed
1231 * flushers, advance work_color and append to
1232 * flusher_queue. This is the start-to-wait
1233 * phase for these overflowed flushers.
1235 list_for_each_entry(tmp
, &wq
->flusher_overflow
, list
)
1236 tmp
->flush_color
= wq
->work_color
;
1238 wq
->work_color
= work_next_color(wq
->work_color
);
1240 list_splice_tail_init(&wq
->flusher_overflow
,
1241 &wq
->flusher_queue
);
1242 flush_workqueue_prep_cwqs(wq
, -1, wq
->work_color
);
1245 if (list_empty(&wq
->flusher_queue
)) {
1246 BUG_ON(wq
->flush_color
!= wq
->work_color
);
1251 * Need to flush more colors. Make the next flusher
1252 * the new first flusher and arm cwqs.
1254 BUG_ON(wq
->flush_color
== wq
->work_color
);
1255 BUG_ON(wq
->flush_color
!= next
->flush_color
);
1257 list_del_init(&next
->list
);
1258 wq
->first_flusher
= next
;
1260 if (flush_workqueue_prep_cwqs(wq
, wq
->flush_color
, -1))
1264 * Meh... this color is already done, clear first
1265 * flusher and repeat cascading.
1267 wq
->first_flusher
= NULL
;
1271 mutex_unlock(&wq
->flush_mutex
);
1273 EXPORT_SYMBOL_GPL(flush_workqueue
);
1276 * flush_work - block until a work_struct's callback has terminated
1277 * @work: the work which is to be flushed
1279 * Returns false if @work has already terminated.
1281 * It is expected that, prior to calling flush_work(), the caller has
1282 * arranged for the work to not be requeued, otherwise it doesn't make
1283 * sense to use this function.
1285 int flush_work(struct work_struct
*work
)
1287 struct worker
*worker
= NULL
;
1288 struct cpu_workqueue_struct
*cwq
;
1289 struct global_cwq
*gcwq
;
1290 struct wq_barrier barr
;
1293 cwq
= get_wq_data(work
);
1298 lock_map_acquire(&cwq
->wq
->lockdep_map
);
1299 lock_map_release(&cwq
->wq
->lockdep_map
);
1301 spin_lock_irq(&gcwq
->lock
);
1302 if (!list_empty(&work
->entry
)) {
1304 * See the comment near try_to_grab_pending()->smp_rmb().
1305 * If it was re-queued under us we are not going to wait.
1308 if (unlikely(cwq
!= get_wq_data(work
)))
1311 if (cwq
->worker
&& cwq
->worker
->current_work
== work
)
1312 worker
= cwq
->worker
;
1317 insert_wq_barrier(cwq
, &barr
, work
, worker
);
1318 spin_unlock_irq(&gcwq
->lock
);
1319 wait_for_completion(&barr
.done
);
1320 destroy_work_on_stack(&barr
.work
);
1323 spin_unlock_irq(&gcwq
->lock
);
1326 EXPORT_SYMBOL_GPL(flush_work
);
1329 * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
1330 * so this work can't be re-armed in any way.
1332 static int try_to_grab_pending(struct work_struct
*work
)
1334 struct global_cwq
*gcwq
;
1335 struct cpu_workqueue_struct
*cwq
;
1338 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT
, work_data_bits(work
)))
1342 * The queueing is in progress, or it is already queued. Try to
1343 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
1346 cwq
= get_wq_data(work
);
1351 spin_lock_irq(&gcwq
->lock
);
1352 if (!list_empty(&work
->entry
)) {
1354 * This work is queued, but perhaps we locked the wrong cwq.
1355 * In that case we must see the new value after rmb(), see
1356 * insert_work()->wmb().
1359 if (cwq
== get_wq_data(work
)) {
1360 debug_work_deactivate(work
);
1361 list_del_init(&work
->entry
);
1362 cwq_dec_nr_in_flight(cwq
, get_work_color(work
));
1366 spin_unlock_irq(&gcwq
->lock
);
1371 static void wait_on_cpu_work(struct cpu_workqueue_struct
*cwq
,
1372 struct work_struct
*work
)
1374 struct global_cwq
*gcwq
= cwq
->gcwq
;
1375 struct wq_barrier barr
;
1376 struct worker
*worker
;
1378 spin_lock_irq(&gcwq
->lock
);
1381 if (unlikely(cwq
->worker
&& cwq
->worker
->current_work
== work
)) {
1382 worker
= cwq
->worker
;
1383 insert_wq_barrier(cwq
, &barr
, work
, worker
);
1386 spin_unlock_irq(&gcwq
->lock
);
1388 if (unlikely(worker
)) {
1389 wait_for_completion(&barr
.done
);
1390 destroy_work_on_stack(&barr
.work
);
1394 static void wait_on_work(struct work_struct
*work
)
1396 struct cpu_workqueue_struct
*cwq
;
1397 struct workqueue_struct
*wq
;
1402 lock_map_acquire(&work
->lockdep_map
);
1403 lock_map_release(&work
->lockdep_map
);
1405 cwq
= get_wq_data(work
);
1411 for_each_possible_cpu(cpu
)
1412 wait_on_cpu_work(get_cwq(cpu
, wq
), work
);
1415 static int __cancel_work_timer(struct work_struct
*work
,
1416 struct timer_list
* timer
)
1421 ret
= (timer
&& likely(del_timer(timer
)));
1423 ret
= try_to_grab_pending(work
);
1425 } while (unlikely(ret
< 0));
1427 clear_wq_data(work
);
1432 * cancel_work_sync - block until a work_struct's callback has terminated
1433 * @work: the work which is to be flushed
1435 * Returns true if @work was pending.
1437 * cancel_work_sync() will cancel the work if it is queued. If the work's
1438 * callback appears to be running, cancel_work_sync() will block until it
1441 * It is possible to use this function if the work re-queues itself. It can
1442 * cancel the work even if it migrates to another workqueue, however in that
1443 * case it only guarantees that work->func() has completed on the last queued
1446 * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
1447 * pending, otherwise it goes into a busy-wait loop until the timer expires.
1449 * The caller must ensure that workqueue_struct on which this work was last
1450 * queued can't be destroyed before this function returns.
1452 int cancel_work_sync(struct work_struct
*work
)
1454 return __cancel_work_timer(work
, NULL
);
1456 EXPORT_SYMBOL_GPL(cancel_work_sync
);
1459 * cancel_delayed_work_sync - reliably kill off a delayed work.
1460 * @dwork: the delayed work struct
1462 * Returns true if @dwork was pending.
1464 * It is possible to use this function if @dwork rearms itself via queue_work()
1465 * or queue_delayed_work(). See also the comment for cancel_work_sync().
1467 int cancel_delayed_work_sync(struct delayed_work
*dwork
)
1469 return __cancel_work_timer(&dwork
->work
, &dwork
->timer
);
1471 EXPORT_SYMBOL(cancel_delayed_work_sync
);
1473 static struct workqueue_struct
*keventd_wq __read_mostly
;
1476 * schedule_work - put work task in global workqueue
1477 * @work: job to be done
1479 * Returns zero if @work was already on the kernel-global workqueue and
1480 * non-zero otherwise.
1482 * This puts a job in the kernel-global workqueue if it was not already
1483 * queued and leaves it in the same position on the kernel-global
1484 * workqueue otherwise.
1486 int schedule_work(struct work_struct
*work
)
1488 return queue_work(keventd_wq
, work
);
1490 EXPORT_SYMBOL(schedule_work
);
1493 * schedule_work_on - put work task on a specific cpu
1494 * @cpu: cpu to put the work task on
1495 * @work: job to be done
1497 * This puts a job on a specific cpu
1499 int schedule_work_on(int cpu
, struct work_struct
*work
)
1501 return queue_work_on(cpu
, keventd_wq
, work
);
1503 EXPORT_SYMBOL(schedule_work_on
);
1506 * schedule_delayed_work - put work task in global workqueue after delay
1507 * @dwork: job to be done
1508 * @delay: number of jiffies to wait or 0 for immediate execution
1510 * After waiting for a given time this puts a job in the kernel-global
1513 int schedule_delayed_work(struct delayed_work
*dwork
,
1514 unsigned long delay
)
1516 return queue_delayed_work(keventd_wq
, dwork
, delay
);
1518 EXPORT_SYMBOL(schedule_delayed_work
);
1521 * flush_delayed_work - block until a dwork_struct's callback has terminated
1522 * @dwork: the delayed work which is to be flushed
1524 * Any timeout is cancelled, and any pending work is run immediately.
1526 void flush_delayed_work(struct delayed_work
*dwork
)
1528 if (del_timer_sync(&dwork
->timer
)) {
1529 __queue_work(get_cpu(), get_wq_data(&dwork
->work
)->wq
,
1533 flush_work(&dwork
->work
);
1535 EXPORT_SYMBOL(flush_delayed_work
);
1538 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
1540 * @dwork: job to be done
1541 * @delay: number of jiffies to wait
1543 * After waiting for a given time this puts a job in the kernel-global
1544 * workqueue on the specified CPU.
1546 int schedule_delayed_work_on(int cpu
,
1547 struct delayed_work
*dwork
, unsigned long delay
)
1549 return queue_delayed_work_on(cpu
, keventd_wq
, dwork
, delay
);
1551 EXPORT_SYMBOL(schedule_delayed_work_on
);
1554 * schedule_on_each_cpu - call a function on each online CPU from keventd
1555 * @func: the function to call
1557 * Returns zero on success.
1558 * Returns -ve errno on failure.
1560 * schedule_on_each_cpu() is very slow.
1562 int schedule_on_each_cpu(work_func_t func
)
1566 struct work_struct
*works
;
1568 works
= alloc_percpu(struct work_struct
);
1575 * When running in keventd don't schedule a work item on
1576 * itself. Can just call directly because the work queue is
1577 * already bound. This also is faster.
1579 if (current_is_keventd())
1580 orig
= raw_smp_processor_id();
1582 for_each_online_cpu(cpu
) {
1583 struct work_struct
*work
= per_cpu_ptr(works
, cpu
);
1585 INIT_WORK(work
, func
);
1587 schedule_work_on(cpu
, work
);
1590 func(per_cpu_ptr(works
, orig
));
1592 for_each_online_cpu(cpu
)
1593 flush_work(per_cpu_ptr(works
, cpu
));
1601 * flush_scheduled_work - ensure that any scheduled work has run to completion.
1603 * Forces execution of the kernel-global workqueue and blocks until its
1606 * Think twice before calling this function! It's very easy to get into
1607 * trouble if you don't take great care. Either of the following situations
1608 * will lead to deadlock:
1610 * One of the work items currently on the workqueue needs to acquire
1611 * a lock held by your code or its caller.
1613 * Your code is running in the context of a work routine.
1615 * They will be detected by lockdep when they occur, but the first might not
1616 * occur very often. It depends on what work items are on the workqueue and
1617 * what locks they need, which you have no control over.
1619 * In most situations flushing the entire workqueue is overkill; you merely
1620 * need to know that a particular work item isn't queued and isn't running.
1621 * In such cases you should use cancel_delayed_work_sync() or
1622 * cancel_work_sync() instead.
1624 void flush_scheduled_work(void)
1626 flush_workqueue(keventd_wq
);
1628 EXPORT_SYMBOL(flush_scheduled_work
);
1631 * execute_in_process_context - reliably execute the routine with user context
1632 * @fn: the function to execute
1633 * @ew: guaranteed storage for the execute work structure (must
1634 * be available when the work executes)
1636 * Executes the function immediately if process context is available,
1637 * otherwise schedules the function for delayed execution.
1639 * Returns: 0 - function was executed
1640 * 1 - function was scheduled for execution
1642 int execute_in_process_context(work_func_t fn
, struct execute_work
*ew
)
1644 if (!in_interrupt()) {
1649 INIT_WORK(&ew
->work
, fn
);
1650 schedule_work(&ew
->work
);
1654 EXPORT_SYMBOL_GPL(execute_in_process_context
);
1656 int keventd_up(void)
1658 return keventd_wq
!= NULL
;
1661 int current_is_keventd(void)
1663 struct cpu_workqueue_struct
*cwq
;
1664 int cpu
= raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
1667 BUG_ON(!keventd_wq
);
1669 cwq
= get_cwq(cpu
, keventd_wq
);
1670 if (current
== cwq
->worker
->task
)
1677 static struct cpu_workqueue_struct
*alloc_cwqs(void)
1680 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
1681 * Make sure that the alignment isn't lower than that of
1682 * unsigned long long.
1684 const size_t size
= sizeof(struct cpu_workqueue_struct
);
1685 const size_t align
= max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS
,
1686 __alignof__(unsigned long long));
1687 struct cpu_workqueue_struct
*cwqs
;
1692 * On UP, percpu allocator doesn't honor alignment parameter
1693 * and simply uses arch-dependent default. Allocate enough
1694 * room to align cwq and put an extra pointer at the end
1695 * pointing back to the originally allocated pointer which
1696 * will be used for free.
1698 * FIXME: This really belongs to UP percpu code. Update UP
1699 * percpu code to honor alignment and remove this ugliness.
1701 ptr
= __alloc_percpu(size
+ align
+ sizeof(void *), 1);
1702 cwqs
= PTR_ALIGN(ptr
, align
);
1703 *(void **)per_cpu_ptr(cwqs
+ 1, 0) = ptr
;
1705 /* On SMP, percpu allocator can do it itself */
1706 cwqs
= __alloc_percpu(size
, align
);
1708 /* just in case, make sure it's actually aligned */
1709 BUG_ON(!IS_ALIGNED((unsigned long)cwqs
, align
));
1713 static void free_cwqs(struct cpu_workqueue_struct
*cwqs
)
1716 /* on UP, the pointer to free is stored right after the cwq */
1718 free_percpu(*(void **)per_cpu_ptr(cwqs
+ 1, 0));
1724 struct workqueue_struct
*__create_workqueue_key(const char *name
,
1727 struct lock_class_key
*key
,
1728 const char *lock_name
)
1730 bool singlethread
= flags
& WQ_SINGLE_THREAD
;
1731 struct workqueue_struct
*wq
;
1732 bool failed
= false;
1735 max_active
= clamp_val(max_active
, 1, INT_MAX
);
1737 wq
= kzalloc(sizeof(*wq
), GFP_KERNEL
);
1741 wq
->cpu_wq
= alloc_cwqs();
1746 wq
->saved_max_active
= max_active
;
1747 mutex_init(&wq
->flush_mutex
);
1748 atomic_set(&wq
->nr_cwqs_to_flush
, 0);
1749 INIT_LIST_HEAD(&wq
->flusher_queue
);
1750 INIT_LIST_HEAD(&wq
->flusher_overflow
);
1752 lockdep_init_map(&wq
->lockdep_map
, lock_name
, key
, 0);
1753 INIT_LIST_HEAD(&wq
->list
);
1755 cpu_maps_update_begin();
1757 * We must initialize cwqs for each possible cpu even if we
1758 * are going to call destroy_workqueue() finally. Otherwise
1759 * cpu_up() can hit the uninitialized cwq once we drop the
1762 for_each_possible_cpu(cpu
) {
1763 struct cpu_workqueue_struct
*cwq
= get_cwq(cpu
, wq
);
1764 struct global_cwq
*gcwq
= get_gcwq(cpu
);
1766 BUG_ON((unsigned long)cwq
& WORK_STRUCT_FLAG_MASK
);
1769 cwq
->flush_color
= -1;
1770 cwq
->max_active
= max_active
;
1771 INIT_LIST_HEAD(&cwq
->worklist
);
1772 INIT_LIST_HEAD(&cwq
->delayed_works
);
1776 cwq
->worker
= create_worker(cwq
,
1777 cpu_online(cpu
) && !singlethread
);
1779 start_worker(cwq
->worker
);
1785 * workqueue_lock protects global freeze state and workqueues
1786 * list. Grab it, set max_active accordingly and add the new
1787 * workqueue to workqueues list.
1789 spin_lock(&workqueue_lock
);
1791 if (workqueue_freezing
&& wq
->flags
& WQ_FREEZEABLE
)
1792 for_each_possible_cpu(cpu
)
1793 get_cwq(cpu
, wq
)->max_active
= 0;
1795 list_add(&wq
->list
, &workqueues
);
1797 spin_unlock(&workqueue_lock
);
1799 cpu_maps_update_done();
1802 destroy_workqueue(wq
);
1808 free_cwqs(wq
->cpu_wq
);
1813 EXPORT_SYMBOL_GPL(__create_workqueue_key
);
1816 * destroy_workqueue - safely terminate a workqueue
1817 * @wq: target workqueue
1819 * Safely destroy a workqueue. All work currently pending will be done first.
1821 void destroy_workqueue(struct workqueue_struct
*wq
)
1825 flush_workqueue(wq
);
1828 * wq list is used to freeze wq, remove from list after
1829 * flushing is complete in case freeze races us.
1831 cpu_maps_update_begin();
1832 spin_lock(&workqueue_lock
);
1833 list_del(&wq
->list
);
1834 spin_unlock(&workqueue_lock
);
1835 cpu_maps_update_done();
1837 for_each_possible_cpu(cpu
) {
1838 struct cpu_workqueue_struct
*cwq
= get_cwq(cpu
, wq
);
1842 spin_lock_irq(&cwq
->gcwq
->lock
);
1843 destroy_worker(cwq
->worker
);
1845 spin_unlock_irq(&cwq
->gcwq
->lock
);
1848 for (i
= 0; i
< WORK_NR_COLORS
; i
++)
1849 BUG_ON(cwq
->nr_in_flight
[i
]);
1850 BUG_ON(cwq
->nr_active
);
1851 BUG_ON(!list_empty(&cwq
->delayed_works
));
1854 free_cwqs(wq
->cpu_wq
);
1857 EXPORT_SYMBOL_GPL(destroy_workqueue
);
1862 * CPU hotplug is implemented by allowing cwqs to be detached from
1863 * CPU, running with unbound workers and allowing them to be
1864 * reattached later if the cpu comes back online. A separate thread
1865 * is created to govern cwqs in such state and is called the trustee.
1867 * Trustee states and their descriptions.
1869 * START Command state used on startup. On CPU_DOWN_PREPARE, a
1870 * new trustee is started with this state.
1872 * IN_CHARGE Once started, trustee will enter this state after
1873 * making all existing workers rogue. DOWN_PREPARE waits
1874 * for trustee to enter this state. After reaching
1875 * IN_CHARGE, trustee tries to execute the pending
1876 * worklist until it's empty and the state is set to
1877 * BUTCHER, or the state is set to RELEASE.
1879 * BUTCHER Command state which is set by the cpu callback after
1880 * the cpu has went down. Once this state is set trustee
1881 * knows that there will be no new works on the worklist
1882 * and once the worklist is empty it can proceed to
1883 * killing idle workers.
1885 * RELEASE Command state which is set by the cpu callback if the
1886 * cpu down has been canceled or it has come online
1887 * again. After recognizing this state, trustee stops
1888 * trying to drain or butcher and transits to DONE.
1890 * DONE Trustee will enter this state after BUTCHER or RELEASE
1893 * trustee CPU draining
1894 * took over down complete
1895 * START -----------> IN_CHARGE -----------> BUTCHER -----------> DONE
1897 * | CPU is back online v return workers |
1898 * ----------------> RELEASE --------------
1902 * trustee_wait_event_timeout - timed event wait for trustee
1903 * @cond: condition to wait for
1904 * @timeout: timeout in jiffies
1906 * wait_event_timeout() for trustee to use. Handles locking and
1907 * checks for RELEASE request.
1910 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1911 * multiple times. To be used by trustee.
1914 * Positive indicating left time if @cond is satisfied, 0 if timed
1915 * out, -1 if canceled.
1917 #define trustee_wait_event_timeout(cond, timeout) ({ \
1918 long __ret = (timeout); \
1919 while (!((cond) || (gcwq->trustee_state == TRUSTEE_RELEASE)) && \
1921 spin_unlock_irq(&gcwq->lock); \
1922 __wait_event_timeout(gcwq->trustee_wait, (cond) || \
1923 (gcwq->trustee_state == TRUSTEE_RELEASE), \
1925 spin_lock_irq(&gcwq->lock); \
1927 gcwq->trustee_state == TRUSTEE_RELEASE ? -1 : (__ret); \
1931 * trustee_wait_event - event wait for trustee
1932 * @cond: condition to wait for
1934 * wait_event() for trustee to use. Automatically handles locking and
1935 * checks for CANCEL request.
1938 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
1939 * multiple times. To be used by trustee.
1942 * 0 if @cond is satisfied, -1 if canceled.
1944 #define trustee_wait_event(cond) ({ \
1946 __ret1 = trustee_wait_event_timeout(cond, MAX_SCHEDULE_TIMEOUT);\
1947 __ret1 < 0 ? -1 : 0; \
1950 static int __cpuinit
trustee_thread(void *__gcwq
)
1952 struct global_cwq
*gcwq
= __gcwq
;
1953 struct worker
*worker
;
1954 struct hlist_node
*pos
;
1957 BUG_ON(gcwq
->cpu
!= smp_processor_id());
1959 spin_lock_irq(&gcwq
->lock
);
1961 * Make all multithread workers rogue. Trustee must be bound
1962 * to the target cpu and can't be cancelled.
1964 BUG_ON(gcwq
->cpu
!= smp_processor_id());
1966 list_for_each_entry(worker
, &gcwq
->idle_list
, entry
)
1967 if (!(worker
->cwq
->wq
->flags
& WQ_SINGLE_THREAD
))
1968 worker
->flags
|= WORKER_ROGUE
;
1970 for_each_busy_worker(worker
, i
, pos
, gcwq
)
1971 if (!(worker
->cwq
->wq
->flags
& WQ_SINGLE_THREAD
))
1972 worker
->flags
|= WORKER_ROGUE
;
1975 * We're now in charge. Notify and proceed to drain. We need
1976 * to keep the gcwq running during the whole CPU down
1977 * procedure as other cpu hotunplug callbacks may need to
1978 * flush currently running tasks.
1980 gcwq
->trustee_state
= TRUSTEE_IN_CHARGE
;
1981 wake_up_all(&gcwq
->trustee_wait
);
1984 * The original cpu is in the process of dying and may go away
1985 * anytime now. When that happens, we and all workers would
1986 * be migrated to other cpus. Try draining any left work.
1987 * Note that if the gcwq is frozen, there may be frozen works
1988 * in freezeable cwqs. Don't declare completion while frozen.
1990 while (gcwq
->nr_workers
!= gcwq
->nr_idle
||
1991 gcwq
->flags
& GCWQ_FREEZING
||
1992 gcwq
->trustee_state
== TRUSTEE_IN_CHARGE
) {
1993 /* give a breather */
1994 if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN
) < 0)
1998 /* notify completion */
1999 gcwq
->trustee
= NULL
;
2000 gcwq
->trustee_state
= TRUSTEE_DONE
;
2001 wake_up_all(&gcwq
->trustee_wait
);
2002 spin_unlock_irq(&gcwq
->lock
);
2007 * wait_trustee_state - wait for trustee to enter the specified state
2008 * @gcwq: gcwq the trustee of interest belongs to
2009 * @state: target state to wait for
2011 * Wait for the trustee to reach @state. DONE is already matched.
2014 * spin_lock_irq(gcwq->lock) which may be released and regrabbed
2015 * multiple times. To be used by cpu_callback.
2017 static void __cpuinit
wait_trustee_state(struct global_cwq
*gcwq
, int state
)
2019 if (!(gcwq
->trustee_state
== state
||
2020 gcwq
->trustee_state
== TRUSTEE_DONE
)) {
2021 spin_unlock_irq(&gcwq
->lock
);
2022 __wait_event(gcwq
->trustee_wait
,
2023 gcwq
->trustee_state
== state
||
2024 gcwq
->trustee_state
== TRUSTEE_DONE
);
2025 spin_lock_irq(&gcwq
->lock
);
2029 static int __devinit
workqueue_cpu_callback(struct notifier_block
*nfb
,
2030 unsigned long action
,
2033 unsigned int cpu
= (unsigned long)hcpu
;
2034 struct global_cwq
*gcwq
= get_gcwq(cpu
);
2035 struct task_struct
*new_trustee
= NULL
;
2036 struct worker
*worker
;
2037 struct hlist_node
*pos
;
2038 unsigned long flags
;
2041 action
&= ~CPU_TASKS_FROZEN
;
2044 case CPU_DOWN_PREPARE
:
2045 new_trustee
= kthread_create(trustee_thread
, gcwq
,
2046 "workqueue_trustee/%d\n", cpu
);
2047 if (IS_ERR(new_trustee
))
2048 return notifier_from_errno(PTR_ERR(new_trustee
));
2049 kthread_bind(new_trustee
, cpu
);
2052 /* some are called w/ irq disabled, don't disturb irq status */
2053 spin_lock_irqsave(&gcwq
->lock
, flags
);
2056 case CPU_DOWN_PREPARE
:
2057 /* initialize trustee and tell it to acquire the gcwq */
2058 BUG_ON(gcwq
->trustee
|| gcwq
->trustee_state
!= TRUSTEE_DONE
);
2059 gcwq
->trustee
= new_trustee
;
2060 gcwq
->trustee_state
= TRUSTEE_START
;
2061 wake_up_process(gcwq
->trustee
);
2062 wait_trustee_state(gcwq
, TRUSTEE_IN_CHARGE
);
2066 gcwq
->trustee_state
= TRUSTEE_BUTCHER
;
2069 case CPU_DOWN_FAILED
:
2071 if (gcwq
->trustee_state
!= TRUSTEE_DONE
) {
2072 gcwq
->trustee_state
= TRUSTEE_RELEASE
;
2073 wake_up_process(gcwq
->trustee
);
2074 wait_trustee_state(gcwq
, TRUSTEE_DONE
);
2077 /* clear ROGUE from all multithread workers */
2078 list_for_each_entry(worker
, &gcwq
->idle_list
, entry
)
2079 if (!(worker
->cwq
->wq
->flags
& WQ_SINGLE_THREAD
))
2080 worker
->flags
&= ~WORKER_ROGUE
;
2082 for_each_busy_worker(worker
, i
, pos
, gcwq
)
2083 if (!(worker
->cwq
->wq
->flags
& WQ_SINGLE_THREAD
))
2084 worker
->flags
&= ~WORKER_ROGUE
;
2088 spin_unlock_irqrestore(&gcwq
->lock
, flags
);
2090 return notifier_from_errno(0);
2095 struct work_for_cpu
{
2096 struct completion completion
;
2102 static int do_work_for_cpu(void *_wfc
)
2104 struct work_for_cpu
*wfc
= _wfc
;
2105 wfc
->ret
= wfc
->fn(wfc
->arg
);
2106 complete(&wfc
->completion
);
2111 * work_on_cpu - run a function in user context on a particular cpu
2112 * @cpu: the cpu to run on
2113 * @fn: the function to run
2114 * @arg: the function arg
2116 * This will return the value @fn returns.
2117 * It is up to the caller to ensure that the cpu doesn't go offline.
2118 * The caller must not hold any locks which would prevent @fn from completing.
2120 long work_on_cpu(unsigned int cpu
, long (*fn
)(void *), void *arg
)
2122 struct task_struct
*sub_thread
;
2123 struct work_for_cpu wfc
= {
2124 .completion
= COMPLETION_INITIALIZER_ONSTACK(wfc
.completion
),
2129 sub_thread
= kthread_create(do_work_for_cpu
, &wfc
, "work_for_cpu");
2130 if (IS_ERR(sub_thread
))
2131 return PTR_ERR(sub_thread
);
2132 kthread_bind(sub_thread
, cpu
);
2133 wake_up_process(sub_thread
);
2134 wait_for_completion(&wfc
.completion
);
2137 EXPORT_SYMBOL_GPL(work_on_cpu
);
2138 #endif /* CONFIG_SMP */
2140 #ifdef CONFIG_FREEZER
2143 * freeze_workqueues_begin - begin freezing workqueues
2145 * Start freezing workqueues. After this function returns, all
2146 * freezeable workqueues will queue new works to their frozen_works
2147 * list instead of the cwq ones.
2150 * Grabs and releases workqueue_lock and gcwq->lock's.
2152 void freeze_workqueues_begin(void)
2154 struct workqueue_struct
*wq
;
2157 spin_lock(&workqueue_lock
);
2159 BUG_ON(workqueue_freezing
);
2160 workqueue_freezing
= true;
2162 for_each_possible_cpu(cpu
) {
2163 struct global_cwq
*gcwq
= get_gcwq(cpu
);
2165 spin_lock_irq(&gcwq
->lock
);
2167 BUG_ON(gcwq
->flags
& GCWQ_FREEZING
);
2168 gcwq
->flags
|= GCWQ_FREEZING
;
2170 list_for_each_entry(wq
, &workqueues
, list
) {
2171 struct cpu_workqueue_struct
*cwq
= get_cwq(cpu
, wq
);
2173 if (wq
->flags
& WQ_FREEZEABLE
)
2174 cwq
->max_active
= 0;
2177 spin_unlock_irq(&gcwq
->lock
);
2180 spin_unlock(&workqueue_lock
);
2184 * freeze_workqueues_busy - are freezeable workqueues still busy?
2186 * Check whether freezing is complete. This function must be called
2187 * between freeze_workqueues_begin() and thaw_workqueues().
2190 * Grabs and releases workqueue_lock.
2193 * %true if some freezeable workqueues are still busy. %false if
2194 * freezing is complete.
2196 bool freeze_workqueues_busy(void)
2198 struct workqueue_struct
*wq
;
2202 spin_lock(&workqueue_lock
);
2204 BUG_ON(!workqueue_freezing
);
2206 for_each_possible_cpu(cpu
) {
2208 * nr_active is monotonically decreasing. It's safe
2209 * to peek without lock.
2211 list_for_each_entry(wq
, &workqueues
, list
) {
2212 struct cpu_workqueue_struct
*cwq
= get_cwq(cpu
, wq
);
2214 if (!(wq
->flags
& WQ_FREEZEABLE
))
2217 BUG_ON(cwq
->nr_active
< 0);
2218 if (cwq
->nr_active
) {
2225 spin_unlock(&workqueue_lock
);
2230 * thaw_workqueues - thaw workqueues
2232 * Thaw workqueues. Normal queueing is restored and all collected
2233 * frozen works are transferred to their respective cwq worklists.
2236 * Grabs and releases workqueue_lock and gcwq->lock's.
2238 void thaw_workqueues(void)
2240 struct workqueue_struct
*wq
;
2243 spin_lock(&workqueue_lock
);
2245 if (!workqueue_freezing
)
2248 for_each_possible_cpu(cpu
) {
2249 struct global_cwq
*gcwq
= get_gcwq(cpu
);
2251 spin_lock_irq(&gcwq
->lock
);
2253 BUG_ON(!(gcwq
->flags
& GCWQ_FREEZING
));
2254 gcwq
->flags
&= ~GCWQ_FREEZING
;
2256 list_for_each_entry(wq
, &workqueues
, list
) {
2257 struct cpu_workqueue_struct
*cwq
= get_cwq(cpu
, wq
);
2259 if (!(wq
->flags
& WQ_FREEZEABLE
))
2262 /* restore max_active and repopulate worklist */
2263 cwq
->max_active
= wq
->saved_max_active
;
2265 while (!list_empty(&cwq
->delayed_works
) &&
2266 cwq
->nr_active
< cwq
->max_active
)
2267 cwq_activate_first_delayed(cwq
);
2269 wake_up_process(cwq
->worker
->task
);
2272 spin_unlock_irq(&gcwq
->lock
);
2275 workqueue_freezing
= false;
2277 spin_unlock(&workqueue_lock
);
2279 #endif /* CONFIG_FREEZER */
2281 void __init
init_workqueues(void)
2286 singlethread_cpu
= cpumask_first(cpu_possible_mask
);
2287 hotcpu_notifier(workqueue_cpu_callback
, CPU_PRI_WORKQUEUE
);
2289 /* initialize gcwqs */
2290 for_each_possible_cpu(cpu
) {
2291 struct global_cwq
*gcwq
= get_gcwq(cpu
);
2293 spin_lock_init(&gcwq
->lock
);
2296 INIT_LIST_HEAD(&gcwq
->idle_list
);
2297 for (i
= 0; i
< BUSY_WORKER_HASH_SIZE
; i
++)
2298 INIT_HLIST_HEAD(&gcwq
->busy_hash
[i
]);
2300 ida_init(&gcwq
->worker_ida
);
2302 gcwq
->trustee_state
= TRUSTEE_DONE
;
2303 init_waitqueue_head(&gcwq
->trustee_wait
);
2306 keventd_wq
= create_workqueue("events");
2307 BUG_ON(!keventd_wq
);