2 * RT-Mutexes: simple blocking mutual exclusion locks with PI support
4 * started by Ingo Molnar and Thomas Gleixner.
6 * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9 * Copyright (C) 2006 Esben Nielsen
11 * See Documentation/rt-mutex-design.txt for details.
13 #include <linux/spinlock.h>
14 #include <linux/export.h>
15 #include <linux/sched.h>
16 #include <linux/sched/rt.h>
17 #include <linux/sched/deadline.h>
18 #include <linux/timer.h>
20 #include "rtmutex_common.h"
23 * lock->owner state tracking:
25 * lock->owner holds the task_struct pointer of the owner. Bit 0
26 * is used to keep track of the "lock has waiters" state.
29 * NULL 0 lock is free (fast acquire possible)
30 * NULL 1 lock is free and has waiters and the top waiter
31 * is going to take the lock*
32 * taskpointer 0 lock is held (fast release possible)
33 * taskpointer 1 lock is held and has waiters**
35 * The fast atomic compare exchange based acquire and release is only
36 * possible when bit 0 of lock->owner is 0.
38 * (*) It also can be a transitional state when grabbing the lock
39 * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
40 * we need to set the bit0 before looking at the lock, and the owner may be
41 * NULL in this small time, hence this can be a transitional state.
43 * (**) There is a small time when bit 0 is set but there are no
44 * waiters. This can happen when grabbing the lock in the slow path.
45 * To prevent a cmpxchg of the owner releasing the lock, we need to
46 * set this bit before looking at the lock.
50 rt_mutex_set_owner(struct rt_mutex
*lock
, struct task_struct
*owner
)
52 unsigned long val
= (unsigned long)owner
;
54 if (rt_mutex_has_waiters(lock
))
55 val
|= RT_MUTEX_HAS_WAITERS
;
57 lock
->owner
= (struct task_struct
*)val
;
60 static inline void clear_rt_mutex_waiters(struct rt_mutex
*lock
)
62 lock
->owner
= (struct task_struct
*)
63 ((unsigned long)lock
->owner
& ~RT_MUTEX_HAS_WAITERS
);
66 static void fixup_rt_mutex_waiters(struct rt_mutex
*lock
)
68 if (!rt_mutex_has_waiters(lock
))
69 clear_rt_mutex_waiters(lock
);
73 * We can speed up the acquire/release, if the architecture
74 * supports cmpxchg and if there's no debugging state to be set up
76 #if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
77 # define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c)
78 static inline void mark_rt_mutex_waiters(struct rt_mutex
*lock
)
80 unsigned long owner
, *p
= (unsigned long *) &lock
->owner
;
84 } while (cmpxchg(p
, owner
, owner
| RT_MUTEX_HAS_WAITERS
) != owner
);
88 * Safe fastpath aware unlock:
89 * 1) Clear the waiters bit
90 * 2) Drop lock->wait_lock
91 * 3) Try to unlock the lock with cmpxchg
93 static inline bool unlock_rt_mutex_safe(struct rt_mutex
*lock
)
94 __releases(lock
->wait_lock
)
96 struct task_struct
*owner
= rt_mutex_owner(lock
);
98 clear_rt_mutex_waiters(lock
);
99 raw_spin_unlock(&lock
->wait_lock
);
101 * If a new waiter comes in between the unlock and the cmpxchg
102 * we have two situations:
106 * cmpxchg(p, owner, 0) == owner
107 * mark_rt_mutex_waiters(lock);
113 * mark_rt_mutex_waiters(lock);
115 * cmpxchg(p, owner, 0) != owner
124 return rt_mutex_cmpxchg(lock
, owner
, NULL
);
128 # define rt_mutex_cmpxchg(l,c,n) (0)
129 static inline void mark_rt_mutex_waiters(struct rt_mutex
*lock
)
131 lock
->owner
= (struct task_struct
*)
132 ((unsigned long)lock
->owner
| RT_MUTEX_HAS_WAITERS
);
136 * Simple slow path only version: lock->owner is protected by lock->wait_lock.
138 static inline bool unlock_rt_mutex_safe(struct rt_mutex
*lock
)
139 __releases(lock
->wait_lock
)
142 raw_spin_unlock(&lock
->wait_lock
);
148 rt_mutex_waiter_less(struct rt_mutex_waiter
*left
,
149 struct rt_mutex_waiter
*right
)
151 if (left
->prio
< right
->prio
)
155 * If both waiters have dl_prio(), we check the deadlines of the
157 * If left waiter has a dl_prio(), and we didn't return 1 above,
158 * then right waiter has a dl_prio() too.
160 if (dl_prio(left
->prio
))
161 return (left
->task
->dl
.deadline
< right
->task
->dl
.deadline
);
167 rt_mutex_enqueue(struct rt_mutex
*lock
, struct rt_mutex_waiter
*waiter
)
169 struct rb_node
**link
= &lock
->waiters
.rb_node
;
170 struct rb_node
*parent
= NULL
;
171 struct rt_mutex_waiter
*entry
;
176 entry
= rb_entry(parent
, struct rt_mutex_waiter
, tree_entry
);
177 if (rt_mutex_waiter_less(waiter
, entry
)) {
178 link
= &parent
->rb_left
;
180 link
= &parent
->rb_right
;
186 lock
->waiters_leftmost
= &waiter
->tree_entry
;
188 rb_link_node(&waiter
->tree_entry
, parent
, link
);
189 rb_insert_color(&waiter
->tree_entry
, &lock
->waiters
);
193 rt_mutex_dequeue(struct rt_mutex
*lock
, struct rt_mutex_waiter
*waiter
)
195 if (RB_EMPTY_NODE(&waiter
->tree_entry
))
198 if (lock
->waiters_leftmost
== &waiter
->tree_entry
)
199 lock
->waiters_leftmost
= rb_next(&waiter
->tree_entry
);
201 rb_erase(&waiter
->tree_entry
, &lock
->waiters
);
202 RB_CLEAR_NODE(&waiter
->tree_entry
);
206 rt_mutex_enqueue_pi(struct task_struct
*task
, struct rt_mutex_waiter
*waiter
)
208 struct rb_node
**link
= &task
->pi_waiters
.rb_node
;
209 struct rb_node
*parent
= NULL
;
210 struct rt_mutex_waiter
*entry
;
215 entry
= rb_entry(parent
, struct rt_mutex_waiter
, pi_tree_entry
);
216 if (rt_mutex_waiter_less(waiter
, entry
)) {
217 link
= &parent
->rb_left
;
219 link
= &parent
->rb_right
;
225 task
->pi_waiters_leftmost
= &waiter
->pi_tree_entry
;
227 rb_link_node(&waiter
->pi_tree_entry
, parent
, link
);
228 rb_insert_color(&waiter
->pi_tree_entry
, &task
->pi_waiters
);
232 rt_mutex_dequeue_pi(struct task_struct
*task
, struct rt_mutex_waiter
*waiter
)
234 if (RB_EMPTY_NODE(&waiter
->pi_tree_entry
))
237 if (task
->pi_waiters_leftmost
== &waiter
->pi_tree_entry
)
238 task
->pi_waiters_leftmost
= rb_next(&waiter
->pi_tree_entry
);
240 rb_erase(&waiter
->pi_tree_entry
, &task
->pi_waiters
);
241 RB_CLEAR_NODE(&waiter
->pi_tree_entry
);
245 * Calculate task priority from the waiter tree priority
247 * Return task->normal_prio when the waiter tree is empty or when
248 * the waiter is not allowed to do priority boosting
250 int rt_mutex_getprio(struct task_struct
*task
)
252 if (likely(!task_has_pi_waiters(task
)))
253 return task
->normal_prio
;
255 return min(task_top_pi_waiter(task
)->prio
,
259 struct task_struct
*rt_mutex_get_top_task(struct task_struct
*task
)
261 if (likely(!task_has_pi_waiters(task
)))
264 return task_top_pi_waiter(task
)->task
;
268 * Called by sched_setscheduler() to check whether the priority change
269 * is overruled by a possible priority boosting.
271 int rt_mutex_check_prio(struct task_struct
*task
, int newprio
)
273 if (!task_has_pi_waiters(task
))
276 return task_top_pi_waiter(task
)->task
->prio
<= newprio
;
280 * Adjust the priority of a task, after its pi_waiters got modified.
282 * This can be both boosting and unboosting. task->pi_lock must be held.
284 static void __rt_mutex_adjust_prio(struct task_struct
*task
)
286 int prio
= rt_mutex_getprio(task
);
288 if (task
->prio
!= prio
|| dl_prio(prio
))
289 rt_mutex_setprio(task
, prio
);
293 * Adjust task priority (undo boosting). Called from the exit path of
294 * rt_mutex_slowunlock() and rt_mutex_slowlock().
296 * (Note: We do this outside of the protection of lock->wait_lock to
297 * allow the lock to be taken while or before we readjust the priority
298 * of task. We do not use the spin_xx_mutex() variants here as we are
299 * outside of the debug path.)
301 static void rt_mutex_adjust_prio(struct task_struct
*task
)
305 raw_spin_lock_irqsave(&task
->pi_lock
, flags
);
306 __rt_mutex_adjust_prio(task
);
307 raw_spin_unlock_irqrestore(&task
->pi_lock
, flags
);
311 * Max number of times we'll walk the boosting chain:
313 int max_lock_depth
= 1024;
315 static inline struct rt_mutex
*task_blocked_on_lock(struct task_struct
*p
)
317 return p
->pi_blocked_on
? p
->pi_blocked_on
->lock
: NULL
;
321 * Adjust the priority chain. Also used for deadlock detection.
322 * Decreases task's usage by one - may thus free the task.
324 * @task: the task owning the mutex (owner) for which a chain walk is
326 * @deadlock_detect: do we have to carry out deadlock detection?
327 * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
328 * things for a task that has just got its priority adjusted, and
329 * is waiting on a mutex)
330 * @next_lock: the mutex on which the owner of @orig_lock was blocked before
331 * we dropped its pi_lock. Is never dereferenced, only used for
332 * comparison to detect lock chain changes.
333 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
334 * its priority to the mutex owner (can be NULL in the case
335 * depicted above or if the top waiter is gone away and we are
336 * actually deboosting the owner)
337 * @top_task: the current top waiter
339 * Returns 0 or -EDEADLK.
341 * Chain walk basics and protection scope
343 * [R] refcount on task
344 * [P] task->pi_lock held
345 * [L] rtmutex->wait_lock held
347 * Step Description Protected by
348 * function arguments:
350 * @orig_lock if != NULL @top_task is blocked on it
351 * @next_lock Unprotected. Cannot be
352 * dereferenced. Only used for
354 * @orig_waiter if != NULL @top_task is blocked on it
355 * @top_task current, or in case of proxy
356 * locking protected by calling
359 * loop_sanity_check();
361 * [1] lock(task->pi_lock); [R] acquire [P]
362 * [2] waiter = task->pi_blocked_on; [P]
363 * [3] check_exit_conditions_1(); [P]
364 * [4] lock = waiter->lock; [P]
365 * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
366 * unlock(task->pi_lock); release [P]
369 * [6] check_exit_conditions_2(); [P] + [L]
370 * [7] requeue_lock_waiter(lock, waiter); [P] + [L]
371 * [8] unlock(task->pi_lock); release [P]
372 * put_task_struct(task); release [R]
373 * [9] check_exit_conditions_3(); [L]
374 * [10] task = owner(lock); [L]
375 * get_task_struct(task); [L] acquire [R]
376 * lock(task->pi_lock); [L] acquire [P]
377 * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
378 * [12] check_exit_conditions_4(); [P] + [L]
379 * [13] unlock(task->pi_lock); release [P]
380 * unlock(lock->wait_lock); release [L]
383 static int rt_mutex_adjust_prio_chain(struct task_struct
*task
,
385 struct rt_mutex
*orig_lock
,
386 struct rt_mutex
*next_lock
,
387 struct rt_mutex_waiter
*orig_waiter
,
388 struct task_struct
*top_task
)
390 struct rt_mutex_waiter
*waiter
, *top_waiter
= orig_waiter
;
391 struct rt_mutex_waiter
*prerequeue_top_waiter
;
392 int detect_deadlock
, ret
= 0, depth
= 0;
393 struct rt_mutex
*lock
;
396 detect_deadlock
= debug_rt_mutex_detect_deadlock(orig_waiter
,
400 * The (de)boosting is a step by step approach with a lot of
401 * pitfalls. We want this to be preemptible and we want hold a
402 * maximum of two locks per step. So we have to check
403 * carefully whether things change under us.
407 * We limit the lock chain length for each invocation.
409 if (++depth
> max_lock_depth
) {
413 * Print this only once. If the admin changes the limit,
414 * print a new message when reaching the limit again.
416 if (prev_max
!= max_lock_depth
) {
417 prev_max
= max_lock_depth
;
418 printk(KERN_WARNING
"Maximum lock depth %d reached "
419 "task: %s (%d)\n", max_lock_depth
,
420 top_task
->comm
, task_pid_nr(top_task
));
422 put_task_struct(task
);
428 * We are fully preemptible here and only hold the refcount on
429 * @task. So everything can have changed under us since the
430 * caller or our own code below (goto retry/again) dropped all
435 * [1] Task cannot go away as we did a get_task() before !
437 raw_spin_lock_irqsave(&task
->pi_lock
, flags
);
440 * [2] Get the waiter on which @task is blocked on.
442 waiter
= task
->pi_blocked_on
;
445 * [3] check_exit_conditions_1() protected by task->pi_lock.
449 * Check whether the end of the boosting chain has been
450 * reached or the state of the chain has changed while we
457 * Check the orig_waiter state. After we dropped the locks,
458 * the previous owner of the lock might have released the lock.
460 if (orig_waiter
&& !rt_mutex_owner(orig_lock
))
464 * We dropped all locks after taking a refcount on @task, so
465 * the task might have moved on in the lock chain or even left
466 * the chain completely and blocks now on an unrelated lock or
469 * We stored the lock on which @task was blocked in @next_lock,
470 * so we can detect the chain change.
472 if (next_lock
!= waiter
->lock
)
476 * Drop out, when the task has no waiters. Note,
477 * top_waiter can be NULL, when we are in the deboosting
481 if (!task_has_pi_waiters(task
))
484 * If deadlock detection is off, we stop here if we
485 * are not the top pi waiter of the task.
487 if (!detect_deadlock
&& top_waiter
!= task_top_pi_waiter(task
))
492 * When deadlock detection is off then we check, if further
493 * priority adjustment is necessary.
495 if (!detect_deadlock
&& waiter
->prio
== task
->prio
)
499 * [4] Get the next lock
503 * [5] We need to trylock here as we are holding task->pi_lock,
504 * which is the reverse lock order versus the other rtmutex
507 if (!raw_spin_trylock(&lock
->wait_lock
)) {
508 raw_spin_unlock_irqrestore(&task
->pi_lock
, flags
);
514 * [6] check_exit_conditions_2() protected by task->pi_lock and
517 * Deadlock detection. If the lock is the same as the original
518 * lock which caused us to walk the lock chain or if the
519 * current lock is owned by the task which initiated the chain
520 * walk, we detected a deadlock.
522 if (lock
== orig_lock
|| rt_mutex_owner(lock
) == top_task
) {
523 debug_rt_mutex_deadlock(deadlock_detect
, orig_waiter
, lock
);
524 raw_spin_unlock(&lock
->wait_lock
);
530 * Store the current top waiter before doing the requeue
531 * operation on @lock. We need it for the boost/deboost
534 prerequeue_top_waiter
= rt_mutex_top_waiter(lock
);
536 /* [7] Requeue the waiter in the lock waiter list. */
537 rt_mutex_dequeue(lock
, waiter
);
538 waiter
->prio
= task
->prio
;
539 rt_mutex_enqueue(lock
, waiter
);
541 /* [8] Release the task */
542 raw_spin_unlock_irqrestore(&task
->pi_lock
, flags
);
543 put_task_struct(task
);
546 * [9] check_exit_conditions_3 protected by lock->wait_lock.
548 * We must abort the chain walk if there is no lock owner even
549 * in the dead lock detection case, as we have nothing to
550 * follow here. This is the end of the chain we are walking.
552 if (!rt_mutex_owner(lock
)) {
554 * If the requeue [7] above changed the top waiter,
555 * then we need to wake the new top waiter up to try
558 if (prerequeue_top_waiter
!= rt_mutex_top_waiter(lock
))
559 wake_up_process(rt_mutex_top_waiter(lock
)->task
);
560 raw_spin_unlock(&lock
->wait_lock
);
564 /* [10] Grab the next task, i.e. the owner of @lock */
565 task
= rt_mutex_owner(lock
);
566 get_task_struct(task
);
567 raw_spin_lock_irqsave(&task
->pi_lock
, flags
);
569 /* [11] requeue the pi waiters if necessary */
570 if (waiter
== rt_mutex_top_waiter(lock
)) {
572 * The waiter became the new top (highest priority)
573 * waiter on the lock. Replace the previous top waiter
574 * in the owner tasks pi waiters list with this waiter
575 * and adjust the priority of the owner.
577 rt_mutex_dequeue_pi(task
, prerequeue_top_waiter
);
578 rt_mutex_enqueue_pi(task
, waiter
);
579 __rt_mutex_adjust_prio(task
);
581 } else if (prerequeue_top_waiter
== waiter
) {
583 * The waiter was the top waiter on the lock, but is
584 * no longer the top prority waiter. Replace waiter in
585 * the owner tasks pi waiters list with the new top
586 * (highest priority) waiter and adjust the priority
588 * The new top waiter is stored in @waiter so that
589 * @waiter == @top_waiter evaluates to true below and
590 * we continue to deboost the rest of the chain.
592 rt_mutex_dequeue_pi(task
, waiter
);
593 waiter
= rt_mutex_top_waiter(lock
);
594 rt_mutex_enqueue_pi(task
, waiter
);
595 __rt_mutex_adjust_prio(task
);
598 * Nothing changed. No need to do any priority
604 * [12] check_exit_conditions_4() protected by task->pi_lock
605 * and lock->wait_lock. The actual decisions are made after we
608 * Check whether the task which owns the current lock is pi
609 * blocked itself. If yes we store a pointer to the lock for
610 * the lock chain change detection above. After we dropped
611 * task->pi_lock next_lock cannot be dereferenced anymore.
613 next_lock
= task_blocked_on_lock(task
);
615 * Store the top waiter of @lock for the end of chain walk
618 top_waiter
= rt_mutex_top_waiter(lock
);
620 /* [13] Drop the locks */
621 raw_spin_unlock_irqrestore(&task
->pi_lock
, flags
);
622 raw_spin_unlock(&lock
->wait_lock
);
625 * Make the actual exit decisions [12], based on the stored
628 * We reached the end of the lock chain. Stop right here. No
629 * point to go back just to figure that out.
635 * If the current waiter is not the top waiter on the lock,
636 * then we can stop the chain walk here if we are not in full
637 * deadlock detection mode.
639 if (!detect_deadlock
&& waiter
!= top_waiter
)
645 raw_spin_unlock_irqrestore(&task
->pi_lock
, flags
);
647 put_task_struct(task
);
653 * Try to take an rt-mutex
655 * Must be called with lock->wait_lock held.
657 * @lock: The lock to be acquired.
658 * @task: The task which wants to acquire the lock
659 * @waiter: The waiter that is queued to the lock's wait list if the
660 * callsite called task_blocked_on_lock(), otherwise NULL
662 static int try_to_take_rt_mutex(struct rt_mutex
*lock
, struct task_struct
*task
,
663 struct rt_mutex_waiter
*waiter
)
668 * Before testing whether we can acquire @lock, we set the
669 * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
670 * other tasks which try to modify @lock into the slow path
671 * and they serialize on @lock->wait_lock.
673 * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
674 * as explained at the top of this file if and only if:
676 * - There is a lock owner. The caller must fixup the
677 * transient state if it does a trylock or leaves the lock
678 * function due to a signal or timeout.
680 * - @task acquires the lock and there are no other
681 * waiters. This is undone in rt_mutex_set_owner(@task) at
682 * the end of this function.
684 mark_rt_mutex_waiters(lock
);
687 * If @lock has an owner, give up.
689 if (rt_mutex_owner(lock
))
693 * If @waiter != NULL, @task has already enqueued the waiter
694 * into @lock waiter list. If @waiter == NULL then this is a
699 * If waiter is not the highest priority waiter of
702 if (waiter
!= rt_mutex_top_waiter(lock
))
706 * We can acquire the lock. Remove the waiter from the
709 rt_mutex_dequeue(lock
, waiter
);
713 * If the lock has waiters already we check whether @task is
714 * eligible to take over the lock.
716 * If there are no other waiters, @task can acquire
717 * the lock. @task->pi_blocked_on is NULL, so it does
718 * not need to be dequeued.
720 if (rt_mutex_has_waiters(lock
)) {
722 * If @task->prio is greater than or equal to
723 * the top waiter priority (kernel view),
726 if (task
->prio
>= rt_mutex_top_waiter(lock
)->prio
)
730 * The current top waiter stays enqueued. We
731 * don't have to change anything in the lock
736 * No waiters. Take the lock without the
737 * pi_lock dance.@task->pi_blocked_on is NULL
738 * and we have no waiters to enqueue in @task
746 * Clear @task->pi_blocked_on. Requires protection by
747 * @task->pi_lock. Redundant operation for the @waiter == NULL
748 * case, but conditionals are more expensive than a redundant
751 raw_spin_lock_irqsave(&task
->pi_lock
, flags
);
752 task
->pi_blocked_on
= NULL
;
754 * Finish the lock acquisition. @task is the new owner. If
755 * other waiters exist we have to insert the highest priority
756 * waiter into @task->pi_waiters list.
758 if (rt_mutex_has_waiters(lock
))
759 rt_mutex_enqueue_pi(task
, rt_mutex_top_waiter(lock
));
760 raw_spin_unlock_irqrestore(&task
->pi_lock
, flags
);
763 /* We got the lock. */
764 debug_rt_mutex_lock(lock
);
767 * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
768 * are still waiters or clears it.
770 rt_mutex_set_owner(lock
, task
);
772 rt_mutex_deadlock_account_lock(lock
, task
);
778 * Task blocks on lock.
780 * Prepare waiter and propagate pi chain
782 * This must be called with lock->wait_lock held.
784 static int task_blocks_on_rt_mutex(struct rt_mutex
*lock
,
785 struct rt_mutex_waiter
*waiter
,
786 struct task_struct
*task
,
789 struct task_struct
*owner
= rt_mutex_owner(lock
);
790 struct rt_mutex_waiter
*top_waiter
= waiter
;
791 struct rt_mutex
*next_lock
;
792 int chain_walk
= 0, res
;
796 * Early deadlock detection. We really don't want the task to
797 * enqueue on itself just to untangle the mess later. It's not
798 * only an optimization. We drop the locks, so another waiter
799 * can come in before the chain walk detects the deadlock. So
800 * the other will detect the deadlock and return -EDEADLOCK,
801 * which is wrong, as the other waiter is not in a deadlock
807 raw_spin_lock_irqsave(&task
->pi_lock
, flags
);
808 __rt_mutex_adjust_prio(task
);
811 waiter
->prio
= task
->prio
;
813 /* Get the top priority waiter on the lock */
814 if (rt_mutex_has_waiters(lock
))
815 top_waiter
= rt_mutex_top_waiter(lock
);
816 rt_mutex_enqueue(lock
, waiter
);
818 task
->pi_blocked_on
= waiter
;
820 raw_spin_unlock_irqrestore(&task
->pi_lock
, flags
);
825 raw_spin_lock_irqsave(&owner
->pi_lock
, flags
);
826 if (waiter
== rt_mutex_top_waiter(lock
)) {
827 rt_mutex_dequeue_pi(owner
, top_waiter
);
828 rt_mutex_enqueue_pi(owner
, waiter
);
830 __rt_mutex_adjust_prio(owner
);
831 if (owner
->pi_blocked_on
)
833 } else if (debug_rt_mutex_detect_deadlock(waiter
, detect_deadlock
)) {
837 /* Store the lock on which owner is blocked or NULL */
838 next_lock
= task_blocked_on_lock(owner
);
840 raw_spin_unlock_irqrestore(&owner
->pi_lock
, flags
);
842 * Even if full deadlock detection is on, if the owner is not
843 * blocked itself, we can avoid finding this out in the chain
846 if (!chain_walk
|| !next_lock
)
850 * The owner can't disappear while holding a lock,
851 * so the owner struct is protected by wait_lock.
852 * Gets dropped in rt_mutex_adjust_prio_chain()!
854 get_task_struct(owner
);
856 raw_spin_unlock(&lock
->wait_lock
);
858 res
= rt_mutex_adjust_prio_chain(owner
, detect_deadlock
, lock
,
859 next_lock
, waiter
, task
);
861 raw_spin_lock(&lock
->wait_lock
);
867 * Wake up the next waiter on the lock.
869 * Remove the top waiter from the current tasks pi waiter list and
872 * Called with lock->wait_lock held.
874 static void wakeup_next_waiter(struct rt_mutex
*lock
)
876 struct rt_mutex_waiter
*waiter
;
879 raw_spin_lock_irqsave(¤t
->pi_lock
, flags
);
881 waiter
= rt_mutex_top_waiter(lock
);
884 * Remove it from current->pi_waiters. We do not adjust a
885 * possible priority boost right now. We execute wakeup in the
886 * boosted mode and go back to normal after releasing
889 rt_mutex_dequeue_pi(current
, waiter
);
892 * As we are waking up the top waiter, and the waiter stays
893 * queued on the lock until it gets the lock, this lock
894 * obviously has waiters. Just set the bit here and this has
895 * the added benefit of forcing all new tasks into the
896 * slow path making sure no task of lower priority than
897 * the top waiter can steal this lock.
899 lock
->owner
= (void *) RT_MUTEX_HAS_WAITERS
;
901 raw_spin_unlock_irqrestore(¤t
->pi_lock
, flags
);
904 * It's safe to dereference waiter as it cannot go away as
905 * long as we hold lock->wait_lock. The waiter task needs to
906 * acquire it in order to dequeue the waiter.
908 wake_up_process(waiter
->task
);
912 * Remove a waiter from a lock and give up
914 * Must be called with lock->wait_lock held and
915 * have just failed to try_to_take_rt_mutex().
917 static void remove_waiter(struct rt_mutex
*lock
,
918 struct rt_mutex_waiter
*waiter
)
920 bool is_top_waiter
= (waiter
== rt_mutex_top_waiter(lock
));
921 struct task_struct
*owner
= rt_mutex_owner(lock
);
922 struct rt_mutex
*next_lock
;
925 raw_spin_lock_irqsave(¤t
->pi_lock
, flags
);
926 rt_mutex_dequeue(lock
, waiter
);
927 current
->pi_blocked_on
= NULL
;
928 raw_spin_unlock_irqrestore(¤t
->pi_lock
, flags
);
931 * Only update priority if the waiter was the highest priority
932 * waiter of the lock and there is an owner to update.
934 if (!owner
|| !is_top_waiter
)
937 raw_spin_lock_irqsave(&owner
->pi_lock
, flags
);
939 rt_mutex_dequeue_pi(owner
, waiter
);
941 if (rt_mutex_has_waiters(lock
))
942 rt_mutex_enqueue_pi(owner
, rt_mutex_top_waiter(lock
));
944 __rt_mutex_adjust_prio(owner
);
946 /* Store the lock on which owner is blocked or NULL */
947 next_lock
= task_blocked_on_lock(owner
);
949 raw_spin_unlock_irqrestore(&owner
->pi_lock
, flags
);
952 * Don't walk the chain, if the owner task is not blocked
958 /* gets dropped in rt_mutex_adjust_prio_chain()! */
959 get_task_struct(owner
);
961 raw_spin_unlock(&lock
->wait_lock
);
963 rt_mutex_adjust_prio_chain(owner
, 0, lock
, next_lock
, NULL
, current
);
965 raw_spin_lock(&lock
->wait_lock
);
969 * Recheck the pi chain, in case we got a priority setting
971 * Called from sched_setscheduler
973 void rt_mutex_adjust_pi(struct task_struct
*task
)
975 struct rt_mutex_waiter
*waiter
;
976 struct rt_mutex
*next_lock
;
979 raw_spin_lock_irqsave(&task
->pi_lock
, flags
);
981 waiter
= task
->pi_blocked_on
;
982 if (!waiter
|| (waiter
->prio
== task
->prio
&&
983 !dl_prio(task
->prio
))) {
984 raw_spin_unlock_irqrestore(&task
->pi_lock
, flags
);
987 next_lock
= waiter
->lock
;
988 raw_spin_unlock_irqrestore(&task
->pi_lock
, flags
);
990 /* gets dropped in rt_mutex_adjust_prio_chain()! */
991 get_task_struct(task
);
993 rt_mutex_adjust_prio_chain(task
, 0, NULL
, next_lock
, NULL
, task
);
997 * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
998 * @lock: the rt_mutex to take
999 * @state: the state the task should block in (TASK_INTERRUPTIBLE
1000 * or TASK_UNINTERRUPTIBLE)
1001 * @timeout: the pre-initialized and started timer, or NULL for none
1002 * @waiter: the pre-initialized rt_mutex_waiter
1004 * lock->wait_lock must be held by the caller.
1007 __rt_mutex_slowlock(struct rt_mutex
*lock
, int state
,
1008 struct hrtimer_sleeper
*timeout
,
1009 struct rt_mutex_waiter
*waiter
)
1014 /* Try to acquire the lock: */
1015 if (try_to_take_rt_mutex(lock
, current
, waiter
))
1019 * TASK_INTERRUPTIBLE checks for signals and
1020 * timeout. Ignored otherwise.
1022 if (unlikely(state
== TASK_INTERRUPTIBLE
)) {
1023 /* Signal pending? */
1024 if (signal_pending(current
))
1026 if (timeout
&& !timeout
->task
)
1032 raw_spin_unlock(&lock
->wait_lock
);
1034 debug_rt_mutex_print_deadlock(waiter
);
1036 schedule_rt_mutex(lock
);
1038 raw_spin_lock(&lock
->wait_lock
);
1039 set_current_state(state
);
1045 static void rt_mutex_handle_deadlock(int res
, int detect_deadlock
,
1046 struct rt_mutex_waiter
*w
)
1049 * If the result is not -EDEADLOCK or the caller requested
1050 * deadlock detection, nothing to do here.
1052 if (res
!= -EDEADLOCK
|| detect_deadlock
)
1056 * Yell lowdly and stop the task right here.
1058 rt_mutex_print_deadlock(w
);
1060 set_current_state(TASK_INTERRUPTIBLE
);
1066 * Slow path lock function:
1069 rt_mutex_slowlock(struct rt_mutex
*lock
, int state
,
1070 struct hrtimer_sleeper
*timeout
,
1071 int detect_deadlock
)
1073 struct rt_mutex_waiter waiter
;
1076 debug_rt_mutex_init_waiter(&waiter
);
1077 RB_CLEAR_NODE(&waiter
.pi_tree_entry
);
1078 RB_CLEAR_NODE(&waiter
.tree_entry
);
1080 raw_spin_lock(&lock
->wait_lock
);
1082 /* Try to acquire the lock again: */
1083 if (try_to_take_rt_mutex(lock
, current
, NULL
)) {
1084 raw_spin_unlock(&lock
->wait_lock
);
1088 set_current_state(state
);
1090 /* Setup the timer, when timeout != NULL */
1091 if (unlikely(timeout
)) {
1092 hrtimer_start_expires(&timeout
->timer
, HRTIMER_MODE_ABS
);
1093 if (!hrtimer_active(&timeout
->timer
))
1094 timeout
->task
= NULL
;
1097 ret
= task_blocks_on_rt_mutex(lock
, &waiter
, current
, detect_deadlock
);
1100 ret
= __rt_mutex_slowlock(lock
, state
, timeout
, &waiter
);
1102 set_current_state(TASK_RUNNING
);
1104 if (unlikely(ret
)) {
1105 remove_waiter(lock
, &waiter
);
1106 rt_mutex_handle_deadlock(ret
, detect_deadlock
, &waiter
);
1110 * try_to_take_rt_mutex() sets the waiter bit
1111 * unconditionally. We might have to fix that up.
1113 fixup_rt_mutex_waiters(lock
);
1115 raw_spin_unlock(&lock
->wait_lock
);
1117 /* Remove pending timer: */
1118 if (unlikely(timeout
))
1119 hrtimer_cancel(&timeout
->timer
);
1121 debug_rt_mutex_free_waiter(&waiter
);
1127 * Slow path try-lock function:
1129 static inline int rt_mutex_slowtrylock(struct rt_mutex
*lock
)
1134 * If the lock already has an owner we fail to get the lock.
1135 * This can be done without taking the @lock->wait_lock as
1136 * it is only being read, and this is a trylock anyway.
1138 if (rt_mutex_owner(lock
))
1142 * The mutex has currently no owner. Lock the wait lock and
1143 * try to acquire the lock.
1145 raw_spin_lock(&lock
->wait_lock
);
1147 ret
= try_to_take_rt_mutex(lock
, current
, NULL
);
1150 * try_to_take_rt_mutex() sets the lock waiters bit
1151 * unconditionally. Clean this up.
1153 fixup_rt_mutex_waiters(lock
);
1155 raw_spin_unlock(&lock
->wait_lock
);
1161 * Slow path to release a rt-mutex:
1164 rt_mutex_slowunlock(struct rt_mutex
*lock
)
1166 raw_spin_lock(&lock
->wait_lock
);
1168 debug_rt_mutex_unlock(lock
);
1170 rt_mutex_deadlock_account_unlock(current
);
1173 * We must be careful here if the fast path is enabled. If we
1174 * have no waiters queued we cannot set owner to NULL here
1177 * foo->lock->owner = NULL;
1178 * rtmutex_lock(foo->lock); <- fast path
1179 * free = atomic_dec_and_test(foo->refcnt);
1180 * rtmutex_unlock(foo->lock); <- fast path
1183 * raw_spin_unlock(foo->lock->wait_lock);
1185 * So for the fastpath enabled kernel:
1187 * Nothing can set the waiters bit as long as we hold
1188 * lock->wait_lock. So we do the following sequence:
1190 * owner = rt_mutex_owner(lock);
1191 * clear_rt_mutex_waiters(lock);
1192 * raw_spin_unlock(&lock->wait_lock);
1193 * if (cmpxchg(&lock->owner, owner, 0) == owner)
1197 * The fastpath disabled variant is simple as all access to
1198 * lock->owner is serialized by lock->wait_lock:
1200 * lock->owner = NULL;
1201 * raw_spin_unlock(&lock->wait_lock);
1203 while (!rt_mutex_has_waiters(lock
)) {
1204 /* Drops lock->wait_lock ! */
1205 if (unlock_rt_mutex_safe(lock
) == true)
1207 /* Relock the rtmutex and try again */
1208 raw_spin_lock(&lock
->wait_lock
);
1212 * The wakeup next waiter path does not suffer from the above
1213 * race. See the comments there.
1215 wakeup_next_waiter(lock
);
1217 raw_spin_unlock(&lock
->wait_lock
);
1219 /* Undo pi boosting if necessary: */
1220 rt_mutex_adjust_prio(current
);
1224 * debug aware fast / slowpath lock,trylock,unlock
1226 * The atomic acquire/release ops are compiled away, when either the
1227 * architecture does not support cmpxchg or when debugging is enabled.
1230 rt_mutex_fastlock(struct rt_mutex
*lock
, int state
,
1231 int detect_deadlock
,
1232 int (*slowfn
)(struct rt_mutex
*lock
, int state
,
1233 struct hrtimer_sleeper
*timeout
,
1234 int detect_deadlock
))
1236 if (!detect_deadlock
&& likely(rt_mutex_cmpxchg(lock
, NULL
, current
))) {
1237 rt_mutex_deadlock_account_lock(lock
, current
);
1240 return slowfn(lock
, state
, NULL
, detect_deadlock
);
1244 rt_mutex_timed_fastlock(struct rt_mutex
*lock
, int state
,
1245 struct hrtimer_sleeper
*timeout
, int detect_deadlock
,
1246 int (*slowfn
)(struct rt_mutex
*lock
, int state
,
1247 struct hrtimer_sleeper
*timeout
,
1248 int detect_deadlock
))
1250 if (!detect_deadlock
&& likely(rt_mutex_cmpxchg(lock
, NULL
, current
))) {
1251 rt_mutex_deadlock_account_lock(lock
, current
);
1254 return slowfn(lock
, state
, timeout
, detect_deadlock
);
1258 rt_mutex_fasttrylock(struct rt_mutex
*lock
,
1259 int (*slowfn
)(struct rt_mutex
*lock
))
1261 if (likely(rt_mutex_cmpxchg(lock
, NULL
, current
))) {
1262 rt_mutex_deadlock_account_lock(lock
, current
);
1265 return slowfn(lock
);
1269 rt_mutex_fastunlock(struct rt_mutex
*lock
,
1270 void (*slowfn
)(struct rt_mutex
*lock
))
1272 if (likely(rt_mutex_cmpxchg(lock
, current
, NULL
)))
1273 rt_mutex_deadlock_account_unlock(current
);
1279 * rt_mutex_lock - lock a rt_mutex
1281 * @lock: the rt_mutex to be locked
1283 void __sched
rt_mutex_lock(struct rt_mutex
*lock
)
1287 rt_mutex_fastlock(lock
, TASK_UNINTERRUPTIBLE
, 0, rt_mutex_slowlock
);
1289 EXPORT_SYMBOL_GPL(rt_mutex_lock
);
1292 * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
1294 * @lock: the rt_mutex to be locked
1295 * @detect_deadlock: deadlock detection on/off
1299 * -EINTR when interrupted by a signal
1300 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
1302 int __sched
rt_mutex_lock_interruptible(struct rt_mutex
*lock
,
1303 int detect_deadlock
)
1307 return rt_mutex_fastlock(lock
, TASK_INTERRUPTIBLE
,
1308 detect_deadlock
, rt_mutex_slowlock
);
1310 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible
);
1313 * rt_mutex_timed_lock - lock a rt_mutex interruptible
1314 * the timeout structure is provided
1317 * @lock: the rt_mutex to be locked
1318 * @timeout: timeout structure or NULL (no timeout)
1319 * @detect_deadlock: deadlock detection on/off
1323 * -EINTR when interrupted by a signal
1324 * -ETIMEDOUT when the timeout expired
1325 * -EDEADLK when the lock would deadlock (when deadlock detection is on)
1328 rt_mutex_timed_lock(struct rt_mutex
*lock
, struct hrtimer_sleeper
*timeout
,
1329 int detect_deadlock
)
1333 return rt_mutex_timed_fastlock(lock
, TASK_INTERRUPTIBLE
, timeout
,
1334 detect_deadlock
, rt_mutex_slowlock
);
1336 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock
);
1339 * rt_mutex_trylock - try to lock a rt_mutex
1341 * @lock: the rt_mutex to be locked
1343 * Returns 1 on success and 0 on contention
1345 int __sched
rt_mutex_trylock(struct rt_mutex
*lock
)
1347 return rt_mutex_fasttrylock(lock
, rt_mutex_slowtrylock
);
1349 EXPORT_SYMBOL_GPL(rt_mutex_trylock
);
1352 * rt_mutex_unlock - unlock a rt_mutex
1354 * @lock: the rt_mutex to be unlocked
1356 void __sched
rt_mutex_unlock(struct rt_mutex
*lock
)
1358 rt_mutex_fastunlock(lock
, rt_mutex_slowunlock
);
1360 EXPORT_SYMBOL_GPL(rt_mutex_unlock
);
1363 * rt_mutex_destroy - mark a mutex unusable
1364 * @lock: the mutex to be destroyed
1366 * This function marks the mutex uninitialized, and any subsequent
1367 * use of the mutex is forbidden. The mutex must not be locked when
1368 * this function is called.
1370 void rt_mutex_destroy(struct rt_mutex
*lock
)
1372 WARN_ON(rt_mutex_is_locked(lock
));
1373 #ifdef CONFIG_DEBUG_RT_MUTEXES
1378 EXPORT_SYMBOL_GPL(rt_mutex_destroy
);
1381 * __rt_mutex_init - initialize the rt lock
1383 * @lock: the rt lock to be initialized
1385 * Initialize the rt lock to unlocked state.
1387 * Initializing of a locked rt lock is not allowed
1389 void __rt_mutex_init(struct rt_mutex
*lock
, const char *name
)
1392 raw_spin_lock_init(&lock
->wait_lock
);
1393 lock
->waiters
= RB_ROOT
;
1394 lock
->waiters_leftmost
= NULL
;
1396 debug_rt_mutex_init(lock
, name
);
1398 EXPORT_SYMBOL_GPL(__rt_mutex_init
);
1401 * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
1404 * @lock: the rt_mutex to be locked
1405 * @proxy_owner:the task to set as owner
1407 * No locking. Caller has to do serializing itself
1408 * Special API call for PI-futex support
1410 void rt_mutex_init_proxy_locked(struct rt_mutex
*lock
,
1411 struct task_struct
*proxy_owner
)
1413 __rt_mutex_init(lock
, NULL
);
1414 debug_rt_mutex_proxy_lock(lock
, proxy_owner
);
1415 rt_mutex_set_owner(lock
, proxy_owner
);
1416 rt_mutex_deadlock_account_lock(lock
, proxy_owner
);
1420 * rt_mutex_proxy_unlock - release a lock on behalf of owner
1422 * @lock: the rt_mutex to be locked
1424 * No locking. Caller has to do serializing itself
1425 * Special API call for PI-futex support
1427 void rt_mutex_proxy_unlock(struct rt_mutex
*lock
,
1428 struct task_struct
*proxy_owner
)
1430 debug_rt_mutex_proxy_unlock(lock
);
1431 rt_mutex_set_owner(lock
, NULL
);
1432 rt_mutex_deadlock_account_unlock(proxy_owner
);
1436 * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1437 * @lock: the rt_mutex to take
1438 * @waiter: the pre-initialized rt_mutex_waiter
1439 * @task: the task to prepare
1440 * @detect_deadlock: perform deadlock detection (1) or not (0)
1443 * 0 - task blocked on lock
1444 * 1 - acquired the lock for task, caller should wake it up
1447 * Special API call for FUTEX_REQUEUE_PI support.
1449 int rt_mutex_start_proxy_lock(struct rt_mutex
*lock
,
1450 struct rt_mutex_waiter
*waiter
,
1451 struct task_struct
*task
, int detect_deadlock
)
1455 raw_spin_lock(&lock
->wait_lock
);
1457 if (try_to_take_rt_mutex(lock
, task
, NULL
)) {
1458 raw_spin_unlock(&lock
->wait_lock
);
1462 /* We enforce deadlock detection for futexes */
1463 ret
= task_blocks_on_rt_mutex(lock
, waiter
, task
, 1);
1465 if (ret
&& !rt_mutex_owner(lock
)) {
1467 * Reset the return value. We might have
1468 * returned with -EDEADLK and the owner
1469 * released the lock while we were walking the
1470 * pi chain. Let the waiter sort it out.
1476 remove_waiter(lock
, waiter
);
1478 raw_spin_unlock(&lock
->wait_lock
);
1480 debug_rt_mutex_print_deadlock(waiter
);
1486 * rt_mutex_next_owner - return the next owner of the lock
1488 * @lock: the rt lock query
1490 * Returns the next owner of the lock or NULL
1492 * Caller has to serialize against other accessors to the lock
1495 * Special API call for PI-futex support
1497 struct task_struct
*rt_mutex_next_owner(struct rt_mutex
*lock
)
1499 if (!rt_mutex_has_waiters(lock
))
1502 return rt_mutex_top_waiter(lock
)->task
;
1506 * rt_mutex_finish_proxy_lock() - Complete lock acquisition
1507 * @lock: the rt_mutex we were woken on
1508 * @to: the timeout, null if none. hrtimer should already have
1510 * @waiter: the pre-initialized rt_mutex_waiter
1511 * @detect_deadlock: perform deadlock detection (1) or not (0)
1513 * Complete the lock acquisition started our behalf by another thread.
1517 * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
1519 * Special API call for PI-futex requeue support
1521 int rt_mutex_finish_proxy_lock(struct rt_mutex
*lock
,
1522 struct hrtimer_sleeper
*to
,
1523 struct rt_mutex_waiter
*waiter
,
1524 int detect_deadlock
)
1528 raw_spin_lock(&lock
->wait_lock
);
1530 set_current_state(TASK_INTERRUPTIBLE
);
1532 ret
= __rt_mutex_slowlock(lock
, TASK_INTERRUPTIBLE
, to
, waiter
);
1534 set_current_state(TASK_RUNNING
);
1537 remove_waiter(lock
, waiter
);
1540 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1541 * have to fix that up.
1543 fixup_rt_mutex_waiters(lock
);
1545 raw_spin_unlock(&lock
->wait_lock
);