+/*
+ * Called from futex_requeue_pi.
+ * Set FUTEX_WAITERS and FUTEX_WAITER_REQUEUED flags on the
+ * PI-futex value; search its associated pi_state if an owner exist
+ * or create a new one without owner.
+ */
+static inline int
+lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb,
+ union futex_key *key,
+ struct futex_pi_state **pi_state)
+{
+ u32 curval, uval, newval;
+
+retry:
+ /*
+ * We can't handle a fault cleanly because we can't
+ * release the locks here. Simply return the fault.
+ */
+ if (get_futex_value_locked(&curval, uaddr))
+ return -EFAULT;
+
+ /* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
+ if ((curval & (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED))
+ != (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) {
+ /*
+ * No waiters yet, we prepare the futex to have some waiters.
+ */
+
+ uval = curval;
+ newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
+
+ pagefault_disable();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ pagefault_enable();
+
+ if (unlikely(curval == -EFAULT))
+ return -EFAULT;
+ if (unlikely(curval != uval))
+ goto retry;
+ }
+
+ if (!(curval & FUTEX_TID_MASK)
+ || lookup_pi_state(curval, hb, key, pi_state)) {
+ /* the futex has no owner (yet) or the lookup failed:
+ allocate one pi_state without owner */
+
+ *pi_state = alloc_pi_state();
+
+ /* Already stores the key: */
+ (*pi_state)->key = *key;
+
+ /* init the mutex without owner */
+ __rt_mutex_init(&(*pi_state)->pi_mutex, NULL);
+ }
+
+ return 0;
+}
+
+/*
+ * Keep the first nr_wake waiter from futex1, wake up one,
+ * and requeue the next nr_requeue waiters following hashed on
+ * one physical page to another physical page (PI-futex uaddr2)
+ */
+static int futex_requeue_pi(u32 __user *uaddr1, u32 __user *uaddr2,
+ int nr_wake, int nr_requeue, u32 *cmpval)
+{
+ union futex_key key1, key2;
+ struct futex_hash_bucket *hb1, *hb2;
+ struct plist_head *head1;
+ struct futex_q *this, *next;
+ struct futex_pi_state *pi_state2 = NULL;
+ struct rt_mutex_waiter *waiter, *top_waiter = NULL;
+ struct rt_mutex *lock2 = NULL;
+ int ret, drop_count = 0;
+
+ if (refill_pi_state_cache())
+ return -ENOMEM;
+
+retry:
+ /*
+ * First take all the futex related locks:
+ */
+ down_read(¤t->mm->mmap_sem);
+
+ ret = get_futex_key(uaddr1, &key1);
+ if (unlikely(ret != 0))
+ goto out;
+ ret = get_futex_key(uaddr2, &key2);
+ if (unlikely(ret != 0))
+ goto out;
+
+ hb1 = hash_futex(&key1);
+ hb2 = hash_futex(&key2);
+
+ double_lock_hb(hb1, hb2);
+
+ if (likely(cmpval != NULL)) {
+ u32 curval;
+
+ ret = get_futex_value_locked(&curval, uaddr1);
+
+ if (unlikely(ret)) {
+ spin_unlock(&hb1->lock);
+ if (hb1 != hb2)
+ spin_unlock(&hb2->lock);
+
+ /*
+ * If we would have faulted, release mmap_sem, fault
+ * it in and start all over again.
+ */
+ up_read(¤t->mm->mmap_sem);
+
+ ret = get_user(curval, uaddr1);
+
+ if (!ret)
+ goto retry;
+
+ return ret;
+ }
+ if (curval != *cmpval) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+ }
+
+ head1 = &hb1->chain;
+ plist_for_each_entry_safe(this, next, head1, list) {
+ if (!match_futex (&this->key, &key1))
+ continue;
+ if (++ret <= nr_wake) {
+ wake_futex(this);
+ } else {
+ /*
+ * FIRST: get and set the pi_state
+ */
+ if (!pi_state2) {
+ int s;
+ /* do this only the first time we requeue someone */
+ s = lookup_pi_state_for_requeue(uaddr2, hb2,
+ &key2, &pi_state2);
+ if (s) {
+ ret = s;
+ goto out_unlock;
+ }
+
+ lock2 = &pi_state2->pi_mutex;
+ spin_lock(&lock2->wait_lock);
+
+ /* Save the top waiter of the wait_list */
+ if (rt_mutex_has_waiters(lock2))
+ top_waiter = rt_mutex_top_waiter(lock2);
+ } else
+ atomic_inc(&pi_state2->refcount);
+
+
+ this->pi_state = pi_state2;
+
+ /*
+ * SECOND: requeue futex_q to the correct hashbucket
+ */
+
+ /*
+ * If key1 and key2 hash to the same bucket, no need to
+ * requeue.
+ */
+ if (likely(head1 != &hb2->chain)) {
+ plist_del(&this->list, &hb1->chain);
+ plist_add(&this->list, &hb2->chain);
+ this->lock_ptr = &hb2->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+ this->list.plist.lock = &hb2->lock;
+#endif
+ }
+ this->key = key2;
+ get_futex_key_refs(&key2);
+ drop_count++;
+
+
+ /*
+ * THIRD: queue it to lock2
+ */
+ spin_lock_irq(&this->task->pi_lock);
+ waiter = &this->waiter;
+ waiter->task = this->task;
+ waiter->lock = lock2;
+ plist_node_init(&waiter->list_entry, this->task->prio);
+ plist_node_init(&waiter->pi_list_entry, this->task->prio);
+ plist_add(&waiter->list_entry, &lock2->wait_list);
+ this->task->pi_blocked_on = waiter;
+ spin_unlock_irq(&this->task->pi_lock);
+
+ if (ret - nr_wake >= nr_requeue)
+ break;
+ }
+ }
+
+ /* If we've requeued some tasks and the top_waiter of the rt_mutex
+ has changed, we must adjust the priority of the owner, if any */
+ if (drop_count) {
+ struct task_struct *owner = rt_mutex_owner(lock2);
+ if (owner &&
+ (top_waiter != (waiter = rt_mutex_top_waiter(lock2)))) {
+ int chain_walk = 0;
+
+ spin_lock_irq(&owner->pi_lock);
+ if (top_waiter)
+ plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
+ else
+ /*
+ * There was no waiters before the requeue,
+ * the flag must be updated
+ */
+ mark_rt_mutex_waiters(lock2);
+
+ plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
+ __rt_mutex_adjust_prio(owner);
+ if (owner->pi_blocked_on) {
+ chain_walk = 1;
+ get_task_struct(owner);
+ }
+
+ spin_unlock_irq(&owner->pi_lock);
+ spin_unlock(&lock2->wait_lock);
+
+ if (chain_walk)
+ rt_mutex_adjust_prio_chain(owner, 0, lock2, NULL,
+ current);
+ } else {
+ /* No owner or the top_waiter does not change */
+ mark_rt_mutex_waiters(lock2);
+ spin_unlock(&lock2->wait_lock);
+ }
+ }
+
+out_unlock:
+ spin_unlock(&hb1->lock);
+ if (hb1 != hb2)
+ spin_unlock(&hb2->lock);
+
+ /* drop_futex_key_refs() must be called outside the spinlocks. */
+ while (--drop_count >= 0)
+ drop_futex_key_refs(&key1);
+
+out:
+ up_read(¤t->mm->mmap_sem);
+ return ret;
+}
+