kernel/locking/qspinlock.c

   1 /*
   2  * Queued spinlock
   3  *
   4  * This program is free software; you can redistribute it and/or modify
   5  * it under the terms of the GNU General Public License as published by
   6  * the Free Software Foundation; either version 2 of the License, or
   7  * (at your option) any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
  15  * (C) Copyright 2013-2014 Red Hat, Inc.
  16  * (C) Copyright 2015 Intel Corp.
  17  *
  18  * Authors: Waiman Long <waiman.long@hp.com>
  19  *          Peter Zijlstra <peterz@infradead.org>
  20  */
  21 #include <linux/smp.h>
  22 #include <linux/bug.h>
  23 #include <linux/cpumask.h>
  24 #include <linux/percpu.h>
  25 #include <linux/hardirq.h>
  26 #include <linux/mutex.h>
  27 #include <asm/qspinlock.h>
  28
  29 /*
  30  * The basic principle of a queue-based spinlock can best be understood
  31  * by studying a classic queue-based spinlock implementation called the
  32  * MCS lock. The paper below provides a good description for this kind
  33  * of lock.
  34  *
  35  * http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf
  36  *
  37  * This queued spinlock implementation is based on the MCS lock, however to make
  38  * it fit the 4 bytes we assume spinlock_t to be, and preserve its existing
  39  * API, we must modify it somehow.
  40  *
  41  * In particular; where the traditional MCS lock consists of a tail pointer
  42  * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to
  43  * unlock the next pending (next->locked), we compress both these: {tail,
  44  * next->locked} into a single u32 value.
  45  *
  46  * Since a spinlock disables recursion of its own context and there is a limit
  47  * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there
  48  * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now
  49  * we can encode the tail by combining the 2-bit nesting level with the cpu
  50  * number. With one byte for the lock value and 3 bytes for the tail, only a
  51  * 32-bit word is now needed. Even though we only need 1 bit for the lock,
  52  * we extend it to a full byte to achieve better performance for architectures
  53  * that support atomic byte write.
  54  *
  55  * We also change the first spinner to spin on the lock bit instead of its
  56  * node; whereby avoiding the need to carry a node from lock to unlock, and
  57  * preserving existing lock API. This also makes the unlock code simpler and
  58  * faster.
  59  */
  60
  61 #include "mcs_spinlock.h"
  62
  63 /*
  64  * Per-CPU queue node structures; we can never have more than 4 nested
  65  * contexts: task, softirq, hardirq, nmi.
  66  *
  67  * Exactly fits one 64-byte cacheline on a 64-bit architecture.
  68  */
  69 static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[4]);
  70
  71 /*
  72  * We must be able to distinguish between no-tail and the tail at 0:0,
  73  * therefore increment the cpu number by one.
  74  */
  75
  76 static inline u32 encode_tail(int cpu, int idx)
  77 {
  78         u32 tail;
  79
  80 #ifdef CONFIG_DEBUG_SPINLOCK
  81         BUG_ON(idx > 3);
  82 #endif
  83         tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
  84         tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
  85
  86         return tail;
  87 }
  88
  89 static inline struct mcs_spinlock *decode_tail(u32 tail)
  90 {
  91         int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
  92         int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
  93
  94         return per_cpu_ptr(&mcs_nodes[idx], cpu);
  95 }
  96
  97 /**
  98  * queued_spin_lock_slowpath - acquire the queued spinlock
  99  * @lock: Pointer to queued spinlock structure
 100  * @val: Current value of the queued spinlock 32-bit word
 101  *
 102  * (queue tail, lock value)
 103  *
 104  *              fast      :    slow                                  :    unlock
 105  *                        :                                          :
 106  * uncontended  (0,0)   --:--> (0,1) --------------------------------:--> (*,0)
 107  *                        :       | ^--------.                    /  :
 108  *                        :       v           \                   |  :
 109  * uncontended            :    (n,x) --+--> (n,0)                 |  :
 110  *   queue                :       | ^--'                          |  :
 111  *                        :       v                               |  :
 112  * contended              :    (*,x) --+--> (*,0) -----> (*,1) ---'  :
 113  *   queue                :         ^--'                             :
 114  *
 115  */
 116 void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 117 {
 118         struct mcs_spinlock *prev, *next, *node;
 119         u32 new, old, tail;
 120         int idx;
 121
 122         BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 123
 124         node = this_cpu_ptr(&mcs_nodes[0]);
 125         idx = node->count++;
 126         tail = encode_tail(smp_processor_id(), idx);
 127
 128         node += idx;
 129         node->locked = 0;
 130         node->next = NULL;
 131
 132         /*
 133          * trylock || xchg(lock, node)
 134          *
 135          * 0,0 -> 0,1 ; no tail, not locked -> no tail, locked.
 136          * p,x -> n,x ; tail was p -> tail is n; preserving locked.
 137          */
 138         for (;;) {
 139                 new = _Q_LOCKED_VAL;
 140                 if (val)
 141                         new = tail | (val & _Q_LOCKED_MASK);
 142
 143                 old = atomic_cmpxchg(&lock->val, val, new);
 144                 if (old == val)
 145                         break;
 146
 147                 val = old;
 148         }
 149
 150         /*
 151          * we won the trylock; forget about queueing.
 152          */
 153         if (new == _Q_LOCKED_VAL)
 154                 goto release;
 155
 156         /*
 157          * if there was a previous node; link it and wait until reaching the
 158          * head of the waitqueue.
 159          */
 160         if (old & ~_Q_LOCKED_MASK) {
 161                 prev = decode_tail(old);
 162                 WRITE_ONCE(prev->next, node);
 163
 164                 arch_mcs_spin_lock_contended(&node->locked);
 165         }
 166
 167         /*
 168          * we're at the head of the waitqueue, wait for the owner to go away.
 169          *
 170          * *,x -> *,0
 171          */
 172         while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
 173                 cpu_relax();
 174
 175         /*
 176          * claim the lock:
 177          *
 178          * n,0 -> 0,1 : lock, uncontended
 179          * *,0 -> *,1 : lock, contended
 180          */
 181         for (;;) {
 182                 new = _Q_LOCKED_VAL;
 183                 if (val != tail)
 184                         new |= val;
 185
 186                 old = atomic_cmpxchg(&lock->val, val, new);
 187                 if (old == val)
 188                         break;
 189
 190                 val = old;
 191         }
 192
 193         /*
 194          * contended path; wait for next, release.
 195          */
 196         if (new != _Q_LOCKED_VAL) {
 197                 while (!(next = READ_ONCE(node->next)))
 198                         cpu_relax();
 199
 200                 arch_mcs_spin_unlock_contended(&next->locked);
 201         }
 202
 203 release:
 204         /*
 205          * release the node
 206          */
 207         this_cpu_dec(mcs_nodes[0].count);
 208 }
 209 EXPORT_SYMBOL(queued_spin_lock_slowpath);