drivers/md/bcache/closure.h

   1 #ifndef _LINUX_CLOSURE_H
   2 #define _LINUX_CLOSURE_H
   3
   4 #include <linux/llist.h>
   5 #include <linux/sched.h>
   6 #include <linux/workqueue.h>
   7
   8 /*
   9  * Closure is perhaps the most overused and abused term in computer science, but
  10  * since I've been unable to come up with anything better you're stuck with it
  11  * again.
  12  *
  13  * What are closures?
  14  *
  15  * They embed a refcount. The basic idea is they count "things that are in
  16  * progress" - in flight bios, some other thread that's doing something else -
  17  * anything you might want to wait on.
  18  *
  19  * The refcount may be manipulated with closure_get() and closure_put().
  20  * closure_put() is where many of the interesting things happen, when it causes
  21  * the refcount to go to 0.
  22  *
  23  * Closures can be used to wait on things both synchronously and asynchronously,
  24  * and synchronous and asynchronous use can be mixed without restriction. To
  25  * wait synchronously, use closure_sync() - you will sleep until your closure's
  26  * refcount hits 1.
  27  *
  28  * To wait asynchronously, use
  29  *   continue_at(cl, next_function, workqueue);
  30  *
  31  * passing it, as you might expect, the function to run when nothing is pending
  32  * and the workqueue to run that function out of.
  33  *
  34  * continue_at() also, critically, is a macro that returns the calling function.
  35  * There's good reason for this.
  36  *
  37  * To use safely closures asynchronously, they must always have a refcount while
  38  * they are running owned by the thread that is running them. Otherwise, suppose
  39  * you submit some bios and wish to have a function run when they all complete:
  40  *
  41  * foo_endio(struct bio *bio, int error)
  42  * {
  43  *      closure_put(cl);
  44  * }
  45  *
  46  * closure_init(cl);
  47  *
  48  * do_stuff();
  49  * closure_get(cl);
  50  * bio1->bi_endio = foo_endio;
  51  * bio_submit(bio1);
  52  *
  53  * do_more_stuff();
  54  * closure_get(cl);
  55  * bio2->bi_endio = foo_endio;
  56  * bio_submit(bio2);
  57  *
  58  * continue_at(cl, complete_some_read, system_wq);
  59  *
  60  * If closure's refcount started at 0, complete_some_read() could run before the
  61  * second bio was submitted - which is almost always not what you want! More
  62  * importantly, it wouldn't be possible to say whether the original thread or
  63  * complete_some_read()'s thread owned the closure - and whatever state it was
  64  * associated with!
  65  *
  66  * So, closure_init() initializes a closure's refcount to 1 - and when a
  67  * closure_fn is run, the refcount will be reset to 1 first.
  68  *
  69  * Then, the rule is - if you got the refcount with closure_get(), release it
  70  * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
  71  * on a closure because you called closure_init() or you were run out of a
  72  * closure - _always_ use continue_at(). Doing so consistently will help
  73  * eliminate an entire class of particularly pernicious races.
  74  *
  75  * For a closure to wait on an arbitrary event, we need to introduce waitlists:
  76  *
  77  * struct closure_waitlist list;
  78  * closure_wait_event(list, cl, condition);
  79  * closure_wake_up(wait_list);
  80  *
  81  * These work analagously to wait_event() and wake_up() - except that instead of
  82  * operating on the current thread (for wait_event()) and lists of threads, they
  83  * operate on an explicit closure and lists of closures.
  84  *
  85  * Because it's a closure we can now wait either synchronously or
  86  * asynchronously. closure_wait_event() returns the current value of the
  87  * condition, and if it returned false continue_at() or closure_sync() can be
  88  * used to wait for it to become true.
  89  *
  90  * It's useful for waiting on things when you can't sleep in the context in
  91  * which you must check the condition (perhaps a spinlock held, or you might be
  92  * beneath generic_make_request() - in which case you can't sleep on IO).
  93  *
  94  * closure_wait_event() will wait either synchronously or asynchronously,
  95  * depending on whether the closure is in blocking mode or not. You can pick a
  96  * mode explicitly with closure_wait_event_sync() and
  97  * closure_wait_event_async(), which do just what you might expect.
  98  *
  99  * Lastly, you might have a wait list dedicated to a specific event, and have no
 100  * need for specifying the condition - you just want to wait until someone runs
 101  * closure_wake_up() on the appropriate wait list. In that case, just use
 102  * closure_wait(). It will return either true or false, depending on whether the
 103  * closure was already on a wait list or not - a closure can only be on one wait
 104  * list at a time.
 105  *
 106  * Parents:
 107  *
 108  * closure_init() takes two arguments - it takes the closure to initialize, and
 109  * a (possibly null) parent.
 110  *
 111  * If parent is non null, the new closure will have a refcount for its lifetime;
 112  * a closure is considered to be "finished" when its refcount hits 0 and the
 113  * function to run is null. Hence
 114  *
 115  * continue_at(cl, NULL, NULL);
 116  *
 117  * returns up the (spaghetti) stack of closures, precisely like normal return
 118  * returns up the C stack. continue_at() with non null fn is better thought of
 119  * as doing a tail call.
 120  *
 121  * All this implies that a closure should typically be embedded in a particular
 122  * struct (which its refcount will normally control the lifetime of), and that
 123  * struct can very much be thought of as a stack frame.
 124  *
 125  * Locking:
 126  *
 127  * Closures are based on work items but they can be thought of as more like
 128  * threads - in that like threads and unlike work items they have a well
 129  * defined lifetime; they are created (with closure_init()) and eventually
 130  * complete after a continue_at(cl, NULL, NULL).
 131  *
 132  * Suppose you've got some larger structure with a closure embedded in it that's
 133  * used for periodically doing garbage collection. You only want one garbage
 134  * collection happening at a time, so the natural thing to do is protect it with
 135  * a lock. However, it's difficult to use a lock protecting a closure correctly
 136  * because the unlock should come after the last continue_to() (additionally, if
 137  * you're using the closure asynchronously a mutex won't work since a mutex has
 138  * to be unlocked by the same process that locked it).
 139  *
 140  * So to make it less error prone and more efficient, we also have the ability
 141  * to use closures as locks:
 142  *
 143  * closure_init_unlocked();
 144  * closure_trylock();
 145  *
 146  * That's all we need for trylock() - the last closure_put() implicitly unlocks
 147  * it for you.  But for closure_lock(), we also need a wait list:
 148  *
 149  * struct closure_with_waitlist frobnicator_cl;
 150  *
 151  * closure_init_unlocked(&frobnicator_cl);
 152  * closure_lock(&frobnicator_cl);
 153  *
 154  * A closure_with_waitlist embeds a closure and a wait list - much like struct
 155  * delayed_work embeds a work item and a timer_list. The important thing is, use
 156  * it exactly like you would a regular closure and closure_put() will magically
 157  * handle everything for you.
 158  */
 159
 160 struct closure;
 161 typedef void (closure_fn) (struct closure *);
 162
 163 struct closure_waitlist {
 164         struct llist_head       list;
 165 };
 166
 167 enum closure_type {
 168         TYPE_closure                            = 0,
 169         TYPE_closure_with_waitlist              = 1,
 170         MAX_CLOSURE_TYPE                        = 1,
 171 };
 172
 173 enum closure_state {
 174         /*
 175          * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
 176          * the thread that owns the closure, and cleared by the thread that's
 177          * waking up the closure.
 178          *
 179          * CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep
 180          * - indicates that cl->task is valid and closure_put() may wake it up.
 181          * Only set or cleared by the thread that owns the closure.
 182          *
 183          * The rest are for debugging and don't affect behaviour:
 184          *
 185          * CLOSURE_RUNNING: Set when a closure is running (i.e. by
 186          * closure_init() and when closure_put() runs then next function), and
 187          * must be cleared before remaining hits 0. Primarily to help guard
 188          * against incorrect usage and accidentally transferring references.
 189          * continue_at() and closure_return() clear it for you, if you're doing
 190          * something unusual you can use closure_set_dead() which also helps
 191          * annotate where references are being transferred.
 192          *
 193          * CLOSURE_STACK: Sanity check - remaining should never hit 0 on a
 194          * closure with this flag set
 195          */
 196
 197         CLOSURE_BITS_START      = (1 << 23),
 198         CLOSURE_DESTRUCTOR      = (1 << 23),
 199         CLOSURE_WAITING         = (1 << 25),
 200         CLOSURE_SLEEPING        = (1 << 27),
 201         CLOSURE_RUNNING         = (1 << 29),
 202         CLOSURE_STACK           = (1 << 31),
 203 };
 204
 205 #define CLOSURE_GUARD_MASK                                      \
 206         ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_SLEEPING|  \
 207           CLOSURE_RUNNING|CLOSURE_STACK) << 1)
 208
 209 #define CLOSURE_REMAINING_MASK          (CLOSURE_BITS_START - 1)
 210 #define CLOSURE_REMAINING_INITIALIZER   (1|CLOSURE_RUNNING)
 211
 212 struct closure {
 213         union {
 214                 struct {
 215                         struct workqueue_struct *wq;
 216                         struct task_struct      *task;
 217                         struct llist_node       list;
 218                         closure_fn              *fn;
 219                 };
 220                 struct work_struct      work;
 221         };
 222
 223         struct closure          *parent;
 224
 225         atomic_t                remaining;
 226
 227         enum closure_type       type;
 228
 229 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 230 #define CLOSURE_MAGIC_DEAD      0xc054dead
 231 #define CLOSURE_MAGIC_ALIVE     0xc054a11e
 232
 233         unsigned                magic;
 234         struct list_head        all;
 235         unsigned long           ip;
 236         unsigned long           waiting_on;
 237 #endif
 238 };
 239
 240 struct closure_with_waitlist {
 241         struct closure          cl;
 242         struct closure_waitlist wait;
 243 };
 244
 245 extern unsigned invalid_closure_type(void);
 246
 247 #define __CLOSURE_TYPE(cl, _t)                                          \
 248           __builtin_types_compatible_p(typeof(cl), struct _t)           \
 249                 ? TYPE_ ## _t :                                         \
 250
 251 #define __closure_type(cl)                                              \
 252 (                                                                       \
 253         __CLOSURE_TYPE(cl, closure)                                     \
 254         __CLOSURE_TYPE(cl, closure_with_waitlist)                       \
 255         invalid_closure_type()                                          \
 256 )
 257
 258 void closure_sub(struct closure *cl, int v);
 259 void closure_put(struct closure *cl);
 260 void __closure_wake_up(struct closure_waitlist *list);
 261 bool closure_wait(struct closure_waitlist *list, struct closure *cl);
 262 void closure_sync(struct closure *cl);
 263
 264 bool closure_trylock(struct closure *cl, struct closure *parent);
 265 void __closure_lock(struct closure *cl, struct closure *parent,
 266                     struct closure_waitlist *wait_list);
 267
 268 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 269
 270 void closure_debug_init(void);
 271 void closure_debug_create(struct closure *cl);
 272 void closure_debug_destroy(struct closure *cl);
 273
 274 #else
 275
 276 static inline void closure_debug_init(void) {}
 277 static inline void closure_debug_create(struct closure *cl) {}
 278 static inline void closure_debug_destroy(struct closure *cl) {}
 279
 280 #endif
 281
 282 static inline void closure_set_ip(struct closure *cl)
 283 {
 284 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 285         cl->ip = _THIS_IP_;
 286 #endif
 287 }
 288
 289 static inline void closure_set_ret_ip(struct closure *cl)
 290 {
 291 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 292         cl->ip = _RET_IP_;
 293 #endif
 294 }
 295
 296 static inline void closure_get(struct closure *cl)
 297 {
 298 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 299         BUG_ON((atomic_inc_return(&cl->remaining) &
 300                 CLOSURE_REMAINING_MASK) <= 1);
 301 #else
 302         atomic_inc(&cl->remaining);
 303 #endif
 304 }
 305
 306 static inline void closure_set_stopped(struct closure *cl)
 307 {
 308         atomic_sub(CLOSURE_RUNNING, &cl->remaining);
 309 }
 310
 311 static inline bool closure_is_unlocked(struct closure *cl)
 312 {
 313         return atomic_read(&cl->remaining) == -1;
 314 }
 315
 316 static inline void do_closure_init(struct closure *cl, struct closure *parent,
 317                                    bool running)
 318 {
 319         cl->parent = parent;
 320         if (parent)
 321                 closure_get(parent);
 322
 323         if (running) {
 324                 closure_debug_create(cl);
 325                 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
 326         } else
 327                 atomic_set(&cl->remaining, -1);
 328
 329         closure_set_ip(cl);
 330 }
 331
 332 /*
 333  * Hack to get at the embedded closure if there is one, by doing an unsafe cast:
 334  * the result of __closure_type() is thrown away, it's used merely for type
 335  * checking.
 336  */
 337 #define __to_internal_closure(cl)                               \
 338 ({                                                              \
 339         BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE);   \
 340         (struct closure *) cl;                                  \
 341 })
 342
 343 #define closure_init_type(cl, parent, running)                  \
 344 do {                                                            \
 345         struct closure *_cl = __to_internal_closure(cl);        \
 346         _cl->type = __closure_type(*(cl));                      \
 347         do_closure_init(_cl, parent, running);                  \
 348 } while (0)
 349
 350 /**
 351  * __closure_init() - Initialize a closure, skipping the memset()
 352  *
 353  * May be used instead of closure_init() when memory has already been zeroed.
 354  */
 355 #define __closure_init(cl, parent)                              \
 356         closure_init_type(cl, parent, true)
 357
 358 /**
 359  * closure_init() - Initialize a closure, setting the refcount to 1
 360  * @cl:         closure to initialize
 361  * @parent:     parent of the new closure. cl will take a refcount on it for its
 362  *              lifetime; may be NULL.
 363  */
 364 #define closure_init(cl, parent)                                \
 365 do {                                                            \
 366         memset((cl), 0, sizeof(*(cl)));                         \
 367         __closure_init(cl, parent);                             \
 368 } while (0)
 369
 370 static inline void closure_init_stack(struct closure *cl)
 371 {
 372         memset(cl, 0, sizeof(struct closure));
 373         atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK);
 374 }
 375
 376 /**
 377  * closure_init_unlocked() - Initialize a closure but leave it unlocked.
 378  * @cl:         closure to initialize
 379  *
 380  * For when the closure will be used as a lock. The closure may not be used
 381  * until after a closure_lock() or closure_trylock().
 382  */
 383 #define closure_init_unlocked(cl)                               \
 384 do {                                                            \
 385         memset((cl), 0, sizeof(*(cl)));                         \
 386         closure_init_type(cl, NULL, false);                     \
 387 } while (0)
 388
 389 /**
 390  * closure_lock() - lock and initialize a closure.
 391  * @cl:         the closure to lock
 392  * @parent:     the new parent for this closure
 393  *
 394  * The closure must be of one of the types that has a waitlist (otherwise we
 395  * wouldn't be able to sleep on contention).
 396  *
 397  * @parent has exactly the same meaning as in closure_init(); if non null, the
 398  * closure will take a reference on @parent which will be released when it is
 399  * unlocked.
 400  */
 401 #define closure_lock(cl, parent)                                \
 402         __closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)
 403
 404 static inline void __closure_end_sleep(struct closure *cl)
 405 {
 406         __set_current_state(TASK_RUNNING);
 407
 408         if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING)
 409                 atomic_sub(CLOSURE_SLEEPING, &cl->remaining);
 410 }
 411
 412 static inline void __closure_start_sleep(struct closure *cl)
 413 {
 414         closure_set_ip(cl);
 415         cl->task = current;
 416         set_current_state(TASK_UNINTERRUPTIBLE);
 417
 418         if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
 419                 atomic_add(CLOSURE_SLEEPING, &cl->remaining);
 420 }
 421
 422 /**
 423  * closure_wake_up() - wake up all closures on a wait list.
 424  */
 425 static inline void closure_wake_up(struct closure_waitlist *list)
 426 {
 427         smp_mb();
 428         __closure_wake_up(list);
 429 }
 430
 431 /*
 432  * Wait on an event, synchronously or asynchronously - analogous to wait_event()
 433  * but for closures.
 434  *
 435  * The loop is oddly structured so as to avoid a race; we must check the
 436  * condition again after we've added ourself to the waitlist. We know if we were
 437  * already on the waitlist because closure_wait() returns false; thus, we only
 438  * schedule or break if closure_wait() returns false. If it returns true, we
 439  * just loop again - rechecking the condition.
 440  *
 441  * The __closure_wake_up() is necessary because we may race with the event
 442  * becoming true; i.e. we see event false -> wait -> recheck condition, but the
 443  * thread that made the event true may have called closure_wake_up() before we
 444  * added ourself to the wait list.
 445  *
 446  * We have to call closure_sync() at the end instead of just
 447  * __closure_end_sleep() because a different thread might've called
 448  * closure_wake_up() before us and gotten preempted before they dropped the
 449  * refcount on our closure. If this was a stack allocated closure, that would be
 450  * bad.
 451  */
 452 #define closure_wait_event(list, cl, condition)                         \
 453 ({                                                                      \
 454         typeof(condition) ret;                                          \
 455                                                                         \
 456         while (1) {                                                     \
 457                 ret = (condition);                                      \
 458                 if (ret) {                                              \
 459                         __closure_wake_up(list);                        \
 460                         closure_sync(cl);                               \
 461                         break;                                          \
 462                 }                                                       \
 463                                                                         \
 464                 __closure_start_sleep(cl);                              \
 465                                                                         \
 466                 if (!closure_wait(list, cl))                            \
 467                         schedule();                                     \
 468         }                                                               \
 469                                                                         \
 470         ret;                                                            \
 471 })
 472
 473 static inline void closure_queue(struct closure *cl)
 474 {
 475         struct workqueue_struct *wq = cl->wq;
 476         if (wq) {
 477                 INIT_WORK(&cl->work, cl->work.func);
 478                 BUG_ON(!queue_work(wq, &cl->work));
 479         } else
 480                 cl->fn(cl);
 481 }
 482
 483 static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
 484                                   struct workqueue_struct *wq)
 485 {
 486         BUG_ON(object_is_on_stack(cl));
 487         closure_set_ip(cl);
 488         cl->fn = fn;
 489         cl->wq = wq;
 490         /* between atomic_dec() in closure_put() */
 491         smp_mb__before_atomic_dec();
 492 }
 493
 494 #define continue_at(_cl, _fn, _wq)                                      \
 495 do {                                                                    \
 496         set_closure_fn(_cl, _fn, _wq);                                  \
 497         closure_sub(_cl, CLOSURE_RUNNING + 1);                          \
 498         return;                                                         \
 499 } while (0)
 500
 501 #define closure_return(_cl)     continue_at((_cl), NULL, NULL)
 502
 503 #define continue_at_nobarrier(_cl, _fn, _wq)                            \
 504 do {                                                                    \
 505         set_closure_fn(_cl, _fn, _wq);                                  \
 506         closure_queue(_cl);                                             \
 507         return;                                                         \
 508 } while (0)
 509
 510 #define closure_return_with_destructor(_cl, _destructor)                \
 511 do {                                                                    \
 512         set_closure_fn(_cl, _destructor, NULL);                         \
 513         closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);     \
 514         return;                                                         \
 515 } while (0)
 516
 517 static inline void closure_call(struct closure *cl, closure_fn fn,
 518                                 struct workqueue_struct *wq,
 519                                 struct closure *parent)
 520 {
 521         closure_init(cl, parent);
 522         continue_at_nobarrier(cl, fn, wq);
 523 }
 524
 525 static inline void closure_trylock_call(struct closure *cl, closure_fn fn,
 526                                         struct workqueue_struct *wq,
 527                                         struct closure *parent)
 528 {
 529         if (closure_trylock(cl, parent))
 530                 continue_at_nobarrier(cl, fn, wq);
 531 }
 532
 533 #endif /* _LINUX_CLOSURE_H */