Merge tag 'v3.13-rc6' into for-3.14/core
authorJens Axboe <axboe@kernel.dk>
Tue, 31 Dec 2013 16:51:02 +0000 (09:51 -0700)
committerJens Axboe <axboe@kernel.dk>
Tue, 31 Dec 2013 16:51:02 +0000 (09:51 -0700)
Needed to bring blk-mq uptodate, since changes have been going in
since for-3.14/core was established.

Fixup merge issues related to the immutable biovec changes.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Conflicts:
block/blk-flush.c
fs/btrfs/check-integrity.c
fs/btrfs/extent_io.c
fs/btrfs/scrub.c
fs/logfs/dev_bdev.c

20 files changed:
1  2 
block/blk-mq.c
drivers/block/xen-blkfront.c
drivers/md/bcache/bcache.h
drivers/md/bcache/btree.c
drivers/md/bcache/movinggc.c
drivers/md/bcache/super.c
drivers/md/bcache/util.c
drivers/md/bcache/writeback.c
drivers/md/dm-bufio.c
drivers/md/dm-cache-policy-mq.c
drivers/md/dm-cache-target.c
drivers/md/dm-delay.c
drivers/md/dm-snap.c
drivers/md/dm-thin.c
drivers/md/md.c
drivers/md/raid5.c
drivers/s390/block/dasd_eckd.c
drivers/scsi/sd.c
drivers/staging/zram/zram_drv.c
fs/xfs/xfs_buf.c

diff --combined block/blk-mq.c
index e4fbcc3fd2db00859269d87ea588abd439b02afb,c79126e110308e8b1ea4b322506a425ceeb3085c..3929f43d0b03097b2ee96f23413071b9c466499e
@@@ -202,10 -202,12 +202,12 @@@ static struct request *blk_mq_alloc_req
                if (rq) {
                        blk_mq_rq_ctx_init(q, ctx, rq, rw);
                        break;
-               } else if (!(gfp & __GFP_WAIT))
-                       break;
+               }
  
                blk_mq_put_ctx(ctx);
+               if (!(gfp & __GFP_WAIT))
+                       break;
                __blk_mq_run_hw_queue(hctx);
                blk_mq_wait_for_tags(hctx->tags);
        } while (1);
@@@ -222,7 -224,8 +224,8 @@@ struct request *blk_mq_alloc_request(st
                return NULL;
  
        rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved);
-       blk_mq_put_ctx(rq->mq_ctx);
+       if (rq)
+               blk_mq_put_ctx(rq->mq_ctx);
        return rq;
  }
  
@@@ -235,7 -238,8 +238,8 @@@ struct request *blk_mq_alloc_reserved_r
                return NULL;
  
        rq = blk_mq_alloc_request_pinned(q, rw, gfp, true);
-       blk_mq_put_ctx(rq->mq_ctx);
+       if (rq)
+               blk_mq_put_ctx(rq->mq_ctx);
        return rq;
  }
  EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
@@@ -301,19 -305,19 +305,19 @@@ void blk_mq_complete_request(struct req
                struct bio *next = bio->bi_next;
  
                bio->bi_next = NULL;
 -              bytes += bio->bi_size;
 +              bytes += bio->bi_iter.bi_size;
                blk_mq_bio_endio(rq, bio, error);
                bio = next;
        }
  
        blk_account_io_completion(rq, bytes);
  
+       blk_account_io_done(rq);
        if (rq->end_io)
                rq->end_io(rq, error);
        else
                blk_mq_free_request(rq);
-       blk_account_io_done(rq);
  }
  
  void __blk_mq_end_io(struct request *rq, int error)
index 80e86307dd4b043e295e4b2adff3f09789621cd6,c4a4c900628891c31fec541277cb7ffea9a3e68b..26ad7923e3319c802a4ab252001ecdf8785b8d60
@@@ -489,7 -489,7 +489,7 @@@ static int blkif_queue_request(struct r
  
                        if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
                            (i % SEGS_PER_INDIRECT_FRAME == 0)) {
-                               unsigned long pfn;
+                               unsigned long uninitialized_var(pfn);
  
                                if (segments)
                                        kunmap_atomic(segments);
@@@ -1547,7 -1547,7 +1547,7 @@@ static int blkif_recover(struct blkfron
                        for (i = 0; i < pending; i++) {
                                offset = (i * segs * PAGE_SIZE) >> 9;
                                size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
 -                                         (unsigned int)(bio->bi_size >> 9) - offset);
 +                                         (unsigned int)bio_sectors(bio) - offset);
                                cloned_bio = bio_clone(bio, GFP_NOIO);
                                BUG_ON(cloned_bio == NULL);
                                bio_trim(cloned_bio, offset, size);
@@@ -2011,6 -2011,10 +2011,10 @@@ static void blkif_release(struct gendis
  
        bdev = bdget_disk(disk, 0);
  
+       if (!bdev) {
+               WARN(1, "Block device %s yanked out from us!\n", disk->disk_name);
+               goto out_mutex;
+       }
        if (bdev->bd_openers)
                goto out;
  
  
  out:
        bdput(bdev);
+ out_mutex:
        mutex_unlock(&blkfront_mutex);
  }
  
index 964353c5329d4d997ca16aae8c4bfdf72b7b816e,754f4317748322e7450d69da9591b6b72aff6dc4..dbdbca5a95910a421898cef06bcf902a43401c6a
@@@ -197,7 -197,7 +197,7 @@@ struct bucket 
        uint8_t         disk_gen;
        uint8_t         last_gc; /* Most out of date gen in the btree */
        uint8_t         gc_gen;
-       uint16_t        gc_mark;
+       uint16_t        gc_mark; /* Bitfield used by GC. See below for field */
  };
  
  /*
@@@ -209,7 -209,8 +209,8 @@@ BITMASK(GC_MARK,    struct bucket, gc_mar
  #define GC_MARK_RECLAIMABLE   0
  #define GC_MARK_DIRTY         1
  #define GC_MARK_METADATA      2
- BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14);
+ BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13);
+ BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
  
  #include "journal.h"
  #include "stats.h"
@@@ -279,6 -280,7 +280,6 @@@ struct bcache_device 
        unsigned long           sectors_dirty_last;
        long                    sectors_dirty_derivative;
  
 -      mempool_t               *unaligned_bvec;
        struct bio_set          *bio_split;
  
        unsigned                data_csum:1;
@@@ -371,14 -373,14 +372,14 @@@ struct cached_dev 
        unsigned char           writeback_percent;
        unsigned                writeback_delay;
  
-       int                     writeback_rate_change;
-       int64_t                 writeback_rate_derivative;
        uint64_t                writeback_rate_target;
+       int64_t                 writeback_rate_proportional;
+       int64_t                 writeback_rate_derivative;
+       int64_t                 writeback_rate_change;
  
        unsigned                writeback_rate_update_seconds;
        unsigned                writeback_rate_d_term;
        unsigned                writeback_rate_p_term_inverse;
-       unsigned                writeback_rate_d_smooth;
  };
  
  enum alloc_watermarks {
@@@ -444,7 -446,6 +445,6 @@@ struct cache 
         * call prio_write() to keep gens from wrapping.
         */
        uint8_t                 need_save_prio;
-       unsigned                gc_move_threshold;
  
        /*
         * If nonzero, we know we aren't going to find any buckets to invalidate
@@@ -901,6 -902,7 +901,6 @@@ void bch_bbio_endio(struct cache_set *
  void bch_bbio_free(struct bio *, struct cache_set *);
  struct bio *bch_bbio_alloc(struct cache_set *);
  
 -struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *);
  void bch_generic_make_request(struct bio *, struct bio_split_pool *);
  void __bch_submit_bbio(struct bio *, struct cache_set *);
  void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
index b62f3792537454c13cd7ba7c881428a54e8cbfa4,31bb53fcc67a40806cf73659a596f98297d36128..946ecd3b048b0ae1c9bd47ab4c42572f7919b838
@@@ -299,7 -299,7 +299,7 @@@ void bch_btree_node_read(struct btree *
  
        bio = bch_bbio_alloc(b->c);
        bio->bi_rw      = REQ_META|READ_SYNC;
 -      bio->bi_size    = KEY_SIZE(&b->key) << 9;
 +      bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9;
        bio->bi_end_io  = btree_node_read_endio;
        bio->bi_private = &cl;
  
@@@ -362,7 -362,7 +362,7 @@@ static void btree_node_write_done(struc
        struct bio_vec *bv;
        int n;
  
 -      __bio_for_each_segment(bv, b->bio, n, 0)
 +      bio_for_each_segment_all(bv, b->bio, n)
                __free_page(bv->bv_page);
  
        __btree_node_write_done(cl);
@@@ -395,7 -395,7 +395,7 @@@ static void do_btree_node_write(struct 
        b->bio->bi_end_io       = btree_node_write_endio;
        b->bio->bi_private      = cl;
        b->bio->bi_rw           = REQ_META|WRITE_SYNC|REQ_FUA;
 -      b->bio->bi_size         = set_blocks(i, b->c) * block_bytes(b->c);
 +      b->bio->bi_iter.bi_size = set_blocks(i, b->c) * block_bytes(b->c);
        bch_bio_map(b->bio, i);
  
        /*
                struct bio_vec *bv;
                void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
  
 -              bio_for_each_segment(bv, b->bio, j)
 +              bio_for_each_segment_all(bv, b->bio, j)
                        memcpy(page_address(bv->bv_page),
                               base + j * PAGE_SIZE, PAGE_SIZE);
  
@@@ -1561,6 -1561,28 +1561,28 @@@ size_t bch_btree_gc_finish(struct cache
                SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i),
                            GC_MARK_METADATA);
  
+       /* don't reclaim buckets to which writeback keys point */
+       rcu_read_lock();
+       for (i = 0; i < c->nr_uuids; i++) {
+               struct bcache_device *d = c->devices[i];
+               struct cached_dev *dc;
+               struct keybuf_key *w, *n;
+               unsigned j;
+               if (!d || UUID_FLASH_ONLY(&c->uuids[i]))
+                       continue;
+               dc = container_of(d, struct cached_dev, disk);
+               spin_lock(&dc->writeback_keys.lock);
+               rbtree_postorder_for_each_entry_safe(w, n,
+                                       &dc->writeback_keys.keys, node)
+                       for (j = 0; j < KEY_PTRS(&w->key); j++)
+                               SET_GC_MARK(PTR_BUCKET(c, &w->key, j),
+                                           GC_MARK_DIRTY);
+               spin_unlock(&dc->writeback_keys.lock);
+       }
+       rcu_read_unlock();
        for_each_cache(ca, c, i) {
                uint64_t *i;
  
@@@ -1817,7 -1839,8 +1839,8 @@@ static bool fix_overlapping_extents(str
                        if (KEY_START(k) > KEY_START(insert) + sectors_found)
                                goto check_failed;
  
-                       if (KEY_PTRS(replace_key) != KEY_PTRS(k))
+                       if (KEY_PTRS(k) != KEY_PTRS(replace_key) ||
+                           KEY_DIRTY(k) != KEY_DIRTY(replace_key))
                                goto check_failed;
  
                        /* skip past gen */
@@@ -2217,7 -2240,7 +2240,7 @@@ struct btree_insert_op 
        struct bkey     *replace_key;
  };
  
- int btree_insert_fn(struct btree_op *b_op, struct btree *b)
static int btree_insert_fn(struct btree_op *b_op, struct btree *b)
  {
        struct btree_insert_op *op = container_of(b_op,
                                        struct btree_insert_op, op);
index 581f95df8265579e94a9546d84644be8deb335c2,f2f0998c4a91872407dd036a54fe72d243885fed..052bd24d24b42b42c3d434564a361dc3b693a2e5
@@@ -25,10 -25,9 +25,9 @@@ static bool moving_pred(struct keybuf *
        unsigned i;
  
        for (i = 0; i < KEY_PTRS(k); i++) {
-               struct cache *ca = PTR_CACHE(c, k, i);
                struct bucket *g = PTR_BUCKET(c, k, i);
  
-               if (GC_SECTORS_USED(g) < ca->gc_move_threshold)
+               if (GC_MOVE(g))
                        return true;
        }
  
@@@ -65,11 -64,16 +64,16 @@@ static void write_moving_finish(struct 
  
  static void read_moving_endio(struct bio *bio, int error)
  {
+       struct bbio *b = container_of(bio, struct bbio, bio);
        struct moving_io *io = container_of(bio->bi_private,
                                            struct moving_io, cl);
  
        if (error)
                io->op.error = error;
+       else if (!KEY_DIRTY(&b->key) &&
+                ptr_stale(io->op.c, &b->key, 0)) {
+               io->op.error = -EINTR;
+       }
  
        bch_bbio_endio(io->op.c, bio, error, "reading data to move");
  }
@@@ -82,7 -86,7 +86,7 @@@ static void moving_init(struct moving_i
        bio_get(bio);
        bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
  
 -      bio->bi_size            = KEY_SIZE(&io->w->key) << 9;
 +      bio->bi_iter.bi_size    = KEY_SIZE(&io->w->key) << 9;
        bio->bi_max_vecs        = DIV_ROUND_UP(KEY_SIZE(&io->w->key),
                                               PAGE_SECTORS);
        bio->bi_private         = &io->cl;
@@@ -98,7 -102,7 +102,7 @@@ static void write_moving(struct closur
        if (!op->error) {
                moving_init(io);
  
 -              io->bio.bio.bi_sector = KEY_START(&io->w->key);
 +              io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
                op->write_prio          = 1;
                op->bio                 = &io->bio.bio;
  
@@@ -141,6 -145,11 +145,11 @@@ static void read_moving(struct cache_se
                if (!w)
                        break;
  
+               if (ptr_stale(c, &w->key, 0)) {
+                       bch_keybuf_del(&c->moving_gc_keys, w);
+                       continue;
+               }
                io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec)
                             * DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS),
                             GFP_KERNEL);
@@@ -184,7 -193,8 +193,8 @@@ static bool bucket_cmp(struct bucket *l
  
  static unsigned bucket_heap_top(struct cache *ca)
  {
-       return GC_SECTORS_USED(heap_peek(&ca->heap));
+       struct bucket *b;
+       return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
  }
  
  void bch_moving_gc(struct cache_set *c)
                        sectors_to_move -= GC_SECTORS_USED(b);
                }
  
-               ca->gc_move_threshold = bucket_heap_top(ca);
-               pr_debug("threshold %u", ca->gc_move_threshold);
+               while (heap_pop(&ca->heap, b, bucket_cmp))
+                       SET_GC_MOVE(b, 1);
        }
  
        mutex_unlock(&c->bucket_lock);
index 60fb6044b9535fcb1864b12af36b479014ad641b,c57bfa071a57c58b06fabeb194cbf98f5f4fbf56..93d593f957f662829c30feba2ea2804e88fa8aa8
@@@ -233,9 -233,9 +233,9 @@@ static void __write_super(struct cache_
        struct cache_sb *out = page_address(bio->bi_io_vec[0].bv_page);
        unsigned i;
  
 -      bio->bi_sector  = SB_SECTOR;
 -      bio->bi_rw      = REQ_SYNC|REQ_META;
 -      bio->bi_size    = SB_SIZE;
 +      bio->bi_iter.bi_sector  = SB_SECTOR;
 +      bio->bi_rw              = REQ_SYNC|REQ_META;
 +      bio->bi_iter.bi_size    = SB_SIZE;
        bch_bio_map(bio, NULL);
  
        out->offset             = cpu_to_le64(sb->offset);
@@@ -347,7 -347,7 +347,7 @@@ static void uuid_io(struct cache_set *c
                struct bio *bio = bch_bbio_alloc(c);
  
                bio->bi_rw      = REQ_SYNC|REQ_META|rw;
 -              bio->bi_size    = KEY_SIZE(k) << 9;
 +              bio->bi_iter.bi_size = KEY_SIZE(k) << 9;
  
                bio->bi_end_io  = uuid_endio;
                bio->bi_private = cl;
@@@ -503,10 -503,10 +503,10 @@@ static void prio_io(struct cache *ca, u
  
        closure_init_stack(cl);
  
 -      bio->bi_sector  = bucket * ca->sb.bucket_size;
 -      bio->bi_bdev    = ca->bdev;
 -      bio->bi_rw      = REQ_SYNC|REQ_META|rw;
 -      bio->bi_size    = bucket_bytes(ca);
 +      bio->bi_iter.bi_sector  = bucket * ca->sb.bucket_size;
 +      bio->bi_bdev            = ca->bdev;
 +      bio->bi_rw              = REQ_SYNC|REQ_META|rw;
 +      bio->bi_iter.bi_size    = bucket_bytes(ca);
  
        bio->bi_end_io  = prio_endio;
        bio->bi_private = ca;
@@@ -739,6 -739,8 +739,6 @@@ static void bcache_device_free(struct b
        }
  
        bio_split_pool_free(&d->bio_split_hook);
 -      if (d->unaligned_bvec)
 -              mempool_destroy(d->unaligned_bvec);
        if (d->bio_split)
                bioset_free(d->bio_split);
        if (is_vmalloc_addr(d->full_dirty_stripes))
@@@ -791,6 -793,8 +791,6 @@@ static int bcache_device_init(struct bc
                return minor;
  
        if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
 -          !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
 -                              sizeof(struct bio_vec) * BIO_MAX_PAGES)) ||
            bio_split_pool_init(&d->bio_split_hook) ||
            !(d->disk = alloc_disk(1))) {
                ida_simple_remove(&bcache_minor, minor);
@@@ -1672,7 -1676,7 +1672,7 @@@ err
  static bool can_attach_cache(struct cache *ca, struct cache_set *c)
  {
        return ca->sb.block_size        == c->sb.block_size &&
-               ca->sb.bucket_size      == c->sb.block_size &&
+               ca->sb.bucket_size      == c->sb.bucket_size &&
                ca->sb.nr_in_set        == c->sb.nr_in_set;
  }
  
diff --combined drivers/md/bcache/util.c
index c57621e49dc002c7b94b7bf34f2a1aa06a3f1b46,bb37618e76648b7bc3caf99532e4f81b48666dfe..db3ae4c2b2233a4026ebe8a183042eb84d53cdc4
@@@ -209,7 -209,13 +209,13 @@@ uint64_t bch_next_delay(struct bch_rate
  {
        uint64_t now = local_clock();
  
-       d->next += div_u64(done, d->rate);
+       d->next += div_u64(done * NSEC_PER_SEC, d->rate);
+       if (time_before64(now + NSEC_PER_SEC, d->next))
+               d->next = now + NSEC_PER_SEC;
+       if (time_after64(now - NSEC_PER_SEC * 2, d->next))
+               d->next = now - NSEC_PER_SEC * 2;
  
        return time_after64(d->next, now)
                ? div_u64(d->next - now, NSEC_PER_SEC / HZ)
  
  void bch_bio_map(struct bio *bio, void *base)
  {
 -      size_t size = bio->bi_size;
 +      size_t size = bio->bi_iter.bi_size;
        struct bio_vec *bv = bio->bi_io_vec;
  
 -      BUG_ON(!bio->bi_size);
 +      BUG_ON(!bio->bi_iter.bi_size);
        BUG_ON(bio->bi_vcnt);
  
        bv->bv_offset = base ? ((unsigned long) base) % PAGE_SIZE : 0;
index 04657e93f4fd55e0985797c5813bffd893ca65bc,6c44fe059c2769a4b2c317f25878596268726f0d..f4300e4c0114a0cc1abc3b90f757a03666d2637b
@@@ -30,38 -30,40 +30,40 @@@ static void __update_writeback_rate(str
  
        /* PD controller */
  
-       int change = 0;
-       int64_t error;
        int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
        int64_t derivative = dirty - dc->disk.sectors_dirty_last;
+       int64_t proportional = dirty - target;
+       int64_t change;
  
        dc->disk.sectors_dirty_last = dirty;
  
-       derivative *= dc->writeback_rate_d_term;
-       derivative = clamp(derivative, -dirty, dirty);
+       /* Scale to sectors per second */
  
-       derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative,
-                             dc->writeback_rate_d_smooth, 0);
+       proportional *= dc->writeback_rate_update_seconds;
+       proportional = div_s64(proportional, dc->writeback_rate_p_term_inverse);
  
-       /* Avoid divide by zero */
-       if (!target)
-               goto out;
+       derivative = div_s64(derivative, dc->writeback_rate_update_seconds);
  
-       error = div64_s64((dirty + derivative - target) << 8, target);
+       derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative,
+                             (dc->writeback_rate_d_term /
+                              dc->writeback_rate_update_seconds) ?: 1, 0);
+       derivative *= dc->writeback_rate_d_term;
+       derivative = div_s64(derivative, dc->writeback_rate_p_term_inverse);
  
-       change = div_s64((dc->writeback_rate.rate * error) >> 8,
-                        dc->writeback_rate_p_term_inverse);
+       change = proportional + derivative;
  
        /* Don't increase writeback rate if the device isn't keeping up */
        if (change > 0 &&
            time_after64(local_clock(),
-                        dc->writeback_rate.next + 10 * NSEC_PER_MSEC))
+                        dc->writeback_rate.next + NSEC_PER_MSEC))
                change = 0;
  
        dc->writeback_rate.rate =
-               clamp_t(int64_t, dc->writeback_rate.rate + change,
+               clamp_t(int64_t, (int64_t) dc->writeback_rate.rate + change,
                        1, NSEC_PER_MSEC);
- out:
+       dc->writeback_rate_proportional = proportional;
        dc->writeback_rate_derivative = derivative;
        dc->writeback_rate_change = change;
        dc->writeback_rate_target = target;
@@@ -87,15 -89,11 +89,11 @@@ static void update_writeback_rate(struc
  
  static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
  {
-       uint64_t ret;
        if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
            !dc->writeback_percent)
                return 0;
  
-       ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
-       return min_t(uint64_t, ret, HZ);
+       return bch_next_delay(&dc->writeback_rate, sectors);
  }
  
  struct dirty_io {
@@@ -113,7 -111,7 +111,7 @@@ static void dirty_init(struct keybuf_ke
        if (!io->dc->writeback_percent)
                bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
  
 -      bio->bi_size            = KEY_SIZE(&w->key) << 9;
 +      bio->bi_iter.bi_size    = KEY_SIZE(&w->key) << 9;
        bio->bi_max_vecs        = DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS);
        bio->bi_private         = w;
        bio->bi_io_vec          = bio->bi_inline_vecs;
@@@ -186,7 -184,7 +184,7 @@@ static void write_dirty(struct closure 
  
        dirty_init(w);
        io->bio.bi_rw           = WRITE;
 -      io->bio.bi_sector       = KEY_START(&w->key);
 +      io->bio.bi_iter.bi_sector = KEY_START(&w->key);
        io->bio.bi_bdev         = io->dc->bdev;
        io->bio.bi_end_io       = dirty_endio;
  
@@@ -241,7 -239,7 +239,7 @@@ static void read_dirty(struct cached_de
                if (KEY_START(&w->key) != dc->last_read ||
                    jiffies_to_msecs(delay) > 50)
                        while (!kthread_should_stop() && delay)
-                               delay = schedule_timeout_interruptible(delay);
+                               delay = schedule_timeout_uninterruptible(delay);
  
                dc->last_read   = KEY_OFFSET(&w->key);
  
                io->dc          = dc;
  
                dirty_init(w);
 -              io->bio.bi_sector       = PTR_OFFSET(&w->key, 0);
 +              io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
                io->bio.bi_bdev         = PTR_CACHE(dc->disk.c,
                                                    &w->key, 0)->bdev;
                io->bio.bi_rw           = READ;
@@@ -438,7 -436,7 +436,7 @@@ static int bch_writeback_thread(void *a
                        while (delay &&
                               !kthread_should_stop() &&
                               !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
-                               delay = schedule_timeout_interruptible(delay);
+                               delay = schedule_timeout_uninterruptible(delay);
                }
        }
  
@@@ -476,6 -474,8 +474,8 @@@ void bch_sectors_dirty_init(struct cach
  
        bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0),
                           sectors_dirty_init_fn, 0);
+       dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
  }
  
  int bch_cached_dev_writeback_init(struct cached_dev *dc)
        dc->writeback_delay             = 30;
        dc->writeback_rate.rate         = 1024;
  
-       dc->writeback_rate_update_seconds = 30;
-       dc->writeback_rate_d_term       = 16;
-       dc->writeback_rate_p_term_inverse = 64;
-       dc->writeback_rate_d_smooth     = 8;
+       dc->writeback_rate_update_seconds = 5;
+       dc->writeback_rate_d_term       = 30;
+       dc->writeback_rate_p_term_inverse = 6000;
  
        dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
                                              "bcache_writeback");
        if (IS_ERR(dc->writeback_thread))
                return PTR_ERR(dc->writeback_thread);
  
-       set_task_state(dc->writeback_thread, TASK_INTERRUPTIBLE);
        INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
        schedule_delayed_work(&dc->writeback_rate_update,
                              dc->writeback_rate_update_seconds * HZ);
diff --combined drivers/md/dm-bufio.c
index 4113b6044b80a2125290888d6da1efa2d4c63c6c,54bdd923316f92818d509f06986c9a9a3b7b1927..a1b58a65d8ed849ecef2217e1b93f2a5d3e42ba1
@@@ -538,7 -538,7 +538,7 @@@ static void use_inline_bio(struct dm_bu
        bio_init(&b->bio);
        b->bio.bi_io_vec = b->bio_vec;
        b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
 -      b->bio.bi_sector = block << b->c->sectors_per_block_bits;
 +      b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits;
        b->bio.bi_bdev = b->c->bdev;
        b->bio.bi_end_io = end_io;
  
@@@ -1717,6 -1717,11 +1717,11 @@@ static int __init dm_bufio_init(void
  {
        __u64 mem;
  
+       dm_bufio_allocated_kmem_cache = 0;
+       dm_bufio_allocated_get_free_pages = 0;
+       dm_bufio_allocated_vmalloc = 0;
+       dm_bufio_current_allocated = 0;
        memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
        memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
  
index bfba97dcde2d5a424853882bca6cd7384ff153c9,64780ad73bb01737a957ea9682d016da4dbffa93..d13a16865d03ddc4ec418618d3d8fbdb4909ddc8
@@@ -72,7 -72,7 +72,7 @@@ static enum io_pattern iot_pattern(stru
  
  static void iot_update_stats(struct io_tracker *t, struct bio *bio)
  {
 -      if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1)
 +      if (bio->bi_iter.bi_sector == from_oblock(t->last_end_oblock) + 1)
                t->nr_seq_samples++;
        else {
                /*
@@@ -87,7 -87,7 +87,7 @@@
                t->nr_rand_samples++;
        }
  
 -      t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1);
 +      t->last_end_oblock = to_oblock(bio_end_sector(bio) - 1);
  }
  
  static void iot_check_for_pattern_switch(struct io_tracker *t)
@@@ -730,15 -730,18 +730,18 @@@ static int pre_cache_entry_found(struc
        int r = 0;
        bool updated = updated_this_tick(mq, e);
  
-       requeue_and_update_tick(mq, e);
        if ((!discarded_oblock && updated) ||
-           !should_promote(mq, e, discarded_oblock, data_dir))
+           !should_promote(mq, e, discarded_oblock, data_dir)) {
+               requeue_and_update_tick(mq, e);
                result->op = POLICY_MISS;
-       else if (!can_migrate)
+       } else if (!can_migrate)
                r = -EWOULDBLOCK;
-       else
+       else {
+               requeue_and_update_tick(mq, e);
                r = pre_cache_to_cache(mq, e, result);
+       }
  
        return r;
  }
index 7c8dd1f69ce03464cc4a4277d2427130609e934b,1b1469ebe5cbad66af0853e0766ba20dbee63275..99f91628a33aa6b6969d2b87b61a861e85d16de6
@@@ -85,12 -85,6 +85,12 @@@ static void dm_unhook_bio(struct dm_hoo
  {
        bio->bi_end_io = h->bi_end_io;
        bio->bi_private = h->bi_private;
 +
 +      /*
 +       * Must bump bi_remaining to allow bio to complete with
 +       * restored bi_end_io.
 +       */
 +      atomic_inc(&bio->bi_remaining);
  }
  
  /*----------------------------------------------------------------*/
@@@ -670,17 -664,15 +670,17 @@@ static void remap_to_origin(struct cach
  static void remap_to_cache(struct cache *cache, struct bio *bio,
                           dm_cblock_t cblock)
  {
 -      sector_t bi_sector = bio->bi_sector;
 +      sector_t bi_sector = bio->bi_iter.bi_sector;
  
        bio->bi_bdev = cache->cache_dev->bdev;
        if (!block_size_is_power_of_two(cache))
 -              bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) +
 -                              sector_div(bi_sector, cache->sectors_per_block);
 +              bio->bi_iter.bi_sector =
 +                      (from_cblock(cblock) * cache->sectors_per_block) +
 +                      sector_div(bi_sector, cache->sectors_per_block);
        else
 -              bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) |
 -                              (bi_sector & (cache->sectors_per_block - 1));
 +              bio->bi_iter.bi_sector =
 +                      (from_cblock(cblock) << cache->sectors_per_block_shift) |
 +                      (bi_sector & (cache->sectors_per_block - 1));
  }
  
  static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
@@@ -720,7 -712,7 +720,7 @@@ static void remap_to_cache_dirty(struc
  
  static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
  {
 -      sector_t block_nr = bio->bi_sector;
 +      sector_t block_nr = bio->bi_iter.bi_sector;
  
        if (!block_size_is_power_of_two(cache))
                (void) sector_div(block_nr, cache->sectors_per_block);
@@@ -1035,7 -1027,7 +1035,7 @@@ static void issue_overwrite(struct dm_c
  static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
  {
        return (bio_data_dir(bio) == WRITE) &&
 -              (bio->bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
 +              (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
  }
  
  static void avoid_copy(struct dm_cache_migration *mg)
@@@ -1260,7 -1252,7 +1260,7 @@@ static void process_flush_bio(struct ca
        size_t pb_data_size = get_per_bio_data_size(cache);
        struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
  
 -      BUG_ON(bio->bi_size);
 +      BUG_ON(bio->bi_iter.bi_size);
        if (!pb->req_nr)
                remap_to_origin(cache, bio);
        else
   */
  static void process_discard_bio(struct cache *cache, struct bio *bio)
  {
 -      dm_block_t start_block = dm_sector_div_up(bio->bi_sector,
 +      dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
                                                  cache->discard_block_size);
 -      dm_block_t end_block = bio->bi_sector + bio_sectors(bio);
 +      dm_block_t end_block = bio_end_sector(bio);
        dm_block_t b;
  
        end_block = block_div(end_block, cache->discard_block_size);
@@@ -2763,7 -2755,7 +2763,7 @@@ static int resize_cache_dev(struct cach
  {
        int r;
  
-       r = dm_cache_resize(cache->cmd, cache->cache_size);
+       r = dm_cache_resize(cache->cmd, new_size);
        if (r) {
                DMERR("could not resize cache metadata");
                return r;
diff --combined drivers/md/dm-delay.c
index 84c860191a2ead900a98198b9818e4dcd483e076,2f91d6d4a2ccf40023c6bccfe142d7781024c810..fc8482a65dd27d0083e1c0f226c619c4cc3142f8
@@@ -20,6 -20,7 +20,7 @@@
  struct delay_c {
        struct timer_list delay_timer;
        struct mutex timer_lock;
+       struct workqueue_struct *kdelayd_wq;
        struct work_struct flush_expired_bios;
        struct list_head delayed_bios;
        atomic_t may_delay;
@@@ -45,14 -46,13 +46,13 @@@ struct dm_delay_info 
  
  static DEFINE_MUTEX(delayed_bios_lock);
  
- static struct workqueue_struct *kdelayd_wq;
  static struct kmem_cache *delayed_cache;
  
  static void handle_delayed_timer(unsigned long data)
  {
        struct delay_c *dc = (struct delay_c *)data;
  
-       queue_work(kdelayd_wq, &dc->flush_expired_bios);
+       queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
  }
  
  static void queue_timeout(struct delay_c *dc, unsigned long expires)
@@@ -191,6 -191,12 +191,12 @@@ out
                goto bad_dev_write;
        }
  
+       dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
+       if (!dc->kdelayd_wq) {
+               DMERR("Couldn't start kdelayd");
+               goto bad_queue;
+       }
        setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
  
        INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
        ti->private = dc;
        return 0;
  
+ bad_queue:
+       mempool_destroy(dc->delayed_pool);
  bad_dev_write:
        if (dc->dev_write)
                dm_put_device(ti, dc->dev_write);
@@@ -217,7 -225,7 +225,7 @@@ static void delay_dtr(struct dm_target 
  {
        struct delay_c *dc = ti->private;
  
-       flush_workqueue(kdelayd_wq);
+       destroy_workqueue(dc->kdelayd_wq);
  
        dm_put_device(ti, dc->dev_read);
  
@@@ -281,15 -289,14 +289,15 @@@ static int delay_map(struct dm_target *
        if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
                bio->bi_bdev = dc->dev_write->bdev;
                if (bio_sectors(bio))
 -                      bio->bi_sector = dc->start_write +
 -                                       dm_target_offset(ti, bio->bi_sector);
 +                      bio->bi_iter.bi_sector = dc->start_write +
 +                              dm_target_offset(ti, bio->bi_iter.bi_sector);
  
                return delay_bio(dc, dc->write_delay, bio);
        }
  
        bio->bi_bdev = dc->dev_read->bdev;
 -      bio->bi_sector = dc->start_read + dm_target_offset(ti, bio->bi_sector);
 +      bio->bi_iter.bi_sector = dc->start_read +
 +              dm_target_offset(ti, bio->bi_iter.bi_sector);
  
        return delay_bio(dc, dc->read_delay, bio);
  }
@@@ -351,12 -358,6 +359,6 @@@ static int __init dm_delay_init(void
  {
        int r = -ENOMEM;
  
-       kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
-       if (!kdelayd_wq) {
-               DMERR("Couldn't start kdelayd");
-               goto bad_queue;
-       }
        delayed_cache = KMEM_CACHE(dm_delay_info, 0);
        if (!delayed_cache) {
                DMERR("Couldn't create delayed bio cache.");
  bad_register:
        kmem_cache_destroy(delayed_cache);
  bad_memcache:
-       destroy_workqueue(kdelayd_wq);
- bad_queue:
        return r;
  }
  
@@@ -383,7 -382,6 +383,6 @@@ static void __exit dm_delay_exit(void
  {
        dm_unregister_target(&delay_target);
        kmem_cache_destroy(delayed_cache);
-       destroy_workqueue(kdelayd_wq);
  }
  
  /* Module hooks */
diff --combined drivers/md/dm-snap.c
index 80b5cabbea29d7d6fb2fc3820b505a3028bcfdec,944690bafd93241d9348f0a4f1cad7f917ce7d83..01b6a11813f29e5ab32e3af144714ab28dc26089
@@@ -66,6 -66,18 +66,18 @@@ struct dm_snapshot 
  
        atomic_t pending_exceptions_count;
  
+       /* Protected by "lock" */
+       sector_t exception_start_sequence;
+       /* Protected by kcopyd single-threaded callback */
+       sector_t exception_complete_sequence;
+       /*
+        * A list of pending exceptions that completed out of order.
+        * Protected by kcopyd single-threaded callback.
+        */
+       struct list_head out_of_order_list;
        mempool_t *pending_pool;
  
        struct dm_exception_table pending;
@@@ -173,6 -185,14 +185,14 @@@ struct dm_snap_pending_exception 
         */
        int started;
  
+       /* There was copying error. */
+       int copy_error;
+       /* A sequence number, it is used for in-order completion. */
+       sector_t exception_sequence;
+       struct list_head out_of_order_entry;
        /*
         * For writing a complete chunk, bypassing the copy.
         */
@@@ -1094,6 -1114,9 +1114,9 @@@ static int snapshot_ctr(struct dm_targe
        s->valid = 1;
        s->active = 0;
        atomic_set(&s->pending_exceptions_count, 0);
+       s->exception_start_sequence = 0;
+       s->exception_complete_sequence = 0;
+       INIT_LIST_HEAD(&s->out_of_order_list);
        init_rwsem(&s->lock);
        INIT_LIST_HEAD(&s->list);
        spin_lock_init(&s->pe_lock);
@@@ -1415,7 -1438,6 +1438,7 @@@ out
        if (full_bio) {
                full_bio->bi_end_io = pe->full_bio_end_io;
                full_bio->bi_private = pe->full_bio_private;
 +              atomic_inc(&full_bio->bi_remaining);
        }
        free_pending_exception(pe);
  
@@@ -1444,6 -1466,19 +1467,19 @@@ static void commit_callback(void *conte
        pending_complete(pe, success);
  }
  
+ static void complete_exception(struct dm_snap_pending_exception *pe)
+ {
+       struct dm_snapshot *s = pe->snap;
+       if (unlikely(pe->copy_error))
+               pending_complete(pe, 0);
+       else
+               /* Update the metadata if we are persistent */
+               s->store->type->commit_exception(s->store, &pe->e,
+                                                commit_callback, pe);
+ }
  /*
   * Called when the copy I/O has finished.  kcopyd actually runs
   * this code so don't block.
@@@ -1453,13 -1488,32 +1489,32 @@@ static void copy_callback(int read_err
        struct dm_snap_pending_exception *pe = context;
        struct dm_snapshot *s = pe->snap;
  
-       if (read_err || write_err)
-               pending_complete(pe, 0);
+       pe->copy_error = read_err || write_err;
  
-       else
-               /* Update the metadata if we are persistent */
-               s->store->type->commit_exception(s->store, &pe->e,
-                                                commit_callback, pe);
+       if (pe->exception_sequence == s->exception_complete_sequence) {
+               s->exception_complete_sequence++;
+               complete_exception(pe);
+               while (!list_empty(&s->out_of_order_list)) {
+                       pe = list_entry(s->out_of_order_list.next,
+                                       struct dm_snap_pending_exception, out_of_order_entry);
+                       if (pe->exception_sequence != s->exception_complete_sequence)
+                               break;
+                       s->exception_complete_sequence++;
+                       list_del(&pe->out_of_order_entry);
+                       complete_exception(pe);
+               }
+       } else {
+               struct list_head *lh;
+               struct dm_snap_pending_exception *pe2;
+               list_for_each_prev(lh, &s->out_of_order_list) {
+                       pe2 = list_entry(lh, struct dm_snap_pending_exception, out_of_order_entry);
+                       if (pe2->exception_sequence < pe->exception_sequence)
+                               break;
+               }
+               list_add(&pe->out_of_order_entry, lh);
+       }
  }
  
  /*
@@@ -1554,6 -1608,8 +1609,8 @@@ __find_pending_exception(struct dm_snap
                return NULL;
        }
  
+       pe->exception_sequence = s->exception_start_sequence++;
        dm_insert_exception(&s->pending, &pe->e);
  
        return pe;
@@@ -1563,10 -1619,11 +1620,10 @@@ static void remap_exception(struct dm_s
                            struct bio *bio, chunk_t chunk)
  {
        bio->bi_bdev = s->cow->bdev;
 -      bio->bi_sector = chunk_to_sector(s->store,
 -                                       dm_chunk_number(e->new_chunk) +
 -                                       (chunk - e->old_chunk)) +
 -                                       (bio->bi_sector &
 -                                        s->store->chunk_mask);
 +      bio->bi_iter.bi_sector =
 +              chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) +
 +                              (chunk - e->old_chunk)) +
 +              (bio->bi_iter.bi_sector & s->store->chunk_mask);
  }
  
  static int snapshot_map(struct dm_target *ti, struct bio *bio)
                return DM_MAPIO_REMAPPED;
        }
  
 -      chunk = sector_to_chunk(s->store, bio->bi_sector);
 +      chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
  
        /* Full snapshots are not usable */
        /* To get here the table must be live so s->active is always set. */
                r = DM_MAPIO_SUBMITTED;
  
                if (!pe->started &&
 -                  bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) {
 +                  bio->bi_iter.bi_size ==
 +                  (s->store->chunk_size << SECTOR_SHIFT)) {
                        pe->started = 1;
                        up_write(&s->lock);
                        start_full_bio(pe, bio);
@@@ -1702,7 -1758,7 +1759,7 @@@ static int snapshot_merge_map(struct dm
                return DM_MAPIO_REMAPPED;
        }
  
 -      chunk = sector_to_chunk(s->store, bio->bi_sector);
 +      chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
  
        down_write(&s->lock);
  
@@@ -2039,7 -2095,7 +2096,7 @@@ static int do_origin(struct dm_dev *ori
        down_read(&_origins_lock);
        o = __lookup_origin(origin->bdev);
        if (o)
 -              r = __origin_write(&o->snapshots, bio->bi_sector, bio);
 +              r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
        up_read(&_origins_lock);
  
        return r;
@@@ -2193,7 -2249,7 +2250,7 @@@ static struct target_type origin_targe
  
  static struct target_type snapshot_target = {
        .name    = "snapshot",
-       .version = {1, 11, 1},
+       .version = {1, 12, 0},
        .module  = THIS_MODULE,
        .ctr     = snapshot_ctr,
        .dtr     = snapshot_dtr,
diff --combined drivers/md/dm-thin.c
index 1abb4a24c338e2146274b1d63940d6f3da3b8b4b,ee29037ffc2e74633050b708718ccbe963bf20d1..357eb272dbd9d3337bf5d1a13db443261eb2b24b
@@@ -413,7 -413,7 +413,7 @@@ static bool block_size_is_power_of_two(
  static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
  {
        struct pool *pool = tc->pool;
 -      sector_t block_nr = bio->bi_sector;
 +      sector_t block_nr = bio->bi_iter.bi_sector;
  
        if (block_size_is_power_of_two(pool))
                block_nr >>= pool->sectors_per_block_shift;
  static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
  {
        struct pool *pool = tc->pool;
 -      sector_t bi_sector = bio->bi_sector;
 +      sector_t bi_sector = bio->bi_iter.bi_sector;
  
        bio->bi_bdev = tc->pool_dev->bdev;
        if (block_size_is_power_of_two(pool))
 -              bio->bi_sector = (block << pool->sectors_per_block_shift) |
 -                              (bi_sector & (pool->sectors_per_block - 1));
 +              bio->bi_iter.bi_sector =
 +                      (block << pool->sectors_per_block_shift) |
 +                      (bi_sector & (pool->sectors_per_block - 1));
        else
 -              bio->bi_sector = (block * pool->sectors_per_block) +
 +              bio->bi_iter.bi_sector = (block * pool->sectors_per_block) +
                                 sector_div(bi_sector, pool->sectors_per_block);
  }
  
@@@ -611,10 -610,8 +611,10 @@@ static void cell_defer_no_holder(struc
  
  static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
  {
 -      if (m->bio)
 +      if (m->bio) {
                m->bio->bi_end_io = m->saved_bi_end_io;
 +              atomic_inc(&m->bio->bi_remaining);
 +      }
        cell_error(m->tc->pool, m->cell);
        list_del(&m->list);
        mempool_free(m, m->tc->pool->mapping_pool);
@@@ -628,10 -625,8 +628,10 @@@ static void process_prepared_mapping(st
        int r;
  
        bio = m->bio;
 -      if (bio)
 +      if (bio) {
                bio->bi_end_io = m->saved_bi_end_io;
 +              atomic_inc(&bio->bi_remaining);
 +      }
  
        if (m->err) {
                cell_error(pool, m->cell);
         */
        r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
        if (r) {
-               DMERR_LIMIT("dm_thin_insert_block() failed");
+               DMERR_LIMIT("%s: dm_thin_insert_block() failed: error = %d",
+                           dm_device_name(pool->pool_md), r);
+               set_pool_mode(pool, PM_READ_ONLY);
                cell_error(pool, m->cell);
                goto out;
        }
@@@ -726,8 -723,7 +728,8 @@@ static void process_prepared(struct poo
   */
  static int io_overlaps_block(struct pool *pool, struct bio *bio)
  {
 -      return bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT);
 +      return bio->bi_iter.bi_size ==
 +              (pool->sectors_per_block << SECTOR_SHIFT);
  }
  
  static int io_overwrites_block(struct pool *pool, struct bio *bio)
@@@ -887,32 -883,23 +889,23 @@@ static void schedule_zero(struct thin_
        }
  }
  
- static int commit(struct pool *pool)
- {
-       int r;
-       r = dm_pool_commit_metadata(pool->pmd);
-       if (r)
-               DMERR_LIMIT("%s: commit failed: error = %d",
-                           dm_device_name(pool->pool_md), r);
-       return r;
- }
  /*
   * A non-zero return indicates read_only or fail_io mode.
   * Many callers don't care about the return value.
   */
- static int commit_or_fallback(struct pool *pool)
+ static int commit(struct pool *pool)
  {
        int r;
  
        if (get_pool_mode(pool) != PM_WRITE)
                return -EINVAL;
  
-       r = commit(pool);
-       if (r)
+       r = dm_pool_commit_metadata(pool->pmd);
+       if (r) {
+               DMERR_LIMIT("%s: dm_pool_commit_metadata failed: error = %d",
+                           dm_device_name(pool->pool_md), r);
                set_pool_mode(pool, PM_READ_ONLY);
+       }
  
        return r;
  }
@@@ -949,7 -936,9 +942,9 @@@ static int alloc_data_block(struct thin
                 * Try to commit to see if that will free up some
                 * more space.
                 */
-               (void) commit_or_fallback(pool);
+               r = commit(pool);
+               if (r)
+                       return r;
  
                r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
                if (r)
                 * table reload).
                 */
                if (!free_blocks) {
-                       DMWARN("%s: no free space available.",
+                       DMWARN("%s: no free data space available.",
                               dm_device_name(pool->pool_md));
                        spin_lock_irqsave(&pool->lock, flags);
                        pool->no_free_space = 1;
        }
  
        r = dm_pool_alloc_data_block(pool->pmd, result);
-       if (r)
+       if (r) {
+               if (r == -ENOSPC &&
+                   !dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks) &&
+                   !free_blocks) {
+                       DMWARN("%s: no free metadata space available.",
+                              dm_device_name(pool->pool_md));
+                       set_pool_mode(pool, PM_READ_ONLY);
+               }
                return r;
+       }
  
        return 0;
  }
@@@ -1136,7 -1133,7 +1139,7 @@@ static void process_shared_bio(struct t
        if (bio_detain(pool, &key, bio, &cell))
                return;
  
 -      if (bio_data_dir(bio) == WRITE && bio->bi_size)
 +      if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size)
                break_sharing(tc, bio, block, &key, lookup_result, cell);
        else {
                struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@@ -1159,7 -1156,7 +1162,7 @@@ static void provision_block(struct thin
        /*
         * Remap empty bios (flushes) immediately, without provisioning.
         */
 -      if (!bio->bi_size) {
 +      if (!bio->bi_iter.bi_size) {
                inc_all_io_entry(pool, bio);
                cell_defer_no_holder(tc, cell);
  
@@@ -1259,7 -1256,7 +1262,7 @@@ static void process_bio_read_only(struc
        r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
        switch (r) {
        case 0:
 -              if (lookup_result.shared && (rw == WRITE) && bio->bi_size)
 +              if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size)
                        bio_io_error(bio);
                else {
                        inc_all_io_entry(tc->pool, bio);
@@@ -1355,7 -1352,7 +1358,7 @@@ static void process_deferred_bios(struc
        if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
                return;
  
-       if (commit_or_fallback(pool)) {
+       if (commit(pool)) {
                while ((bio = bio_list_pop(&bios)))
                        bio_io_error(bio);
                return;
@@@ -1403,6 -1400,7 +1406,7 @@@ static void set_pool_mode(struct pool *
        case PM_FAIL:
                DMERR("%s: switching pool to failure mode",
                      dm_device_name(pool->pool_md));
+               dm_pool_metadata_read_only(pool->pmd);
                pool->process_bio = process_bio_fail;
                pool->process_discard = process_bio_fail;
                pool->process_prepared_mapping = process_prepared_mapping_fail;
                break;
  
        case PM_WRITE:
+               dm_pool_metadata_read_write(pool->pmd);
                pool->process_bio = process_bio;
                pool->process_discard = process_discard;
                pool->process_prepared_mapping = process_prepared_mapping;
@@@ -1643,12 -1642,19 +1648,19 @@@ static int bind_control_target(struct p
        struct pool_c *pt = ti->private;
  
        /*
-        * We want to make sure that degraded pools are never upgraded.
+        * We want to make sure that a pool in PM_FAIL mode is never upgraded.
         */
        enum pool_mode old_mode = pool->pf.mode;
        enum pool_mode new_mode = pt->adjusted_pf.mode;
  
-       if (old_mode > new_mode)
+       /*
+        * If we were in PM_FAIL mode, rollback of metadata failed.  We're
+        * not going to recover without a thin_repair.  So we never let the
+        * pool move out of the old mode.  On the other hand a PM_READ_ONLY
+        * may have been due to a lack of metadata or data space, and may
+        * now work (ie. if the underlying devices have been resized).
+        */
+       if (old_mode == PM_FAIL)
                new_mode = old_mode;
  
        pool->ti = ti;
@@@ -2272,7 -2278,7 +2284,7 @@@ static int pool_preresume(struct dm_tar
                return r;
  
        if (need_commit1 || need_commit2)
-               (void) commit_or_fallback(pool);
+               (void) commit(pool);
  
        return 0;
  }
@@@ -2299,7 -2305,7 +2311,7 @@@ static void pool_postsuspend(struct dm_
  
        cancel_delayed_work(&pool->waker);
        flush_workqueue(pool->wq);
-       (void) commit_or_fallback(pool);
+       (void) commit(pool);
  }
  
  static int check_arg_count(unsigned argc, unsigned args_required)
@@@ -2433,7 -2439,7 +2445,7 @@@ static int process_reserve_metadata_sna
        if (r)
                return r;
  
-       (void) commit_or_fallback(pool);
+       (void) commit(pool);
  
        r = dm_pool_reserve_metadata_snap(pool->pmd);
        if (r)
@@@ -2495,7 -2501,7 +2507,7 @@@ static int pool_message(struct dm_targe
                DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
  
        if (!r)
-               (void) commit_or_fallback(pool);
+               (void) commit(pool);
  
        return r;
  }
@@@ -2550,7 -2556,7 +2562,7 @@@ static void pool_status(struct dm_targe
  
                /* Commit to ensure statistics aren't out-of-date */
                if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
-                       (void) commit_or_fallback(pool);
+                       (void) commit(pool);
  
                r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
                if (r) {
@@@ -2873,7 -2879,7 +2885,7 @@@ out_unlock
  
  static int thin_map(struct dm_target *ti, struct bio *bio)
  {
 -      bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
 +      bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
  
        return thin_bio_map(ti, bio);
  }
diff --combined drivers/md/md.c
index b07fed398fd7d70ecbc0dd55d5b3a1426e85ff1b,21f4d7ff0da22ee16e7556958502a2fb9ea74502..16d84e091e2d199222f3c58203a07f3b696249c9
@@@ -393,7 -393,7 +393,7 @@@ static void md_submit_flush_data(struc
        struct mddev *mddev = container_of(ws, struct mddev, flush_work);
        struct bio *bio = mddev->flush_bio;
  
 -      if (bio->bi_size == 0)
 +      if (bio->bi_iter.bi_size == 0)
                /* an empty barrier - all done */
                bio_endio(bio, 0);
        else {
@@@ -754,7 -754,7 +754,7 @@@ void md_super_write(struct mddev *mddev
        struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
  
        bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
 -      bio->bi_sector = sector;
 +      bio->bi_iter.bi_sector = sector;
        bio_add_page(bio, page, size, 0);
        bio->bi_private = rdev;
        bio->bi_end_io = super_written;
@@@ -782,16 -782,18 +782,16 @@@ int sync_page_io(struct md_rdev *rdev, 
        struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
        int ret;
  
 -      rw |= REQ_SYNC;
 -
        bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
                rdev->meta_bdev : rdev->bdev;
        if (metadata_op)
 -              bio->bi_sector = sector + rdev->sb_start;
 +              bio->bi_iter.bi_sector = sector + rdev->sb_start;
        else if (rdev->mddev->reshape_position != MaxSector &&
                 (rdev->mddev->reshape_backwards ==
                  (sector >= rdev->mddev->reshape_position)))
 -              bio->bi_sector = sector + rdev->new_data_offset;
 +              bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
        else
 -              bio->bi_sector = sector + rdev->data_offset;
 +              bio->bi_iter.bi_sector = sector + rdev->data_offset;
        bio_add_page(bio, page, size, 0);
        submit_bio_wait(rw, bio);
  
@@@ -7765,7 -7767,7 +7765,7 @@@ void md_check_recovery(struct mddev *md
        if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
                return;
        if ( ! (
-               (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
+               (mddev->flags & MD_UPDATE_SB_FLAGS & ~ (1<<MD_CHANGE_PENDING)) ||
                test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
                test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
                (mddev->external == 0 && mddev->safemode == 1) ||
diff --combined drivers/md/raid5.c
index bef353c51c04af04d1eddff86cc1da6adaea3278,cc055da02e2a300706548041dc4d9cf63957978c..eea63372e4d30533b2255159c8b428b2ad90acb3
@@@ -133,7 -133,7 +133,7 @@@ static inline void unlock_all_device_ha
  static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
  {
        int sectors = bio_sectors(bio);
 -      if (bio->bi_sector + sectors < sector + STRIPE_SECTORS)
 +      if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS)
                return bio->bi_next;
        else
                return NULL;
@@@ -225,7 -225,7 +225,7 @@@ static void return_io(struct bio *retur
  
                return_bi = bi->bi_next;
                bi->bi_next = NULL;
 -              bi->bi_size = 0;
 +              bi->bi_iter.bi_size = 0;
                trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
                                         bi, 0);
                bio_endio(bi, 0);
@@@ -678,26 -678,23 +678,23 @@@ get_active_stripe(struct r5conf *conf, 
                        } else
                                init_stripe(sh, sector, previous);
                } else {
+                       spin_lock(&conf->device_lock);
                        if (atomic_read(&sh->count)) {
                                BUG_ON(!list_empty(&sh->lru)
                                    && !test_bit(STRIPE_EXPANDING, &sh->state)
                                    && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
-                                   && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
+                                       );
                        } else {
-                               spin_lock(&conf->device_lock);
                                if (!test_bit(STRIPE_HANDLE, &sh->state))
                                        atomic_inc(&conf->active_stripes);
-                               if (list_empty(&sh->lru) &&
-                                   !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state) &&
-                                   !test_bit(STRIPE_EXPANDING, &sh->state))
-                                       BUG();
+                               BUG_ON(list_empty(&sh->lru));
                                list_del_init(&sh->lru);
                                if (sh->group) {
                                        sh->group->stripes_cnt--;
                                        sh->group = NULL;
                                }
-                               spin_unlock(&conf->device_lock);
                        }
+                       spin_unlock(&conf->device_lock);
                }
        } while (sh == NULL);
  
@@@ -854,10 -851,10 +851,10 @@@ static void ops_run_io(struct stripe_he
                                bi->bi_rw, i);
                        atomic_inc(&sh->count);
                        if (use_new_offset(conf, sh))
 -                              bi->bi_sector = (sh->sector
 +                              bi->bi_iter.bi_sector = (sh->sector
                                                 + rdev->new_data_offset);
                        else
 -                              bi->bi_sector = (sh->sector
 +                              bi->bi_iter.bi_sector = (sh->sector
                                                 + rdev->data_offset);
                        if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
                                bi->bi_rw |= REQ_NOMERGE;
                        bi->bi_vcnt = 1;
                        bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
                        bi->bi_io_vec[0].bv_offset = 0;
 -                      bi->bi_size = STRIPE_SIZE;
 +                      bi->bi_iter.bi_size = STRIPE_SIZE;
                        /*
                         * If this is discard request, set bi_vcnt 0. We don't
                         * want to confuse SCSI because SCSI will replace payload
                                rbi->bi_rw, i);
                        atomic_inc(&sh->count);
                        if (use_new_offset(conf, sh))
 -                              rbi->bi_sector = (sh->sector
 +                              rbi->bi_iter.bi_sector = (sh->sector
                                                  + rrdev->new_data_offset);
                        else
 -                              rbi->bi_sector = (sh->sector
 +                              rbi->bi_iter.bi_sector = (sh->sector
                                                  + rrdev->data_offset);
                        rbi->bi_vcnt = 1;
                        rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
                        rbi->bi_io_vec[0].bv_offset = 0;
 -                      rbi->bi_size = STRIPE_SIZE;
 +                      rbi->bi_iter.bi_size = STRIPE_SIZE;
                        /*
                         * If this is discard request, set bi_vcnt 0. We don't
                         * want to confuse SCSI because SCSI will replace payload
@@@ -937,24 -934,24 +934,24 @@@ static struct dma_async_tx_descriptor 
  async_copy_data(int frombio, struct bio *bio, struct page *page,
        sector_t sector, struct dma_async_tx_descriptor *tx)
  {
 -      struct bio_vec *bvl;
 +      struct bio_vec bvl;
 +      struct bvec_iter iter;
        struct page *bio_page;
 -      int i;
        int page_offset;
        struct async_submit_ctl submit;
        enum async_tx_flags flags = 0;
  
 -      if (bio->bi_sector >= sector)
 -              page_offset = (signed)(bio->bi_sector - sector) * 512;
 +      if (bio->bi_iter.bi_sector >= sector)
 +              page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512;
        else
 -              page_offset = (signed)(sector - bio->bi_sector) * -512;
 +              page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512;
  
        if (frombio)
                flags |= ASYNC_TX_FENCE;
        init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
  
 -      bio_for_each_segment(bvl, bio, i) {
 -              int len = bvl->bv_len;
 +      bio_for_each_segment(bvl, bio, iter) {
 +              int len = bvl.bv_len;
                int clen;
                int b_offset = 0;
  
                        clen = len;
  
                if (clen > 0) {
 -                      b_offset += bvl->bv_offset;
 -                      bio_page = bvl->bv_page;
 +                      b_offset += bvl.bv_offset;
 +                      bio_page = bvl.bv_page;
                        if (frombio)
                                tx = async_memcpy(page, bio_page, page_offset,
                                                  b_offset, clen, &submit);
@@@ -1014,7 -1011,7 +1011,7 @@@ static void ops_complete_biofill(void *
                        BUG_ON(!dev->read);
                        rbi = dev->read;
                        dev->read = NULL;
 -                      while (rbi && rbi->bi_sector <
 +                      while (rbi && rbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
                                rbi2 = r5_next_bio(rbi, dev->sector);
                                if (!raid5_dec_bi_active_stripes(rbi)) {
@@@ -1050,7 -1047,7 +1047,7 @@@ static void ops_run_biofill(struct stri
                        dev->read = rbi = dev->toread;
                        dev->toread = NULL;
                        spin_unlock_irq(&sh->stripe_lock);
 -                      while (rbi && rbi->bi_sector <
 +                      while (rbi && rbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
                                tx = async_copy_data(0, rbi, dev->page,
                                        dev->sector, tx);
@@@ -1392,7 -1389,7 +1389,7 @@@ ops_run_biodrain(struct stripe_head *sh
                        wbi = dev->written = chosen;
                        spin_unlock_irq(&sh->stripe_lock);
  
 -                      while (wbi && wbi->bi_sector <
 +                      while (wbi && wbi->bi_iter.bi_sector <
                                dev->sector + STRIPE_SECTORS) {
                                if (wbi->bi_rw & REQ_FUA)
                                        set_bit(R5_WantFUA, &dev->flags);
@@@ -2616,7 -2613,7 +2613,7 @@@ static int add_stripe_bio(struct stripe
        int firstwrite=0;
  
        pr_debug("adding bi b#%llu to stripe s#%llu\n",
 -              (unsigned long long)bi->bi_sector,
 +              (unsigned long long)bi->bi_iter.bi_sector,
                (unsigned long long)sh->sector);
  
        /*
                        firstwrite = 1;
        } else
                bip = &sh->dev[dd_idx].toread;
 -      while (*bip && (*bip)->bi_sector < bi->bi_sector) {
 -              if (bio_end_sector(*bip) > bi->bi_sector)
 +      while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) {
 +              if (bio_end_sector(*bip) > bi->bi_iter.bi_sector)
                        goto overlap;
                bip = & (*bip)->bi_next;
        }
 -      if (*bip && (*bip)->bi_sector < bio_end_sector(bi))
 +      if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
                goto overlap;
  
        BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
                sector_t sector = sh->dev[dd_idx].sector;
                for (bi=sh->dev[dd_idx].towrite;
                     sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
 -                           bi && bi->bi_sector <= sector;
 +                           bi && bi->bi_iter.bi_sector <= sector;
                     bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
                        if (bio_end_sector(bi) >= sector)
                                sector = bio_end_sector(bi);
        }
  
        pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
 -              (unsigned long long)(*bip)->bi_sector,
 +              (unsigned long long)(*bip)->bi_iter.bi_sector,
                (unsigned long long)sh->sector, dd_idx);
        spin_unlock_irq(&sh->stripe_lock);
  
@@@ -2738,7 -2735,7 +2735,7 @@@ handle_failed_stripe(struct r5conf *con
                if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
                        wake_up(&conf->wait_for_overlap);
  
 -              while (bi && bi->bi_sector <
 +              while (bi && bi->bi_iter.bi_sector <
                        sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
                        clear_bit(BIO_UPTODATE, &bi->bi_flags);
                bi = sh->dev[i].written;
                sh->dev[i].written = NULL;
                if (bi) bitmap_end = 1;
 -              while (bi && bi->bi_sector <
 +              while (bi && bi->bi_iter.bi_sector <
                       sh->dev[i].sector + STRIPE_SECTORS) {
                        struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
                        clear_bit(BIO_UPTODATE, &bi->bi_flags);
                        spin_unlock_irq(&sh->stripe_lock);
                        if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
                                wake_up(&conf->wait_for_overlap);
 -                      while (bi && bi->bi_sector <
 +                      while (bi && bi->bi_iter.bi_sector <
                               sh->dev[i].sector + STRIPE_SECTORS) {
                                struct bio *nextbi =
                                        r5_next_bio(bi, sh->dev[i].sector);
@@@ -3005,7 -3002,7 +3002,7 @@@ static void handle_stripe_clean_event(s
                                        clear_bit(R5_UPTODATE, &dev->flags);
                                wbi = dev->written;
                                dev->written = NULL;
 -                              while (wbi && wbi->bi_sector <
 +                              while (wbi && wbi->bi_iter.bi_sector <
                                        dev->sector + STRIPE_SECTORS) {
                                        wbi2 = r5_next_bio(wbi, dev->sector);
                                        if (!raid5_dec_bi_active_stripes(wbi)) {
@@@ -4097,7 -4094,7 +4094,7 @@@ static int raid5_mergeable_bvec(struct 
  
  static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
  {
 -      sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
 +      sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
        unsigned int chunk_sectors = mddev->chunk_sectors;
        unsigned int bio_sectors = bio_sectors(bio);
  
@@@ -4234,9 -4231,9 +4231,9 @@@ static int chunk_aligned_read(struct md
        /*
         *      compute position
         */
 -      align_bi->bi_sector =  raid5_compute_sector(conf, raid_bio->bi_sector,
 -                                                  0,
 -                                                  &dd_idx, NULL);
 +      align_bi->bi_iter.bi_sector =
 +              raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector,
 +                                   0, &dd_idx, NULL);
  
        end_sector = bio_end_sector(align_bi);
        rcu_read_lock();
                align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
  
                if (!bio_fits_rdev(align_bi) ||
 -                  is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi),
 +                  is_badblock(rdev, align_bi->bi_iter.bi_sector,
 +                              bio_sectors(align_bi),
                                &first_bad, &bad_sectors)) {
                        /* too big in some way, or has a known bad block */
                        bio_put(align_bi);
                }
  
                /* No reshape active, so we can trust rdev->data_offset */
 -              align_bi->bi_sector += rdev->data_offset;
 +              align_bi->bi_iter.bi_sector += rdev->data_offset;
  
                spin_lock_irq(&conf->device_lock);
                wait_event_lock_irq(conf->wait_for_stripe,
                if (mddev->gendisk)
                        trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
                                              align_bi, disk_devt(mddev->gendisk),
 -                                            raid_bio->bi_sector);
 +                                            raid_bio->bi_iter.bi_sector);
                generic_make_request(align_bi);
                return 1;
        } else {
@@@ -4466,8 -4462,8 +4463,8 @@@ static void make_discard_request(struc
                /* Skip discard while reshape is happening */
                return;
  
 -      logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
 -      last_sector = bi->bi_sector + (bi->bi_size>>9);
 +      logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
 +      last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9);
  
        bi->bi_next = NULL;
        bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
@@@ -4571,7 -4567,7 +4568,7 @@@ static void make_request(struct mddev *
                return;
        }
  
 -      logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
 +      logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
        last_sector = bio_end_sector(bi);
        bi->bi_next = NULL;
        bi->bi_phys_segments = 1;       /* over-loaded to count active stripes */
@@@ -5055,8 -5051,7 +5052,8 @@@ static int  retry_aligned_read(struct r
        int remaining;
        int handled = 0;
  
 -      logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
 +      logical_sector = raid_bio->bi_iter.bi_sector &
 +              ~((sector_t)STRIPE_SECTORS-1);
        sector = raid5_compute_sector(conf, logical_sector,
                                      0, &dd_idx, NULL);
        last_sector = bio_end_sector(raid_bio);
@@@ -5473,7 -5468,7 +5470,7 @@@ static int alloc_thread_groups(struct r
        for (i = 0; i < *group_cnt; i++) {
                struct r5worker_group *group;
  
-               group = worker_groups[i];
+               group = &(*worker_groups)[i];
                INIT_LIST_HEAD(&group->handle_list);
                group->conf = conf;
                group->workers = workers + i * cnt;
index 70d1770173293d10df376b50d5056f585e61d6c8,95e45782692fa7bb2a89e9ec566a6a035f736381..2e8e0755070b609b13e9e49f5db5f17ac232ef69
@@@ -2551,7 -2551,7 +2551,7 @@@ static struct dasd_ccw_req *dasd_eckd_b
        struct dasd_ccw_req *cqr;
        struct ccw1 *ccw;
        struct req_iterator iter;
 -      struct bio_vec *bv;
 +      struct bio_vec bv;
        char *dst;
        unsigned int off;
        int count, cidaw, cplength, datasize;
        count = 0;
        cidaw = 0;
        rq_for_each_segment(bv, req, iter) {
 -              if (bv->bv_len & (blksize - 1))
 +              if (bv.bv_len & (blksize - 1))
                        /* Eckd can only do full blocks. */
                        return ERR_PTR(-EINVAL);
 -              count += bv->bv_len >> (block->s2b_shift + 9);
 +              count += bv.bv_len >> (block->s2b_shift + 9);
  #if defined(CONFIG_64BIT)
 -              if (idal_is_needed (page_address(bv->bv_page), bv->bv_len))
 -                      cidaw += bv->bv_len >> (block->s2b_shift + 9);
 +              if (idal_is_needed (page_address(bv.bv_page), bv.bv_len))
 +                      cidaw += bv.bv_len >> (block->s2b_shift + 9);
  #endif
        }
        /* Paranoia. */
                              last_rec - recid + 1, cmd, basedev, blksize);
        }
        rq_for_each_segment(bv, req, iter) {
 -              dst = page_address(bv->bv_page) + bv->bv_offset;
 +              dst = page_address(bv.bv_page) + bv.bv_offset;
                if (dasd_page_cache) {
                        char *copy = kmem_cache_alloc(dasd_page_cache,
                                                      GFP_DMA | __GFP_NOWARN);
                        if (copy && rq_data_dir(req) == WRITE)
 -                              memcpy(copy + bv->bv_offset, dst, bv->bv_len);
 +                              memcpy(copy + bv.bv_offset, dst, bv.bv_len);
                        if (copy)
 -                              dst = copy + bv->bv_offset;
 +                              dst = copy + bv.bv_offset;
                }
 -              for (off = 0; off < bv->bv_len; off += blksize) {
 +              for (off = 0; off < bv.bv_len; off += blksize) {
                        sector_t trkid = recid;
                        unsigned int recoffs = sector_div(trkid, blk_per_trk);
                        rcmd = cmd;
@@@ -2735,7 -2735,7 +2735,7 @@@ static struct dasd_ccw_req *dasd_eckd_b
        struct dasd_ccw_req *cqr;
        struct ccw1 *ccw;
        struct req_iterator iter;
 -      struct bio_vec *bv;
 +      struct bio_vec bv;
        char *dst, *idaw_dst;
        unsigned int cidaw, cplength, datasize;
        unsigned int tlf;
        idaw_dst = NULL;
        idaw_len = 0;
        rq_for_each_segment(bv, req, iter) {
 -              dst = page_address(bv->bv_page) + bv->bv_offset;
 -              seg_len = bv->bv_len;
 +              dst = page_address(bv.bv_page) + bv.bv_offset;
 +              seg_len = bv.bv_len;
                while (seg_len) {
                        if (new_track) {
                                trkid = recid;
@@@ -3039,7 -3039,7 +3039,7 @@@ static struct dasd_ccw_req *dasd_eckd_b
  {
        struct dasd_ccw_req *cqr;
        struct req_iterator iter;
 -      struct bio_vec *bv;
 +      struct bio_vec bv;
        char *dst;
        unsigned int trkcount, ctidaw;
        unsigned char cmd;
                new_track = 1;
                recid = first_rec;
                rq_for_each_segment(bv, req, iter) {
 -                      dst = page_address(bv->bv_page) + bv->bv_offset;
 -                      seg_len = bv->bv_len;
 +                      dst = page_address(bv.bv_page) + bv.bv_offset;
 +                      seg_len = bv.bv_len;
                        while (seg_len) {
                                if (new_track) {
                                        trkid = recid;
                }
        } else {
                rq_for_each_segment(bv, req, iter) {
 -                      dst = page_address(bv->bv_page) + bv->bv_offset;
 +                      dst = page_address(bv.bv_page) + bv.bv_offset;
                        last_tidaw = itcw_add_tidaw(itcw, 0x00,
 -                                                  dst, bv->bv_len);
 +                                                  dst, bv.bv_len);
                        if (IS_ERR(last_tidaw)) {
                                ret = -EINVAL;
                                goto out_error;
@@@ -3224,6 -3224,8 +3224,8 @@@ static struct dasd_ccw_req *dasd_eckd_b
  
        fcx_multitrack = private->features.feature[40] & 0x20;
        data_size = blk_rq_bytes(req);
+       if (data_size % blksize)
+               return ERR_PTR(-EINVAL);
        /* tpm write request add CBC data on each track boundary */
        if (rq_data_dir(req) == WRITE)
                data_size += (last_trk - first_trk) * 4;
@@@ -3276,7 -3278,7 +3278,7 @@@ static struct dasd_ccw_req *dasd_raw_bu
        struct dasd_ccw_req *cqr;
        struct ccw1 *ccw;
        struct req_iterator iter;
 -      struct bio_vec *bv;
 +      struct bio_vec bv;
        char *dst;
        unsigned char cmd;
        unsigned int trkcount;
                        idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE);
        }
        rq_for_each_segment(bv, req, iter) {
 -              dst = page_address(bv->bv_page) + bv->bv_offset;
 -              seg_len = bv->bv_len;
 +              dst = page_address(bv.bv_page) + bv.bv_offset;
 +              seg_len = bv.bv_len;
                if (cmd == DASD_ECKD_CCW_READ_TRACK)
                        memset(dst, 0, seg_len);
                if (!len_to_track_end) {
@@@ -3422,7 -3424,7 +3424,7 @@@ dasd_eckd_free_cp(struct dasd_ccw_req *
        struct dasd_eckd_private *private;
        struct ccw1 *ccw;
        struct req_iterator iter;
 -      struct bio_vec *bv;
 +      struct bio_vec bv;
        char *dst, *cda;
        unsigned int blksize, blk_per_trk, off;
        sector_t recid;
        if (private->uses_cdl == 0 || recid > 2*blk_per_trk)
                ccw++;
        rq_for_each_segment(bv, req, iter) {
 -              dst = page_address(bv->bv_page) + bv->bv_offset;
 -              for (off = 0; off < bv->bv_len; off += blksize) {
 +              dst = page_address(bv.bv_page) + bv.bv_offset;
 +              for (off = 0; off < bv.bv_len; off += blksize) {
                        /* Skip locate record. */
                        if (private->uses_cdl && recid <= 2*blk_per_trk)
                                ccw++;
                                        cda = (char *)((addr_t) ccw->cda);
                                if (dst != cda) {
                                        if (rq_data_dir(req) == READ)
 -                                              memcpy(dst, cda, bv->bv_len);
 +                                              memcpy(dst, cda, bv.bv_len);
                                        kmem_cache_free(dasd_page_cache,
                                            (void *)((addr_t)cda & PAGE_MASK));
                                }
diff --combined drivers/scsi/sd.c
index 200d6bc8124052ed75805cf3cf55427771f5e72e,69725f7c32c1bc5b6c6dbfb93756aa4770b5d17e..5c8a3b696a1dbf3ab18f914225e36f7c4d0a3a31
@@@ -801,7 -801,7 +801,7 @@@ static int sd_setup_write_same_cmnd(str
        if (sdkp->device->no_write_same)
                return BLKPREP_KILL;
  
 -      BUG_ON(bio_offset(bio) || bio_iovec(bio)->bv_len != sdp->sector_size);
 +      BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size);
  
        sector >>= ilog2(sdp->sector_size) - 9;
        nr_sectors >>= ilog2(sdp->sector_size) - 9;
@@@ -2659,6 -2659,12 +2659,12 @@@ static void sd_read_write_same(struct s
  {
        struct scsi_device *sdev = sdkp->device;
  
+       if (sdev->host->no_write_same) {
+               sdev->no_write_same = 1;
+               return;
+       }
        if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY) < 0) {
                /* too large values might cause issues with arcmsr */
                int vpd_buf_len = 64;
index 6f988382b1742ed4f8e4303dcafc16146b3d08e8,3277d9838f4e928ab3555720a186e476e826a720..108f2733106d77c00b73003e106febe6aa8998cc
@@@ -171,14 -171,13 +171,14 @@@ static inline int valid_io_request(stru
        u64 start, end, bound;
  
        /* unaligned request */
 -      if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
 +      if (unlikely(bio->bi_iter.bi_sector &
 +                   (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
                return 0;
 -      if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
 +      if (unlikely(bio->bi_iter.bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
                return 0;
  
 -      start = bio->bi_sector;
 -      end = start + (bio->bi_size >> SECTOR_SHIFT);
 +      start = bio->bi_iter.bi_sector;
 +      end = start + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
        bound = zram->disksize >> SECTOR_SHIFT;
        /* out of range range */
        if (unlikely(start >= bound || end > bound || start > end))
@@@ -653,29 -652,37 +653,38 @@@ static ssize_t reset_store(struct devic
                return -ENOMEM;
  
        /* Do not reset an active device! */
-       if (bdev->bd_holders)
-               return -EBUSY;
+       if (bdev->bd_holders) {
+               ret = -EBUSY;
+               goto out;
+       }
  
        ret = kstrtou16(buf, 10, &do_reset);
        if (ret)
-               return ret;
+               goto out;
  
-       if (!do_reset)
-               return -EINVAL;
+       if (!do_reset) {
+               ret = -EINVAL;
+               goto out;
+       }
  
        /* Make sure all pending I/O is finished */
        fsync_bdev(bdev);
+       bdput(bdev);
  
        zram_reset_device(zram, true);
        return len;
+ out:
+       bdput(bdev);
+       return ret;
  }
  
  static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
  {
 -      int i, offset;
 +      int offset;
        u32 index;
 -      struct bio_vec *bvec;
 +      struct bio_vec bvec;
 +      struct bvec_iter iter;
  
        switch (rw) {
        case READ:
                break;
        }
  
 -      index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
 -      offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
 +      index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
 +      offset = (bio->bi_iter.bi_sector &
 +                (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
  
 -      bio_for_each_segment(bvec, bio, i) {
 +      bio_for_each_segment(bvec, bio, iter) {
                int max_transfer_size = PAGE_SIZE - offset;
  
 -              if (bvec->bv_len > max_transfer_size) {
 +              if (bvec.bv_len > max_transfer_size) {
                        /*
                         * zram_bvec_rw() can only make operation on a single
                         * zram page. Split the bio vector.
                         */
                        struct bio_vec bv;
  
 -                      bv.bv_page = bvec->bv_page;
 +                      bv.bv_page = bvec.bv_page;
                        bv.bv_len = max_transfer_size;
 -                      bv.bv_offset = bvec->bv_offset;
 +                      bv.bv_offset = bvec.bv_offset;
  
                        if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0)
                                goto out;
  
 -                      bv.bv_len = bvec->bv_len - max_transfer_size;
 +                      bv.bv_len = bvec.bv_len - max_transfer_size;
                        bv.bv_offset += max_transfer_size;
                        if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0)
                                goto out;
                } else
 -                      if (zram_bvec_rw(zram, bvec, index, offset, bio, rw)
 +                      if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw)
                            < 0)
                                goto out;
  
 -              update_position(&index, &offset, bvec);
 +              update_position(&index, &offset, &bvec);
        }
  
        set_bit(BIO_UPTODATE, &bio->bi_flags);
diff --combined fs/xfs/xfs_buf.c
index 5f3ea443ebbe34950123246533058eb83c7f30c4,afe7645e4b2b8b7746665ed35a0d55da6ce11e3b..2a941ab623cb1b32498e9aadfcddda63084df1f8
@@@ -698,7 -698,11 +698,11 @@@ xfs_buf_read_uncached
        bp->b_flags |= XBF_READ;
        bp->b_ops = ops;
  
-       xfsbdstrat(target->bt_mount, bp);
+       if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
+               xfs_buf_relse(bp);
+               return NULL;
+       }
+       xfs_buf_iorequest(bp);
        xfs_buf_iowait(bp);
        return bp;
  }
@@@ -1089,7 -1093,7 +1093,7 @@@ xfs_bioerror
   * This is meant for userdata errors; metadata bufs come with
   * iodone functions attached, so that we can track down errors.
   */
STATIC int
+ int
  xfs_bioerror_relse(
        struct xfs_buf  *bp)
  {
@@@ -1152,7 -1156,7 +1156,7 @@@ xfs_bwrite
        ASSERT(xfs_buf_islocked(bp));
  
        bp->b_flags |= XBF_WRITE;
-       bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+       bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
  
        xfs_bdstrat_cb(bp);
  
        return error;
  }
  
- /*
-  * Wrapper around bdstrat so that we can stop data from going to disk in case
-  * we are shutting down the filesystem.  Typically user data goes thru this
-  * path; one of the exceptions is the superblock.
-  */
- void
- xfsbdstrat(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp)
- {
-       if (XFS_FORCED_SHUTDOWN(mp)) {
-               trace_xfs_bdstrat_shut(bp, _RET_IP_);
-               xfs_bioerror_relse(bp);
-               return;
-       }
-       xfs_buf_iorequest(bp);
- }
  STATIC void
  _xfs_buf_ioend(
        xfs_buf_t               *bp,
@@@ -1255,7 -1240,7 +1240,7 @@@ next_chunk
  
        bio = bio_alloc(GFP_NOIO, nr_pages);
        bio->bi_bdev = bp->b_target->bt_bdev;
 -      bio->bi_sector = sector;
 +      bio->bi_iter.bi_sector = sector;
        bio->bi_end_io = xfs_buf_bio_end_io;
        bio->bi_private = bp;
  
                total_nr_pages--;
        }
  
 -      if (likely(bio->bi_size)) {
 +      if (likely(bio->bi_iter.bi_size)) {
                if (xfs_buf_is_vmapped(bp)) {
                        flush_kernel_vmap_range(bp->b_addr,
                                                xfs_buf_vmap_len(bp));
@@@ -1516,6 -1501,12 +1501,12 @@@ xfs_wait_buftarg
                        struct xfs_buf *bp;
                        bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
                        list_del_init(&bp->b_lru);
+                       if (bp->b_flags & XBF_WRITE_FAIL) {
+                               xfs_alert(btp->bt_mount,
+ "Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n"
+ "Please run xfs_repair to determine the extent of the problem.",
+                                       (long long)bp->b_bn);
+                       }
                        xfs_buf_rele(bp);
                }
                if (loop++ != 0)
@@@ -1799,7 -1790,7 +1790,7 @@@ __xfs_buf_delwri_submit
  
        blk_start_plug(&plug);
        list_for_each_entry_safe(bp, n, io_list, b_list) {
-               bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
+               bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
                bp->b_flags |= XBF_WRITE;
  
                if (!wait) {
This page took 0.068157 seconds and 5 git commands to generate.