if (rq) {
blk_mq_rq_ctx_init(q, ctx, rq, rw);
break;
- } else if (!(gfp & __GFP_WAIT))
- break;
+ }
blk_mq_put_ctx(ctx);
+ if (!(gfp & __GFP_WAIT))
+ break;
+
__blk_mq_run_hw_queue(hctx);
blk_mq_wait_for_tags(hctx->tags);
} while (1);
return NULL;
rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved);
- blk_mq_put_ctx(rq->mq_ctx);
+ if (rq)
+ blk_mq_put_ctx(rq->mq_ctx);
return rq;
}
return NULL;
rq = blk_mq_alloc_request_pinned(q, rw, gfp, true);
- blk_mq_put_ctx(rq->mq_ctx);
+ if (rq)
+ blk_mq_put_ctx(rq->mq_ctx);
return rq;
}
EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
struct bio *next = bio->bi_next;
bio->bi_next = NULL;
- bytes += bio->bi_size;
+ bytes += bio->bi_iter.bi_size;
blk_mq_bio_endio(rq, bio, error);
bio = next;
}
blk_account_io_completion(rq, bytes);
+ blk_account_io_done(rq);
+
if (rq->end_io)
rq->end_io(rq, error);
else
blk_mq_free_request(rq);
-
- blk_account_io_done(rq);
}
void __blk_mq_end_io(struct request *rq, int error)
if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
(i % SEGS_PER_INDIRECT_FRAME == 0)) {
- unsigned long pfn;
+ unsigned long uninitialized_var(pfn);
if (segments)
kunmap_atomic(segments);
for (i = 0; i < pending; i++) {
offset = (i * segs * PAGE_SIZE) >> 9;
size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
- (unsigned int)(bio->bi_size >> 9) - offset);
+ (unsigned int)bio_sectors(bio) - offset);
cloned_bio = bio_clone(bio, GFP_NOIO);
BUG_ON(cloned_bio == NULL);
bio_trim(cloned_bio, offset, size);
bdev = bdget_disk(disk, 0);
+ if (!bdev) {
+ WARN(1, "Block device %s yanked out from us!\n", disk->disk_name);
+ goto out_mutex;
+ }
if (bdev->bd_openers)
goto out;
out:
bdput(bdev);
+ out_mutex:
mutex_unlock(&blkfront_mutex);
}
uint8_t disk_gen;
uint8_t last_gc; /* Most out of date gen in the btree */
uint8_t gc_gen;
- uint16_t gc_mark;
+ uint16_t gc_mark; /* Bitfield used by GC. See below for field */
};
/*
#define GC_MARK_RECLAIMABLE 0
#define GC_MARK_DIRTY 1
#define GC_MARK_METADATA 2
- BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 14);
+ BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13);
+ BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
#include "journal.h"
#include "stats.h"
unsigned long sectors_dirty_last;
long sectors_dirty_derivative;
- mempool_t *unaligned_bvec;
struct bio_set *bio_split;
unsigned data_csum:1;
unsigned char writeback_percent;
unsigned writeback_delay;
- int writeback_rate_change;
- int64_t writeback_rate_derivative;
uint64_t writeback_rate_target;
+ int64_t writeback_rate_proportional;
+ int64_t writeback_rate_derivative;
+ int64_t writeback_rate_change;
unsigned writeback_rate_update_seconds;
unsigned writeback_rate_d_term;
unsigned writeback_rate_p_term_inverse;
- unsigned writeback_rate_d_smooth;
};
enum alloc_watermarks {
* call prio_write() to keep gens from wrapping.
*/
uint8_t need_save_prio;
- unsigned gc_move_threshold;
/*
* If nonzero, we know we aren't going to find any buckets to invalidate
void bch_bbio_free(struct bio *, struct cache_set *);
struct bio *bch_bbio_alloc(struct cache_set *);
-struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *);
void bch_generic_make_request(struct bio *, struct bio_split_pool *);
void __bch_submit_bbio(struct bio *, struct cache_set *);
void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
bio = bch_bbio_alloc(b->c);
bio->bi_rw = REQ_META|READ_SYNC;
- bio->bi_size = KEY_SIZE(&b->key) << 9;
+ bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9;
bio->bi_end_io = btree_node_read_endio;
bio->bi_private = &cl;
struct bio_vec *bv;
int n;
- __bio_for_each_segment(bv, b->bio, n, 0)
+ bio_for_each_segment_all(bv, b->bio, n)
__free_page(bv->bv_page);
__btree_node_write_done(cl);
b->bio->bi_end_io = btree_node_write_endio;
b->bio->bi_private = cl;
b->bio->bi_rw = REQ_META|WRITE_SYNC|REQ_FUA;
- b->bio->bi_size = set_blocks(i, b->c) * block_bytes(b->c);
+ b->bio->bi_iter.bi_size = set_blocks(i, b->c) * block_bytes(b->c);
bch_bio_map(b->bio, i);
/*
struct bio_vec *bv;
void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
- bio_for_each_segment(bv, b->bio, j)
+ bio_for_each_segment_all(bv, b->bio, j)
memcpy(page_address(bv->bv_page),
base + j * PAGE_SIZE, PAGE_SIZE);
SET_GC_MARK(PTR_BUCKET(c, &c->uuid_bucket, i),
GC_MARK_METADATA);
+ /* don't reclaim buckets to which writeback keys point */
+ rcu_read_lock();
+ for (i = 0; i < c->nr_uuids; i++) {
+ struct bcache_device *d = c->devices[i];
+ struct cached_dev *dc;
+ struct keybuf_key *w, *n;
+ unsigned j;
+
+ if (!d || UUID_FLASH_ONLY(&c->uuids[i]))
+ continue;
+ dc = container_of(d, struct cached_dev, disk);
+
+ spin_lock(&dc->writeback_keys.lock);
+ rbtree_postorder_for_each_entry_safe(w, n,
+ &dc->writeback_keys.keys, node)
+ for (j = 0; j < KEY_PTRS(&w->key); j++)
+ SET_GC_MARK(PTR_BUCKET(c, &w->key, j),
+ GC_MARK_DIRTY);
+ spin_unlock(&dc->writeback_keys.lock);
+ }
+ rcu_read_unlock();
+
for_each_cache(ca, c, i) {
uint64_t *i;
if (KEY_START(k) > KEY_START(insert) + sectors_found)
goto check_failed;
- if (KEY_PTRS(replace_key) != KEY_PTRS(k))
+ if (KEY_PTRS(k) != KEY_PTRS(replace_key) ||
+ KEY_DIRTY(k) != KEY_DIRTY(replace_key))
goto check_failed;
/* skip past gen */
struct bkey *replace_key;
};
- int btree_insert_fn(struct btree_op *b_op, struct btree *b)
+ static int btree_insert_fn(struct btree_op *b_op, struct btree *b)
{
struct btree_insert_op *op = container_of(b_op,
struct btree_insert_op, op);
unsigned i;
for (i = 0; i < KEY_PTRS(k); i++) {
- struct cache *ca = PTR_CACHE(c, k, i);
struct bucket *g = PTR_BUCKET(c, k, i);
- if (GC_SECTORS_USED(g) < ca->gc_move_threshold)
+ if (GC_MOVE(g))
return true;
}
static void read_moving_endio(struct bio *bio, int error)
{
+ struct bbio *b = container_of(bio, struct bbio, bio);
struct moving_io *io = container_of(bio->bi_private,
struct moving_io, cl);
if (error)
io->op.error = error;
+ else if (!KEY_DIRTY(&b->key) &&
+ ptr_stale(io->op.c, &b->key, 0)) {
+ io->op.error = -EINTR;
+ }
bch_bbio_endio(io->op.c, bio, error, "reading data to move");
}
bio_get(bio);
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
- bio->bi_size = KEY_SIZE(&io->w->key) << 9;
+ bio->bi_iter.bi_size = KEY_SIZE(&io->w->key) << 9;
bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&io->w->key),
PAGE_SECTORS);
bio->bi_private = &io->cl;
if (!op->error) {
moving_init(io);
- io->bio.bio.bi_sector = KEY_START(&io->w->key);
+ io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
op->write_prio = 1;
op->bio = &io->bio.bio;
if (!w)
break;
+ if (ptr_stale(c, &w->key, 0)) {
+ bch_keybuf_del(&c->moving_gc_keys, w);
+ continue;
+ }
+
io = kzalloc(sizeof(struct moving_io) + sizeof(struct bio_vec)
* DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS),
GFP_KERNEL);
static unsigned bucket_heap_top(struct cache *ca)
{
- return GC_SECTORS_USED(heap_peek(&ca->heap));
+ struct bucket *b;
+ return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
}
void bch_moving_gc(struct cache_set *c)
sectors_to_move -= GC_SECTORS_USED(b);
}
- ca->gc_move_threshold = bucket_heap_top(ca);
-
- pr_debug("threshold %u", ca->gc_move_threshold);
+ while (heap_pop(&ca->heap, b, bucket_cmp))
+ SET_GC_MOVE(b, 1);
}
mutex_unlock(&c->bucket_lock);
struct cache_sb *out = page_address(bio->bi_io_vec[0].bv_page);
unsigned i;
- bio->bi_sector = SB_SECTOR;
- bio->bi_rw = REQ_SYNC|REQ_META;
- bio->bi_size = SB_SIZE;
+ bio->bi_iter.bi_sector = SB_SECTOR;
+ bio->bi_rw = REQ_SYNC|REQ_META;
+ bio->bi_iter.bi_size = SB_SIZE;
bch_bio_map(bio, NULL);
out->offset = cpu_to_le64(sb->offset);
struct bio *bio = bch_bbio_alloc(c);
bio->bi_rw = REQ_SYNC|REQ_META|rw;
- bio->bi_size = KEY_SIZE(k) << 9;
+ bio->bi_iter.bi_size = KEY_SIZE(k) << 9;
bio->bi_end_io = uuid_endio;
bio->bi_private = cl;
closure_init_stack(cl);
- bio->bi_sector = bucket * ca->sb.bucket_size;
- bio->bi_bdev = ca->bdev;
- bio->bi_rw = REQ_SYNC|REQ_META|rw;
- bio->bi_size = bucket_bytes(ca);
+ bio->bi_iter.bi_sector = bucket * ca->sb.bucket_size;
+ bio->bi_bdev = ca->bdev;
+ bio->bi_rw = REQ_SYNC|REQ_META|rw;
+ bio->bi_iter.bi_size = bucket_bytes(ca);
bio->bi_end_io = prio_endio;
bio->bi_private = ca;
}
bio_split_pool_free(&d->bio_split_hook);
- if (d->unaligned_bvec)
- mempool_destroy(d->unaligned_bvec);
if (d->bio_split)
bioset_free(d->bio_split);
if (is_vmalloc_addr(d->full_dirty_stripes))
return minor;
if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
- !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
- sizeof(struct bio_vec) * BIO_MAX_PAGES)) ||
bio_split_pool_init(&d->bio_split_hook) ||
!(d->disk = alloc_disk(1))) {
ida_simple_remove(&bcache_minor, minor);
static bool can_attach_cache(struct cache *ca, struct cache_set *c)
{
return ca->sb.block_size == c->sb.block_size &&
- ca->sb.bucket_size == c->sb.block_size &&
+ ca->sb.bucket_size == c->sb.bucket_size &&
ca->sb.nr_in_set == c->sb.nr_in_set;
}
{
uint64_t now = local_clock();
- d->next += div_u64(done, d->rate);
+ d->next += div_u64(done * NSEC_PER_SEC, d->rate);
+
+ if (time_before64(now + NSEC_PER_SEC, d->next))
+ d->next = now + NSEC_PER_SEC;
+
+ if (time_after64(now - NSEC_PER_SEC * 2, d->next))
+ d->next = now - NSEC_PER_SEC * 2;
return time_after64(d->next, now)
? div_u64(d->next - now, NSEC_PER_SEC / HZ)
void bch_bio_map(struct bio *bio, void *base)
{
- size_t size = bio->bi_size;
+ size_t size = bio->bi_iter.bi_size;
struct bio_vec *bv = bio->bi_io_vec;
- BUG_ON(!bio->bi_size);
+ BUG_ON(!bio->bi_iter.bi_size);
BUG_ON(bio->bi_vcnt);
bv->bv_offset = base ? ((unsigned long) base) % PAGE_SIZE : 0;
/* PD controller */
- int change = 0;
- int64_t error;
int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
int64_t derivative = dirty - dc->disk.sectors_dirty_last;
+ int64_t proportional = dirty - target;
+ int64_t change;
dc->disk.sectors_dirty_last = dirty;
- derivative *= dc->writeback_rate_d_term;
- derivative = clamp(derivative, -dirty, dirty);
+ /* Scale to sectors per second */
- derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative,
- dc->writeback_rate_d_smooth, 0);
+ proportional *= dc->writeback_rate_update_seconds;
+ proportional = div_s64(proportional, dc->writeback_rate_p_term_inverse);
- /* Avoid divide by zero */
- if (!target)
- goto out;
+ derivative = div_s64(derivative, dc->writeback_rate_update_seconds);
- error = div64_s64((dirty + derivative - target) << 8, target);
+ derivative = ewma_add(dc->disk.sectors_dirty_derivative, derivative,
+ (dc->writeback_rate_d_term /
+ dc->writeback_rate_update_seconds) ?: 1, 0);
+
+ derivative *= dc->writeback_rate_d_term;
+ derivative = div_s64(derivative, dc->writeback_rate_p_term_inverse);
- change = div_s64((dc->writeback_rate.rate * error) >> 8,
- dc->writeback_rate_p_term_inverse);
+ change = proportional + derivative;
/* Don't increase writeback rate if the device isn't keeping up */
if (change > 0 &&
time_after64(local_clock(),
- dc->writeback_rate.next + 10 * NSEC_PER_MSEC))
+ dc->writeback_rate.next + NSEC_PER_MSEC))
change = 0;
dc->writeback_rate.rate =
- clamp_t(int64_t, dc->writeback_rate.rate + change,
+ clamp_t(int64_t, (int64_t) dc->writeback_rate.rate + change,
1, NSEC_PER_MSEC);
- out:
+
+ dc->writeback_rate_proportional = proportional;
dc->writeback_rate_derivative = derivative;
dc->writeback_rate_change = change;
dc->writeback_rate_target = target;
static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
{
- uint64_t ret;
-
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
!dc->writeback_percent)
return 0;
- ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
-
- return min_t(uint64_t, ret, HZ);
+ return bch_next_delay(&dc->writeback_rate, sectors);
}
struct dirty_io {
if (!io->dc->writeback_percent)
bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
- bio->bi_size = KEY_SIZE(&w->key) << 9;
+ bio->bi_iter.bi_size = KEY_SIZE(&w->key) << 9;
bio->bi_max_vecs = DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS);
bio->bi_private = w;
bio->bi_io_vec = bio->bi_inline_vecs;
dirty_init(w);
io->bio.bi_rw = WRITE;
- io->bio.bi_sector = KEY_START(&w->key);
+ io->bio.bi_iter.bi_sector = KEY_START(&w->key);
io->bio.bi_bdev = io->dc->bdev;
io->bio.bi_end_io = dirty_endio;
if (KEY_START(&w->key) != dc->last_read ||
jiffies_to_msecs(delay) > 50)
while (!kthread_should_stop() && delay)
- delay = schedule_timeout_interruptible(delay);
+ delay = schedule_timeout_uninterruptible(delay);
dc->last_read = KEY_OFFSET(&w->key);
io->dc = dc;
dirty_init(w);
- io->bio.bi_sector = PTR_OFFSET(&w->key, 0);
+ io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
io->bio.bi_bdev = PTR_CACHE(dc->disk.c,
&w->key, 0)->bdev;
io->bio.bi_rw = READ;
while (delay &&
!kthread_should_stop() &&
!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
- delay = schedule_timeout_interruptible(delay);
+ delay = schedule_timeout_uninterruptible(delay);
}
}
bch_btree_map_keys(&op.op, dc->disk.c, &KEY(op.inode, 0, 0),
sectors_dirty_init_fn, 0);
+
+ dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
}
int bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_delay = 30;
dc->writeback_rate.rate = 1024;
- dc->writeback_rate_update_seconds = 30;
- dc->writeback_rate_d_term = 16;
- dc->writeback_rate_p_term_inverse = 64;
- dc->writeback_rate_d_smooth = 8;
+ dc->writeback_rate_update_seconds = 5;
+ dc->writeback_rate_d_term = 30;
+ dc->writeback_rate_p_term_inverse = 6000;
dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
"bcache_writeback");
if (IS_ERR(dc->writeback_thread))
return PTR_ERR(dc->writeback_thread);
- set_task_state(dc->writeback_thread, TASK_INTERRUPTIBLE);
-
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
schedule_delayed_work(&dc->writeback_rate_update,
dc->writeback_rate_update_seconds * HZ);
bio_init(&b->bio);
b->bio.bi_io_vec = b->bio_vec;
b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
- b->bio.bi_sector = block << b->c->sectors_per_block_bits;
+ b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits;
b->bio.bi_bdev = b->c->bdev;
b->bio.bi_end_io = end_io;
{
__u64 mem;
+ dm_bufio_allocated_kmem_cache = 0;
+ dm_bufio_allocated_get_free_pages = 0;
+ dm_bufio_allocated_vmalloc = 0;
+ dm_bufio_current_allocated = 0;
+
memset(&dm_bufio_caches, 0, sizeof dm_bufio_caches);
memset(&dm_bufio_cache_names, 0, sizeof dm_bufio_cache_names);
static void iot_update_stats(struct io_tracker *t, struct bio *bio)
{
- if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1)
+ if (bio->bi_iter.bi_sector == from_oblock(t->last_end_oblock) + 1)
t->nr_seq_samples++;
else {
/*
t->nr_rand_samples++;
}
- t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1);
+ t->last_end_oblock = to_oblock(bio_end_sector(bio) - 1);
}
static void iot_check_for_pattern_switch(struct io_tracker *t)
int r = 0;
bool updated = updated_this_tick(mq, e);
- requeue_and_update_tick(mq, e);
-
if ((!discarded_oblock && updated) ||
- !should_promote(mq, e, discarded_oblock, data_dir))
+ !should_promote(mq, e, discarded_oblock, data_dir)) {
+ requeue_and_update_tick(mq, e);
result->op = POLICY_MISS;
- else if (!can_migrate)
+
+ } else if (!can_migrate)
r = -EWOULDBLOCK;
- else
+
+ else {
+ requeue_and_update_tick(mq, e);
r = pre_cache_to_cache(mq, e, result);
+ }
return r;
}
{
bio->bi_end_io = h->bi_end_io;
bio->bi_private = h->bi_private;
+
+ /*
+ * Must bump bi_remaining to allow bio to complete with
+ * restored bi_end_io.
+ */
+ atomic_inc(&bio->bi_remaining);
}
/*----------------------------------------------------------------*/
static void remap_to_cache(struct cache *cache, struct bio *bio,
dm_cblock_t cblock)
{
- sector_t bi_sector = bio->bi_sector;
+ sector_t bi_sector = bio->bi_iter.bi_sector;
bio->bi_bdev = cache->cache_dev->bdev;
if (!block_size_is_power_of_two(cache))
- bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) +
- sector_div(bi_sector, cache->sectors_per_block);
+ bio->bi_iter.bi_sector =
+ (from_cblock(cblock) * cache->sectors_per_block) +
+ sector_div(bi_sector, cache->sectors_per_block);
else
- bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) |
- (bi_sector & (cache->sectors_per_block - 1));
+ bio->bi_iter.bi_sector =
+ (from_cblock(cblock) << cache->sectors_per_block_shift) |
+ (bi_sector & (cache->sectors_per_block - 1));
}
static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
{
- sector_t block_nr = bio->bi_sector;
+ sector_t block_nr = bio->bi_iter.bi_sector;
if (!block_size_is_power_of_two(cache))
(void) sector_div(block_nr, cache->sectors_per_block);
static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
{
return (bio_data_dir(bio) == WRITE) &&
- (bio->bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
+ (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
}
static void avoid_copy(struct dm_cache_migration *mg)
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
- BUG_ON(bio->bi_size);
+ BUG_ON(bio->bi_iter.bi_size);
if (!pb->req_nr)
remap_to_origin(cache, bio);
else
*/
static void process_discard_bio(struct cache *cache, struct bio *bio)
{
- dm_block_t start_block = dm_sector_div_up(bio->bi_sector,
+ dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
cache->discard_block_size);
- dm_block_t end_block = bio->bi_sector + bio_sectors(bio);
+ dm_block_t end_block = bio_end_sector(bio);
dm_block_t b;
end_block = block_div(end_block, cache->discard_block_size);
{
int r;
- r = dm_cache_resize(cache->cmd, cache->cache_size);
+ r = dm_cache_resize(cache->cmd, new_size);
if (r) {
DMERR("could not resize cache metadata");
return r;
struct delay_c {
struct timer_list delay_timer;
struct mutex timer_lock;
+ struct workqueue_struct *kdelayd_wq;
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
atomic_t may_delay;
static DEFINE_MUTEX(delayed_bios_lock);
- static struct workqueue_struct *kdelayd_wq;
static struct kmem_cache *delayed_cache;
static void handle_delayed_timer(unsigned long data)
{
struct delay_c *dc = (struct delay_c *)data;
- queue_work(kdelayd_wq, &dc->flush_expired_bios);
+ queue_work(dc->kdelayd_wq, &dc->flush_expired_bios);
}
static void queue_timeout(struct delay_c *dc, unsigned long expires)
goto bad_dev_write;
}
+ dc->kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
+ if (!dc->kdelayd_wq) {
+ DMERR("Couldn't start kdelayd");
+ goto bad_queue;
+ }
+
setup_timer(&dc->delay_timer, handle_delayed_timer, (unsigned long)dc);
INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
ti->private = dc;
return 0;
+ bad_queue:
+ mempool_destroy(dc->delayed_pool);
bad_dev_write:
if (dc->dev_write)
dm_put_device(ti, dc->dev_write);
{
struct delay_c *dc = ti->private;
- flush_workqueue(kdelayd_wq);
+ destroy_workqueue(dc->kdelayd_wq);
dm_put_device(ti, dc->dev_read);
if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
bio->bi_bdev = dc->dev_write->bdev;
if (bio_sectors(bio))
- bio->bi_sector = dc->start_write +
- dm_target_offset(ti, bio->bi_sector);
+ bio->bi_iter.bi_sector = dc->start_write +
+ dm_target_offset(ti, bio->bi_iter.bi_sector);
return delay_bio(dc, dc->write_delay, bio);
}
bio->bi_bdev = dc->dev_read->bdev;
- bio->bi_sector = dc->start_read + dm_target_offset(ti, bio->bi_sector);
+ bio->bi_iter.bi_sector = dc->start_read +
+ dm_target_offset(ti, bio->bi_iter.bi_sector);
return delay_bio(dc, dc->read_delay, bio);
}
{
int r = -ENOMEM;
- kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
- if (!kdelayd_wq) {
- DMERR("Couldn't start kdelayd");
- goto bad_queue;
- }
-
delayed_cache = KMEM_CACHE(dm_delay_info, 0);
if (!delayed_cache) {
DMERR("Couldn't create delayed bio cache.");
bad_register:
kmem_cache_destroy(delayed_cache);
bad_memcache:
- destroy_workqueue(kdelayd_wq);
- bad_queue:
return r;
}
{
dm_unregister_target(&delay_target);
kmem_cache_destroy(delayed_cache);
- destroy_workqueue(kdelayd_wq);
}
/* Module hooks */
atomic_t pending_exceptions_count;
+ /* Protected by "lock" */
+ sector_t exception_start_sequence;
+
+ /* Protected by kcopyd single-threaded callback */
+ sector_t exception_complete_sequence;
+
+ /*
+ * A list of pending exceptions that completed out of order.
+ * Protected by kcopyd single-threaded callback.
+ */
+ struct list_head out_of_order_list;
+
mempool_t *pending_pool;
struct dm_exception_table pending;
*/
int started;
+ /* There was copying error. */
+ int copy_error;
+
+ /* A sequence number, it is used for in-order completion. */
+ sector_t exception_sequence;
+
+ struct list_head out_of_order_entry;
+
/*
* For writing a complete chunk, bypassing the copy.
*/
s->valid = 1;
s->active = 0;
atomic_set(&s->pending_exceptions_count, 0);
+ s->exception_start_sequence = 0;
+ s->exception_complete_sequence = 0;
+ INIT_LIST_HEAD(&s->out_of_order_list);
init_rwsem(&s->lock);
INIT_LIST_HEAD(&s->list);
spin_lock_init(&s->pe_lock);
if (full_bio) {
full_bio->bi_end_io = pe->full_bio_end_io;
full_bio->bi_private = pe->full_bio_private;
+ atomic_inc(&full_bio->bi_remaining);
}
free_pending_exception(pe);
pending_complete(pe, success);
}
+ static void complete_exception(struct dm_snap_pending_exception *pe)
+ {
+ struct dm_snapshot *s = pe->snap;
+
+ if (unlikely(pe->copy_error))
+ pending_complete(pe, 0);
+
+ else
+ /* Update the metadata if we are persistent */
+ s->store->type->commit_exception(s->store, &pe->e,
+ commit_callback, pe);
+ }
+
/*
* Called when the copy I/O has finished. kcopyd actually runs
* this code so don't block.
struct dm_snap_pending_exception *pe = context;
struct dm_snapshot *s = pe->snap;
- if (read_err || write_err)
- pending_complete(pe, 0);
+ pe->copy_error = read_err || write_err;
- else
- /* Update the metadata if we are persistent */
- s->store->type->commit_exception(s->store, &pe->e,
- commit_callback, pe);
+ if (pe->exception_sequence == s->exception_complete_sequence) {
+ s->exception_complete_sequence++;
+ complete_exception(pe);
+
+ while (!list_empty(&s->out_of_order_list)) {
+ pe = list_entry(s->out_of_order_list.next,
+ struct dm_snap_pending_exception, out_of_order_entry);
+ if (pe->exception_sequence != s->exception_complete_sequence)
+ break;
+ s->exception_complete_sequence++;
+ list_del(&pe->out_of_order_entry);
+ complete_exception(pe);
+ }
+ } else {
+ struct list_head *lh;
+ struct dm_snap_pending_exception *pe2;
+
+ list_for_each_prev(lh, &s->out_of_order_list) {
+ pe2 = list_entry(lh, struct dm_snap_pending_exception, out_of_order_entry);
+ if (pe2->exception_sequence < pe->exception_sequence)
+ break;
+ }
+ list_add(&pe->out_of_order_entry, lh);
+ }
}
/*
return NULL;
}
+ pe->exception_sequence = s->exception_start_sequence++;
+
dm_insert_exception(&s->pending, &pe->e);
return pe;
struct bio *bio, chunk_t chunk)
{
bio->bi_bdev = s->cow->bdev;
- bio->bi_sector = chunk_to_sector(s->store,
- dm_chunk_number(e->new_chunk) +
- (chunk - e->old_chunk)) +
- (bio->bi_sector &
- s->store->chunk_mask);
+ bio->bi_iter.bi_sector =
+ chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) +
+ (chunk - e->old_chunk)) +
+ (bio->bi_iter.bi_sector & s->store->chunk_mask);
}
static int snapshot_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
- chunk = sector_to_chunk(s->store, bio->bi_sector);
+ chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
/* Full snapshots are not usable */
/* To get here the table must be live so s->active is always set. */
r = DM_MAPIO_SUBMITTED;
if (!pe->started &&
- bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) {
+ bio->bi_iter.bi_size ==
+ (s->store->chunk_size << SECTOR_SHIFT)) {
pe->started = 1;
up_write(&s->lock);
start_full_bio(pe, bio);
return DM_MAPIO_REMAPPED;
}
- chunk = sector_to_chunk(s->store, bio->bi_sector);
+ chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
down_write(&s->lock);
down_read(&_origins_lock);
o = __lookup_origin(origin->bdev);
if (o)
- r = __origin_write(&o->snapshots, bio->bi_sector, bio);
+ r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
up_read(&_origins_lock);
return r;
static struct target_type snapshot_target = {
.name = "snapshot",
- .version = {1, 11, 1},
+ .version = {1, 12, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
{
struct pool *pool = tc->pool;
- sector_t block_nr = bio->bi_sector;
+ sector_t block_nr = bio->bi_iter.bi_sector;
if (block_size_is_power_of_two(pool))
block_nr >>= pool->sectors_per_block_shift;
static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
{
struct pool *pool = tc->pool;
- sector_t bi_sector = bio->bi_sector;
+ sector_t bi_sector = bio->bi_iter.bi_sector;
bio->bi_bdev = tc->pool_dev->bdev;
if (block_size_is_power_of_two(pool))
- bio->bi_sector = (block << pool->sectors_per_block_shift) |
- (bi_sector & (pool->sectors_per_block - 1));
+ bio->bi_iter.bi_sector =
+ (block << pool->sectors_per_block_shift) |
+ (bi_sector & (pool->sectors_per_block - 1));
else
- bio->bi_sector = (block * pool->sectors_per_block) +
+ bio->bi_iter.bi_sector = (block * pool->sectors_per_block) +
sector_div(bi_sector, pool->sectors_per_block);
}
static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
{
- if (m->bio)
+ if (m->bio) {
m->bio->bi_end_io = m->saved_bi_end_io;
+ atomic_inc(&m->bio->bi_remaining);
+ }
cell_error(m->tc->pool, m->cell);
list_del(&m->list);
mempool_free(m, m->tc->pool->mapping_pool);
int r;
bio = m->bio;
- if (bio)
+ if (bio) {
bio->bi_end_io = m->saved_bi_end_io;
+ atomic_inc(&bio->bi_remaining);
+ }
if (m->err) {
cell_error(pool, m->cell);
*/
r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block);
if (r) {
- DMERR_LIMIT("dm_thin_insert_block() failed");
+ DMERR_LIMIT("%s: dm_thin_insert_block() failed: error = %d",
+ dm_device_name(pool->pool_md), r);
+ set_pool_mode(pool, PM_READ_ONLY);
cell_error(pool, m->cell);
goto out;
}
*/
static int io_overlaps_block(struct pool *pool, struct bio *bio)
{
- return bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT);
+ return bio->bi_iter.bi_size ==
+ (pool->sectors_per_block << SECTOR_SHIFT);
}
static int io_overwrites_block(struct pool *pool, struct bio *bio)
}
}
- static int commit(struct pool *pool)
- {
- int r;
-
- r = dm_pool_commit_metadata(pool->pmd);
- if (r)
- DMERR_LIMIT("%s: commit failed: error = %d",
- dm_device_name(pool->pool_md), r);
-
- return r;
- }
-
/*
* A non-zero return indicates read_only or fail_io mode.
* Many callers don't care about the return value.
*/
- static int commit_or_fallback(struct pool *pool)
+ static int commit(struct pool *pool)
{
int r;
if (get_pool_mode(pool) != PM_WRITE)
return -EINVAL;
- r = commit(pool);
- if (r)
+ r = dm_pool_commit_metadata(pool->pmd);
+ if (r) {
+ DMERR_LIMIT("%s: dm_pool_commit_metadata failed: error = %d",
+ dm_device_name(pool->pool_md), r);
set_pool_mode(pool, PM_READ_ONLY);
+ }
return r;
}
* Try to commit to see if that will free up some
* more space.
*/
- (void) commit_or_fallback(pool);
+ r = commit(pool);
+ if (r)
+ return r;
r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
if (r)
* table reload).
*/
if (!free_blocks) {
- DMWARN("%s: no free space available.",
+ DMWARN("%s: no free data space available.",
dm_device_name(pool->pool_md));
spin_lock_irqsave(&pool->lock, flags);
pool->no_free_space = 1;
}
r = dm_pool_alloc_data_block(pool->pmd, result);
- if (r)
+ if (r) {
+ if (r == -ENOSPC &&
+ !dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks) &&
+ !free_blocks) {
+ DMWARN("%s: no free metadata space available.",
+ dm_device_name(pool->pool_md));
+ set_pool_mode(pool, PM_READ_ONLY);
+ }
return r;
+ }
return 0;
}
if (bio_detain(pool, &key, bio, &cell))
return;
- if (bio_data_dir(bio) == WRITE && bio->bi_size)
+ if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size)
break_sharing(tc, bio, block, &key, lookup_result, cell);
else {
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
/*
* Remap empty bios (flushes) immediately, without provisioning.
*/
- if (!bio->bi_size) {
+ if (!bio->bi_iter.bi_size) {
inc_all_io_entry(pool, bio);
cell_defer_no_holder(tc, cell);
r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
switch (r) {
case 0:
- if (lookup_result.shared && (rw == WRITE) && bio->bi_size)
+ if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size)
bio_io_error(bio);
else {
inc_all_io_entry(tc->pool, bio);
if (bio_list_empty(&bios) && !need_commit_due_to_time(pool))
return;
- if (commit_or_fallback(pool)) {
+ if (commit(pool)) {
while ((bio = bio_list_pop(&bios)))
bio_io_error(bio);
return;
case PM_FAIL:
DMERR("%s: switching pool to failure mode",
dm_device_name(pool->pool_md));
+ dm_pool_metadata_read_only(pool->pmd);
pool->process_bio = process_bio_fail;
pool->process_discard = process_bio_fail;
pool->process_prepared_mapping = process_prepared_mapping_fail;
break;
case PM_WRITE:
+ dm_pool_metadata_read_write(pool->pmd);
pool->process_bio = process_bio;
pool->process_discard = process_discard;
pool->process_prepared_mapping = process_prepared_mapping;
struct pool_c *pt = ti->private;
/*
- * We want to make sure that degraded pools are never upgraded.
+ * We want to make sure that a pool in PM_FAIL mode is never upgraded.
*/
enum pool_mode old_mode = pool->pf.mode;
enum pool_mode new_mode = pt->adjusted_pf.mode;
- if (old_mode > new_mode)
+ /*
+ * If we were in PM_FAIL mode, rollback of metadata failed. We're
+ * not going to recover without a thin_repair. So we never let the
+ * pool move out of the old mode. On the other hand a PM_READ_ONLY
+ * may have been due to a lack of metadata or data space, and may
+ * now work (ie. if the underlying devices have been resized).
+ */
+ if (old_mode == PM_FAIL)
new_mode = old_mode;
pool->ti = ti;
return r;
if (need_commit1 || need_commit2)
- (void) commit_or_fallback(pool);
+ (void) commit(pool);
return 0;
}
cancel_delayed_work(&pool->waker);
flush_workqueue(pool->wq);
- (void) commit_or_fallback(pool);
+ (void) commit(pool);
}
static int check_arg_count(unsigned argc, unsigned args_required)
if (r)
return r;
- (void) commit_or_fallback(pool);
+ (void) commit(pool);
r = dm_pool_reserve_metadata_snap(pool->pmd);
if (r)
DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
if (!r)
- (void) commit_or_fallback(pool);
+ (void) commit(pool);
return r;
}
/* Commit to ensure statistics aren't out-of-date */
if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
- (void) commit_or_fallback(pool);
+ (void) commit(pool);
r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id);
if (r) {
static int thin_map(struct dm_target *ti, struct bio *bio)
{
- bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
+ bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
return thin_bio_map(ti, bio);
}
struct mddev *mddev = container_of(ws, struct mddev, flush_work);
struct bio *bio = mddev->flush_bio;
- if (bio->bi_size == 0)
+ if (bio->bi_iter.bi_size == 0)
/* an empty barrier - all done */
bio_endio(bio, 0);
else {
struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
- bio->bi_sector = sector;
+ bio->bi_iter.bi_sector = sector;
bio_add_page(bio, page, size, 0);
bio->bi_private = rdev;
bio->bi_end_io = super_written;
struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
int ret;
- rw |= REQ_SYNC;
-
bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
rdev->meta_bdev : rdev->bdev;
if (metadata_op)
- bio->bi_sector = sector + rdev->sb_start;
+ bio->bi_iter.bi_sector = sector + rdev->sb_start;
else if (rdev->mddev->reshape_position != MaxSector &&
(rdev->mddev->reshape_backwards ==
(sector >= rdev->mddev->reshape_position)))
- bio->bi_sector = sector + rdev->new_data_offset;
+ bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
else
- bio->bi_sector = sector + rdev->data_offset;
+ bio->bi_iter.bi_sector = sector + rdev->data_offset;
bio_add_page(bio, page, size, 0);
submit_bio_wait(rw, bio);
if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return;
if ( ! (
- (mddev->flags & ~ (1<<MD_CHANGE_PENDING)) ||
+ (mddev->flags & MD_UPDATE_SB_FLAGS & ~ (1<<MD_CHANGE_PENDING)) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
(mddev->external == 0 && mddev->safemode == 1) ||
static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
{
int sectors = bio_sectors(bio);
- if (bio->bi_sector + sectors < sector + STRIPE_SECTORS)
+ if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS)
return bio->bi_next;
else
return NULL;
return_bi = bi->bi_next;
bi->bi_next = NULL;
- bi->bi_size = 0;
+ bi->bi_iter.bi_size = 0;
trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
bi, 0);
bio_endio(bi, 0);
} else
init_stripe(sh, sector, previous);
} else {
+ spin_lock(&conf->device_lock);
if (atomic_read(&sh->count)) {
BUG_ON(!list_empty(&sh->lru)
&& !test_bit(STRIPE_EXPANDING, &sh->state)
&& !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
- && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state));
+ );
} else {
- spin_lock(&conf->device_lock);
if (!test_bit(STRIPE_HANDLE, &sh->state))
atomic_inc(&conf->active_stripes);
- if (list_empty(&sh->lru) &&
- !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state) &&
- !test_bit(STRIPE_EXPANDING, &sh->state))
- BUG();
+ BUG_ON(list_empty(&sh->lru));
list_del_init(&sh->lru);
if (sh->group) {
sh->group->stripes_cnt--;
sh->group = NULL;
}
- spin_unlock(&conf->device_lock);
}
+ spin_unlock(&conf->device_lock);
}
} while (sh == NULL);
bi->bi_rw, i);
atomic_inc(&sh->count);
if (use_new_offset(conf, sh))
- bi->bi_sector = (sh->sector
+ bi->bi_iter.bi_sector = (sh->sector
+ rdev->new_data_offset);
else
- bi->bi_sector = (sh->sector
+ bi->bi_iter.bi_sector = (sh->sector
+ rdev->data_offset);
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
bi->bi_rw |= REQ_NOMERGE;
bi->bi_vcnt = 1;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
- bi->bi_size = STRIPE_SIZE;
+ bi->bi_iter.bi_size = STRIPE_SIZE;
/*
* If this is discard request, set bi_vcnt 0. We don't
* want to confuse SCSI because SCSI will replace payload
rbi->bi_rw, i);
atomic_inc(&sh->count);
if (use_new_offset(conf, sh))
- rbi->bi_sector = (sh->sector
+ rbi->bi_iter.bi_sector = (sh->sector
+ rrdev->new_data_offset);
else
- rbi->bi_sector = (sh->sector
+ rbi->bi_iter.bi_sector = (sh->sector
+ rrdev->data_offset);
rbi->bi_vcnt = 1;
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
rbi->bi_io_vec[0].bv_offset = 0;
- rbi->bi_size = STRIPE_SIZE;
+ rbi->bi_iter.bi_size = STRIPE_SIZE;
/*
* If this is discard request, set bi_vcnt 0. We don't
* want to confuse SCSI because SCSI will replace payload
async_copy_data(int frombio, struct bio *bio, struct page *page,
sector_t sector, struct dma_async_tx_descriptor *tx)
{
- struct bio_vec *bvl;
+ struct bio_vec bvl;
+ struct bvec_iter iter;
struct page *bio_page;
- int i;
int page_offset;
struct async_submit_ctl submit;
enum async_tx_flags flags = 0;
- if (bio->bi_sector >= sector)
- page_offset = (signed)(bio->bi_sector - sector) * 512;
+ if (bio->bi_iter.bi_sector >= sector)
+ page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512;
else
- page_offset = (signed)(sector - bio->bi_sector) * -512;
+ page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512;
if (frombio)
flags |= ASYNC_TX_FENCE;
init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
- bio_for_each_segment(bvl, bio, i) {
- int len = bvl->bv_len;
+ bio_for_each_segment(bvl, bio, iter) {
+ int len = bvl.bv_len;
int clen;
int b_offset = 0;
clen = len;
if (clen > 0) {
- b_offset += bvl->bv_offset;
- bio_page = bvl->bv_page;
+ b_offset += bvl.bv_offset;
+ bio_page = bvl.bv_page;
if (frombio)
tx = async_memcpy(page, bio_page, page_offset,
b_offset, clen, &submit);
BUG_ON(!dev->read);
rbi = dev->read;
dev->read = NULL;
- while (rbi && rbi->bi_sector <
+ while (rbi && rbi->bi_iter.bi_sector <
dev->sector + STRIPE_SECTORS) {
rbi2 = r5_next_bio(rbi, dev->sector);
if (!raid5_dec_bi_active_stripes(rbi)) {
dev->read = rbi = dev->toread;
dev->toread = NULL;
spin_unlock_irq(&sh->stripe_lock);
- while (rbi && rbi->bi_sector <
+ while (rbi && rbi->bi_iter.bi_sector <
dev->sector + STRIPE_SECTORS) {
tx = async_copy_data(0, rbi, dev->page,
dev->sector, tx);
wbi = dev->written = chosen;
spin_unlock_irq(&sh->stripe_lock);
- while (wbi && wbi->bi_sector <
+ while (wbi && wbi->bi_iter.bi_sector <
dev->sector + STRIPE_SECTORS) {
if (wbi->bi_rw & REQ_FUA)
set_bit(R5_WantFUA, &dev->flags);
int firstwrite=0;
pr_debug("adding bi b#%llu to stripe s#%llu\n",
- (unsigned long long)bi->bi_sector,
+ (unsigned long long)bi->bi_iter.bi_sector,
(unsigned long long)sh->sector);
/*
firstwrite = 1;
} else
bip = &sh->dev[dd_idx].toread;
- while (*bip && (*bip)->bi_sector < bi->bi_sector) {
- if (bio_end_sector(*bip) > bi->bi_sector)
+ while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) {
+ if (bio_end_sector(*bip) > bi->bi_iter.bi_sector)
goto overlap;
bip = & (*bip)->bi_next;
}
- if (*bip && (*bip)->bi_sector < bio_end_sector(bi))
+ if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
goto overlap;
BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
sector_t sector = sh->dev[dd_idx].sector;
for (bi=sh->dev[dd_idx].towrite;
sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
- bi && bi->bi_sector <= sector;
+ bi && bi->bi_iter.bi_sector <= sector;
bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
if (bio_end_sector(bi) >= sector)
sector = bio_end_sector(bi);
}
pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
- (unsigned long long)(*bip)->bi_sector,
+ (unsigned long long)(*bip)->bi_iter.bi_sector,
(unsigned long long)sh->sector, dd_idx);
spin_unlock_irq(&sh->stripe_lock);
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
wake_up(&conf->wait_for_overlap);
- while (bi && bi->bi_sector <
+ while (bi && bi->bi_iter.bi_sector <
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
bi = sh->dev[i].written;
sh->dev[i].written = NULL;
if (bi) bitmap_end = 1;
- while (bi && bi->bi_sector <
+ while (bi && bi->bi_iter.bi_sector <
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
clear_bit(BIO_UPTODATE, &bi->bi_flags);
spin_unlock_irq(&sh->stripe_lock);
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
wake_up(&conf->wait_for_overlap);
- while (bi && bi->bi_sector <
+ while (bi && bi->bi_iter.bi_sector <
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi =
r5_next_bio(bi, sh->dev[i].sector);
clear_bit(R5_UPTODATE, &dev->flags);
wbi = dev->written;
dev->written = NULL;
- while (wbi && wbi->bi_sector <
+ while (wbi && wbi->bi_iter.bi_sector <
dev->sector + STRIPE_SECTORS) {
wbi2 = r5_next_bio(wbi, dev->sector);
if (!raid5_dec_bi_active_stripes(wbi)) {
static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
{
- sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+ sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
unsigned int chunk_sectors = mddev->chunk_sectors;
unsigned int bio_sectors = bio_sectors(bio);
/*
* compute position
*/
- align_bi->bi_sector = raid5_compute_sector(conf, raid_bio->bi_sector,
- 0,
- &dd_idx, NULL);
+ align_bi->bi_iter.bi_sector =
+ raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector,
+ 0, &dd_idx, NULL);
end_sector = bio_end_sector(align_bi);
rcu_read_lock();
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
if (!bio_fits_rdev(align_bi) ||
- is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi),
+ is_badblock(rdev, align_bi->bi_iter.bi_sector,
+ bio_sectors(align_bi),
&first_bad, &bad_sectors)) {
/* too big in some way, or has a known bad block */
bio_put(align_bi);
}
/* No reshape active, so we can trust rdev->data_offset */
- align_bi->bi_sector += rdev->data_offset;
+ align_bi->bi_iter.bi_sector += rdev->data_offset;
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_stripe,
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
align_bi, disk_devt(mddev->gendisk),
- raid_bio->bi_sector);
+ raid_bio->bi_iter.bi_sector);
generic_make_request(align_bi);
return 1;
} else {
/* Skip discard while reshape is happening */
return;
- logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
- last_sector = bi->bi_sector + (bi->bi_size>>9);
+ logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+ last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9);
bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
return;
}
- logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+ logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
last_sector = bio_end_sector(bi);
bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
int remaining;
int handled = 0;
- logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+ logical_sector = raid_bio->bi_iter.bi_sector &
+ ~((sector_t)STRIPE_SECTORS-1);
sector = raid5_compute_sector(conf, logical_sector,
0, &dd_idx, NULL);
last_sector = bio_end_sector(raid_bio);
for (i = 0; i < *group_cnt; i++) {
struct r5worker_group *group;
- group = worker_groups[i];
+ group = &(*worker_groups)[i];
INIT_LIST_HEAD(&group->handle_list);
group->conf = conf;
group->workers = workers + i * cnt;
struct dasd_ccw_req *cqr;
struct ccw1 *ccw;
struct req_iterator iter;
- struct bio_vec *bv;
+ struct bio_vec bv;
char *dst;
unsigned int off;
int count, cidaw, cplength, datasize;
count = 0;
cidaw = 0;
rq_for_each_segment(bv, req, iter) {
- if (bv->bv_len & (blksize - 1))
+ if (bv.bv_len & (blksize - 1))
/* Eckd can only do full blocks. */
return ERR_PTR(-EINVAL);
- count += bv->bv_len >> (block->s2b_shift + 9);
+ count += bv.bv_len >> (block->s2b_shift + 9);
#if defined(CONFIG_64BIT)
- if (idal_is_needed (page_address(bv->bv_page), bv->bv_len))
- cidaw += bv->bv_len >> (block->s2b_shift + 9);
+ if (idal_is_needed (page_address(bv.bv_page), bv.bv_len))
+ cidaw += bv.bv_len >> (block->s2b_shift + 9);
#endif
}
/* Paranoia. */
last_rec - recid + 1, cmd, basedev, blksize);
}
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
+ dst = page_address(bv.bv_page) + bv.bv_offset;
if (dasd_page_cache) {
char *copy = kmem_cache_alloc(dasd_page_cache,
GFP_DMA | __GFP_NOWARN);
if (copy && rq_data_dir(req) == WRITE)
- memcpy(copy + bv->bv_offset, dst, bv->bv_len);
+ memcpy(copy + bv.bv_offset, dst, bv.bv_len);
if (copy)
- dst = copy + bv->bv_offset;
+ dst = copy + bv.bv_offset;
}
- for (off = 0; off < bv->bv_len; off += blksize) {
+ for (off = 0; off < bv.bv_len; off += blksize) {
sector_t trkid = recid;
unsigned int recoffs = sector_div(trkid, blk_per_trk);
rcmd = cmd;
struct dasd_ccw_req *cqr;
struct ccw1 *ccw;
struct req_iterator iter;
- struct bio_vec *bv;
+ struct bio_vec bv;
char *dst, *idaw_dst;
unsigned int cidaw, cplength, datasize;
unsigned int tlf;
idaw_dst = NULL;
idaw_len = 0;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
- seg_len = bv->bv_len;
+ dst = page_address(bv.bv_page) + bv.bv_offset;
+ seg_len = bv.bv_len;
while (seg_len) {
if (new_track) {
trkid = recid;
{
struct dasd_ccw_req *cqr;
struct req_iterator iter;
- struct bio_vec *bv;
+ struct bio_vec bv;
char *dst;
unsigned int trkcount, ctidaw;
unsigned char cmd;
new_track = 1;
recid = first_rec;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
- seg_len = bv->bv_len;
+ dst = page_address(bv.bv_page) + bv.bv_offset;
+ seg_len = bv.bv_len;
while (seg_len) {
if (new_track) {
trkid = recid;
}
} else {
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
+ dst = page_address(bv.bv_page) + bv.bv_offset;
last_tidaw = itcw_add_tidaw(itcw, 0x00,
- dst, bv->bv_len);
+ dst, bv.bv_len);
if (IS_ERR(last_tidaw)) {
ret = -EINVAL;
goto out_error;
fcx_multitrack = private->features.feature[40] & 0x20;
data_size = blk_rq_bytes(req);
+ if (data_size % blksize)
+ return ERR_PTR(-EINVAL);
/* tpm write request add CBC data on each track boundary */
if (rq_data_dir(req) == WRITE)
data_size += (last_trk - first_trk) * 4;
struct dasd_ccw_req *cqr;
struct ccw1 *ccw;
struct req_iterator iter;
- struct bio_vec *bv;
+ struct bio_vec bv;
char *dst;
unsigned char cmd;
unsigned int trkcount;
idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE);
}
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
- seg_len = bv->bv_len;
+ dst = page_address(bv.bv_page) + bv.bv_offset;
+ seg_len = bv.bv_len;
if (cmd == DASD_ECKD_CCW_READ_TRACK)
memset(dst, 0, seg_len);
if (!len_to_track_end) {
struct dasd_eckd_private *private;
struct ccw1 *ccw;
struct req_iterator iter;
- struct bio_vec *bv;
+ struct bio_vec bv;
char *dst, *cda;
unsigned int blksize, blk_per_trk, off;
sector_t recid;
if (private->uses_cdl == 0 || recid > 2*blk_per_trk)
ccw++;
rq_for_each_segment(bv, req, iter) {
- dst = page_address(bv->bv_page) + bv->bv_offset;
- for (off = 0; off < bv->bv_len; off += blksize) {
+ dst = page_address(bv.bv_page) + bv.bv_offset;
+ for (off = 0; off < bv.bv_len; off += blksize) {
/* Skip locate record. */
if (private->uses_cdl && recid <= 2*blk_per_trk)
ccw++;
cda = (char *)((addr_t) ccw->cda);
if (dst != cda) {
if (rq_data_dir(req) == READ)
- memcpy(dst, cda, bv->bv_len);
+ memcpy(dst, cda, bv.bv_len);
kmem_cache_free(dasd_page_cache,
(void *)((addr_t)cda & PAGE_MASK));
}
if (sdkp->device->no_write_same)
return BLKPREP_KILL;
- BUG_ON(bio_offset(bio) || bio_iovec(bio)->bv_len != sdp->sector_size);
+ BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size);
sector >>= ilog2(sdp->sector_size) - 9;
nr_sectors >>= ilog2(sdp->sector_size) - 9;
{
struct scsi_device *sdev = sdkp->device;
+ if (sdev->host->no_write_same) {
+ sdev->no_write_same = 1;
+
+ return;
+ }
+
if (scsi_report_opcode(sdev, buffer, SD_BUF_SIZE, INQUIRY) < 0) {
/* too large values might cause issues with arcmsr */
int vpd_buf_len = 64;
u64 start, end, bound;
/* unaligned request */
- if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
+ if (unlikely(bio->bi_iter.bi_sector &
+ (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
return 0;
- if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
+ if (unlikely(bio->bi_iter.bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
return 0;
- start = bio->bi_sector;
- end = start + (bio->bi_size >> SECTOR_SHIFT);
+ start = bio->bi_iter.bi_sector;
+ end = start + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
bound = zram->disksize >> SECTOR_SHIFT;
/* out of range range */
if (unlikely(start >= bound || end > bound || start > end))
return -ENOMEM;
/* Do not reset an active device! */
- if (bdev->bd_holders)
- return -EBUSY;
+ if (bdev->bd_holders) {
+ ret = -EBUSY;
+ goto out;
+ }
ret = kstrtou16(buf, 10, &do_reset);
if (ret)
- return ret;
+ goto out;
- if (!do_reset)
- return -EINVAL;
+ if (!do_reset) {
+ ret = -EINVAL;
+ goto out;
+ }
/* Make sure all pending I/O is finished */
fsync_bdev(bdev);
+ bdput(bdev);
zram_reset_device(zram, true);
return len;
+
+ out:
+ bdput(bdev);
+ return ret;
}
static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
{
- int i, offset;
+ int offset;
u32 index;
- struct bio_vec *bvec;
+ struct bio_vec bvec;
+ struct bvec_iter iter;
switch (rw) {
case READ:
break;
}
- index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
- offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
+ index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
+ offset = (bio->bi_iter.bi_sector &
+ (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
- bio_for_each_segment(bvec, bio, i) {
+ bio_for_each_segment(bvec, bio, iter) {
int max_transfer_size = PAGE_SIZE - offset;
- if (bvec->bv_len > max_transfer_size) {
+ if (bvec.bv_len > max_transfer_size) {
/*
* zram_bvec_rw() can only make operation on a single
* zram page. Split the bio vector.
*/
struct bio_vec bv;
- bv.bv_page = bvec->bv_page;
+ bv.bv_page = bvec.bv_page;
bv.bv_len = max_transfer_size;
- bv.bv_offset = bvec->bv_offset;
+ bv.bv_offset = bvec.bv_offset;
if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0)
goto out;
- bv.bv_len = bvec->bv_len - max_transfer_size;
+ bv.bv_len = bvec.bv_len - max_transfer_size;
bv.bv_offset += max_transfer_size;
if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0)
goto out;
} else
- if (zram_bvec_rw(zram, bvec, index, offset, bio, rw)
+ if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw)
< 0)
goto out;
- update_position(&index, &offset, bvec);
+ update_position(&index, &offset, &bvec);
}
set_bit(BIO_UPTODATE, &bio->bi_flags);
bp->b_flags |= XBF_READ;
bp->b_ops = ops;
- xfsbdstrat(target->bt_mount, bp);
+ if (XFS_FORCED_SHUTDOWN(target->bt_mount)) {
+ xfs_buf_relse(bp);
+ return NULL;
+ }
+ xfs_buf_iorequest(bp);
xfs_buf_iowait(bp);
return bp;
}
* This is meant for userdata errors; metadata bufs come with
* iodone functions attached, so that we can track down errors.
*/
- STATIC int
+ int
xfs_bioerror_relse(
struct xfs_buf *bp)
{
ASSERT(xfs_buf_islocked(bp));
bp->b_flags |= XBF_WRITE;
- bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
+ bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);
xfs_bdstrat_cb(bp);
return error;
}
- /*
- * Wrapper around bdstrat so that we can stop data from going to disk in case
- * we are shutting down the filesystem. Typically user data goes thru this
- * path; one of the exceptions is the superblock.
- */
- void
- xfsbdstrat(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
- {
- if (XFS_FORCED_SHUTDOWN(mp)) {
- trace_xfs_bdstrat_shut(bp, _RET_IP_);
- xfs_bioerror_relse(bp);
- return;
- }
-
- xfs_buf_iorequest(bp);
- }
-
STATIC void
_xfs_buf_ioend(
xfs_buf_t *bp,
bio = bio_alloc(GFP_NOIO, nr_pages);
bio->bi_bdev = bp->b_target->bt_bdev;
- bio->bi_sector = sector;
+ bio->bi_iter.bi_sector = sector;
bio->bi_end_io = xfs_buf_bio_end_io;
bio->bi_private = bp;
total_nr_pages--;
}
- if (likely(bio->bi_size)) {
+ if (likely(bio->bi_iter.bi_size)) {
if (xfs_buf_is_vmapped(bp)) {
flush_kernel_vmap_range(bp->b_addr,
xfs_buf_vmap_len(bp));
struct xfs_buf *bp;
bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
list_del_init(&bp->b_lru);
+ if (bp->b_flags & XBF_WRITE_FAIL) {
+ xfs_alert(btp->bt_mount,
+ "Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n"
+ "Please run xfs_repair to determine the extent of the problem.",
+ (long long)bp->b_bn);
+ }
xfs_buf_rele(bp);
}
if (loop++ != 0)
blk_start_plug(&plug);
list_for_each_entry_safe(bp, n, io_list, b_list) {
- bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
+ bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);
bp->b_flags |= XBF_WRITE;
if (!wait) {