/*
* FIXME: the cache is read/write for the time being.
*/
-enum cache_mode {
+enum cache_metadata_mode {
CM_WRITE, /* metadata may be changed */
CM_READ_ONLY, /* metadata may not be changed */
};
+enum cache_io_mode {
+ /*
+ * Data is written to cached blocks only. These blocks are marked
+ * dirty. If you lose the cache device you will lose data.
+ * Potential performance increase for both reads and writes.
+ */
+ CM_IO_WRITEBACK,
+
+ /*
+ * Data is written to both cache and origin. Blocks are never
+ * dirty. Potential performance benfit for reads only.
+ */
+ CM_IO_WRITETHROUGH,
+
+ /*
+ * A degraded mode useful for various cache coherency situations
+ * (eg, rolling back snapshots). Reads and writes always go to the
+ * origin. If a write goes to a cached oblock, then the cache
+ * block is invalidated.
+ */
+ CM_IO_PASSTHROUGH
+};
+
struct cache_features {
- enum cache_mode mode;
- bool write_through:1;
+ enum cache_metadata_mode mode;
+ enum cache_io_mode io_mode;
};
struct cache_stats {
atomic_t discard_count;
};
+/*
+ * Defines a range of cblocks, begin to (end - 1) are in the range. end is
+ * the one-past-the-end value.
+ */
+struct cblock_range {
+ dm_cblock_t begin;
+ dm_cblock_t end;
+};
+
+struct invalidation_request {
+ struct list_head list;
+ struct cblock_range *cblocks;
+
+ atomic_t complete;
+ int err;
+
+ wait_queue_head_t result_wait;
+};
+
struct cache {
struct dm_target *ti;
struct dm_target_callbacks callbacks;
bool need_tick_bio:1;
bool sized:1;
+ bool invalidate:1;
bool commit_requested:1;
bool loaded_mappings:1;
bool loaded_discards:1;
struct cache_features features;
struct cache_stats stats;
+
+ /*
+ * Invalidation fields.
+ */
+ spinlock_t invalidation_lock;
+ struct list_head invalidation_requests;
};
struct per_bio_data {
bool demote:1;
bool promote:1;
bool requeue_holder:1;
+ bool invalidate:1;
struct dm_bio_prison_cell *old_ocell;
struct dm_bio_prison_cell *new_ocell;
#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
+static bool writethrough_mode(struct cache_features *f)
+{
+ return f->io_mode == CM_IO_WRITETHROUGH;
+}
+
+static bool writeback_mode(struct cache_features *f)
+{
+ return f->io_mode == CM_IO_WRITEBACK;
+}
+
+static bool passthrough_mode(struct cache_features *f)
+{
+ return f->io_mode == CM_IO_PASSTHROUGH;
+}
+
static size_t get_per_bio_data_size(struct cache *cache)
{
- return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
+ return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
}
static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
static void remap_to_cache(struct cache *cache, struct bio *bio,
dm_cblock_t cblock)
{
- sector_t bi_sector = bio->bi_sector;
+ sector_t bi_sector = bio->bi_iter.bi_sector;
bio->bi_bdev = cache->cache_dev->bdev;
if (!block_size_is_power_of_two(cache))
- bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) +
- sector_div(bi_sector, cache->sectors_per_block);
+ bio->bi_iter.bi_sector =
+ (from_cblock(cblock) * cache->sectors_per_block) +
+ sector_div(bi_sector, cache->sectors_per_block);
else
- bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) |
- (bi_sector & (cache->sectors_per_block - 1));
+ bio->bi_iter.bi_sector =
+ (from_cblock(cblock) << cache->sectors_per_block_shift) |
+ (bi_sector & (cache->sectors_per_block - 1));
}
static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
{
- sector_t block_nr = bio->bi_sector;
+ sector_t block_nr = bio->bi_iter.bi_sector;
if (!block_size_is_power_of_two(cache))
(void) sector_div(block_nr, cache->sectors_per_block);
dm_unhook_bio(&pb->hook_info, bio);
+ /*
+ * Must bump bi_remaining to allow bio to complete with
+ * restored bi_end_io.
+ */
+ atomic_inc(&bio->bi_remaining);
+
if (err) {
bio_endio(bio, err);
return;
list_add_tail(&mg->list, &cache->quiesced_migrations);
spin_unlock_irqrestore(&cache->lock, flags);
- } else
+ } else {
+ if (mg->invalidate)
+ policy_remove_mapping(cache->policy, mg->old_oblock);
cleanup_migration(mg);
+ }
} else {
if (mg->requeue_holder)
static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
{
return (bio_data_dir(bio) == WRITE) &&
- (bio->bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
+ (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
}
static void avoid_copy(struct dm_cache_migration *mg)
mg->demote = false;
mg->promote = true;
mg->requeue_holder = true;
+ mg->invalidate = false;
mg->cache = cache;
mg->new_oblock = oblock;
mg->cblock = cblock;
mg->demote = false;
mg->promote = false;
mg->requeue_holder = true;
+ mg->invalidate = false;
mg->cache = cache;
mg->old_oblock = oblock;
mg->cblock = cblock;
mg->demote = true;
mg->promote = true;
mg->requeue_holder = true;
+ mg->invalidate = false;
mg->cache = cache;
mg->old_oblock = old_oblock;
mg->new_oblock = new_oblock;
quiesce_migration(mg);
}
+/*
+ * Invalidate a cache entry. No writeback occurs; any changes in the cache
+ * block are thrown away.
+ */
+static void invalidate(struct cache *cache, struct prealloc *structs,
+ dm_oblock_t oblock, dm_cblock_t cblock,
+ struct dm_bio_prison_cell *cell)
+{
+ struct dm_cache_migration *mg = prealloc_get_migration(structs);
+
+ mg->err = false;
+ mg->writeback = false;
+ mg->demote = true;
+ mg->promote = false;
+ mg->requeue_holder = true;
+ mg->invalidate = true;
+ mg->cache = cache;
+ mg->old_oblock = oblock;
+ mg->cblock = cblock;
+ mg->old_ocell = cell;
+ mg->new_ocell = NULL;
+ mg->start_jiffies = jiffies;
+
+ inc_nr_migrations(cache);
+ quiesce_migration(mg);
+}
+
/*----------------------------------------------------------------
* bio processing
*--------------------------------------------------------------*/
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
- BUG_ON(bio->bi_size);
+ BUG_ON(bio->bi_iter.bi_size);
if (!pb->req_nr)
remap_to_origin(cache, bio);
else
*/
static void process_discard_bio(struct cache *cache, struct bio *bio)
{
- dm_block_t start_block = dm_sector_div_up(bio->bi_sector,
+ dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
cache->discard_block_size);
- dm_block_t end_block = bio->bi_sector + bio_sectors(bio);
+ dm_block_t end_block = bio_end_sector(bio);
dm_block_t b;
end_block = block_div(end_block, cache->discard_block_size);
return current_volume < cache->migration_threshold;
}
-static bool is_writethrough_io(struct cache *cache, struct bio *bio,
- dm_cblock_t cblock)
-{
- return bio_data_dir(bio) == WRITE &&
- cache->features.write_through && !is_dirty(cache, cblock);
-}
-
static void inc_hit_counter(struct cache *cache, struct bio *bio)
{
atomic_inc(bio_data_dir(bio) == READ ?
&cache->stats.read_miss : &cache->stats.write_miss);
}
+static void issue_cache_bio(struct cache *cache, struct bio *bio,
+ struct per_bio_data *pb,
+ dm_oblock_t oblock, dm_cblock_t cblock)
+{
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ remap_to_cache_dirty(cache, bio, oblock, cblock);
+ issue(cache, bio);
+}
+
static void process_bio(struct cache *cache, struct prealloc *structs,
struct bio *bio)
{
size_t pb_data_size = get_per_bio_data_size(cache);
struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
bool discarded_block = is_discarded_oblock(cache, block);
- bool can_migrate = discarded_block || spare_migration_bandwidth(cache);
+ bool passthrough = passthrough_mode(&cache->features);
+ bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
/*
* Check to see if that block is currently migrating.
switch (lookup_result.op) {
case POLICY_HIT:
- inc_hit_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ if (passthrough) {
+ inc_miss_counter(cache, bio);
- if (is_writethrough_io(cache, bio, lookup_result.cblock))
- remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
- else
- remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+ /*
+ * Passthrough always maps to the origin,
+ * invalidating any cache blocks that are written
+ * to.
+ */
+
+ if (bio_data_dir(bio) == WRITE) {
+ atomic_inc(&cache->stats.demotion);
+ invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
+ release_cell = false;
+
+ } else {
+ /* FIXME: factor out issue_origin() */
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ remap_to_origin_clear_discard(cache, bio, block);
+ issue(cache, bio);
+ }
+ } else {
+ inc_hit_counter(cache, bio);
+
+ if (bio_data_dir(bio) == WRITE &&
+ writethrough_mode(&cache->features) &&
+ !is_dirty(cache, lookup_result.cblock)) {
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+ issue(cache, bio);
+ } else
+ issue_cache_bio(cache, bio, pb, block, lookup_result.cblock);
+ }
- issue(cache, bio);
break;
case POLICY_MISS:
prealloc_free_structs(cache, &structs);
}
+/*----------------------------------------------------------------
+ * Invalidations.
+ * Dropping something from the cache *without* writing back.
+ *--------------------------------------------------------------*/
+
+static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
+{
+ int r = 0;
+ uint64_t begin = from_cblock(req->cblocks->begin);
+ uint64_t end = from_cblock(req->cblocks->end);
+
+ while (begin != end) {
+ r = policy_remove_cblock(cache->policy, to_cblock(begin));
+ if (!r) {
+ r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
+ if (r)
+ break;
+
+ } else if (r == -ENODATA) {
+ /* harmless, already unmapped */
+ r = 0;
+
+ } else {
+ DMERR("policy_remove_cblock failed");
+ break;
+ }
+
+ begin++;
+ }
+
+ cache->commit_requested = true;
+
+ req->err = r;
+ atomic_set(&req->complete, 1);
+
+ wake_up(&req->result_wait);
+}
+
+static void process_invalidation_requests(struct cache *cache)
+{
+ struct list_head list;
+ struct invalidation_request *req, *tmp;
+
+ INIT_LIST_HEAD(&list);
+ spin_lock(&cache->invalidation_lock);
+ list_splice_init(&cache->invalidation_requests, &list);
+ spin_unlock(&cache->invalidation_lock);
+
+ list_for_each_entry_safe (req, tmp, &list, list)
+ process_invalidation_request(cache, req);
+}
+
/*----------------------------------------------------------------
* Main worker loop
*--------------------------------------------------------------*/
!bio_list_empty(&cache->deferred_writethrough_bios) ||
!list_empty(&cache->quiesced_migrations) ||
!list_empty(&cache->completed_migrations) ||
- !list_empty(&cache->need_commit_migrations);
+ !list_empty(&cache->need_commit_migrations) ||
+ cache->invalidate;
}
static void do_worker(struct work_struct *ws)
writeback_some_dirty_blocks(cache);
process_deferred_writethrough_bios(cache);
process_deferred_bios(cache);
+ process_invalidation_requests(cache);
}
process_migrations(cache, &cache->quiesced_migrations, issue_copy);
static void init_features(struct cache_features *cf)
{
cf->mode = CM_WRITE;
- cf->write_through = false;
+ cf->io_mode = CM_IO_WRITEBACK;
}
static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
arg = dm_shift_arg(as);
if (!strcasecmp(arg, "writeback"))
- cf->write_through = false;
+ cf->io_mode = CM_IO_WRITEBACK;
else if (!strcasecmp(arg, "writethrough"))
- cf->write_through = true;
+ cf->io_mode = CM_IO_WRITETHROUGH;
+
+ else if (!strcasecmp(arg, "passthrough"))
+ cf->io_mode = CM_IO_PASSTHROUGH;
else {
*error = "Unrecognised cache feature requested";
}
cache->cmd = cmd;
+ if (passthrough_mode(&cache->features)) {
+ bool all_clean;
+
+ r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
+ if (r) {
+ *error = "dm_cache_metadata_all_clean() failed";
+ goto bad;
+ }
+
+ if (!all_clean) {
+ *error = "Cannot enter passthrough mode unless all blocks are clean";
+ r = -EINVAL;
+ goto bad;
+ }
+ }
+
spin_lock_init(&cache->lock);
bio_list_init(&cache->deferred_bios);
bio_list_init(&cache->deferred_flush_bios);
cache->need_tick_bio = true;
cache->sized = false;
+ cache->invalidate = false;
cache->commit_requested = false;
cache->loaded_mappings = false;
cache->loaded_discards = false;
atomic_set(&cache->stats.commit_count, 0);
atomic_set(&cache->stats.discard_count, 0);
+ spin_lock_init(&cache->invalidation_lock);
+ INIT_LIST_HEAD(&cache->invalidation_requests);
+
*result = cache;
return 0;
return DM_MAPIO_SUBMITTED;
}
+ r = DM_MAPIO_REMAPPED;
switch (lookup_result.op) {
case POLICY_HIT:
- inc_hit_counter(cache, bio);
- pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ if (passthrough_mode(&cache->features)) {
+ if (bio_data_dir(bio) == WRITE) {
+ /*
+ * We need to invalidate this block, so
+ * defer for the worker thread.
+ */
+ cell_defer(cache, cell, true);
+ r = DM_MAPIO_SUBMITTED;
+
+ } else {
+ pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
+ inc_miss_counter(cache, bio);
+ remap_to_origin_clear_discard(cache, bio, block);
+
+ cell_defer(cache, cell, false);
+ }
- if (is_writethrough_io(cache, bio, lookup_result.cblock))
- remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
- else
- remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
+ } else {
+ inc_hit_counter(cache, bio);
+
+ if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
+ !is_dirty(cache, lookup_result.cblock))
+ remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
+ else
+ remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
- cell_defer(cache, cell, false);
+ cell_defer(cache, cell, false);
+ }
break;
case POLICY_MISS:
DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
(unsigned) lookup_result.op);
bio_io_error(bio);
- return DM_MAPIO_SUBMITTED;
+ r = DM_MAPIO_SUBMITTED;
}
- return DM_MAPIO_REMAPPED;
+ return r;
}
static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
(unsigned long long) from_cblock(residency),
cache->nr_dirty);
- if (cache->features.write_through)
+ if (writethrough_mode(&cache->features))
DMEMIT("1 writethrough ");
- else
- DMEMIT("0 ");
+
+ else if (passthrough_mode(&cache->features))
+ DMEMIT("1 passthrough ");
+
+ else if (writeback_mode(&cache->features))
+ DMEMIT("1 writeback ");
+
+ else {
+ DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode);
+ goto err;
+ }
DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
if (sz < maxlen) {
}
/*
- * Supports <key> <value>.
+ * A cache block range can take two forms:
+ *
+ * i) A single cblock, eg. '3456'
+ * ii) A begin and end cblock with dots between, eg. 123-234
+ */
+static int parse_cblock_range(struct cache *cache, const char *str,
+ struct cblock_range *result)
+{
+ char dummy;
+ uint64_t b, e;
+ int r;
+
+ /*
+ * Try and parse form (ii) first.
+ */
+ r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
+ if (r < 0)
+ return r;
+
+ if (r == 2) {
+ result->begin = to_cblock(b);
+ result->end = to_cblock(e);
+ return 0;
+ }
+
+ /*
+ * That didn't work, try form (i).
+ */
+ r = sscanf(str, "%llu%c", &b, &dummy);
+ if (r < 0)
+ return r;
+
+ if (r == 1) {
+ result->begin = to_cblock(b);
+ result->end = to_cblock(from_cblock(result->begin) + 1u);
+ return 0;
+ }
+
+ DMERR("invalid cblock range '%s'", str);
+ return -EINVAL;
+}
+
+static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
+{
+ uint64_t b = from_cblock(range->begin);
+ uint64_t e = from_cblock(range->end);
+ uint64_t n = from_cblock(cache->cache_size);
+
+ if (b >= n) {
+ DMERR("begin cblock out of range: %llu >= %llu", b, n);
+ return -EINVAL;
+ }
+
+ if (e > n) {
+ DMERR("end cblock out of range: %llu > %llu", e, n);
+ return -EINVAL;
+ }
+
+ if (b >= e) {
+ DMERR("invalid cblock range: %llu >= %llu", b, e);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int request_invalidation(struct cache *cache, struct cblock_range *range)
+{
+ struct invalidation_request req;
+
+ INIT_LIST_HEAD(&req.list);
+ req.cblocks = range;
+ atomic_set(&req.complete, 0);
+ req.err = 0;
+ init_waitqueue_head(&req.result_wait);
+
+ spin_lock(&cache->invalidation_lock);
+ list_add(&req.list, &cache->invalidation_requests);
+ spin_unlock(&cache->invalidation_lock);
+ wake_worker(cache);
+
+ wait_event(req.result_wait, atomic_read(&req.complete));
+ return req.err;
+}
+
+static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
+ const char **cblock_ranges)
+{
+ int r = 0;
+ unsigned i;
+ struct cblock_range range;
+
+ if (!passthrough_mode(&cache->features)) {
+ DMERR("cache has to be in passthrough mode for invalidation");
+ return -EPERM;
+ }
+
+ for (i = 0; i < count; i++) {
+ r = parse_cblock_range(cache, cblock_ranges[i], &range);
+ if (r)
+ break;
+
+ r = validate_cblock_range(cache, &range);
+ if (r)
+ break;
+
+ /*
+ * Pass begin and end origin blocks to the worker and wake it.
+ */
+ r = request_invalidation(cache, &range);
+ if (r)
+ break;
+ }
+
+ return r;
+}
+
+/*
+ * Supports
+ * "<key> <value>"
+ * and
+ * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]*
*
* The key migration_threshold is supported by the cache target core.
*/
{
struct cache *cache = ti->private;
+ if (!argc)
+ return -EINVAL;
+
+ if (!strcasecmp(argv[0], "invalidate_cblocks"))
+ return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
+
if (argc != 2)
return -EINVAL;
static struct target_type cache_target = {
.name = "cache",
- .version = {1, 1, 1},
+ .version = {1, 2, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,