#define DM_MSG_PREFIX "snapshots"
+static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
+
+#define dm_target_is_snapshot_merge(ti) \
+ ((ti)->type->name == dm_snapshot_merge_target_name)
+
/*
* The percentage increment we will wake up users at
*/
struct bio_list origin_bios;
struct bio_list snapshot_bios;
- /*
- * Short-term queue of pending exceptions prior to submission.
- */
- struct list_head list;
-
- /*
- * The primary pending_exception is the one that holds
- * the ref_count and the list of origin_bios for a
- * group of pending_exceptions. It is always last to get freed.
- * These fields get set up when writing to the origin.
- */
- struct dm_snap_pending_exception *primary_pe;
-
- /*
- * Number of pending_exceptions processing this chunk.
- * When this drops to zero we must complete the origin bios.
- * If incrementing or decrementing this, hold pe->snap->lock for
- * the sibling concerned and not pe->primary_pe->snap->lock unless
- * they are the same.
- */
- atomic_t ref_count;
-
/* Pointer back to snapshot context */
struct dm_snapshot *snap;
return found;
}
+/*
+ * This conflicting I/O is extremely improbable in the caller,
+ * so msleep(1) is sufficient and there is no need for a wait queue.
+ */
+static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk)
+{
+ while (__chunk_is_tracked(s, chunk))
+ msleep(1);
+}
+
/*
* One of these per registered origin, held in the snapshot_origins hash
*/
int i;
int r = -EINVAL;
char *origin_path, *cow_path;
- unsigned args_used;
+ unsigned args_used, num_flush_requests = 1;
+ fmode_t origin_mode = FMODE_READ;
if (argc != 4) {
ti->error = "requires exactly 4 arguments";
goto bad;
}
+ if (dm_target_is_snapshot_merge(ti)) {
+ num_flush_requests = 2;
+ origin_mode = FMODE_WRITE;
+ }
+
origin_path = argv[0];
argv++;
argc--;
argv += args_used;
argc -= args_used;
- r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
+ r = dm_get_device(ti, origin_path, 0, ti->len, origin_mode, &s->origin);
if (r) {
ti->error = "Cannot get origin device";
goto bad_origin;
INIT_WORK(&s->queued_bios_work, flush_queued_bios);
ti->private = s;
- ti->num_flush_requests = 1;
+ ti->num_flush_requests = num_flush_requests;
/* Add snapshot to the list of snapshots for this origin */
/* Exceptions aren't triggered till snapshot_resume() is called */
flush_bios(queued_bios);
}
+static int do_origin(struct dm_dev *origin, struct bio *bio);
+
+/*
+ * Flush a list of buffers.
+ */
+static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
+{
+ struct bio *n;
+ int r;
+
+ while (bio) {
+ n = bio->bi_next;
+ bio->bi_next = NULL;
+ r = do_origin(s->origin, bio);
+ if (r == DM_MAPIO_REMAPPED)
+ generic_make_request(bio);
+ bio = n;
+ }
+}
+
/*
* Error a list of buffers.
*/
dm_table_event(s->ti->table);
}
-static void get_pending_exception(struct dm_snap_pending_exception *pe)
-{
- atomic_inc(&pe->ref_count);
-}
-
-static struct bio *put_pending_exception(struct dm_snap_pending_exception *pe)
-{
- struct dm_snap_pending_exception *primary_pe;
- struct bio *origin_bios = NULL;
-
- primary_pe = pe->primary_pe;
-
- /*
- * If this pe is involved in a write to the origin and
- * it is the last sibling to complete then release
- * the bios for the original write to the origin.
- */
- if (primary_pe &&
- atomic_dec_and_test(&primary_pe->ref_count)) {
- origin_bios = bio_list_get(&primary_pe->origin_bios);
- free_pending_exception(primary_pe);
- }
-
- /*
- * Free the pe if it's not linked to an origin write or if
- * it's not itself a primary pe.
- */
- if (!primary_pe || primary_pe != pe)
- free_pending_exception(pe);
-
- return origin_bios;
-}
-
static void pending_complete(struct dm_snap_pending_exception *pe, int success)
{
struct dm_exception *e;
goto out;
}
- /*
- * Check for conflicting reads. This is extremely improbable,
- * so msleep(1) is sufficient and there is no need for a wait queue.
- */
- while (__chunk_is_tracked(s, pe->e.old_chunk))
- msleep(1);
+ /* Check for conflicting reads */
+ __check_for_conflicting_io(s, pe->e.old_chunk);
/*
* Add a proper exception, and remove the
out:
dm_remove_exception(&pe->e);
snapshot_bios = bio_list_get(&pe->snapshot_bios);
- origin_bios = put_pending_exception(pe);
+ origin_bios = bio_list_get(&pe->origin_bios);
+ free_pending_exception(pe);
up_write(&s->lock);
else
flush_bios(snapshot_bios);
- flush_bios(origin_bios);
+ retry_origin_bios(s, origin_bios);
}
static void commit_callback(void *context, int success)
pe->e.old_chunk = chunk;
bio_list_init(&pe->origin_bios);
bio_list_init(&pe->snapshot_bios);
- pe->primary_pe = NULL;
- atomic_set(&pe->ref_count, 0);
pe->started = 0;
if (s->store->type->prepare_exception(s->store, &pe->e)) {
return NULL;
}
- get_pending_exception(pe);
dm_insert_exception(&s->pending, &pe->e);
return pe;
return r;
}
+/*
+ * A snapshot-merge target behaves like a combination of a snapshot
+ * target and a snapshot-origin target. It only generates new
+ * exceptions in other snapshots and not in the one that is being
+ * merged.
+ *
+ * For each chunk, if there is an existing exception, it is used to
+ * redirect I/O to the cow device. Otherwise I/O is sent to the origin,
+ * which in turn might generate exceptions in other snapshots.
+ */
+static int snapshot_merge_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
+{
+ struct dm_exception *e;
+ struct dm_snapshot *s = ti->private;
+ int r = DM_MAPIO_REMAPPED;
+ chunk_t chunk;
+
+ if (unlikely(bio_empty_barrier(bio))) {
+ if (!map_context->flush_request)
+ bio->bi_bdev = s->origin->bdev;
+ else
+ bio->bi_bdev = s->cow->bdev;
+ map_context->ptr = NULL;
+ return DM_MAPIO_REMAPPED;
+ }
+
+ chunk = sector_to_chunk(s->store, bio->bi_sector);
+
+ down_read(&s->lock);
+
+ /* Full snapshots are not usable */
+ if (!s->valid) {
+ r = -EIO;
+ goto out_unlock;
+ }
+
+ /* If the block is already remapped - use that */
+ e = dm_lookup_exception(&s->complete, chunk);
+ if (e) {
+ remap_exception(s, e, bio, chunk);
+ goto out_unlock;
+ }
+
+ bio->bi_bdev = s->origin->bdev;
+
+ if (bio_rw(bio) == WRITE) {
+ up_read(&s->lock);
+ return do_origin(s->origin, bio);
+ }
+
+out_unlock:
+ up_read(&s->lock);
+
+ return r;
+}
+
static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
int error, union map_info *map_context)
{
/*-----------------------------------------------------------------
* Origin methods
*---------------------------------------------------------------*/
-static int __origin_write(struct list_head *snapshots, struct bio *bio)
+
+/*
+ * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any
+ * supplied bio was ignored. The caller may submit it immediately.
+ * (No remapping actually occurs as the origin is always a direct linear
+ * map.)
+ *
+ * If further exceptions are required, DM_MAPIO_SUBMITTED is returned
+ * and any supplied bio is added to a list to be submitted once all
+ * the necessary exceptions exist.
+ */
+static int __origin_write(struct list_head *snapshots, sector_t sector,
+ struct bio *bio)
{
- int r = DM_MAPIO_REMAPPED, first = 0;
+ int r = DM_MAPIO_REMAPPED;
struct dm_snapshot *snap;
struct dm_exception *e;
- struct dm_snap_pending_exception *pe, *next_pe, *primary_pe = NULL;
+ struct dm_snap_pending_exception *pe;
+ struct dm_snap_pending_exception *pe_to_start_now = NULL;
+ struct dm_snap_pending_exception *pe_to_start_last = NULL;
chunk_t chunk;
- LIST_HEAD(pe_queue);
/* Do all the snapshots on this origin */
list_for_each_entry (snap, snapshots, list) {
+ /*
+ * Don't make new exceptions in a merging snapshot
+ * because it has effectively been deleted
+ */
+ if (dm_target_is_snapshot_merge(snap->ti))
+ continue;
down_write(&snap->lock);
goto next_snapshot;
/* Nothing to do if writing beyond end of snapshot */
- if (bio->bi_sector >= dm_table_get_size(snap->ti->table))
+ if (sector >= dm_table_get_size(snap->ti->table))
goto next_snapshot;
/*
* Remember, different snapshots can have
* different chunk sizes.
*/
- chunk = sector_to_chunk(snap->store, bio->bi_sector);
+ chunk = sector_to_chunk(snap->store, sector);
/*
* Check exception table to see if block
* is already remapped in this snapshot
* and trigger an exception if not.
- *
- * ref_count is initialised to 1 so pending_complete()
- * won't destroy the primary_pe while we're inside this loop.
*/
e = dm_lookup_exception(&snap->complete, chunk);
if (e)
}
}
- if (!primary_pe) {
- /*
- * Either every pe here has same
- * primary_pe or none has one yet.
- */
- if (pe->primary_pe)
- primary_pe = pe->primary_pe;
- else {
- primary_pe = pe;
- first = 1;
- }
-
- bio_list_add(&primary_pe->origin_bios, bio);
+ r = DM_MAPIO_SUBMITTED;
- r = DM_MAPIO_SUBMITTED;
- }
+ /*
+ * If an origin bio was supplied, queue it to wait for the
+ * completion of this exception, and start this one last,
+ * at the end of the function.
+ */
+ if (bio) {
+ bio_list_add(&pe->origin_bios, bio);
+ bio = NULL;
- if (!pe->primary_pe) {
- pe->primary_pe = primary_pe;
- get_pending_exception(primary_pe);
+ if (!pe->started) {
+ pe->started = 1;
+ pe_to_start_last = pe;
+ }
}
if (!pe->started) {
pe->started = 1;
- list_add_tail(&pe->list, &pe_queue);
+ pe_to_start_now = pe;
}
next_snapshot:
up_write(&snap->lock);
- }
- if (!primary_pe)
- return r;
-
- /*
- * If this is the first time we're processing this chunk and
- * ref_count is now 1 it means all the pending exceptions
- * got completed while we were in the loop above, so it falls to
- * us here to remove the primary_pe and submit any origin_bios.
- */
-
- if (first && atomic_dec_and_test(&primary_pe->ref_count)) {
- flush_bios(bio_list_get(&primary_pe->origin_bios));
- free_pending_exception(primary_pe);
- /* If we got here, pe_queue is necessarily empty. */
- return r;
+ if (pe_to_start_now) {
+ start_copy(pe_to_start_now);
+ pe_to_start_now = NULL;
+ }
}
/*
- * Now that we have a complete pe list we can start the copying.
+ * Submit the exception against which the bio is queued last,
+ * to give the other exceptions a head start.
*/
- list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
- start_copy(pe);
+ if (pe_to_start_last)
+ start_copy(pe_to_start_last);
return r;
}
down_read(&_origins_lock);
o = __lookup_origin(origin->bdev);
if (o)
- r = __origin_write(&o->snapshots, bio);
+ r = __origin_write(&o->snapshots, bio->bi_sector, bio);
up_read(&_origins_lock);
return r;
.iterate_devices = snapshot_iterate_devices,
};
+static struct target_type merge_target = {
+ .name = dm_snapshot_merge_target_name,
+ .version = {1, 0, 0},
+ .module = THIS_MODULE,
+ .ctr = snapshot_ctr,
+ .dtr = snapshot_dtr,
+ .map = snapshot_merge_map,
+ .end_io = snapshot_end_io,
+ .postsuspend = snapshot_postsuspend,
+ .preresume = snapshot_preresume,
+ .resume = snapshot_resume,
+ .status = snapshot_status,
+ .iterate_devices = snapshot_iterate_devices,
+};
+
static int __init dm_snapshot_init(void)
{
int r;
}
r = dm_register_target(&snapshot_target);
- if (r) {
+ if (r < 0) {
DMERR("snapshot target register failed %d", r);
goto bad_register_snapshot_target;
}
r = dm_register_target(&origin_target);
if (r < 0) {
DMERR("Origin target register failed %d", r);
- goto bad1;
+ goto bad_register_origin_target;
+ }
+
+ r = dm_register_target(&merge_target);
+ if (r < 0) {
+ DMERR("Merge target register failed %d", r);
+ goto bad_register_merge_target;
}
r = init_origin_hash();
if (r) {
DMERR("init_origin_hash failed.");
- goto bad2;
+ goto bad_origin_hash;
}
exception_cache = KMEM_CACHE(dm_exception, 0);
if (!exception_cache) {
DMERR("Couldn't create exception cache.");
r = -ENOMEM;
- goto bad3;
+ goto bad_exception_cache;
}
pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
if (!pending_cache) {
DMERR("Couldn't create pending cache.");
r = -ENOMEM;
- goto bad4;
+ goto bad_pending_cache;
}
tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0);
if (!tracked_chunk_cache) {
DMERR("Couldn't create cache to track chunks in use.");
r = -ENOMEM;
- goto bad5;
+ goto bad_tracked_chunk_cache;
}
ksnapd = create_singlethread_workqueue("ksnapd");
bad_pending_pool:
kmem_cache_destroy(tracked_chunk_cache);
-bad5:
+bad_tracked_chunk_cache:
kmem_cache_destroy(pending_cache);
-bad4:
+bad_pending_cache:
kmem_cache_destroy(exception_cache);
-bad3:
+bad_exception_cache:
exit_origin_hash();
-bad2:
+bad_origin_hash:
+ dm_unregister_target(&merge_target);
+bad_register_merge_target:
dm_unregister_target(&origin_target);
-bad1:
+bad_register_origin_target:
dm_unregister_target(&snapshot_target);
-
bad_register_snapshot_target:
dm_exception_store_exit();
+
return r;
}
dm_unregister_target(&snapshot_target);
dm_unregister_target(&origin_target);
+ dm_unregister_target(&merge_target);
exit_origin_hash();
kmem_cache_destroy(pending_cache);