btrfs: Fix data checksum error cause by replace with io-load.
[deliverable/linux.git] / fs / btrfs / scrub.c
index 9f2feabe99f211f9c8eb5d489c44c85f54754c27..7555ddc5289fda675ca4a5cb1617f8a0032b9f8f 100644 (file)
@@ -332,11 +332,14 @@ static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
        }
 }
 
-static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
+static void scrub_pause_on(struct btrfs_fs_info *fs_info)
 {
        atomic_inc(&fs_info->scrubs_paused);
        wake_up(&fs_info->scrub_pause_wait);
+}
 
+static void scrub_pause_off(struct btrfs_fs_info *fs_info)
+{
        mutex_lock(&fs_info->scrub_lock);
        __scrub_blocked_if_needed(fs_info);
        atomic_dec(&fs_info->scrubs_paused);
@@ -345,6 +348,12 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
        wake_up(&fs_info->scrub_pause_wait);
 }
 
+static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
+{
+       scrub_pause_on(fs_info);
+       scrub_pause_off(fs_info);
+}
+
 /*
  * used for workers that require transaction commits (i.e., for the
  * NOCOW case)
@@ -2702,7 +2711,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
                           sparity->nsectors))
                goto out;
 
-       length = sparity->logic_end - sparity->logic_start + 1;
+       length = sparity->logic_end - sparity->logic_start;
        ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
                               sparity->logic_start,
                               &length, &bbio, 0, 1);
@@ -2856,6 +2865,10 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
                        }
                        btrfs_item_key_to_cpu(l, &key, slot);
 
+                       if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+                           key.type != BTRFS_METADATA_ITEM_KEY)
+                               goto next;
+
                        if (key.type == BTRFS_METADATA_ITEM_KEY)
                                bytes = root->nodesize;
                        else
@@ -2864,11 +2877,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
                        if (key.objectid + bytes <= logic_start)
                                goto next;
 
-                       if (key.type != BTRFS_EXTENT_ITEM_KEY &&
-                           key.type != BTRFS_METADATA_ITEM_KEY)
-                               goto next;
-
-                       if (key.objectid > logic_end) {
+                       if (key.objectid >= logic_end) {
                                stop_loop = 1;
                                break;
                        }
@@ -2881,11 +2890,12 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
                        flags = btrfs_extent_flags(l, extent);
                        generation = btrfs_extent_generation(l, extent);
 
-                       if (key.objectid < logic_start &&
-                           (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
-                               btrfs_err(fs_info,
-                                         "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
-                                          key.objectid, logic_start);
+                       if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
+                           (key.objectid < logic_start ||
+                            key.objectid + bytes >
+                            logic_start + map->stripe_len)) {
+                               btrfs_err(fs_info, "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
+                                         key.objectid, logic_start);
                                goto next;
                        }
 again:
@@ -2923,10 +2933,12 @@ again:
                                                      extent_dev, flags,
                                                      generation,
                                                      extent_mirror_num);
+
+                       scrub_free_csums(sctx);
+
                        if (ret)
                                goto out;
 
-                       scrub_free_csums(sctx);
                        if (extent_logical + extent_len <
                            key.objectid + bytes) {
                                logic_start += map->stripe_len;
@@ -2955,7 +2967,7 @@ next:
 out:
        if (ret < 0)
                scrub_parity_mark_sectors_error(sparity, logic_start,
-                                               logic_end - logic_start + 1);
+                                               logic_end - logic_start);
        scrub_parity_put(sparity);
        scrub_submit(sctx);
        mutex_lock(&sctx->wr_ctx.wr_lock);
@@ -3104,22 +3116,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
         */
        ret = 0;
        while (physical < physical_end) {
-               /* for raid56, we skip parity stripe */
-               if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-                       ret = get_raid56_logic_offset(physical, num,
-                                       map, &logical, &stripe_logical);
-                       logical += base;
-                       if (ret) {
-                               stripe_logical += base;
-                               stripe_end = stripe_logical + increment - 1;
-                               ret = scrub_raid56_parity(sctx, map, scrub_dev,
-                                               ppath, stripe_logical,
-                                               stripe_end);
-                               if (ret)
-                                       goto out;
-                               goto skip;
-                       }
-               }
                /*
                 * canceled?
                 */
@@ -3144,6 +3140,24 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                        scrub_blocked_if_needed(fs_info);
                }
 
+               /* for raid56, we skip parity stripe */
+               if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+                       ret = get_raid56_logic_offset(physical, num, map,
+                                                     &logical,
+                                                     &stripe_logical);
+                       logical += base;
+                       if (ret) {
+                               stripe_logical += base;
+                               stripe_end = stripe_logical + increment;
+                               ret = scrub_raid56_parity(sctx, map, scrub_dev,
+                                                         ppath, stripe_logical,
+                                                         stripe_end);
+                               if (ret)
+                                       goto out;
+                               goto skip;
+                       }
+               }
+
                if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
                        key.type = BTRFS_METADATA_ITEM_KEY;
                else
@@ -3188,6 +3202,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                        }
                        btrfs_item_key_to_cpu(l, &key, slot);
 
+                       if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+                           key.type != BTRFS_METADATA_ITEM_KEY)
+                               goto next;
+
                        if (key.type == BTRFS_METADATA_ITEM_KEY)
                                bytes = root->nodesize;
                        else
@@ -3196,10 +3214,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                        if (key.objectid + bytes <= logical)
                                goto next;
 
-                       if (key.type != BTRFS_EXTENT_ITEM_KEY &&
-                           key.type != BTRFS_METADATA_ITEM_KEY)
-                               goto next;
-
                        if (key.objectid >= logical + map->stripe_len) {
                                /* out of this device extent */
                                if (key.objectid >= logic_end)
@@ -3212,8 +3226,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                        flags = btrfs_extent_flags(l, extent);
                        generation = btrfs_extent_generation(l, extent);
 
-                       if (key.objectid < logical &&
-                           (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
+                       if ((flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) &&
+                           (key.objectid < logical ||
+                            key.objectid + bytes >
+                            logical + map->stripe_len)) {
                                btrfs_err(fs_info,
                                           "scrub: tree block %llu spanning "
                                           "stripes, ignored. logical=%llu",
@@ -3247,9 +3263,11 @@ again:
                                                   &extent_dev,
                                                   &extent_mirror_num);
 
-                       ret = btrfs_lookup_csums_range(csum_root, logical,
-                                               logical + map->stripe_len - 1,
-                                               &sctx->csum_list, 1);
+                       ret = btrfs_lookup_csums_range(csum_root,
+                                                      extent_logical,
+                                                      extent_logical +
+                                                      extent_len - 1,
+                                                      &sctx->csum_list, 1);
                        if (ret)
                                goto out;
 
@@ -3257,10 +3275,12 @@ again:
                                           extent_physical, extent_dev, flags,
                                           generation, extent_mirror_num,
                                           extent_logical - logical + physical);
+
+                       scrub_free_csums(sctx);
+
                        if (ret)
                                goto out;
 
-                       scrub_free_csums(sctx);
                        if (extent_logical + extent_len <
                            key.objectid + bytes) {
                                if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
@@ -3278,7 +3298,7 @@ loop:
                                        if (ret && physical < physical_end) {
                                                stripe_logical += base;
                                                stripe_end = stripe_logical +
-                                                               increment - 1;
+                                                               increment;
                                                ret = scrub_raid56_parity(sctx,
                                                        map, scrub_dev, ppath,
                                                        stripe_logical,
@@ -3387,7 +3407,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
        u64 chunk_tree;
        u64 chunk_objectid;
        u64 chunk_offset;
-       int ret;
+       int ret = 0;
        int slot;
        struct extent_buffer *l;
        struct btrfs_key key;
@@ -3415,8 +3435,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                        if (path->slots[0] >=
                            btrfs_header_nritems(path->nodes[0])) {
                                ret = btrfs_next_leaf(root, path);
-                               if (ret)
+                               if (ret < 0)
                                        break;
+                               if (ret > 0) {
+                                       ret = 0;
+                                       break;
+                               }
+                       } else {
+                               ret = 0;
                        }
                }
 
@@ -3458,6 +3484,22 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                if (!cache)
                        goto skip;
 
+               /*
+                * we need call btrfs_inc_block_group_ro() with scrubs_paused,
+                * to avoid deadlock caused by:
+                * btrfs_inc_block_group_ro()
+                * -> btrfs_wait_for_commit()
+                * -> btrfs_commit_transaction()
+                * -> btrfs_scrub_pause()
+                */
+               scrub_pause_on(fs_info);
+               ret = btrfs_inc_block_group_ro(root, cache);
+               scrub_pause_off(fs_info);
+               if (ret) {
+                       btrfs_put_block_group(cache);
+                       break;
+               }
+
                dev_replace->cursor_right = found_key.offset + length;
                dev_replace->cursor_left = found_key.offset;
                dev_replace->item_needs_writeback = 1;
@@ -3483,8 +3525,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 
                wait_event(sctx->list_wait,
                           atomic_read(&sctx->bios_in_flight) == 0);
-               atomic_inc(&fs_info->scrubs_paused);
-               wake_up(&fs_info->scrub_pause_wait);
+
+               scrub_pause_on(fs_info);
 
                /*
                 * must be called before we decrease @scrub_paused.
@@ -3495,11 +3537,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                           atomic_read(&sctx->workers_pending) == 0);
                atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
 
-               mutex_lock(&fs_info->scrub_lock);
-               __scrub_blocked_if_needed(fs_info);
-               atomic_dec(&fs_info->scrubs_paused);
-               mutex_unlock(&fs_info->scrub_lock);
-               wake_up(&fs_info->scrub_pause_wait);
+               scrub_pause_off(fs_info);
+
+               btrfs_dec_block_group_ro(root, cache);
 
                btrfs_put_block_group(cache);
                if (ret)
@@ -3523,11 +3563,7 @@ skip:
 
        btrfs_free_path(path);
 
-       /*
-        * ret can still be 1 from search_slot or next_leaf,
-        * that's not an error
-        */
-       return ret < 0 ? ret : 0;
+       return ret;
 }
 
 static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
@@ -3571,7 +3607,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
 static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
                                                int is_dev_replace)
 {
-       int ret = 0;
        unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
        int max_active = fs_info->thread_pool_size;
 
@@ -3584,34 +3619,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
                        fs_info->scrub_workers =
                                btrfs_alloc_workqueue("btrfs-scrub", flags,
                                                      max_active, 4);
-               if (!fs_info->scrub_workers) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
+               if (!fs_info->scrub_workers)
+                       goto fail_scrub_workers;
+
                fs_info->scrub_wr_completion_workers =
                        btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
                                              max_active, 2);
-               if (!fs_info->scrub_wr_completion_workers) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
+               if (!fs_info->scrub_wr_completion_workers)
+                       goto fail_scrub_wr_completion_workers;
+
                fs_info->scrub_nocow_workers =
                        btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
-               if (!fs_info->scrub_nocow_workers) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
+               if (!fs_info->scrub_nocow_workers)
+                       goto fail_scrub_nocow_workers;
                fs_info->scrub_parity_workers =
                        btrfs_alloc_workqueue("btrfs-scrubparity", flags,
                                              max_active, 2);
-               if (!fs_info->scrub_parity_workers) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
+               if (!fs_info->scrub_parity_workers)
+                       goto fail_scrub_parity_workers;
        }
        ++fs_info->scrub_workers_refcnt;
-out:
-       return ret;
+       return 0;
+
+fail_scrub_parity_workers:
+       btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
+fail_scrub_nocow_workers:
+       btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
+fail_scrub_wr_completion_workers:
+       btrfs_destroy_workqueue(fs_info->scrub_workers);
+fail_scrub_workers:
+       return -ENOMEM;
 }
 
 static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
This page took 0.043296 seconds and 5 git commands to generate.