Btrfs: eliminate a use-after-free in btrfs_balance()

[deliverable/linux.git] / fs / btrfs / volumes.c
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index e2e01a3271085d8417ec5197fe54acffdc831c15..d778e96665971ffc5e0915ecd9f16df3b6dbcb1c 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -72,6 +72,19 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
         kfree(fs_devices);
  }
  
+static void btrfs_kobject_uevent(struct block_device *bdev,
+                                enum kobject_action action)
+{
+       int ret;
+
+       ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
+       if (ret)
+               pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n",
+                       action,
+                       kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
+                       &disk_to_dev(bdev->bd_disk)->kobj);
+}
+
  void btrfs_cleanup_fs_uuids(void)
  {
         struct btrfs_fs_devices *fs_devices;
@@ -779,26 +792,77 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
         return ret;
  }
  
+/*
+ * Look for a btrfs signature on a device. This may be called out of the mount path
+ * and we are not allowed to call set_blocksize during the scan. The superblock
+ * is read via pagecache
+ */
  int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
                           struct btrfs_fs_devices **fs_devices_ret)
  {
         struct btrfs_super_block *disk_super;
         struct block_device *bdev;
-       struct buffer_head *bh;
-       int ret;
+       struct page *page;
+       void *p;
+       int ret = -EINVAL;
         u64 devid;
         u64 transid;
         u64 total_devices;
+       u64 bytenr;
+       pgoff_t index;
  
+       /*
+        * we would like to check all the supers, but that would make
+        * a btrfs mount succeed after a mkfs from a different FS.
+        * So, we need to add a special mount option to scan for
+        * later supers, using BTRFS_SUPER_MIRROR_MAX instead
+        */
+       bytenr = btrfs_sb_offset(0);
         flags |= FMODE_EXCL;
         mutex_lock(&uuid_mutex);
-       ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh);
-       if (ret)
+
+       bdev = blkdev_get_by_path(path, flags, holder);
+
+       if (IS_ERR(bdev)) {
+               ret = PTR_ERR(bdev);
+               printk(KERN_INFO "btrfs: open %s failed\n", path);
                 goto error;
-       disk_super = (struct btrfs_super_block *)bh->b_data;
+       }
+
+       /* make sure our super fits in the device */
+       if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode))
+               goto error_bdev_put;
+
+       /* make sure our super fits in the page */
+       if (sizeof(*disk_super) > PAGE_CACHE_SIZE)
+               goto error_bdev_put;
+
+       /* make sure our super doesn't straddle pages on disk */
+       index = bytenr >> PAGE_CACHE_SHIFT;
+       if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index)
+               goto error_bdev_put;
+
+       /* pull in the page with our super */
+       page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
+                                  index, GFP_NOFS);
+
+       if (IS_ERR_OR_NULL(page))
+               goto error_bdev_put;
+
+       p = kmap(page);
+
+       /* align our pointer to the offset of the super block */
+       disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
+
+       if (btrfs_super_bytenr(disk_super) != bytenr ||
+           strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
+                   sizeof(disk_super->magic)))
+               goto error_unmap;
+
         devid = btrfs_stack_device_id(&disk_super->dev_item);
         transid = btrfs_super_generation(disk_super);
         total_devices = btrfs_super_num_devices(disk_super);
+
         if (disk_super->label[0]) {
                 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
                         disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
@@ -806,12 +870,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
         } else {
                 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
         }
+
         printk(KERN_CONT "devid %llu transid %llu %s\n",
                (unsigned long long)devid, (unsigned long long)transid, path);
+
         ret = device_list_add(path, disk_super, devid, fs_devices_ret);
         if (!ret && fs_devices_ret)
                 (*fs_devices_ret)->total_devices = total_devices;
-       brelse(bh);
+
+error_unmap:
+       kunmap(page);
+       page_cache_release(page);
+
+error_bdev_put:
         blkdev_put(bdev, flags);
  error:
         mutex_unlock(&uuid_mutex);
@@ -1359,14 +1430,19 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
         u64 devid;
         u64 num_devices;
         u8 *dev_uuid;
+       unsigned seq;
         int ret = 0;
         bool clear_super = false;
  
         mutex_lock(&uuid_mutex);
  
-       all_avail = root->fs_info->avail_data_alloc_bits |
-               root->fs_info->avail_system_alloc_bits |
-               root->fs_info->avail_metadata_alloc_bits;
+       do {
+               seq = read_seqbegin(&root->fs_info->profiles_lock);
+
+               all_avail = root->fs_info->avail_data_alloc_bits |
+                           root->fs_info->avail_system_alloc_bits |
+                           root->fs_info->avail_metadata_alloc_bits;
+       } while (read_seqretry(&root->fs_info->profiles_lock, seq));
  
         num_devices = root->fs_info->fs_devices->num_devices;
         btrfs_dev_replace_lock(&root->fs_info->dev_replace);
@@ -1418,7 +1494,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                 }
         } else {
                 ret = btrfs_get_bdev_and_sb(device_path,
-                                           FMODE_READ | FMODE_EXCL,
+                                           FMODE_WRITE | FMODE_EXCL,
                                             root->fs_info->bdev_holder, 0,
                                             &bdev, &bh);
                 if (ret)
@@ -1542,9 +1618,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
  
         ret = 0;
  
+       /* Notify udev that device has changed */
+       if (bdev)
+               btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+
  error_brelse:
         brelse(bh);
-error_close:
         if (bdev)
                 blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
  out:
@@ -2599,7 +2678,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
         cache = btrfs_lookup_block_group(fs_info, chunk_offset);
         chunk_used = btrfs_block_group_used(&cache->item);
  
-       user_thresh = div_factor_fine(cache->key.offset, bargs->usage);
+       if (bargs->usage == 0)
+               user_thresh = 0;
+       else if (bargs->usage > 100)
+               user_thresh = cache->key.offset;
+       else
+               user_thresh = div_factor_fine(cache->key.offset,
+                                             bargs->usage);
+
         if (chunk_used < user_thresh)
                 ret = 0;
  
@@ -2944,6 +3030,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
         unset_balance_control(fs_info);
         ret = del_balance_item(fs_info->tree_root);
         BUG_ON(ret);
+
+       atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
  }
  
  void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
@@ -2960,6 +3048,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
         int mixed = 0;
         int ret;
         u64 num_devices;
+       unsigned seq;
  
         if (btrfs_fs_closing(fs_info) ||
             atomic_read(&fs_info->balance_pause_req) ||
@@ -3043,22 +3132,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
         /* allow to reduce meta or sys integrity only if force set */
         allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
                         BTRFS_BLOCK_GROUP_RAID10;
-       if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-            (fs_info->avail_system_alloc_bits & allowed) &&
-            !(bctl->sys.target & allowed)) ||
-           ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-            (fs_info->avail_metadata_alloc_bits & allowed) &&
-            !(bctl->meta.target & allowed))) {
-               if (bctl->flags & BTRFS_BALANCE_FORCE) {
-                       printk(KERN_INFO "btrfs: force reducing metadata "
-                              "integrity\n");
-               } else {
-                       printk(KERN_ERR "btrfs: balance will reduce metadata "
-                              "integrity, use force if you want this\n");
-                       ret = -EINVAL;
-                       goto out;
+       do {
+               seq = read_seqbegin(&fs_info->profiles_lock);
+
+               if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+                    (fs_info->avail_system_alloc_bits & allowed) &&
+                    !(bctl->sys.target & allowed)) ||
+                   ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+                    (fs_info->avail_metadata_alloc_bits & allowed) &&
+                    !(bctl->meta.target & allowed))) {
+                       if (bctl->flags & BTRFS_BALANCE_FORCE) {
+                               printk(KERN_INFO "btrfs: force reducing metadata "
+                                      "integrity\n");
+                       } else {
+                               printk(KERN_ERR "btrfs: balance will reduce metadata "
+                                      "integrity, use force if you want this\n");
+                               ret = -EINVAL;
+                               goto out;
+                       }
                 }
-       }
+       } while (read_seqretry(&fs_info->profiles_lock, seq));
  
         if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
                 int num_tolerated_disk_barrier_failures;
@@ -3102,6 +3195,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
         mutex_lock(&fs_info->balance_mutex);
         atomic_dec(&fs_info->balance_running);
  
+       if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
+               fs_info->num_tolerated_disk_barrier_failures =
+                       btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
+       }
+
         if (bargs) {
                 memset(bargs, 0, sizeof(*bargs));
                 update_ioctl_balance_args(fs_info, 0, bargs);
@@ -3112,19 +3210,16 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                 __cancel_balance(fs_info);
         }
  
-       if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
-               fs_info->num_tolerated_disk_barrier_failures =
-                       btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
-       }
-
         wake_up(&fs_info->balance_wait_q);
  
         return ret;
  out:
         if (bctl->flags & BTRFS_BALANCE_RESUME)
                 __cancel_balance(fs_info);
-       else
+       else {
                 kfree(bctl);
+               atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
+       }
         return ret;
  }
  
@@ -3141,7 +3236,6 @@ static int balance_kthread(void *data)
                 ret = btrfs_balance(fs_info->balance_ctl, NULL);
         }
  
-       atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
         mutex_unlock(&fs_info->balance_mutex);
         mutex_unlock(&fs_info->volume_mutex);
  
@@ -3164,7 +3258,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
                 return 0;
         }
  
-       WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
         tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
         if (IS_ERR(tsk))
                 return PTR_ERR(tsk);
@@ -3218,6 +3311,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
         btrfs_balance_sys(leaf, item, &disk_bargs);
         btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
  
+       WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
+
         mutex_lock(&fs_info->volume_mutex);
         mutex_lock(&fs_info->balance_mutex);
  
@@ -3476,6 +3571,49 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
         return 0;
  }
  
+struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+       [BTRFS_RAID_RAID10] = {
+               .sub_stripes    = 2,
+               .dev_stripes    = 1,
+               .devs_max       = 0,    /* 0 == as many as possible */
+               .devs_min       = 4,
+               .devs_increment = 2,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_RAID1] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 2,
+               .devs_min       = 2,
+               .devs_increment = 2,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_DUP] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 2,
+               .devs_max       = 1,
+               .devs_min       = 1,
+               .devs_increment = 1,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_RAID0] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 2,
+               .devs_increment = 1,
+               .ncopies        = 1,
+       },
+       [BTRFS_RAID_SINGLE] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 1,
+               .devs_min       = 1,
+               .devs_increment = 1,
+               .ncopies        = 1,
+       },
+};
+ 
  static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                                struct btrfs_root *extent_root,
                                struct map_lookup **map_ret,
@@ -3505,43 +3643,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
         int ndevs;
         int i;
         int j;
+       int index;
  
         BUG_ON(!alloc_profile_is_valid(type, 0));
  
         if (list_empty(&fs_devices->alloc_list))
                 return -ENOSPC;
  
-       sub_stripes = 1;
-       dev_stripes = 1;
-       devs_increment = 1;
-       ncopies = 1;
-       devs_max = 0;   /* 0 == as many as possible */
-       devs_min = 1;
+       index = __get_raid_index(type);
  
-       /*
-        * define the properties of each RAID type.
-        * FIXME: move this to a global table and use it in all RAID
-        * calculation code
-        */
-       if (type & (BTRFS_BLOCK_GROUP_DUP)) {
-               dev_stripes = 2;
-               ncopies = 2;
-               devs_max = 1;
-       } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
-               devs_min = 2;
-       } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
-               devs_increment = 2;
-               ncopies = 2;
-               devs_max = 2;
-               devs_min = 2;
-       } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
-               sub_stripes = 2;
-               devs_increment = 2;
-               ncopies = 2;
-               devs_min = 4;
-       } else {
-               devs_max = 1;
-       }
+       sub_stripes = btrfs_raid_array[index].sub_stripes;
+       dev_stripes = btrfs_raid_array[index].dev_stripes;
+       devs_max = btrfs_raid_array[index].devs_max;
+       devs_min = btrfs_raid_array[index].devs_min;
+       devs_increment = btrfs_raid_array[index].devs_increment;
+       ncopies = btrfs_raid_array[index].ncopies;
  
         if (type & BTRFS_BLOCK_GROUP_DATA) {
                 max_stripe_size = 1024 * 1024 * 1024;
@@ -3618,12 +3734,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
                         continue;
  
+               if (ndevs == fs_devices->rw_devices) {
+                       WARN(1, "%s: found more than %llu devices\n",
+                            __func__, fs_devices->rw_devices);
+                       break;
+               }
                 devices_info[ndevs].dev_offset = dev_offset;
                 devices_info[ndevs].max_avail = max_avail;
                 devices_info[ndevs].total_avail = total_avail;
                 devices_info[ndevs].dev = device;
                 ++ndevs;
-               WARN_ON(ndevs > fs_devices->rw_devices);
         }
  
         /*
@@ -3705,15 +3825,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
         write_lock(&em_tree->lock);
         ret = add_extent_mapping(em_tree, em);
         write_unlock(&em_tree->lock);
-       free_extent_map(em);
-       if (ret)
-               goto error;
-
-       ret = btrfs_make_block_group(trans, extent_root, 0, type,
-                                    BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-                                    start, num_bytes);
-       if (ret)
+       if (ret) {
+               free_extent_map(em);
                 goto error;
+       }
  
         for (i = 0; i < map->num_stripes; ++i) {
                 struct btrfs_device *device;
@@ -3726,15 +3841,42 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                                 info->chunk_root->root_key.objectid,
                                 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
                                 start, dev_offset, stripe_size);
-               if (ret) {
-                       btrfs_abort_transaction(trans, extent_root, ret);
-                       goto error;
-               }
+               if (ret)
+                       goto error_dev_extent;
         }
  
+       ret = btrfs_make_block_group(trans, extent_root, 0, type,
+                                    BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+                                    start, num_bytes);
+       if (ret) {
+               i = map->num_stripes - 1;
+               goto error_dev_extent;
+       }
+
+       free_extent_map(em);
         kfree(devices_info);
         return 0;
  
+error_dev_extent:
+       for (; i >= 0; i--) {
+               struct btrfs_device *device;
+               int err;
+
+               device = map->stripes[i].dev;
+               err = btrfs_free_dev_extent(trans, device, start);
+               if (err) {
+                       btrfs_abort_transaction(trans, extent_root, err);
+                       break;
+               }
+       }
+       write_lock(&em_tree->lock);
+       remove_extent_mapping(em_tree, em);
+       write_unlock(&em_tree->lock);
+
+       /* One for our allocation */
+       free_extent_map(em);
+       /* One for the tree reference */
+       free_extent_map(em);
  error:
         kfree(map);
         kfree(devices_info);
@@ -3874,10 +4016,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
         if (ret)
                 return ret;
  
-       alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
-                               fs_info->avail_metadata_alloc_bits;
-       alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
-
+       alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
         ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
                                   &stripe_size, chunk_offset, alloc_profile);
         if (ret)
@@ -3885,10 +4024,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
  
         sys_chunk_offset = chunk_offset + chunk_size;
  
-       alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
-                               fs_info->avail_system_alloc_bits;
-       alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
-
+       alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
         ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
                                   &sys_chunk_size, &sys_stripe_size,
                                   sys_chunk_offset, alloc_profile);
@@ -4004,6 +4140,12 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
         else
                 ret = 1;
         free_extent_map(em);
+
+       btrfs_dev_replace_lock(&fs_info->dev_replace);
+       if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
+               ret++;
+       btrfs_dev_replace_unlock(&fs_info->dev_replace);
+
         return ret;
  }
  
@@ -4068,6 +4210,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
         struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
         int dev_replace_is_ongoing = 0;
         int num_alloc_stripes;
+       int patch_the_first_stripe_for_dev_replace = 0;
+       u64 physical_to_patch_in_first_stripe = 0;
  
         read_lock(&em_tree->lock);
         em = lookup_extent_mapping(em_tree, logical, *length);
@@ -4084,9 +4228,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
         map = (struct map_lookup *)em->bdev;
         offset = logical - em->start;
  
-       if (mirror_num > map->num_stripes)
-               mirror_num = 0;
-
         stripe_nr = offset;
         /*
          * stripe_nr counts the total number of stripes we have to stride
@@ -4118,6 +4259,88 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
         if (!dev_replace_is_ongoing)
                 btrfs_dev_replace_unlock(dev_replace);
  
+       if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
+           !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
+           dev_replace->tgtdev != NULL) {
+               /*
+                * in dev-replace case, for repair case (that's the only
+                * case where the mirror is selected explicitly when
+                * calling btrfs_map_block), blocks left of the left cursor
+                * can also be read from the target drive.
+                * For REQ_GET_READ_MIRRORS, the target drive is added as
+                * the last one to the array of stripes. For READ, it also
+                * needs to be supported using the same mirror number.
+                * If the requested block is not left of the left cursor,
+                * EIO is returned. This can happen because btrfs_num_copies()
+                * returns one more in the dev-replace case.
+                */
+               u64 tmp_length = *length;
+               struct btrfs_bio *tmp_bbio = NULL;
+               int tmp_num_stripes;
+               u64 srcdev_devid = dev_replace->srcdev->devid;
+               int index_srcdev = 0;
+               int found = 0;
+               u64 physical_of_found = 0;
+
+               ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
+                            logical, &tmp_length, &tmp_bbio, 0);
+               if (ret) {
+                       WARN_ON(tmp_bbio != NULL);
+                       goto out;
+               }
+
+               tmp_num_stripes = tmp_bbio->num_stripes;
+               if (mirror_num > tmp_num_stripes) {
+                       /*
+                        * REQ_GET_READ_MIRRORS does not contain this
+                        * mirror, that means that the requested area
+                        * is not left of the left cursor
+                        */
+                       ret = -EIO;
+                       kfree(tmp_bbio);
+                       goto out;
+               }
+
+               /*
+                * process the rest of the function using the mirror_num
+                * of the source drive. Therefore look it up first.
+                * At the end, patch the device pointer to the one of the
+                * target drive.
+                */
+               for (i = 0; i < tmp_num_stripes; i++) {
+                       if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
+                               /*
+                                * In case of DUP, in order to keep it
+                                * simple, only add the mirror with the
+                                * lowest physical address
+                                */
+                               if (found &&
+                                   physical_of_found <=
+                                    tmp_bbio->stripes[i].physical)
+                                       continue;
+                               index_srcdev = i;
+                               found = 1;
+                               physical_of_found =
+                                       tmp_bbio->stripes[i].physical;
+                       }
+               }
+
+               if (found) {
+                       mirror_num = index_srcdev + 1;
+                       patch_the_first_stripe_for_dev_replace = 1;
+                       physical_to_patch_in_first_stripe = physical_of_found;
+               } else {
+                       WARN_ON(1);
+                       ret = -EIO;
+                       kfree(tmp_bbio);
+                       goto out;
+               }
+
+               kfree(tmp_bbio);
+       } else if (mirror_num > map->num_stripes) {
+               mirror_num = 0;
+       }
+
         num_stripes = 1;
         stripe_index = 0;
         stripe_nr_orig = stripe_nr;
@@ -4188,8 +4411,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
         BUG_ON(stripe_index >= map->num_stripes);
  
         num_alloc_stripes = num_stripes;
-       if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)))
-               num_alloc_stripes <<= 1;
+       if (dev_replace_is_ongoing) {
+               if (rw & (REQ_WRITE | REQ_DISCARD))
+                       num_alloc_stripes <<= 1;
+               if (rw & REQ_GET_READ_MIRRORS)
+                       num_alloc_stripes++;
+       }
         bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
         if (!bbio) {
                 ret = -ENOMEM;
@@ -4318,12 +4545,70 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                         }
                 }
                 num_stripes = index_where_to_add;
+       } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) &&
+                  dev_replace->tgtdev != NULL) {
+               u64 srcdev_devid = dev_replace->srcdev->devid;
+               int index_srcdev = 0;
+               int found = 0;
+               u64 physical_of_found = 0;
+
+               /*
+                * During the dev-replace procedure, the target drive can
+                * also be used to read data in case it is needed to repair
+                * a corrupt block elsewhere. This is possible if the
+                * requested area is left of the left cursor. In this area,
+                * the target drive is a full copy of the source drive.
+                */
+               for (i = 0; i < num_stripes; i++) {
+                       if (bbio->stripes[i].dev->devid == srcdev_devid) {
+                               /*
+                                * In case of DUP, in order to keep it
+                                * simple, only add the mirror with the
+                                * lowest physical address
+                                */
+                               if (found &&
+                                   physical_of_found <=
+                                    bbio->stripes[i].physical)
+                                       continue;
+                               index_srcdev = i;
+                               found = 1;
+                               physical_of_found = bbio->stripes[i].physical;
+                       }
+               }
+               if (found) {
+                       u64 length = map->stripe_len;
+
+                       if (physical_of_found + length <=
+                           dev_replace->cursor_left) {
+                               struct btrfs_bio_stripe *tgtdev_stripe =
+                                       bbio->stripes + num_stripes;
+
+                               tgtdev_stripe->physical = physical_of_found;
+                               tgtdev_stripe->length =
+                                       bbio->stripes[index_srcdev].length;
+                               tgtdev_stripe->dev = dev_replace->tgtdev;
+
+                               num_stripes++;
+                       }
+               }
         }
  
         *bbio_ret = bbio;
         bbio->num_stripes = num_stripes;
         bbio->max_errors = max_errors;
         bbio->mirror_num = mirror_num;
+
+       /*
+        * this is the case that REQ_READ && dev_replace_is_ongoing &&
+        * mirror_num == num_stripes + 1 && dev_replace target drive is
+        * available as a mirror
+        */
+       if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
+               WARN_ON(num_stripes > 1);
+               bbio->stripes[0].dev = dev_replace->tgtdev;
+               bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
+               bbio->mirror_num = map->num_stripes + 1;
+       }
  out:
         if (dev_replace_is_ongoing)
                 btrfs_dev_replace_unlock(dev_replace);
@@ -4819,6 +5104,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
         em->bdev = (struct block_device *)map;
         em->start = logical;
         em->len = length;
+       em->orig_start = 0;
         em->block_start = 0;
         em->block_len = em->len;