md: fix clearing of 'blocked' flag in the presence of bad blocks.
[deliverable/linux.git] / drivers / md / md.c
index 463a392c0705e0602f77958dc71c7ffe2f1ea2f7..3742ce8b0acf6b70cf119bc412654c94ec0aa165 100644 (file)
@@ -215,6 +215,55 @@ struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL_GPL(bio_clone_mddev);
 
+void md_trim_bio(struct bio *bio, int offset, int size)
+{
+       /* 'bio' is a cloned bio which we need to trim to match
+        * the given offset and size.
+        * This requires adjusting bi_sector, bi_size, and bi_io_vec
+        */
+       int i;
+       struct bio_vec *bvec;
+       int sofar = 0;
+
+       size <<= 9;
+       if (offset == 0 && size == bio->bi_size)
+               return;
+
+       bio->bi_sector += offset;
+       bio->bi_size = size;
+       offset <<= 9;
+       clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+       while (bio->bi_idx < bio->bi_vcnt &&
+              bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
+               /* remove this whole bio_vec */
+               offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
+               bio->bi_idx++;
+       }
+       if (bio->bi_idx < bio->bi_vcnt) {
+               bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
+               bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
+       }
+       /* avoid any complications with bi_idx being non-zero*/
+       if (bio->bi_idx) {
+               memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+                       (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+               bio->bi_vcnt -= bio->bi_idx;
+               bio->bi_idx = 0;
+       }
+       /* Make sure vcnt and last bv are not too big */
+       bio_for_each_segment(bvec, bio, i) {
+               if (sofar + bvec->bv_len > size)
+                       bvec->bv_len = size - sofar;
+               if (bvec->bv_len == 0) {
+                       bio->bi_vcnt = i;
+                       break;
+               }
+               sofar += bvec->bv_len;
+       }
+}
+EXPORT_SYMBOL_GPL(md_trim_bio);
+
 /*
  * We have a system wide 'event count' that is incremented
  * on any 'interesting' event, and readers of /proc/mdstat
@@ -757,6 +806,10 @@ static void free_disk_sb(mdk_rdev_t * rdev)
                rdev->sb_start = 0;
                rdev->sectors = 0;
        }
+       if (rdev->bb_page) {
+               put_page(rdev->bb_page);
+               rdev->bb_page = NULL;
+       }
 }
 
 
@@ -795,7 +848,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
        bio->bi_end_io = super_written;
 
        atomic_inc(&mddev->pending_writes);
-       submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio);
+       submit_bio(WRITE_FLUSH_FUA, bio);
 }
 
 void md_super_wait(mddev_t *mddev)
@@ -1054,6 +1107,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
        rdev->preferred_minor = sb->md_minor;
        rdev->data_offset = 0;
        rdev->sb_size = MD_SB_BYTES;
+       rdev->badblocks.shift = -1;
 
        if (sb->level == LEVEL_MULTIPATH)
                rdev->desc_nr = -1;
@@ -1395,6 +1449,8 @@ static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
        return cpu_to_le32(csum);
 }
 
+static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
+                           int acknowledged);
 static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 {
        struct mdp_superblock_1 *sb;
@@ -1473,6 +1529,47 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
        else
                rdev->desc_nr = le32_to_cpu(sb->dev_number);
 
+       if (!rdev->bb_page) {
+               rdev->bb_page = alloc_page(GFP_KERNEL);
+               if (!rdev->bb_page)
+                       return -ENOMEM;
+       }
+       if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
+           rdev->badblocks.count == 0) {
+               /* need to load the bad block list.
+                * Currently we limit it to one page.
+                */
+               s32 offset;
+               sector_t bb_sector;
+               u64 *bbp;
+               int i;
+               int sectors = le16_to_cpu(sb->bblog_size);
+               if (sectors > (PAGE_SIZE / 512))
+                       return -EINVAL;
+               offset = le32_to_cpu(sb->bblog_offset);
+               if (offset == 0)
+                       return -EINVAL;
+               bb_sector = (long long)offset;
+               if (!sync_page_io(rdev, bb_sector, sectors << 9,
+                                 rdev->bb_page, READ, true))
+                       return -EIO;
+               bbp = (u64 *)page_address(rdev->bb_page);
+               rdev->badblocks.shift = sb->bblog_shift;
+               for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
+                       u64 bb = le64_to_cpu(*bbp);
+                       int count = bb & (0x3ff);
+                       u64 sector = bb >> 10;
+                       sector <<= sb->bblog_shift;
+                       count <<= sb->bblog_shift;
+                       if (bb + 1 == 0)
+                               break;
+                       if (md_set_badblocks(&rdev->badblocks,
+                                            sector, count, 1) == 0)
+                               return -EINVAL;
+               }
+       } else if (sb->bblog_offset == 0)
+               rdev->badblocks.shift = -1;
+
        if (!refdev) {
                ret = 1;
        } else {
@@ -1624,7 +1721,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        sb->pad0 = 0;
        sb->recovery_offset = cpu_to_le64(0);
        memset(sb->pad1, 0, sizeof(sb->pad1));
-       memset(sb->pad2, 0, sizeof(sb->pad2));
        memset(sb->pad3, 0, sizeof(sb->pad3));
 
        sb->utime = cpu_to_le64((__u64)mddev->utime);
@@ -1642,6 +1738,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        sb->level = cpu_to_le32(mddev->level);
        sb->layout = cpu_to_le32(mddev->layout);
 
+       if (test_bit(WriteMostly, &rdev->flags))
+               sb->devflags |= WriteMostly1;
+       else
+               sb->devflags &= ~WriteMostly1;
+
        if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
                sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
                sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
@@ -1664,6 +1765,40 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
                sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
        }
 
+       if (rdev->badblocks.count == 0)
+               /* Nothing to do for bad blocks*/ ;
+       else if (sb->bblog_offset == 0)
+               /* Cannot record bad blocks on this device */
+               md_error(mddev, rdev);
+       else {
+               struct badblocks *bb = &rdev->badblocks;
+               u64 *bbp = (u64 *)page_address(rdev->bb_page);
+               u64 *p = bb->page;
+               sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
+               if (bb->changed) {
+                       unsigned seq;
+
+retry:
+                       seq = read_seqbegin(&bb->lock);
+
+                       memset(bbp, 0xff, PAGE_SIZE);
+
+                       for (i = 0 ; i < bb->count ; i++) {
+                               u64 internal_bb = *p++;
+                               u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
+                                               | BB_LEN(internal_bb));
+                               *bbp++ = cpu_to_le64(store_bb);
+                       }
+                       if (read_seqretry(&bb->lock, seq))
+                               goto retry;
+
+                       bb->sector = (rdev->sb_start +
+                                     (int)le32_to_cpu(sb->bblog_offset));
+                       bb->size = le16_to_cpu(sb->bblog_size);
+                       bb->changed = 0;
+               }
+       }
+
        max_dev = 0;
        list_for_each_entry(rdev2, &mddev->disks, same_set)
                if (rdev2->desc_nr+1 > max_dev)
@@ -2196,6 +2331,7 @@ static void md_update_sb(mddev_t * mddev, int force_change)
        mdk_rdev_t *rdev;
        int sync_req;
        int nospares = 0;
+       int any_badblocks_changed = 0;
 
 repeat:
        /* First make sure individual recovery_offsets are correct */
@@ -2210,8 +2346,18 @@ repeat:
        if (!mddev->persistent) {
                clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
                clear_bit(MD_CHANGE_DEVS, &mddev->flags);
-               if (!mddev->external)
+               if (!mddev->external) {
                        clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+                       list_for_each_entry(rdev, &mddev->disks, same_set) {
+                               if (rdev->badblocks.changed) {
+                                       md_ack_all_badblocks(&rdev->badblocks);
+                                       md_error(mddev, rdev);
+                               }
+                               clear_bit(Blocked, &rdev->flags);
+                               clear_bit(BlockedBadBlocks, &rdev->flags);
+                               wake_up(&rdev->blocked_wait);
+                       }
+               }
                wake_up(&mddev->sb_wait);
                return;
        }
@@ -2267,6 +2413,14 @@ repeat:
                MD_BUG();
                mddev->events --;
        }
+
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
+               if (rdev->badblocks.changed)
+                       any_badblocks_changed++;
+               if (test_bit(Faulty, &rdev->flags))
+                       set_bit(FaultRecorded, &rdev->flags);
+       }
+
        sync_sbs(mddev, nospares);
        spin_unlock_irq(&mddev->write_lock);
 
@@ -2292,6 +2446,13 @@ repeat:
                                bdevname(rdev->bdev,b),
                                (unsigned long long)rdev->sb_start);
                        rdev->sb_events = mddev->events;
+                       if (rdev->badblocks.size) {
+                               md_super_write(mddev, rdev,
+                                              rdev->badblocks.sector,
+                                              rdev->badblocks.size << 9,
+                                              rdev->bb_page);
+                               rdev->badblocks.size = 0;
+                       }
 
                } else
                        dprintk(")\n");
@@ -2315,6 +2476,15 @@ repeat:
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
 
+       list_for_each_entry(rdev, &mddev->disks, same_set) {
+               if (test_and_clear_bit(FaultRecorded, &rdev->flags))
+                       clear_bit(Blocked, &rdev->flags);
+
+               if (any_badblocks_changed)
+                       md_ack_all_badblocks(&rdev->badblocks);
+               clear_bit(BlockedBadBlocks, &rdev->flags);
+               wake_up(&rdev->blocked_wait);
+       }
 }
 
 /* words written to sysfs files may, or may not, be \n terminated.
@@ -2349,7 +2519,8 @@ state_show(mdk_rdev_t *rdev, char *page)
        char *sep = "";
        size_t len = 0;
 
-       if (test_bit(Faulty, &rdev->flags)) {
+       if (test_bit(Faulty, &rdev->flags) ||
+           rdev->badblocks.unacked_exist) {
                len+= sprintf(page+len, "%sfaulty",sep);
                sep = ",";
        }
@@ -2361,7 +2532,8 @@ state_show(mdk_rdev_t *rdev, char *page)
                len += sprintf(page+len, "%swrite_mostly",sep);
                sep = ",";
        }
-       if (test_bit(Blocked, &rdev->flags)) {
+       if (test_bit(Blocked, &rdev->flags) ||
+           rdev->badblocks.unacked_exist) {
                len += sprintf(page+len, "%sblocked", sep);
                sep = ",";
        }
@@ -2370,6 +2542,10 @@ state_show(mdk_rdev_t *rdev, char *page)
                len += sprintf(page+len, "%sspare", sep);
                sep = ",";
        }
+       if (test_bit(WriteErrorSeen, &rdev->flags)) {
+               len += sprintf(page+len, "%swrite_error", sep);
+               sep = ",";
+       }
        return len+sprintf(page+len, "\n");
 }
 
@@ -2377,18 +2553,23 @@ static ssize_t
 state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
 {
        /* can write
-        *  faulty  - simulates and error
+        *  faulty  - simulates an error
         *  remove  - disconnects the device
         *  writemostly - sets write_mostly
         *  -writemostly - clears write_mostly
-        *  blocked - sets the Blocked flag
-        *  -blocked - clears the Blocked flag
+        *  blocked - sets the Blocked flags
+        *  -blocked - clears the Blocked and possibly simulates an error
         *  insync - sets Insync providing device isn't active
+        *  write_error - sets WriteErrorSeen
+        *  -write_error - clears WriteErrorSeen
         */
        int err = -EINVAL;
        if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
                md_error(rdev->mddev, rdev);
-               err = 0;
+               if (test_bit(Faulty, &rdev->flags))
+                       err = 0;
+               else
+                       err = -EBUSY;
        } else if (cmd_match(buf, "remove")) {
                if (rdev->raid_disk >= 0)
                        err = -EBUSY;
@@ -2410,7 +2591,15 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
                set_bit(Blocked, &rdev->flags);
                err = 0;
        } else if (cmd_match(buf, "-blocked")) {
+               if (!test_bit(Faulty, &rdev->flags) &&
+                   rdev->badblocks.unacked_exist) {
+                       /* metadata handler doesn't understand badblocks,
+                        * so we need to fail the device
+                        */
+                       md_error(rdev->mddev, rdev);
+               }
                clear_bit(Blocked, &rdev->flags);
+               clear_bit(BlockedBadBlocks, &rdev->flags);
                wake_up(&rdev->blocked_wait);
                set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
                md_wakeup_thread(rdev->mddev->thread);
@@ -2419,6 +2608,12 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
        } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
                set_bit(In_sync, &rdev->flags);
                err = 0;
+       } else if (cmd_match(buf, "write_error")) {
+               set_bit(WriteErrorSeen, &rdev->flags);
+               err = 0;
+       } else if (cmd_match(buf, "-write_error")) {
+               clear_bit(WriteErrorSeen, &rdev->flags);
+               err = 0;
        }
        if (!err)
                sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -2711,6 +2906,39 @@ static ssize_t recovery_start_store(mdk_rdev_t *rdev, const char *buf, size_t le
 static struct rdev_sysfs_entry rdev_recovery_start =
 __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
 
+
+static ssize_t
+badblocks_show(struct badblocks *bb, char *page, int unack);
+static ssize_t
+badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
+
+static ssize_t bb_show(mdk_rdev_t *rdev, char *page)
+{
+       return badblocks_show(&rdev->badblocks, page, 0);
+}
+static ssize_t bb_store(mdk_rdev_t *rdev, const char *page, size_t len)
+{
+       int rv = badblocks_store(&rdev->badblocks, page, len, 0);
+       /* Maybe that ack was all we needed */
+       if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
+               wake_up(&rdev->blocked_wait);
+       return rv;
+}
+static struct rdev_sysfs_entry rdev_bad_blocks =
+__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
+
+
+static ssize_t ubb_show(mdk_rdev_t *rdev, char *page)
+{
+       return badblocks_show(&rdev->badblocks, page, 1);
+}
+static ssize_t ubb_store(mdk_rdev_t *rdev, const char *page, size_t len)
+{
+       return badblocks_store(&rdev->badblocks, page, len, 1);
+}
+static struct rdev_sysfs_entry rdev_unack_bad_blocks =
+__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
+
 static struct attribute *rdev_default_attrs[] = {
        &rdev_state.attr,
        &rdev_errors.attr,
@@ -2718,6 +2946,8 @@ static struct attribute *rdev_default_attrs[] = {
        &rdev_offset.attr,
        &rdev_size.attr,
        &rdev_recovery_start.attr,
+       &rdev_bad_blocks.attr,
+       &rdev_unack_bad_blocks.attr,
        NULL,
 };
 static ssize_t
@@ -2791,6 +3021,8 @@ int md_rdev_init(mdk_rdev_t *rdev)
        rdev->sb_events = 0;
        rdev->last_read_error.tv_sec  = 0;
        rdev->last_read_error.tv_nsec = 0;
+       rdev->sb_loaded = 0;
+       rdev->bb_page = NULL;
        atomic_set(&rdev->nr_pending, 0);
        atomic_set(&rdev->read_errors, 0);
        atomic_set(&rdev->corrected_errors, 0);
@@ -2875,15 +3107,16 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
                        goto abort_free;
                }
        }
+       if (super_format == -1)
+               /* hot-add for 0.90, or non-persistent: so no badblocks */
+               rdev->badblocks.shift = -1;
 
        return rdev;
 
 abort_free:
-       if (rdev->sb_page) {
-               if (rdev->bdev)
-                       unlock_rdev(rdev);
-               free_disk_sb(rdev);
-       }
+       if (rdev->bdev)
+               unlock_rdev(rdev);
+       free_disk_sb(rdev);
        kfree(rdev->badblocks.page);
        kfree(rdev);
        return ERR_PTR(err);
@@ -5758,6 +5991,8 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev)
                return -ENODEV;
 
        md_error(mddev, rdev);
+       if (!test_bit(Faulty, &rdev->flags))
+               return -EBUSY;
        return 0;
 }
 
@@ -6206,18 +6441,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
        if (!rdev || test_bit(Faulty, &rdev->flags))
                return;
 
-       if (mddev->external)
-               set_bit(Blocked, &rdev->flags);
-/*
-       dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
-               mdname(mddev),
-               MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev),
-               __builtin_return_address(0),__builtin_return_address(1),
-               __builtin_return_address(2),__builtin_return_address(3));
-*/
-       if (!mddev->pers)
-               return;
-       if (!mddev->pers->error_handler)
+       if (!mddev->pers || !mddev->pers->error_handler)
                return;
        mddev->pers->error_handler(mddev,rdev);
        if (mddev->degraded)
@@ -6402,16 +6626,11 @@ static void md_seq_stop(struct seq_file *seq, void *v)
                mddev_put(mddev);
 }
 
-struct mdstat_info {
-       int event;
-};
-
 static int md_seq_show(struct seq_file *seq, void *v)
 {
        mddev_t *mddev = v;
        sector_t sectors;
        mdk_rdev_t *rdev;
-       struct mdstat_info *mi = seq->private;
        struct bitmap *bitmap;
 
        if (v == (void*)1) {
@@ -6423,7 +6642,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
 
                spin_unlock(&pers_lock);
                seq_printf(seq, "\n");
-               mi->event = atomic_read(&md_event_count);
+               seq->poll_event = atomic_read(&md_event_count);
                return 0;
        }
        if (v == (void*)2) {
@@ -6535,26 +6754,21 @@ static const struct seq_operations md_seq_ops = {
 
 static int md_seq_open(struct inode *inode, struct file *file)
 {
+       struct seq_file *seq;
        int error;
-       struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
-       if (mi == NULL)
-               return -ENOMEM;
 
        error = seq_open(file, &md_seq_ops);
        if (error)
-               kfree(mi);
-       else {
-               struct seq_file *p = file->private_data;
-               p->private = mi;
-               mi->event = atomic_read(&md_event_count);
-       }
+               return error;
+
+       seq = file->private_data;
+       seq->poll_event = atomic_read(&md_event_count);
        return error;
 }
 
 static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
 {
-       struct seq_file *m = filp->private_data;
-       struct mdstat_info *mi = m->private;
+       struct seq_file *seq = filp->private_data;
        int mask;
 
        poll_wait(filp, &md_event_waiters, wait);
@@ -6562,7 +6776,7 @@ static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
        /* always allow read */
        mask = POLLIN | POLLRDNORM;
 
-       if (mi->event != atomic_read(&md_event_count))
+       if (seq->poll_event != atomic_read(&md_event_count))
                mask |= POLLERR | POLLPRI;
        return mask;
 }
@@ -6951,11 +7165,14 @@ void md_do_sync(mddev_t *mddev)
                        atomic_add(sectors, &mddev->recovery_active);
                }
 
+               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+                       break;
+
                j += sectors;
                if (j>1) mddev->curr_resync = j;
                mddev->curr_mark_cnt = io_sectors;
                if (last_check == 0)
-                       /* this is the earliers that rebuilt will be
+                       /* this is the earliest that rebuild will be
                         * visible in /proc/mdstat
                         */
                        md_new_event(mddev);
@@ -6964,10 +7181,6 @@ void md_do_sync(mddev_t *mddev)
                        continue;
 
                last_check = io_sectors;
-
-               if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
-                       break;
-
        repeat:
                if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
                        /* step marks */
@@ -7094,8 +7307,7 @@ static int remove_and_add_spares(mddev_t *mddev)
                list_for_each_entry(rdev, &mddev->disks, same_set) {
                        if (rdev->raid_disk >= 0 &&
                            !test_bit(In_sync, &rdev->flags) &&
-                           !test_bit(Faulty, &rdev->flags) &&
-                           !test_bit(Blocked, &rdev->flags))
+                           !test_bit(Faulty, &rdev->flags))
                                spares++;
                        if (rdev->raid_disk < 0
                            && !test_bit(Faulty, &rdev->flags)) {
@@ -7341,7 +7553,8 @@ void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
 {
        sysfs_notify_dirent_safe(rdev->sysfs_state);
        wait_event_timeout(rdev->blocked_wait,
-                          !test_bit(Blocked, &rdev->flags),
+                          !test_bit(Blocked, &rdev->flags) &&
+                          !test_bit(BlockedBadBlocks, &rdev->flags),
                           msecs_to_jiffies(5000));
        rdev_dec_pending(rdev, mddev);
 }
@@ -7587,6 +7800,8 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
        }
 
        bb->changed = 1;
+       if (!acknowledged)
+               bb->unacked_exist = 1;
        write_sequnlock_irq(&bb->lock);
 
        return rv;
@@ -7731,11 +7946,106 @@ void md_ack_all_badblocks(struct badblocks *bb)
                                p[i] = BB_MAKE(start, len, 1);
                        }
                }
+               bb->unacked_exist = 0;
        }
        write_sequnlock_irq(&bb->lock);
 }
 EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
 
+/* sysfs access to bad-blocks list.
+ * We present two files.
+ * 'bad-blocks' lists sector numbers and lengths of ranges that
+ *    are recorded as bad.  The list is truncated to fit within
+ *    the one-page limit of sysfs.
+ *    Writing "sector length" to this file adds an acknowledged
+ *    bad block list.
+ * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
+ *    been acknowledged.  Writing to this file adds bad blocks
+ *    without acknowledging them.  This is largely for testing.
+ */
+
+static ssize_t
+badblocks_show(struct badblocks *bb, char *page, int unack)
+{
+       size_t len;
+       int i;
+       u64 *p = bb->page;
+       unsigned seq;
+
+       if (bb->shift < 0)
+               return 0;
+
+retry:
+       seq = read_seqbegin(&bb->lock);
+
+       len = 0;
+       i = 0;
+
+       while (len < PAGE_SIZE && i < bb->count) {
+               sector_t s = BB_OFFSET(p[i]);
+               unsigned int length = BB_LEN(p[i]);
+               int ack = BB_ACK(p[i]);
+               i++;
+
+               if (unack && ack)
+                       continue;
+
+               len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
+                               (unsigned long long)s << bb->shift,
+                               length << bb->shift);
+       }
+       if (unack && len == 0)
+               bb->unacked_exist = 0;
+
+       if (read_seqretry(&bb->lock, seq))
+               goto retry;
+
+       return len;
+}
+
+#define DO_DEBUG 1
+
+static ssize_t
+badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
+{
+       unsigned long long sector;
+       int length;
+       char newline;
+#ifdef DO_DEBUG
+       /* Allow clearing via sysfs *only* for testing/debugging.
+        * Normally only a successful write may clear a badblock
+        */
+       int clear = 0;
+       if (page[0] == '-') {
+               clear = 1;
+               page++;
+       }
+#endif /* DO_DEBUG */
+
+       switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
+       case 3:
+               if (newline != '\n')
+                       return -EINVAL;
+       case 2:
+               if (length <= 0)
+                       return -EINVAL;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+#ifdef DO_DEBUG
+       if (clear) {
+               md_clear_badblocks(bb, sector, length);
+               return len;
+       }
+#endif /* DO_DEBUG */
+       if (md_set_badblocks(bb, sector, length, !unack))
+               return len;
+       else
+               return -ENOSPC;
+}
+
 static int md_notify_reboot(struct notifier_block *this,
                            unsigned long code, void *x)
 {
This page took 0.0382 seconds and 5 git commands to generate.