Commit | Line | Data |
---|---|---|
047fc8a1 RZ |
1 | /* |
2 | * NVDIMM Block Window Driver | |
3 | * Copyright (c) 2014, Intel Corporation. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms and conditions of the GNU General Public License, | |
7 | * version 2, as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | */ | |
14 | ||
15 | #include <linux/blkdev.h> | |
16 | #include <linux/fs.h> | |
17 | #include <linux/genhd.h> | |
18 | #include <linux/module.h> | |
19 | #include <linux/moduleparam.h> | |
20 | #include <linux/nd.h> | |
21 | #include <linux/sizes.h> | |
22 | #include "nd.h" | |
23 | ||
24 | struct nd_blk_device { | |
25 | struct request_queue *queue; | |
26 | struct gendisk *disk; | |
27 | struct nd_namespace_blk *nsblk; | |
28 | struct nd_blk_region *ndbr; | |
29 | size_t disk_size; | |
fcae6957 VV |
30 | u32 sector_size; |
31 | u32 internal_lbasize; | |
047fc8a1 RZ |
32 | }; |
33 | ||
fcae6957 VV |
34 | static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) |
35 | { | |
36 | return blk_dev->nsblk->lbasize - blk_dev->sector_size; | |
37 | } | |
38 | ||
047fc8a1 RZ |
39 | static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, |
40 | resource_size_t ns_offset, unsigned int len) | |
41 | { | |
42 | int i; | |
43 | ||
44 | for (i = 0; i < nsblk->num_resources; i++) { | |
45 | if (ns_offset < resource_size(nsblk->res[i])) { | |
46 | if (ns_offset + len > resource_size(nsblk->res[i])) { | |
47 | dev_WARN_ONCE(&nsblk->common.dev, 1, | |
48 | "illegal request\n"); | |
49 | return SIZE_MAX; | |
50 | } | |
51 | return nsblk->res[i]->start + ns_offset; | |
52 | } | |
53 | ns_offset -= resource_size(nsblk->res[i]); | |
54 | } | |
55 | ||
56 | dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n"); | |
57 | return SIZE_MAX; | |
58 | } | |
59 | ||
fcae6957 VV |
60 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
61 | static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, | |
62 | struct bio_integrity_payload *bip, u64 lba, | |
63 | int rw) | |
64 | { | |
65 | unsigned int len = nd_blk_meta_size(blk_dev); | |
66 | resource_size_t dev_offset, ns_offset; | |
67 | struct nd_namespace_blk *nsblk; | |
68 | struct nd_blk_region *ndbr; | |
69 | int err = 0; | |
70 | ||
71 | nsblk = blk_dev->nsblk; | |
72 | ndbr = blk_dev->ndbr; | |
73 | ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; | |
74 | dev_offset = to_dev_offset(nsblk, ns_offset, len); | |
75 | if (dev_offset == SIZE_MAX) | |
76 | return -EIO; | |
77 | ||
78 | while (len) { | |
79 | unsigned int cur_len; | |
80 | struct bio_vec bv; | |
81 | void *iobuf; | |
82 | ||
83 | bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); | |
84 | /* | |
85 | * The 'bv' obtained from bvec_iter_bvec has its .bv_len and | |
86 | * .bv_offset already adjusted for iter->bi_bvec_done, and we | |
87 | * can use those directly | |
88 | */ | |
89 | ||
90 | cur_len = min(len, bv.bv_len); | |
91 | iobuf = kmap_atomic(bv.bv_page); | |
92 | err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset, | |
93 | cur_len, rw); | |
94 | kunmap_atomic(iobuf); | |
95 | if (err) | |
96 | return err; | |
97 | ||
98 | len -= cur_len; | |
99 | dev_offset += cur_len; | |
100 | bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len); | |
101 | } | |
102 | ||
103 | return err; | |
104 | } | |
105 | ||
106 | #else /* CONFIG_BLK_DEV_INTEGRITY */ | |
107 | static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, | |
108 | struct bio_integrity_payload *bip, u64 lba, | |
109 | int rw) | |
110 | { | |
111 | return 0; | |
112 | } | |
113 | #endif | |
114 | ||
115 | static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, | |
116 | struct bio_integrity_payload *bip, struct page *page, | |
117 | unsigned int len, unsigned int off, int rw, | |
118 | sector_t sector) | |
119 | { | |
120 | struct nd_blk_region *ndbr = blk_dev->ndbr; | |
121 | resource_size_t dev_offset, ns_offset; | |
122 | int err = 0; | |
123 | void *iobuf; | |
124 | u64 lba; | |
125 | ||
126 | while (len) { | |
127 | unsigned int cur_len; | |
128 | ||
129 | /* | |
130 | * If we don't have an integrity payload, we don't have to | |
131 | * split the bvec into sectors, as this would cause unnecessary | |
132 | * Block Window setup/move steps. the do_io routine is capable | |
133 | * of handling len <= PAGE_SIZE. | |
134 | */ | |
135 | cur_len = bip ? min(len, blk_dev->sector_size) : len; | |
136 | ||
137 | lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); | |
138 | ns_offset = lba * blk_dev->internal_lbasize; | |
139 | dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); | |
140 | if (dev_offset == SIZE_MAX) | |
141 | return -EIO; | |
142 | ||
143 | iobuf = kmap_atomic(page); | |
144 | err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw); | |
145 | kunmap_atomic(iobuf); | |
146 | if (err) | |
147 | return err; | |
148 | ||
149 | if (bip) { | |
150 | err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); | |
151 | if (err) | |
152 | return err; | |
153 | } | |
154 | len -= cur_len; | |
155 | off += cur_len; | |
156 | sector += blk_dev->sector_size >> SECTOR_SHIFT; | |
157 | } | |
158 | ||
159 | return err; | |
160 | } | |
161 | ||
dece1635 | 162 | static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) |
047fc8a1 RZ |
163 | { |
164 | struct block_device *bdev = bio->bi_bdev; | |
165 | struct gendisk *disk = bdev->bd_disk; | |
fcae6957 | 166 | struct bio_integrity_payload *bip; |
047fc8a1 | 167 | struct nd_blk_device *blk_dev; |
047fc8a1 | 168 | struct bvec_iter iter; |
f0dc089c | 169 | unsigned long start; |
047fc8a1 RZ |
170 | struct bio_vec bvec; |
171 | int err = 0, rw; | |
f0dc089c | 172 | bool do_acct; |
047fc8a1 | 173 | |
fcae6957 VV |
174 | /* |
175 | * bio_integrity_enabled also checks if the bio already has an | |
176 | * integrity payload attached. If it does, we *don't* do a | |
177 | * bio_integrity_prep here - the payload has been generated by | |
178 | * another kernel subsystem, and we just pass it through. | |
179 | */ | |
180 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { | |
4246a0b6 | 181 | bio->bi_error = -EIO; |
fcae6957 VV |
182 | goto out; |
183 | } | |
184 | ||
185 | bip = bio_integrity(bio); | |
047fc8a1 | 186 | blk_dev = disk->private_data; |
047fc8a1 | 187 | rw = bio_data_dir(bio); |
f0dc089c | 188 | do_acct = nd_iostat_start(bio, &start); |
047fc8a1 RZ |
189 | bio_for_each_segment(bvec, bio, iter) { |
190 | unsigned int len = bvec.bv_len; | |
047fc8a1 RZ |
191 | |
192 | BUG_ON(len > PAGE_SIZE); | |
fcae6957 VV |
193 | err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, |
194 | bvec.bv_offset, rw, iter.bi_sector); | |
195 | if (err) { | |
196 | dev_info(&blk_dev->nsblk->common.dev, | |
197 | "io error in %s sector %lld, len %d,\n", | |
198 | (rw == READ) ? "READ" : "WRITE", | |
199 | (unsigned long long) iter.bi_sector, len); | |
4246a0b6 | 200 | bio->bi_error = err; |
f0dc089c | 201 | break; |
047fc8a1 | 202 | } |
047fc8a1 | 203 | } |
f0dc089c DW |
204 | if (do_acct) |
205 | nd_iostat_end(bio, start); | |
047fc8a1 RZ |
206 | |
207 | out: | |
4246a0b6 | 208 | bio_endio(bio); |
dece1635 | 209 | return BLK_QC_T_NONE; |
047fc8a1 RZ |
210 | } |
211 | ||
212 | static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, | |
213 | resource_size_t offset, void *iobuf, size_t n, int rw) | |
214 | { | |
215 | struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); | |
216 | struct nd_namespace_blk *nsblk = blk_dev->nsblk; | |
217 | struct nd_blk_region *ndbr = blk_dev->ndbr; | |
218 | resource_size_t dev_offset; | |
219 | ||
220 | dev_offset = to_dev_offset(nsblk, offset, n); | |
221 | ||
222 | if (unlikely(offset + n > blk_dev->disk_size)) { | |
223 | dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); | |
224 | return -EFAULT; | |
225 | } | |
226 | ||
227 | if (dev_offset == SIZE_MAX) | |
228 | return -EIO; | |
229 | ||
230 | return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw); | |
231 | } | |
232 | ||
233 | static const struct block_device_operations nd_blk_fops = { | |
234 | .owner = THIS_MODULE, | |
58138820 | 235 | .revalidate_disk = nvdimm_revalidate_disk, |
047fc8a1 RZ |
236 | }; |
237 | ||
238 | static int nd_blk_attach_disk(struct nd_namespace_common *ndns, | |
239 | struct nd_blk_device *blk_dev) | |
240 | { | |
fcae6957 | 241 | resource_size_t available_disk_size; |
047fc8a1 | 242 | struct gendisk *disk; |
fcae6957 VV |
243 | u64 internal_nlba; |
244 | ||
245 | internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); | |
246 | available_disk_size = internal_nlba * blk_dev->sector_size; | |
047fc8a1 RZ |
247 | |
248 | blk_dev->queue = blk_alloc_queue(GFP_KERNEL); | |
249 | if (!blk_dev->queue) | |
250 | return -ENOMEM; | |
251 | ||
252 | blk_queue_make_request(blk_dev->queue, nd_blk_make_request); | |
253 | blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX); | |
254 | blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY); | |
fcae6957 | 255 | blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size); |
047fc8a1 RZ |
256 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue); |
257 | ||
258 | disk = blk_dev->disk = alloc_disk(0); | |
259 | if (!disk) { | |
260 | blk_cleanup_queue(blk_dev->queue); | |
261 | return -ENOMEM; | |
262 | } | |
263 | ||
264 | disk->driverfs_dev = &ndns->dev; | |
047fc8a1 RZ |
265 | disk->first_minor = 0; |
266 | disk->fops = &nd_blk_fops; | |
267 | disk->private_data = blk_dev; | |
268 | disk->queue = blk_dev->queue; | |
269 | disk->flags = GENHD_FL_EXT_DEVT; | |
270 | nvdimm_namespace_disk_name(ndns, disk->disk_name); | |
fcae6957 | 271 | set_capacity(disk, 0); |
047fc8a1 RZ |
272 | add_disk(disk); |
273 | ||
fcae6957 VV |
274 | if (nd_blk_meta_size(blk_dev)) { |
275 | int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); | |
276 | ||
277 | if (rc) { | |
278 | del_gendisk(disk); | |
279 | put_disk(disk); | |
280 | blk_cleanup_queue(blk_dev->queue); | |
281 | return rc; | |
282 | } | |
283 | } | |
284 | ||
285 | set_capacity(disk, available_disk_size >> SECTOR_SHIFT); | |
58138820 | 286 | revalidate_disk(disk); |
047fc8a1 RZ |
287 | return 0; |
288 | } | |
289 | ||
290 | static int nd_blk_probe(struct device *dev) | |
291 | { | |
292 | struct nd_namespace_common *ndns; | |
fcae6957 | 293 | struct nd_namespace_blk *nsblk; |
047fc8a1 RZ |
294 | struct nd_blk_device *blk_dev; |
295 | int rc; | |
296 | ||
297 | ndns = nvdimm_namespace_common_probe(dev); | |
298 | if (IS_ERR(ndns)) | |
299 | return PTR_ERR(ndns); | |
300 | ||
301 | blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL); | |
302 | if (!blk_dev) | |
303 | return -ENOMEM; | |
304 | ||
fcae6957 | 305 | nsblk = to_nd_namespace_blk(&ndns->dev); |
047fc8a1 RZ |
306 | blk_dev->disk_size = nvdimm_namespace_capacity(ndns); |
307 | blk_dev->ndbr = to_nd_blk_region(dev->parent); | |
308 | blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); | |
fcae6957 VV |
309 | blk_dev->internal_lbasize = roundup(nsblk->lbasize, |
310 | INT_LBASIZE_ALIGNMENT); | |
311 | blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512); | |
047fc8a1 RZ |
312 | dev_set_drvdata(dev, blk_dev); |
313 | ||
314 | ndns->rw_bytes = nd_blk_rw_bytes; | |
315 | if (is_nd_btt(dev)) | |
316 | rc = nvdimm_namespace_attach_btt(ndns); | |
317 | else if (nd_btt_probe(ndns, blk_dev) == 0) { | |
318 | /* we'll come back as btt-blk */ | |
319 | rc = -ENXIO; | |
320 | } else | |
321 | rc = nd_blk_attach_disk(ndns, blk_dev); | |
322 | if (rc) | |
323 | kfree(blk_dev); | |
324 | return rc; | |
325 | } | |
326 | ||
327 | static void nd_blk_detach_disk(struct nd_blk_device *blk_dev) | |
328 | { | |
329 | del_gendisk(blk_dev->disk); | |
330 | put_disk(blk_dev->disk); | |
331 | blk_cleanup_queue(blk_dev->queue); | |
332 | } | |
333 | ||
334 | static int nd_blk_remove(struct device *dev) | |
335 | { | |
336 | struct nd_blk_device *blk_dev = dev_get_drvdata(dev); | |
337 | ||
338 | if (is_nd_btt(dev)) | |
339 | nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); | |
340 | else | |
341 | nd_blk_detach_disk(blk_dev); | |
342 | kfree(blk_dev); | |
343 | ||
344 | return 0; | |
345 | } | |
346 | ||
347 | static struct nd_device_driver nd_blk_driver = { | |
348 | .probe = nd_blk_probe, | |
349 | .remove = nd_blk_remove, | |
350 | .drv = { | |
351 | .name = "nd_blk", | |
352 | }, | |
353 | .type = ND_DRIVER_NAMESPACE_BLK, | |
354 | }; | |
355 | ||
356 | static int __init nd_blk_init(void) | |
357 | { | |
ec56151d | 358 | return nd_driver_register(&nd_blk_driver); |
047fc8a1 RZ |
359 | } |
360 | ||
361 | static void __exit nd_blk_exit(void) | |
362 | { | |
363 | driver_unregister(&nd_blk_driver.drv); | |
047fc8a1 RZ |
364 | } |
365 | ||
366 | MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); | |
367 | MODULE_LICENSE("GPL v2"); | |
368 | MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK); | |
369 | module_init(nd_blk_init); | |
370 | module_exit(nd_blk_exit); |