Commit | Line | Data |
---|---|---|
047fc8a1 RZ |
1 | /* |
2 | * NVDIMM Block Window Driver | |
3 | * Copyright (c) 2014, Intel Corporation. | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms and conditions of the GNU General Public License, | |
7 | * version 2, as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope it will be useful, but WITHOUT | |
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
12 | * more details. | |
13 | */ | |
14 | ||
15 | #include <linux/blkdev.h> | |
16 | #include <linux/fs.h> | |
17 | #include <linux/genhd.h> | |
18 | #include <linux/module.h> | |
19 | #include <linux/moduleparam.h> | |
20 | #include <linux/nd.h> | |
21 | #include <linux/sizes.h> | |
22 | #include "nd.h" | |
23 | ||
24 | struct nd_blk_device { | |
25 | struct request_queue *queue; | |
26 | struct gendisk *disk; | |
27 | struct nd_namespace_blk *nsblk; | |
28 | struct nd_blk_region *ndbr; | |
29 | size_t disk_size; | |
fcae6957 VV |
30 | u32 sector_size; |
31 | u32 internal_lbasize; | |
047fc8a1 RZ |
32 | }; |
33 | ||
34 | static int nd_blk_major; | |
35 | ||
fcae6957 VV |
36 | static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) |
37 | { | |
38 | return blk_dev->nsblk->lbasize - blk_dev->sector_size; | |
39 | } | |
40 | ||
047fc8a1 RZ |
41 | static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, |
42 | resource_size_t ns_offset, unsigned int len) | |
43 | { | |
44 | int i; | |
45 | ||
46 | for (i = 0; i < nsblk->num_resources; i++) { | |
47 | if (ns_offset < resource_size(nsblk->res[i])) { | |
48 | if (ns_offset + len > resource_size(nsblk->res[i])) { | |
49 | dev_WARN_ONCE(&nsblk->common.dev, 1, | |
50 | "illegal request\n"); | |
51 | return SIZE_MAX; | |
52 | } | |
53 | return nsblk->res[i]->start + ns_offset; | |
54 | } | |
55 | ns_offset -= resource_size(nsblk->res[i]); | |
56 | } | |
57 | ||
58 | dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n"); | |
59 | return SIZE_MAX; | |
60 | } | |
61 | ||
fcae6957 VV |
62 | #ifdef CONFIG_BLK_DEV_INTEGRITY |
63 | static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, | |
64 | struct bio_integrity_payload *bip, u64 lba, | |
65 | int rw) | |
66 | { | |
67 | unsigned int len = nd_blk_meta_size(blk_dev); | |
68 | resource_size_t dev_offset, ns_offset; | |
69 | struct nd_namespace_blk *nsblk; | |
70 | struct nd_blk_region *ndbr; | |
71 | int err = 0; | |
72 | ||
73 | nsblk = blk_dev->nsblk; | |
74 | ndbr = blk_dev->ndbr; | |
75 | ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; | |
76 | dev_offset = to_dev_offset(nsblk, ns_offset, len); | |
77 | if (dev_offset == SIZE_MAX) | |
78 | return -EIO; | |
79 | ||
80 | while (len) { | |
81 | unsigned int cur_len; | |
82 | struct bio_vec bv; | |
83 | void *iobuf; | |
84 | ||
85 | bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter); | |
86 | /* | |
87 | * The 'bv' obtained from bvec_iter_bvec has its .bv_len and | |
88 | * .bv_offset already adjusted for iter->bi_bvec_done, and we | |
89 | * can use those directly | |
90 | */ | |
91 | ||
92 | cur_len = min(len, bv.bv_len); | |
93 | iobuf = kmap_atomic(bv.bv_page); | |
94 | err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset, | |
95 | cur_len, rw); | |
96 | kunmap_atomic(iobuf); | |
97 | if (err) | |
98 | return err; | |
99 | ||
100 | len -= cur_len; | |
101 | dev_offset += cur_len; | |
102 | bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len); | |
103 | } | |
104 | ||
105 | return err; | |
106 | } | |
107 | ||
108 | #else /* CONFIG_BLK_DEV_INTEGRITY */ | |
109 | static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, | |
110 | struct bio_integrity_payload *bip, u64 lba, | |
111 | int rw) | |
112 | { | |
113 | return 0; | |
114 | } | |
115 | #endif | |
116 | ||
117 | static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, | |
118 | struct bio_integrity_payload *bip, struct page *page, | |
119 | unsigned int len, unsigned int off, int rw, | |
120 | sector_t sector) | |
121 | { | |
122 | struct nd_blk_region *ndbr = blk_dev->ndbr; | |
123 | resource_size_t dev_offset, ns_offset; | |
124 | int err = 0; | |
125 | void *iobuf; | |
126 | u64 lba; | |
127 | ||
128 | while (len) { | |
129 | unsigned int cur_len; | |
130 | ||
131 | /* | |
132 | * If we don't have an integrity payload, we don't have to | |
133 | * split the bvec into sectors, as this would cause unnecessary | |
134 | * Block Window setup/move steps. the do_io routine is capable | |
135 | * of handling len <= PAGE_SIZE. | |
136 | */ | |
137 | cur_len = bip ? min(len, blk_dev->sector_size) : len; | |
138 | ||
139 | lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); | |
140 | ns_offset = lba * blk_dev->internal_lbasize; | |
141 | dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); | |
142 | if (dev_offset == SIZE_MAX) | |
143 | return -EIO; | |
144 | ||
145 | iobuf = kmap_atomic(page); | |
146 | err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw); | |
147 | kunmap_atomic(iobuf); | |
148 | if (err) | |
149 | return err; | |
150 | ||
151 | if (bip) { | |
152 | err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); | |
153 | if (err) | |
154 | return err; | |
155 | } | |
156 | len -= cur_len; | |
157 | off += cur_len; | |
158 | sector += blk_dev->sector_size >> SECTOR_SHIFT; | |
159 | } | |
160 | ||
161 | return err; | |
162 | } | |
163 | ||
047fc8a1 RZ |
164 | static void nd_blk_make_request(struct request_queue *q, struct bio *bio) |
165 | { | |
166 | struct block_device *bdev = bio->bi_bdev; | |
167 | struct gendisk *disk = bdev->bd_disk; | |
fcae6957 | 168 | struct bio_integrity_payload *bip; |
047fc8a1 | 169 | struct nd_blk_device *blk_dev; |
047fc8a1 | 170 | struct bvec_iter iter; |
f0dc089c | 171 | unsigned long start; |
047fc8a1 RZ |
172 | struct bio_vec bvec; |
173 | int err = 0, rw; | |
f0dc089c | 174 | bool do_acct; |
047fc8a1 | 175 | |
fcae6957 VV |
176 | /* |
177 | * bio_integrity_enabled also checks if the bio already has an | |
178 | * integrity payload attached. If it does, we *don't* do a | |
179 | * bio_integrity_prep here - the payload has been generated by | |
180 | * another kernel subsystem, and we just pass it through. | |
181 | */ | |
182 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { | |
183 | err = -EIO; | |
184 | goto out; | |
185 | } | |
186 | ||
187 | bip = bio_integrity(bio); | |
047fc8a1 | 188 | blk_dev = disk->private_data; |
047fc8a1 | 189 | rw = bio_data_dir(bio); |
f0dc089c | 190 | do_acct = nd_iostat_start(bio, &start); |
047fc8a1 RZ |
191 | bio_for_each_segment(bvec, bio, iter) { |
192 | unsigned int len = bvec.bv_len; | |
047fc8a1 RZ |
193 | |
194 | BUG_ON(len > PAGE_SIZE); | |
fcae6957 VV |
195 | err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, |
196 | bvec.bv_offset, rw, iter.bi_sector); | |
197 | if (err) { | |
198 | dev_info(&blk_dev->nsblk->common.dev, | |
199 | "io error in %s sector %lld, len %d,\n", | |
200 | (rw == READ) ? "READ" : "WRITE", | |
201 | (unsigned long long) iter.bi_sector, len); | |
f0dc089c | 202 | break; |
047fc8a1 | 203 | } |
047fc8a1 | 204 | } |
f0dc089c DW |
205 | if (do_acct) |
206 | nd_iostat_end(bio, start); | |
047fc8a1 RZ |
207 | |
208 | out: | |
209 | bio_endio(bio, err); | |
210 | } | |
211 | ||
212 | static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, | |
213 | resource_size_t offset, void *iobuf, size_t n, int rw) | |
214 | { | |
215 | struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); | |
216 | struct nd_namespace_blk *nsblk = blk_dev->nsblk; | |
217 | struct nd_blk_region *ndbr = blk_dev->ndbr; | |
218 | resource_size_t dev_offset; | |
219 | ||
220 | dev_offset = to_dev_offset(nsblk, offset, n); | |
221 | ||
222 | if (unlikely(offset + n > blk_dev->disk_size)) { | |
223 | dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); | |
224 | return -EFAULT; | |
225 | } | |
226 | ||
227 | if (dev_offset == SIZE_MAX) | |
228 | return -EIO; | |
229 | ||
230 | return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw); | |
231 | } | |
232 | ||
233 | static const struct block_device_operations nd_blk_fops = { | |
234 | .owner = THIS_MODULE, | |
235 | }; | |
236 | ||
237 | static int nd_blk_attach_disk(struct nd_namespace_common *ndns, | |
238 | struct nd_blk_device *blk_dev) | |
239 | { | |
fcae6957 | 240 | resource_size_t available_disk_size; |
047fc8a1 | 241 | struct gendisk *disk; |
fcae6957 VV |
242 | u64 internal_nlba; |
243 | ||
244 | internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); | |
245 | available_disk_size = internal_nlba * blk_dev->sector_size; | |
047fc8a1 RZ |
246 | |
247 | blk_dev->queue = blk_alloc_queue(GFP_KERNEL); | |
248 | if (!blk_dev->queue) | |
249 | return -ENOMEM; | |
250 | ||
251 | blk_queue_make_request(blk_dev->queue, nd_blk_make_request); | |
252 | blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX); | |
253 | blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY); | |
fcae6957 | 254 | blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size); |
047fc8a1 RZ |
255 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue); |
256 | ||
257 | disk = blk_dev->disk = alloc_disk(0); | |
258 | if (!disk) { | |
259 | blk_cleanup_queue(blk_dev->queue); | |
260 | return -ENOMEM; | |
261 | } | |
262 | ||
263 | disk->driverfs_dev = &ndns->dev; | |
264 | disk->major = nd_blk_major; | |
265 | disk->first_minor = 0; | |
266 | disk->fops = &nd_blk_fops; | |
267 | disk->private_data = blk_dev; | |
268 | disk->queue = blk_dev->queue; | |
269 | disk->flags = GENHD_FL_EXT_DEVT; | |
270 | nvdimm_namespace_disk_name(ndns, disk->disk_name); | |
fcae6957 | 271 | set_capacity(disk, 0); |
047fc8a1 RZ |
272 | add_disk(disk); |
273 | ||
fcae6957 VV |
274 | if (nd_blk_meta_size(blk_dev)) { |
275 | int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); | |
276 | ||
277 | if (rc) { | |
278 | del_gendisk(disk); | |
279 | put_disk(disk); | |
280 | blk_cleanup_queue(blk_dev->queue); | |
281 | return rc; | |
282 | } | |
283 | } | |
284 | ||
285 | set_capacity(disk, available_disk_size >> SECTOR_SHIFT); | |
047fc8a1 RZ |
286 | return 0; |
287 | } | |
288 | ||
289 | static int nd_blk_probe(struct device *dev) | |
290 | { | |
291 | struct nd_namespace_common *ndns; | |
fcae6957 | 292 | struct nd_namespace_blk *nsblk; |
047fc8a1 RZ |
293 | struct nd_blk_device *blk_dev; |
294 | int rc; | |
295 | ||
296 | ndns = nvdimm_namespace_common_probe(dev); | |
297 | if (IS_ERR(ndns)) | |
298 | return PTR_ERR(ndns); | |
299 | ||
300 | blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL); | |
301 | if (!blk_dev) | |
302 | return -ENOMEM; | |
303 | ||
fcae6957 | 304 | nsblk = to_nd_namespace_blk(&ndns->dev); |
047fc8a1 RZ |
305 | blk_dev->disk_size = nvdimm_namespace_capacity(ndns); |
306 | blk_dev->ndbr = to_nd_blk_region(dev->parent); | |
307 | blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); | |
fcae6957 VV |
308 | blk_dev->internal_lbasize = roundup(nsblk->lbasize, |
309 | INT_LBASIZE_ALIGNMENT); | |
310 | blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512); | |
047fc8a1 RZ |
311 | dev_set_drvdata(dev, blk_dev); |
312 | ||
313 | ndns->rw_bytes = nd_blk_rw_bytes; | |
314 | if (is_nd_btt(dev)) | |
315 | rc = nvdimm_namespace_attach_btt(ndns); | |
316 | else if (nd_btt_probe(ndns, blk_dev) == 0) { | |
317 | /* we'll come back as btt-blk */ | |
318 | rc = -ENXIO; | |
319 | } else | |
320 | rc = nd_blk_attach_disk(ndns, blk_dev); | |
321 | if (rc) | |
322 | kfree(blk_dev); | |
323 | return rc; | |
324 | } | |
325 | ||
326 | static void nd_blk_detach_disk(struct nd_blk_device *blk_dev) | |
327 | { | |
328 | del_gendisk(blk_dev->disk); | |
329 | put_disk(blk_dev->disk); | |
330 | blk_cleanup_queue(blk_dev->queue); | |
331 | } | |
332 | ||
333 | static int nd_blk_remove(struct device *dev) | |
334 | { | |
335 | struct nd_blk_device *blk_dev = dev_get_drvdata(dev); | |
336 | ||
337 | if (is_nd_btt(dev)) | |
338 | nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); | |
339 | else | |
340 | nd_blk_detach_disk(blk_dev); | |
341 | kfree(blk_dev); | |
342 | ||
343 | return 0; | |
344 | } | |
345 | ||
346 | static struct nd_device_driver nd_blk_driver = { | |
347 | .probe = nd_blk_probe, | |
348 | .remove = nd_blk_remove, | |
349 | .drv = { | |
350 | .name = "nd_blk", | |
351 | }, | |
352 | .type = ND_DRIVER_NAMESPACE_BLK, | |
353 | }; | |
354 | ||
355 | static int __init nd_blk_init(void) | |
356 | { | |
357 | int rc; | |
358 | ||
359 | rc = register_blkdev(0, "nd_blk"); | |
360 | if (rc < 0) | |
361 | return rc; | |
362 | ||
363 | nd_blk_major = rc; | |
364 | rc = nd_driver_register(&nd_blk_driver); | |
365 | ||
366 | if (rc < 0) | |
367 | unregister_blkdev(nd_blk_major, "nd_blk"); | |
368 | ||
369 | return rc; | |
370 | } | |
371 | ||
372 | static void __exit nd_blk_exit(void) | |
373 | { | |
374 | driver_unregister(&nd_blk_driver.drv); | |
375 | unregister_blkdev(nd_blk_major, "nd_blk"); | |
376 | } | |
377 | ||
378 | MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); | |
379 | MODULE_LICENSE("GPL v2"); | |
380 | MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK); | |
381 | module_init(nd_blk_init); | |
382 | module_exit(nd_blk_exit); |