Commit | Line | Data |
---|---|---|
5c83746a | 1 | /* |
d9186c03 | 2 | * Copyright (c) 2014-2016 Christoph Hellwig. |
5c83746a CH |
3 | */ |
4 | #include <linux/sunrpc/svc.h> | |
5 | #include <linux/blkdev.h> | |
6 | #include <linux/nfs4.h> | |
7 | #include <linux/nfs_fs.h> | |
8 | #include <linux/nfs_xdr.h> | |
d9186c03 | 9 | #include <linux/pr.h> |
5c83746a CH |
10 | |
11 | #include "blocklayout.h" | |
12 | ||
13 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | |
14 | ||
15 | static void | |
16 | bl_free_device(struct pnfs_block_dev *dev) | |
17 | { | |
18 | if (dev->nr_children) { | |
19 | int i; | |
20 | ||
21 | for (i = 0; i < dev->nr_children; i++) | |
22 | bl_free_device(&dev->children[i]); | |
23 | kfree(dev->children); | |
24 | } else { | |
d9186c03 CH |
25 | if (dev->pr_registered) { |
26 | const struct pr_ops *ops = | |
27 | dev->bdev->bd_disk->fops->pr_ops; | |
28 | int error; | |
29 | ||
30 | error = ops->pr_register(dev->bdev, dev->pr_key, 0, | |
31 | false); | |
32 | if (error) | |
33 | pr_err("failed to unregister PR key.\n"); | |
34 | } | |
35 | ||
5c83746a | 36 | if (dev->bdev) |
513d6d7a | 37 | blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE); |
5c83746a CH |
38 | } |
39 | } | |
40 | ||
41 | void | |
42 | bl_free_deviceid_node(struct nfs4_deviceid_node *d) | |
43 | { | |
44 | struct pnfs_block_dev *dev = | |
45 | container_of(d, struct pnfs_block_dev, node); | |
46 | ||
47 | bl_free_device(dev); | |
84a80f62 | 48 | kfree_rcu(dev, node.rcu); |
5c83746a CH |
49 | } |
50 | ||
51 | static int | |
52 | nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) | |
53 | { | |
54 | __be32 *p; | |
55 | int i; | |
56 | ||
57 | p = xdr_inline_decode(xdr, 4); | |
58 | if (!p) | |
59 | return -EIO; | |
60 | b->type = be32_to_cpup(p++); | |
61 | ||
62 | switch (b->type) { | |
63 | case PNFS_BLOCK_VOLUME_SIMPLE: | |
64 | p = xdr_inline_decode(xdr, 4); | |
65 | if (!p) | |
66 | return -EIO; | |
67 | b->simple.nr_sigs = be32_to_cpup(p++); | |
68 | if (!b->simple.nr_sigs) { | |
69 | dprintk("no signature\n"); | |
70 | return -EIO; | |
71 | } | |
72 | ||
73 | b->simple.len = 4 + 4; | |
74 | for (i = 0; i < b->simple.nr_sigs; i++) { | |
75 | p = xdr_inline_decode(xdr, 8 + 4); | |
76 | if (!p) | |
77 | return -EIO; | |
78 | p = xdr_decode_hyper(p, &b->simple.sigs[i].offset); | |
79 | b->simple.sigs[i].sig_len = be32_to_cpup(p++); | |
2bd3c63a CH |
80 | if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) { |
81 | pr_info("signature too long: %d\n", | |
82 | b->simple.sigs[i].sig_len); | |
83 | return -EIO; | |
84 | } | |
5c83746a CH |
85 | |
86 | p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len); | |
87 | if (!p) | |
88 | return -EIO; | |
89 | memcpy(&b->simple.sigs[i].sig, p, | |
90 | b->simple.sigs[i].sig_len); | |
91 | ||
92 | b->simple.len += 8 + 4 + b->simple.sigs[i].sig_len; | |
93 | } | |
94 | break; | |
95 | case PNFS_BLOCK_VOLUME_SLICE: | |
96 | p = xdr_inline_decode(xdr, 8 + 8 + 4); | |
97 | if (!p) | |
98 | return -EIO; | |
99 | p = xdr_decode_hyper(p, &b->slice.start); | |
100 | p = xdr_decode_hyper(p, &b->slice.len); | |
101 | b->slice.volume = be32_to_cpup(p++); | |
102 | break; | |
103 | case PNFS_BLOCK_VOLUME_CONCAT: | |
104 | p = xdr_inline_decode(xdr, 4); | |
105 | if (!p) | |
106 | return -EIO; | |
107 | b->concat.volumes_count = be32_to_cpup(p++); | |
108 | ||
109 | p = xdr_inline_decode(xdr, b->concat.volumes_count * 4); | |
110 | if (!p) | |
111 | return -EIO; | |
112 | for (i = 0; i < b->concat.volumes_count; i++) | |
113 | b->concat.volumes[i] = be32_to_cpup(p++); | |
114 | break; | |
115 | case PNFS_BLOCK_VOLUME_STRIPE: | |
116 | p = xdr_inline_decode(xdr, 8 + 4); | |
117 | if (!p) | |
118 | return -EIO; | |
119 | p = xdr_decode_hyper(p, &b->stripe.chunk_size); | |
120 | b->stripe.volumes_count = be32_to_cpup(p++); | |
121 | ||
122 | p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4); | |
123 | if (!p) | |
124 | return -EIO; | |
125 | for (i = 0; i < b->stripe.volumes_count; i++) | |
126 | b->stripe.volumes[i] = be32_to_cpup(p++); | |
127 | break; | |
d9186c03 CH |
128 | case PNFS_BLOCK_VOLUME_SCSI: |
129 | p = xdr_inline_decode(xdr, 4 + 4 + 4); | |
130 | if (!p) | |
131 | return -EIO; | |
132 | b->scsi.code_set = be32_to_cpup(p++); | |
133 | b->scsi.designator_type = be32_to_cpup(p++); | |
134 | b->scsi.designator_len = be32_to_cpup(p++); | |
135 | p = xdr_inline_decode(xdr, b->scsi.designator_len); | |
136 | if (!p) | |
137 | return -EIO; | |
138 | if (b->scsi.designator_len > 256) | |
139 | return -EIO; | |
140 | memcpy(&b->scsi.designator, p, b->scsi.designator_len); | |
141 | p = xdr_inline_decode(xdr, 8); | |
142 | if (!p) | |
143 | return -EIO; | |
144 | p = xdr_decode_hyper(p, &b->scsi.pr_key); | |
145 | break; | |
5c83746a CH |
146 | default: |
147 | dprintk("unknown volume type!\n"); | |
148 | return -EIO; | |
149 | } | |
150 | ||
151 | return 0; | |
152 | } | |
153 | ||
154 | static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset, | |
155 | struct pnfs_block_dev_map *map) | |
156 | { | |
157 | map->start = dev->start; | |
158 | map->len = dev->len; | |
159 | map->disk_offset = dev->disk_offset; | |
160 | map->bdev = dev->bdev; | |
161 | return true; | |
162 | } | |
163 | ||
164 | static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset, | |
165 | struct pnfs_block_dev_map *map) | |
166 | { | |
167 | int i; | |
168 | ||
169 | for (i = 0; i < dev->nr_children; i++) { | |
170 | struct pnfs_block_dev *child = &dev->children[i]; | |
171 | ||
172 | if (child->start > offset || | |
173 | child->start + child->len <= offset) | |
174 | continue; | |
175 | ||
176 | child->map(child, offset - child->start, map); | |
177 | return true; | |
178 | } | |
179 | ||
180 | dprintk("%s: ran off loop!\n", __func__); | |
181 | return false; | |
182 | } | |
183 | ||
184 | static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, | |
185 | struct pnfs_block_dev_map *map) | |
186 | { | |
187 | struct pnfs_block_dev *child; | |
5466112f TM |
188 | u64 chunk; |
189 | u32 chunk_idx; | |
5c83746a CH |
190 | u64 disk_offset; |
191 | ||
5466112f TM |
192 | chunk = div_u64(offset, dev->chunk_size); |
193 | div_u64_rem(chunk, dev->nr_children, &chunk_idx); | |
194 | ||
5c83746a CH |
195 | if (chunk_idx > dev->nr_children) { |
196 | dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", | |
197 | __func__, chunk_idx, offset, dev->chunk_size); | |
198 | /* error, should not happen */ | |
199 | return false; | |
200 | } | |
201 | ||
202 | /* truncate offset to the beginning of the stripe */ | |
203 | offset = chunk * dev->chunk_size; | |
204 | ||
205 | /* disk offset of the stripe */ | |
5466112f | 206 | disk_offset = div_u64(offset, dev->nr_children); |
5c83746a CH |
207 | |
208 | child = &dev->children[chunk_idx]; | |
209 | child->map(child, disk_offset, map); | |
210 | ||
211 | map->start += offset; | |
212 | map->disk_offset += disk_offset; | |
213 | map->len = dev->chunk_size; | |
214 | return true; | |
215 | } | |
216 | ||
217 | static int | |
218 | bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, | |
219 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); | |
220 | ||
221 | ||
222 | static int | |
223 | bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, | |
224 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | |
225 | { | |
226 | struct pnfs_block_volume *v = &volumes[idx]; | |
227 | dev_t dev; | |
228 | ||
229 | dev = bl_resolve_deviceid(server, v, gfp_mask); | |
230 | if (!dev) | |
231 | return -EIO; | |
232 | ||
513d6d7a | 233 | d->bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_WRITE, NULL); |
5c83746a CH |
234 | if (IS_ERR(d->bdev)) { |
235 | printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", | |
236 | MAJOR(dev), MINOR(dev), PTR_ERR(d->bdev)); | |
237 | return PTR_ERR(d->bdev); | |
238 | } | |
239 | ||
240 | ||
241 | d->len = i_size_read(d->bdev->bd_inode); | |
242 | d->map = bl_map_simple; | |
243 | ||
244 | printk(KERN_INFO "pNFS: using block device %s\n", | |
245 | d->bdev->bd_disk->disk_name); | |
246 | return 0; | |
247 | } | |
248 | ||
d9186c03 CH |
249 | static bool |
250 | bl_validate_designator(struct pnfs_block_volume *v) | |
251 | { | |
252 | switch (v->scsi.designator_type) { | |
253 | case PS_DESIGNATOR_EUI64: | |
254 | if (v->scsi.code_set != PS_CODE_SET_BINARY) | |
255 | return false; | |
256 | ||
257 | if (v->scsi.designator_len != 8 && | |
258 | v->scsi.designator_len != 10 && | |
259 | v->scsi.designator_len != 16) | |
260 | return false; | |
261 | ||
262 | return true; | |
263 | case PS_DESIGNATOR_NAA: | |
264 | if (v->scsi.code_set != PS_CODE_SET_BINARY) | |
265 | return false; | |
266 | ||
267 | if (v->scsi.designator_len != 8 && | |
268 | v->scsi.designator_len != 16) | |
269 | return false; | |
270 | ||
271 | return true; | |
272 | case PS_DESIGNATOR_T10: | |
273 | case PS_DESIGNATOR_NAME: | |
274 | pr_err("pNFS: unsupported designator " | |
275 | "(code set %d, type %d, len %d.\n", | |
276 | v->scsi.code_set, | |
277 | v->scsi.designator_type, | |
278 | v->scsi.designator_len); | |
279 | return false; | |
280 | default: | |
281 | pr_err("pNFS: invalid designator " | |
282 | "(code set %d, type %d, len %d.\n", | |
283 | v->scsi.code_set, | |
284 | v->scsi.designator_type, | |
285 | v->scsi.designator_len); | |
286 | return false; | |
287 | } | |
288 | } | |
289 | ||
290 | static int | |
291 | bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, | |
292 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | |
293 | { | |
294 | struct pnfs_block_volume *v = &volumes[idx]; | |
295 | const struct pr_ops *ops; | |
296 | const char *devname; | |
297 | int error; | |
298 | ||
299 | if (!bl_validate_designator(v)) | |
300 | return -EINVAL; | |
301 | ||
302 | switch (v->scsi.designator_len) { | |
303 | case 8: | |
304 | devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN", | |
305 | v->scsi.designator); | |
306 | break; | |
307 | case 12: | |
308 | devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN", | |
309 | v->scsi.designator); | |
310 | break; | |
311 | case 16: | |
312 | devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN", | |
313 | v->scsi.designator); | |
314 | break; | |
315 | default: | |
316 | return -EINVAL; | |
317 | } | |
318 | ||
319 | d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL); | |
320 | if (IS_ERR(d->bdev)) { | |
321 | pr_warn("pNFS: failed to open device %s (%ld)\n", | |
322 | devname, PTR_ERR(d->bdev)); | |
323 | kfree(devname); | |
324 | return PTR_ERR(d->bdev); | |
325 | } | |
326 | ||
327 | kfree(devname); | |
328 | ||
329 | d->len = i_size_read(d->bdev->bd_inode); | |
330 | d->map = bl_map_simple; | |
331 | d->pr_key = v->scsi.pr_key; | |
332 | ||
333 | pr_info("pNFS: using block device %s (reservation key 0x%llx)\n", | |
334 | d->bdev->bd_disk->disk_name, d->pr_key); | |
335 | ||
336 | ops = d->bdev->bd_disk->fops->pr_ops; | |
337 | if (!ops) { | |
338 | pr_err("pNFS: block device %s does not support reservations.", | |
339 | d->bdev->bd_disk->disk_name); | |
340 | error = -EINVAL; | |
341 | goto out_blkdev_put; | |
342 | } | |
343 | ||
344 | error = ops->pr_register(d->bdev, 0, d->pr_key, true); | |
345 | if (error) { | |
346 | pr_err("pNFS: failed to register key for block device %s.", | |
347 | d->bdev->bd_disk->disk_name); | |
348 | goto out_blkdev_put; | |
349 | } | |
350 | ||
351 | d->pr_registered = true; | |
352 | return 0; | |
353 | ||
354 | out_blkdev_put: | |
355 | blkdev_put(d->bdev, FMODE_READ); | |
356 | return error; | |
357 | } | |
358 | ||
5c83746a CH |
359 | static int |
360 | bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, | |
361 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | |
362 | { | |
363 | struct pnfs_block_volume *v = &volumes[idx]; | |
364 | int ret; | |
365 | ||
366 | ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask); | |
367 | if (ret) | |
368 | return ret; | |
369 | ||
370 | d->disk_offset = v->slice.start; | |
371 | d->len = v->slice.len; | |
372 | return 0; | |
373 | } | |
374 | ||
375 | static int | |
376 | bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d, | |
377 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | |
378 | { | |
379 | struct pnfs_block_volume *v = &volumes[idx]; | |
380 | u64 len = 0; | |
381 | int ret, i; | |
382 | ||
383 | d->children = kcalloc(v->concat.volumes_count, | |
384 | sizeof(struct pnfs_block_dev), GFP_KERNEL); | |
385 | if (!d->children) | |
386 | return -ENOMEM; | |
387 | ||
388 | for (i = 0; i < v->concat.volumes_count; i++) { | |
389 | ret = bl_parse_deviceid(server, &d->children[i], | |
390 | volumes, v->concat.volumes[i], gfp_mask); | |
391 | if (ret) | |
392 | return ret; | |
393 | ||
394 | d->nr_children++; | |
395 | d->children[i].start += len; | |
396 | len += d->children[i].len; | |
397 | } | |
398 | ||
399 | d->len = len; | |
400 | d->map = bl_map_concat; | |
401 | return 0; | |
402 | } | |
403 | ||
404 | static int | |
405 | bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d, | |
406 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | |
407 | { | |
408 | struct pnfs_block_volume *v = &volumes[idx]; | |
409 | u64 len = 0; | |
410 | int ret, i; | |
411 | ||
412 | d->children = kcalloc(v->stripe.volumes_count, | |
413 | sizeof(struct pnfs_block_dev), GFP_KERNEL); | |
414 | if (!d->children) | |
415 | return -ENOMEM; | |
416 | ||
417 | for (i = 0; i < v->stripe.volumes_count; i++) { | |
418 | ret = bl_parse_deviceid(server, &d->children[i], | |
419 | volumes, v->stripe.volumes[i], gfp_mask); | |
420 | if (ret) | |
421 | return ret; | |
422 | ||
423 | d->nr_children++; | |
424 | len += d->children[i].len; | |
425 | } | |
426 | ||
427 | d->len = len; | |
428 | d->chunk_size = v->stripe.chunk_size; | |
429 | d->map = bl_map_stripe; | |
430 | return 0; | |
431 | } | |
432 | ||
433 | static int | |
434 | bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, | |
435 | struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) | |
436 | { | |
437 | switch (volumes[idx].type) { | |
438 | case PNFS_BLOCK_VOLUME_SIMPLE: | |
439 | return bl_parse_simple(server, d, volumes, idx, gfp_mask); | |
440 | case PNFS_BLOCK_VOLUME_SLICE: | |
441 | return bl_parse_slice(server, d, volumes, idx, gfp_mask); | |
442 | case PNFS_BLOCK_VOLUME_CONCAT: | |
443 | return bl_parse_concat(server, d, volumes, idx, gfp_mask); | |
444 | case PNFS_BLOCK_VOLUME_STRIPE: | |
445 | return bl_parse_stripe(server, d, volumes, idx, gfp_mask); | |
d9186c03 CH |
446 | case PNFS_BLOCK_VOLUME_SCSI: |
447 | return bl_parse_scsi(server, d, volumes, idx, gfp_mask); | |
5c83746a CH |
448 | default: |
449 | dprintk("unsupported volume type: %d\n", volumes[idx].type); | |
450 | return -EIO; | |
451 | } | |
452 | } | |
453 | ||
454 | struct nfs4_deviceid_node * | |
455 | bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | |
456 | gfp_t gfp_mask) | |
457 | { | |
458 | struct nfs4_deviceid_node *node = NULL; | |
459 | struct pnfs_block_volume *volumes; | |
460 | struct pnfs_block_dev *top; | |
461 | struct xdr_stream xdr; | |
462 | struct xdr_buf buf; | |
463 | struct page *scratch; | |
464 | int nr_volumes, ret, i; | |
465 | __be32 *p; | |
466 | ||
467 | scratch = alloc_page(gfp_mask); | |
468 | if (!scratch) | |
469 | goto out; | |
470 | ||
471 | xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); | |
472 | xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE); | |
473 | ||
474 | p = xdr_inline_decode(&xdr, sizeof(__be32)); | |
475 | if (!p) | |
476 | goto out_free_scratch; | |
477 | nr_volumes = be32_to_cpup(p++); | |
478 | ||
479 | volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume), | |
480 | gfp_mask); | |
481 | if (!volumes) | |
482 | goto out_free_scratch; | |
483 | ||
484 | for (i = 0; i < nr_volumes; i++) { | |
485 | ret = nfs4_block_decode_volume(&xdr, &volumes[i]); | |
486 | if (ret < 0) | |
487 | goto out_free_volumes; | |
488 | } | |
489 | ||
490 | top = kzalloc(sizeof(*top), gfp_mask); | |
491 | if (!top) | |
492 | goto out_free_volumes; | |
493 | ||
494 | ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); | |
495 | if (ret) { | |
496 | bl_free_device(top); | |
497 | kfree(top); | |
498 | goto out_free_volumes; | |
499 | } | |
500 | ||
501 | node = &top->node; | |
502 | nfs4_init_deviceid_node(node, server, &pdev->dev_id); | |
503 | ||
504 | out_free_volumes: | |
505 | kfree(volumes); | |
506 | out_free_scratch: | |
507 | __free_page(scratch); | |
508 | out: | |
509 | return node; | |
510 | } |