2 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 #include <linux/memremap.h>
14 #include <linux/blkdev.h>
15 #include <linux/device.h>
16 #include <linux/genhd.h>
17 #include <linux/sizes.h>
18 #include <linux/slab.h>
25 static void nd_pfn_release(struct device
*dev
)
27 struct nd_region
*nd_region
= to_nd_region(dev
->parent
);
28 struct nd_pfn
*nd_pfn
= to_nd_pfn(dev
);
30 dev_dbg(dev
, "%s\n", __func__
);
31 nd_detach_ndns(&nd_pfn
->dev
, &nd_pfn
->ndns
);
32 ida_simple_remove(&nd_region
->pfn_ida
, nd_pfn
->id
);
37 static struct device_type nd_pfn_device_type
= {
39 .release
= nd_pfn_release
,
42 bool is_nd_pfn(struct device
*dev
)
44 return dev
? dev
->type
== &nd_pfn_device_type
: false;
46 EXPORT_SYMBOL(is_nd_pfn
);
48 struct nd_pfn
*to_nd_pfn(struct device
*dev
)
50 struct nd_pfn
*nd_pfn
= container_of(dev
, struct nd_pfn
, dev
);
52 WARN_ON(!is_nd_pfn(dev
));
55 EXPORT_SYMBOL(to_nd_pfn
);
57 static ssize_t
mode_show(struct device
*dev
,
58 struct device_attribute
*attr
, char *buf
)
60 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
62 switch (nd_pfn
->mode
) {
64 return sprintf(buf
, "ram\n");
66 return sprintf(buf
, "pmem\n");
68 return sprintf(buf
, "none\n");
72 static ssize_t
mode_store(struct device
*dev
,
73 struct device_attribute
*attr
, const char *buf
, size_t len
)
75 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
85 if (strncmp(buf
, "pmem\n", n
) == 0
86 || strncmp(buf
, "pmem", n
) == 0) {
87 nd_pfn
->mode
= PFN_MODE_PMEM
;
88 } else if (strncmp(buf
, "ram\n", n
) == 0
89 || strncmp(buf
, "ram", n
) == 0)
90 nd_pfn
->mode
= PFN_MODE_RAM
;
91 else if (strncmp(buf
, "none\n", n
) == 0
92 || strncmp(buf
, "none", n
) == 0)
93 nd_pfn
->mode
= PFN_MODE_NONE
;
97 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
98 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
99 nvdimm_bus_unlock(dev
);
102 return rc
? rc
: len
;
104 static DEVICE_ATTR_RW(mode
);
106 static ssize_t
align_show(struct device
*dev
,
107 struct device_attribute
*attr
, char *buf
)
109 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
111 return sprintf(buf
, "%lx\n", nd_pfn
->align
);
114 static ssize_t
__align_store(struct nd_pfn
*nd_pfn
, const char *buf
)
119 rc
= kstrtoul(buf
, 0, &val
);
123 if (!is_power_of_2(val
) || val
< PAGE_SIZE
|| val
> SZ_1G
)
126 if (nd_pfn
->dev
.driver
)
134 static ssize_t
align_store(struct device
*dev
,
135 struct device_attribute
*attr
, const char *buf
, size_t len
)
137 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
141 nvdimm_bus_lock(dev
);
142 rc
= __align_store(nd_pfn
, buf
);
143 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
144 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
145 nvdimm_bus_unlock(dev
);
148 return rc
? rc
: len
;
150 static DEVICE_ATTR_RW(align
);
152 static ssize_t
uuid_show(struct device
*dev
,
153 struct device_attribute
*attr
, char *buf
)
155 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
158 return sprintf(buf
, "%pUb\n", nd_pfn
->uuid
);
159 return sprintf(buf
, "\n");
162 static ssize_t
uuid_store(struct device
*dev
,
163 struct device_attribute
*attr
, const char *buf
, size_t len
)
165 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
169 rc
= nd_uuid_store(dev
, &nd_pfn
->uuid
, buf
, len
);
170 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
171 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
174 return rc
? rc
: len
;
176 static DEVICE_ATTR_RW(uuid
);
178 static ssize_t
namespace_show(struct device
*dev
,
179 struct device_attribute
*attr
, char *buf
)
181 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
184 nvdimm_bus_lock(dev
);
185 rc
= sprintf(buf
, "%s\n", nd_pfn
->ndns
186 ? dev_name(&nd_pfn
->ndns
->dev
) : "");
187 nvdimm_bus_unlock(dev
);
191 static ssize_t
namespace_store(struct device
*dev
,
192 struct device_attribute
*attr
, const char *buf
, size_t len
)
194 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
198 nvdimm_bus_lock(dev
);
199 rc
= nd_namespace_store(dev
, &nd_pfn
->ndns
, buf
, len
);
200 dev_dbg(dev
, "%s: result: %zd wrote: %s%s", __func__
,
201 rc
, buf
, buf
[len
- 1] == '\n' ? "" : "\n");
202 nvdimm_bus_unlock(dev
);
207 static DEVICE_ATTR_RW(namespace);
209 static ssize_t
resource_show(struct device
*dev
,
210 struct device_attribute
*attr
, char *buf
)
212 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
217 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
218 u64 offset
= __le64_to_cpu(pfn_sb
->dataoff
);
219 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
220 u32 start_pad
= __le32_to_cpu(pfn_sb
->start_pad
);
221 struct nd_namespace_io
*nsio
= to_nd_namespace_io(&ndns
->dev
);
223 rc
= sprintf(buf
, "%#llx\n", (unsigned long long) nsio
->res
.start
224 + start_pad
+ offset
);
226 /* no address to convey if the pfn instance is disabled */
233 static DEVICE_ATTR_RO(resource
);
235 static ssize_t
size_show(struct device
*dev
,
236 struct device_attribute
*attr
, char *buf
)
238 struct nd_pfn
*nd_pfn
= to_nd_pfn_safe(dev
);
243 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
244 u64 offset
= __le64_to_cpu(pfn_sb
->dataoff
);
245 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
246 u32 start_pad
= __le32_to_cpu(pfn_sb
->start_pad
);
247 u32 end_trunc
= __le32_to_cpu(pfn_sb
->end_trunc
);
248 struct nd_namespace_io
*nsio
= to_nd_namespace_io(&ndns
->dev
);
250 rc
= sprintf(buf
, "%llu\n", (unsigned long long)
251 resource_size(&nsio
->res
) - start_pad
252 - end_trunc
- offset
);
254 /* no size to convey if the pfn instance is disabled */
261 static DEVICE_ATTR_RO(size
);
263 static struct attribute
*nd_pfn_attributes
[] = {
265 &dev_attr_namespace
.attr
,
267 &dev_attr_align
.attr
,
268 &dev_attr_resource
.attr
,
273 struct attribute_group nd_pfn_attribute_group
= {
274 .attrs
= nd_pfn_attributes
,
277 static const struct attribute_group
*nd_pfn_attribute_groups
[] = {
278 &nd_pfn_attribute_group
,
279 &nd_device_attribute_group
,
280 &nd_numa_attribute_group
,
284 struct device
*nd_pfn_devinit(struct nd_pfn
*nd_pfn
,
285 struct nd_namespace_common
*ndns
)
287 struct device
*dev
= &nd_pfn
->dev
;
292 nd_pfn
->mode
= PFN_MODE_NONE
;
293 nd_pfn
->align
= HPAGE_SIZE
;
295 device_initialize(&nd_pfn
->dev
);
296 if (ndns
&& !__nd_attach_ndns(&nd_pfn
->dev
, ndns
, &nd_pfn
->ndns
)) {
297 dev_dbg(&ndns
->dev
, "%s failed, already claimed by %s\n",
298 __func__
, dev_name(ndns
->claim
));
305 static struct nd_pfn
*nd_pfn_alloc(struct nd_region
*nd_region
)
307 struct nd_pfn
*nd_pfn
;
310 nd_pfn
= kzalloc(sizeof(*nd_pfn
), GFP_KERNEL
);
314 nd_pfn
->id
= ida_simple_get(&nd_region
->pfn_ida
, 0, 0, GFP_KERNEL
);
315 if (nd_pfn
->id
< 0) {
321 dev_set_name(dev
, "pfn%d.%d", nd_region
->id
, nd_pfn
->id
);
322 dev
->groups
= nd_pfn_attribute_groups
;
323 dev
->type
= &nd_pfn_device_type
;
324 dev
->parent
= &nd_region
->dev
;
329 struct device
*nd_pfn_create(struct nd_region
*nd_region
)
331 struct nd_pfn
*nd_pfn
;
334 if (!is_nd_pmem(&nd_region
->dev
))
337 nd_pfn
= nd_pfn_alloc(nd_region
);
338 dev
= nd_pfn_devinit(nd_pfn
, NULL
);
340 __nd_device_register(dev
);
344 int nd_pfn_validate(struct nd_pfn
*nd_pfn
, const char *sig
)
346 u64 checksum
, offset
;
347 struct nd_namespace_io
*nsio
;
348 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
349 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
350 const u8
*parent_uuid
= nd_dev_to_uuid(&ndns
->dev
);
352 if (!pfn_sb
|| !ndns
)
355 if (!is_nd_pmem(nd_pfn
->dev
.parent
))
358 if (nvdimm_read_bytes(ndns
, SZ_4K
, pfn_sb
, sizeof(*pfn_sb
)))
361 if (memcmp(pfn_sb
->signature
, sig
, PFN_SIG_LEN
) != 0)
364 checksum
= le64_to_cpu(pfn_sb
->checksum
);
365 pfn_sb
->checksum
= 0;
366 if (checksum
!= nd_sb_checksum((struct nd_gen_sb
*) pfn_sb
))
368 pfn_sb
->checksum
= cpu_to_le64(checksum
);
370 if (memcmp(pfn_sb
->parent_uuid
, parent_uuid
, 16) != 0)
373 if (__le16_to_cpu(pfn_sb
->version_minor
) < 1) {
374 pfn_sb
->start_pad
= 0;
375 pfn_sb
->end_trunc
= 0;
378 if (__le16_to_cpu(pfn_sb
->version_minor
) < 2)
381 switch (le32_to_cpu(pfn_sb
->mode
)) {
390 /* from probe we allocate */
391 nd_pfn
->uuid
= kmemdup(pfn_sb
->uuid
, 16, GFP_KERNEL
);
395 /* from init we validate */
396 if (memcmp(nd_pfn
->uuid
, pfn_sb
->uuid
, 16) != 0)
400 if (nd_pfn
->align
== 0)
401 nd_pfn
->align
= le32_to_cpu(pfn_sb
->align
);
402 if (nd_pfn
->align
> nvdimm_namespace_capacity(ndns
)) {
403 dev_err(&nd_pfn
->dev
, "alignment: %lx exceeds capacity %llx\n",
404 nd_pfn
->align
, nvdimm_namespace_capacity(ndns
));
409 * These warnings are verbose because they can only trigger in
410 * the case where the physical address alignment of the
411 * namespace has changed since the pfn superblock was
414 offset
= le64_to_cpu(pfn_sb
->dataoff
);
415 nsio
= to_nd_namespace_io(&ndns
->dev
);
416 if (offset
>= resource_size(&nsio
->res
)) {
417 dev_err(&nd_pfn
->dev
, "pfn array size exceeds capacity of %s\n",
418 dev_name(&ndns
->dev
));
422 if ((nd_pfn
->align
&& !IS_ALIGNED(offset
, nd_pfn
->align
))
423 || !IS_ALIGNED(offset
, PAGE_SIZE
)) {
424 dev_err(&nd_pfn
->dev
, "bad offset: %#llx dax disabled\n",
431 EXPORT_SYMBOL(nd_pfn_validate
);
433 int nd_pfn_probe(struct device
*dev
, struct nd_namespace_common
*ndns
)
436 struct nd_pfn
*nd_pfn
;
437 struct device
*pfn_dev
;
438 struct nd_pfn_sb
*pfn_sb
;
439 struct nd_region
*nd_region
= to_nd_region(ndns
->dev
.parent
);
444 nvdimm_bus_lock(&ndns
->dev
);
445 nd_pfn
= nd_pfn_alloc(nd_region
);
446 pfn_dev
= nd_pfn_devinit(nd_pfn
, ndns
);
447 nvdimm_bus_unlock(&ndns
->dev
);
450 pfn_sb
= devm_kzalloc(dev
, sizeof(*pfn_sb
), GFP_KERNEL
);
451 nd_pfn
= to_nd_pfn(pfn_dev
);
452 nd_pfn
->pfn_sb
= pfn_sb
;
453 rc
= nd_pfn_validate(nd_pfn
, PFN_SIG
);
454 dev_dbg(dev
, "%s: pfn: %s\n", __func__
,
455 rc
== 0 ? dev_name(pfn_dev
) : "<none>");
457 __nd_detach_ndns(pfn_dev
, &nd_pfn
->ndns
);
460 __nd_device_register(pfn_dev
);
464 EXPORT_SYMBOL(nd_pfn_probe
);
467 * We hotplug memory at section granularity, pad the reserved area from
468 * the previous section base to the namespace base address.
470 static unsigned long init_altmap_base(resource_size_t base
)
472 unsigned long base_pfn
= PHYS_PFN(base
);
474 return PFN_SECTION_ALIGN_DOWN(base_pfn
);
477 static unsigned long init_altmap_reserve(resource_size_t base
)
479 unsigned long reserve
= PHYS_PFN(SZ_8K
);
480 unsigned long base_pfn
= PHYS_PFN(base
);
482 reserve
+= base_pfn
- PFN_SECTION_ALIGN_DOWN(base_pfn
);
486 static struct vmem_altmap
*__nvdimm_setup_pfn(struct nd_pfn
*nd_pfn
,
487 struct resource
*res
, struct vmem_altmap
*altmap
)
489 struct nd_pfn_sb
*pfn_sb
= nd_pfn
->pfn_sb
;
490 u64 offset
= le64_to_cpu(pfn_sb
->dataoff
);
491 u32 start_pad
= __le32_to_cpu(pfn_sb
->start_pad
);
492 u32 end_trunc
= __le32_to_cpu(pfn_sb
->end_trunc
);
493 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
494 struct nd_namespace_io
*nsio
= to_nd_namespace_io(&ndns
->dev
);
495 resource_size_t base
= nsio
->res
.start
+ start_pad
;
496 struct vmem_altmap __altmap
= {
497 .base_pfn
= init_altmap_base(base
),
498 .reserve
= init_altmap_reserve(base
),
501 memcpy(res
, &nsio
->res
, sizeof(*res
));
502 res
->start
+= start_pad
;
503 res
->end
-= end_trunc
;
505 nd_pfn
->mode
= le32_to_cpu(nd_pfn
->pfn_sb
->mode
);
506 if (nd_pfn
->mode
== PFN_MODE_RAM
) {
508 return ERR_PTR(-EINVAL
);
509 nd_pfn
->npfns
= le64_to_cpu(pfn_sb
->npfns
);
511 } else if (nd_pfn
->mode
== PFN_MODE_PMEM
) {
512 nd_pfn
->npfns
= (resource_size(res
) - offset
) / PAGE_SIZE
;
513 if (le64_to_cpu(nd_pfn
->pfn_sb
->npfns
) > nd_pfn
->npfns
)
514 dev_info(&nd_pfn
->dev
,
515 "number of pfns truncated from %lld to %ld\n",
516 le64_to_cpu(nd_pfn
->pfn_sb
->npfns
),
518 memcpy(altmap
, &__altmap
, sizeof(*altmap
));
519 altmap
->free
= PHYS_PFN(offset
- SZ_8K
);
522 return ERR_PTR(-ENXIO
);
527 static int nd_pfn_init(struct nd_pfn
*nd_pfn
)
529 u32 dax_label_reserve
= is_nd_dax(&nd_pfn
->dev
) ? SZ_128K
: 0;
530 struct nd_namespace_common
*ndns
= nd_pfn
->ndns
;
531 u32 start_pad
= 0, end_trunc
= 0;
532 resource_size_t start
, size
;
533 struct nd_namespace_io
*nsio
;
534 struct nd_region
*nd_region
;
535 struct nd_pfn_sb
*pfn_sb
;
542 pfn_sb
= devm_kzalloc(&nd_pfn
->dev
, sizeof(*pfn_sb
), GFP_KERNEL
);
546 nd_pfn
->pfn_sb
= pfn_sb
;
547 if (is_nd_dax(&nd_pfn
->dev
))
551 rc
= nd_pfn_validate(nd_pfn
, sig
);
555 /* no info block, do init */;
556 nd_region
= to_nd_region(nd_pfn
->dev
.parent
);
558 dev_info(&nd_pfn
->dev
,
559 "%s is read-only, unable to init metadata\n",
560 dev_name(&nd_region
->dev
));
564 memset(pfn_sb
, 0, sizeof(*pfn_sb
));
567 * Check if pmem collides with 'System RAM' when section aligned and
568 * trim it accordingly
570 nsio
= to_nd_namespace_io(&ndns
->dev
);
571 start
= PHYS_SECTION_ALIGN_DOWN(nsio
->res
.start
);
572 size
= resource_size(&nsio
->res
);
573 if (region_intersects(start
, size
, IORESOURCE_SYSTEM_RAM
,
574 IORES_DESC_NONE
) == REGION_MIXED
) {
575 start
= nsio
->res
.start
;
576 start_pad
= PHYS_SECTION_ALIGN_UP(start
) - start
;
579 start
= nsio
->res
.start
;
580 size
= PHYS_SECTION_ALIGN_UP(start
+ size
) - start
;
581 if (region_intersects(start
, size
, IORESOURCE_SYSTEM_RAM
,
582 IORES_DESC_NONE
) == REGION_MIXED
) {
583 size
= resource_size(&nsio
->res
);
584 end_trunc
= start
+ size
- PHYS_SECTION_ALIGN_DOWN(start
+ size
);
587 if (start_pad
+ end_trunc
)
588 dev_info(&nd_pfn
->dev
, "%s section collision, truncate %d bytes\n",
589 dev_name(&ndns
->dev
), start_pad
+ end_trunc
);
592 * Note, we use 64 here for the standard size of struct page,
593 * debugging options may cause it to be larger in which case the
594 * implementation will limit the pfns advertised through
595 * ->direct_access() to those that are included in the memmap.
598 size
= resource_size(&nsio
->res
);
599 npfns
= (size
- start_pad
- end_trunc
- SZ_8K
) / SZ_4K
;
600 if (nd_pfn
->mode
== PFN_MODE_PMEM
) {
601 unsigned long memmap_size
;
604 * vmemmap_populate_hugepages() allocates the memmap array in
607 memmap_size
= ALIGN(64 * npfns
, HPAGE_SIZE
);
608 offset
= ALIGN(start
+ SZ_8K
+ memmap_size
+ dax_label_reserve
,
609 nd_pfn
->align
) - start
;
610 } else if (nd_pfn
->mode
== PFN_MODE_RAM
)
611 offset
= ALIGN(start
+ SZ_8K
+ dax_label_reserve
,
612 nd_pfn
->align
) - start
;
616 if (offset
+ start_pad
+ end_trunc
>= size
) {
617 dev_err(&nd_pfn
->dev
, "%s unable to satisfy requested alignment\n",
618 dev_name(&ndns
->dev
));
622 npfns
= (size
- offset
- start_pad
- end_trunc
) / SZ_4K
;
623 pfn_sb
->mode
= cpu_to_le32(nd_pfn
->mode
);
624 pfn_sb
->dataoff
= cpu_to_le64(offset
);
625 pfn_sb
->npfns
= cpu_to_le64(npfns
);
626 memcpy(pfn_sb
->signature
, sig
, PFN_SIG_LEN
);
627 memcpy(pfn_sb
->uuid
, nd_pfn
->uuid
, 16);
628 memcpy(pfn_sb
->parent_uuid
, nd_dev_to_uuid(&ndns
->dev
), 16);
629 pfn_sb
->version_major
= cpu_to_le16(1);
630 pfn_sb
->version_minor
= cpu_to_le16(2);
631 pfn_sb
->start_pad
= cpu_to_le32(start_pad
);
632 pfn_sb
->end_trunc
= cpu_to_le32(end_trunc
);
633 pfn_sb
->align
= cpu_to_le32(nd_pfn
->align
);
634 checksum
= nd_sb_checksum((struct nd_gen_sb
*) pfn_sb
);
635 pfn_sb
->checksum
= cpu_to_le64(checksum
);
637 return nvdimm_write_bytes(ndns
, SZ_4K
, pfn_sb
, sizeof(*pfn_sb
));
641 * Determine the effective resource range and vmem_altmap from an nd_pfn
644 struct vmem_altmap
*nvdimm_setup_pfn(struct nd_pfn
*nd_pfn
,
645 struct resource
*res
, struct vmem_altmap
*altmap
)
649 if (!nd_pfn
->uuid
|| !nd_pfn
->ndns
)
650 return ERR_PTR(-ENODEV
);
652 rc
= nd_pfn_init(nd_pfn
);
656 /* we need a valid pfn_sb before we can init a vmem_altmap */
657 return __nvdimm_setup_pfn(nd_pfn
, res
, altmap
);
659 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn
);