#include <linux/suspend.h>
#include <linux/mm_inline.h>
#include <linux/firmware-map.h>
+#include <linux/stop_machine.h>
#include <asm/tlbflush.h>
mutex_lock(&ppb_lock);
__free_pages_bootmem(page, 0);
mutex_unlock(&ppb_lock);
+ totalram_pages++;
}
}
}
pfn = pgdat->node_start_pfn;
- end_pfn = pfn + pgdat->node_spanned_pages;
+ end_pfn = pgdat_end_pfn(pgdat);
/* register_section info */
for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
set_page_links(pfn_to_page(pfn), zid, nid, pfn);
}
+/* Can fail with -ENOMEM from allocating a wait table with vmalloc() or
+ * alloc_bootmem_node_nopanic() */
+static int __ref ensure_zone_is_initialized(struct zone *zone,
+ unsigned long start_pfn, unsigned long num_pages)
+{
+ if (!zone_is_initialized(zone))
+ return init_currently_empty_zone(zone, start_pfn, num_pages,
+ MEMMAP_HOTPLUG);
+ return 0;
+}
+
static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2,
unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long flags;
unsigned long z1_start_pfn;
- if (!z1->wait_table) {
- ret = init_currently_empty_zone(z1, start_pfn,
- end_pfn - start_pfn, MEMMAP_HOTPLUG);
- if (ret)
- return ret;
- }
+ ret = ensure_zone_is_initialized(z1, start_pfn, end_pfn - start_pfn);
+ if (ret)
+ return ret;
pgdat_resize_lock(z1->zone_pgdat, &flags);
/* can't move pfns which are higher than @z2 */
- if (end_pfn > z2->zone_start_pfn + z2->spanned_pages)
+ if (end_pfn > zone_end_pfn(z2))
goto out_fail;
/* the move out part mast at the left most of @z2 */
if (start_pfn > z2->zone_start_pfn)
z1_start_pfn = start_pfn;
resize_zone(z1, z1_start_pfn, end_pfn);
- resize_zone(z2, end_pfn, z2->zone_start_pfn + z2->spanned_pages);
+ resize_zone(z2, end_pfn, zone_end_pfn(z2));
pgdat_resize_unlock(z1->zone_pgdat, &flags);
unsigned long flags;
unsigned long z2_end_pfn;
- if (!z2->wait_table) {
- ret = init_currently_empty_zone(z2, start_pfn,
- end_pfn - start_pfn, MEMMAP_HOTPLUG);
- if (ret)
- return ret;
- }
+ ret = ensure_zone_is_initialized(z2, start_pfn, end_pfn - start_pfn);
+ if (ret)
+ return ret;
pgdat_resize_lock(z1->zone_pgdat, &flags);
if (z1->zone_start_pfn > start_pfn)
goto out_fail;
/* the move out part mast at the right most of @z1 */
- if (z1->zone_start_pfn + z1->spanned_pages > end_pfn)
+ if (zone_end_pfn(z1) > end_pfn)
goto out_fail;
/* must included/overlap */
- if (start_pfn >= z1->zone_start_pfn + z1->spanned_pages)
+ if (start_pfn >= zone_end_pfn(z1))
goto out_fail;
/* use end_pfn for z2's end_pfn if z2 is empty */
if (z2->spanned_pages)
- z2_end_pfn = z2->zone_start_pfn + z2->spanned_pages;
+ z2_end_pfn = zone_end_pfn(z2);
else
z2_end_pfn = end_pfn;
int nid = pgdat->node_id;
int zone_type;
unsigned long flags;
+ int ret;
zone_type = zone - pgdat->node_zones;
- if (!zone->wait_table) {
- int ret;
+ ret = ensure_zone_is_initialized(zone, phys_start_pfn, nr_pages);
+ if (ret)
+ return ret;
- ret = init_currently_empty_zone(zone, phys_start_pfn,
- nr_pages, MEMMAP_HOTPLUG);
- if (ret)
- return ret;
- }
pgdat_resize_lock(zone->zone_pgdat, &flags);
grow_zone_span(zone, phys_start_pfn, phys_start_pfn + nr_pages);
grow_pgdat_span(zone->zone_pgdat, phys_start_pfn,
unsigned long zholes_size[MAX_NR_ZONES] = {0};
unsigned long start_pfn = start >> PAGE_SHIFT;
- pgdat = arch_alloc_nodedata(nid);
- if (!pgdat)
- return NULL;
+ pgdat = NODE_DATA(nid);
+ if (!pgdat) {
+ pgdat = arch_alloc_nodedata(nid);
+ if (!pgdat)
+ return NULL;
- arch_refresh_nodedata(nid, pgdat);
+ arch_refresh_nodedata(nid, pgdat);
+ }
/* we can use NODE_DATA(nid) from here */
int __ref add_memory(int nid, u64 start, u64 size)
{
pg_data_t *pgdat = NULL;
- int new_pgdat = 0;
+ bool new_pgdat;
+ bool new_node;
struct resource *res;
int ret;
if (!res)
goto out;
- if (!node_online(nid)) {
+ { /* Stupid hack to suppress address-never-null warning */
+ void *p = NODE_DATA(nid);
+ new_pgdat = !p;
+ }
+ new_node = !node_online(nid);
+ if (new_node) {
pgdat = hotadd_new_pgdat(nid, start);
ret = -ENOMEM;
if (!pgdat)
goto error;
- new_pgdat = 1;
}
/* call arch's memory hotadd */
/* we online node here. we can't roll back from here. */
node_set_online(nid);
- if (new_pgdat) {
+ if (new_node) {
ret = register_one_node(nid);
/*
* If sysfs file of new node can't create, cpu on the node
* migrate_pages returns # of failed pages.
*/
ret = migrate_pages(&source, alloc_migrate_target, 0,
- true, MIGRATE_SYNC,
- MR_MEMORY_HOTPLUG);
+ MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
if (ret)
putback_lru_pages(&source);
}
return ret;
}
-int __ref remove_memory(u64 start, u64 size)
+static int check_cpu_on_node(void *data)
+{
+ struct pglist_data *pgdat = data;
+ int cpu;
+
+ for_each_present_cpu(cpu) {
+ if (cpu_to_node(cpu) == pgdat->node_id)
+ /*
+ * the cpu on this node isn't removed, and we can't
+ * offline this node.
+ */
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static void unmap_cpu_on_node(void *data)
+{
+#ifdef CONFIG_ACPI_NUMA
+ struct pglist_data *pgdat = data;
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ if (cpu_to_node(cpu) == pgdat->node_id)
+ numa_clear_node(cpu);
+#endif
+}
+
+static int check_and_unmap_cpu_on_node(void *data)
+{
+ int ret = check_cpu_on_node(data);
+
+ if (ret)
+ return ret;
+
+ /*
+ * the node will be offlined when we come here, so we can clear
+ * the cpu_to_node() now.
+ */
+
+ unmap_cpu_on_node(data);
+ return 0;
+}
+
+/* offline the node if all memory sections of this node are removed */
+void try_offline_node(int nid)
+{
+ pg_data_t *pgdat = NODE_DATA(nid);
+ unsigned long start_pfn = pgdat->node_start_pfn;
+ unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
+ unsigned long pfn;
+ struct page *pgdat_page = virt_to_page(pgdat);
+ int i;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ unsigned long section_nr = pfn_to_section_nr(pfn);
+
+ if (!present_section_nr(section_nr))
+ continue;
+
+ if (pfn_to_nid(pfn) != nid)
+ continue;
+
+ /*
+ * some memory sections of this node are not removed, and we
+ * can't offline node now.
+ */
+ return;
+ }
+
+ if (stop_machine(check_and_unmap_cpu_on_node, pgdat, NULL))
+ return;
+
+ /*
+ * all memory/cpu of this node are removed, we can offline this
+ * node now.
+ */
+ node_set_offline(nid);
+ unregister_one_node(nid);
+
+ if (!PageSlab(pgdat_page) && !PageCompound(pgdat_page))
+ /* node data is allocated from boot memory */
+ return;
+
+ /* free waittable in each zone */
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+
+ if (zone->wait_table)
+ vfree(zone->wait_table);
+ }
+
+ /*
+ * Since there is no way to guarentee the address of pgdat/zone is not
+ * on stack of any kernel threads or used by other kernel objects
+ * without reference counting or other symchronizing method, do not
+ * reset node_data and free pgdat here. Just reset it to 0 and reuse
+ * the memory when the node is online again.
+ */
+ memset(pgdat, 0, sizeof(*pgdat));
+}
+EXPORT_SYMBOL(try_offline_node);
+
+int __ref remove_memory(int nid, u64 start, u64 size)
{
unsigned long start_pfn, end_pfn;
int ret = 0;
arch_remove_memory(start, size);
+ try_offline_node(nid);
+
unlock_memory_hotplug();
return 0;
{
return -EINVAL;
}
-int remove_memory(u64 start, u64 size)
+int remove_memory(int nid, u64 start, u64 size)
{
return -EINVAL;
}