X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=mm%2Fmempolicy.c;h=0f1d2b8a952b900f899ea19233c84b862909323d;hb=8ad4b1fb8205340dba16b63467bb23efc27264d6;hp=2076b1542b8ac9ce32beba96e051bd70a62ebf96;hpb=9f75e1eff3edb2bb07349b94c28f4f2a6c66ca43;p=deliverable%2Flinux.git diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 2076b1542b8a..0f1d2b8a952b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -93,7 +93,7 @@ static kmem_cache_t *sn_cache; /* Highest zone. An specific allocation for a zone below that is not policied. */ -static int policy_zone; +int policy_zone = ZONE_DMA; struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ @@ -131,17 +131,8 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) if (!zl) return NULL; num = 0; - for_each_node_mask(nd, *nodes) { - int k; - for (k = MAX_NR_ZONES-1; k >= 0; k--) { - struct zone *z = &NODE_DATA(nd)->node_zones[k]; - if (!z->present_pages) - continue; - zl->zones[num++] = z; - if (k > policy_zone) - policy_zone = k; - } - } + for_each_node_mask(nd, *nodes) + zl->zones[num++] = &NODE_DATA(nd)->node_zones[policy_zone]; zl->zones[num] = NULL; return zl; } @@ -161,6 +152,10 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) switch (mode) { case MPOL_INTERLEAVE: policy->v.nodes = *nodes; + if (nodes_weight(*nodes) == 0) { + kmem_cache_free(policy_cache, policy); + return ERR_PTR(-EINVAL); + } break; case MPOL_PREFERRED: policy->v.preferred_node = first_node(*nodes); @@ -189,17 +184,15 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd, orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); do { - unsigned long pfn; + struct page *page; unsigned int nid; if (!pte_present(*pte)) continue; - pfn = pte_pfn(*pte); - if (!pfn_valid(pfn)) { - print_bad_pte(vma, *pte, addr); + page = vm_normal_page(vma, addr, *pte); + if (!page) continue; - } - nid = pfn_to_nid(pfn); + nid = page_to_nid(page); if (!node_isset(nid, *nodes)) break; } while (pte++, addr += PAGE_SIZE, addr != end); @@ -269,8 +262,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, first = find_vma(mm, start); if (!first) return ERR_PTR(-EFAULT); - if (first->vm_flags & VM_RESERVED) - return ERR_PTR(-EACCES); prev = NULL; for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { if (!vma->vm_next && vma->vm_end < end) @@ -457,6 +448,7 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, struct vm_area_struct *vma = NULL; struct mempolicy *pol = current->mempolicy; + cpuset_update_current_mems_allowed(); if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) return -EINVAL; if (flags & MPOL_F_ADDR) { @@ -784,6 +776,34 @@ static unsigned offset_il_node(struct mempolicy *pol, return nid; } +/* Determine a node number for interleave */ +static inline unsigned interleave_nid(struct mempolicy *pol, + struct vm_area_struct *vma, unsigned long addr, int shift) +{ + if (vma) { + unsigned long off; + + off = vma->vm_pgoff; + off += (addr - vma->vm_start) >> shift; + return offset_il_node(pol, vma, off); + } else + return interleave_nodes(pol); +} + +/* Return a zonelist suitable for a huge page allocation. */ +struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) +{ + struct mempolicy *pol = get_vma_policy(current, vma, addr); + + if (pol->policy == MPOL_INTERLEAVE) { + unsigned nid; + + nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); + return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER); + } + return zonelist_policy(GFP_HIGHUSER, pol); +} + /* Allocate a page in interleaved policy. Own path because it needs to do special accounting. */ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, @@ -832,15 +852,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) if (unlikely(pol->policy == MPOL_INTERLEAVE)) { unsigned nid; - if (vma) { - unsigned long off; - off = vma->vm_pgoff; - off += (addr - vma->vm_start) >> PAGE_SHIFT; - nid = offset_il_node(pol, vma, off); - } else { - /* fall back to process interleaving */ - nid = interleave_nodes(pol); - } + + nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); return alloc_page_interleave(gfp, 0, nid); } return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); @@ -938,54 +951,6 @@ void __mpol_free(struct mempolicy *p) kmem_cache_free(policy_cache, p); } -/* - * Hugetlb policy. Same as above, just works with node numbers instead of - * zonelists. - */ - -/* Find first node suitable for an allocation */ -int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) -{ - struct mempolicy *pol = get_vma_policy(current, vma, addr); - - switch (pol->policy) { - case MPOL_DEFAULT: - return numa_node_id(); - case MPOL_BIND: - return pol->v.zonelist->zones[0]->zone_pgdat->node_id; - case MPOL_INTERLEAVE: - return interleave_nodes(pol); - case MPOL_PREFERRED: - return pol->v.preferred_node >= 0 ? - pol->v.preferred_node : numa_node_id(); - } - BUG(); - return 0; -} - -/* Find secondary valid nodes for an allocation */ -int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) -{ - struct mempolicy *pol = get_vma_policy(current, vma, addr); - - switch (pol->policy) { - case MPOL_PREFERRED: - case MPOL_DEFAULT: - case MPOL_INTERLEAVE: - return 1; - case MPOL_BIND: { - struct zone **z; - for (z = pol->v.zonelist->zones; *z; z++) - if ((*z)->zone_pgdat->node_id == nid) - return 1; - return 0; - } - default: - BUG(); - return 0; - } -} - /* * Shared memory backing store policy support. * @@ -1206,3 +1171,66 @@ void numa_default_policy(void) { do_set_mempolicy(MPOL_DEFAULT, NULL); } + +/* Migrate a policy to a different set of nodes */ +static void rebind_policy(struct mempolicy *pol, const nodemask_t *old, + const nodemask_t *new) +{ + nodemask_t tmp; + + if (!pol) + return; + + switch (pol->policy) { + case MPOL_DEFAULT: + break; + case MPOL_INTERLEAVE: + nodes_remap(tmp, pol->v.nodes, *old, *new); + pol->v.nodes = tmp; + current->il_next = node_remap(current->il_next, *old, *new); + break; + case MPOL_PREFERRED: + pol->v.preferred_node = node_remap(pol->v.preferred_node, + *old, *new); + break; + case MPOL_BIND: { + nodemask_t nodes; + struct zone **z; + struct zonelist *zonelist; + + nodes_clear(nodes); + for (z = pol->v.zonelist->zones; *z; z++) + node_set((*z)->zone_pgdat->node_id, nodes); + nodes_remap(tmp, nodes, *old, *new); + nodes = tmp; + + zonelist = bind_zonelist(&nodes); + + /* If no mem, then zonelist is NULL and we keep old zonelist. + * If that old zonelist has no remaining mems_allowed nodes, + * then zonelist_policy() will "FALL THROUGH" to MPOL_DEFAULT. + */ + + if (zonelist) { + /* Good - got mem - substitute new zonelist */ + kfree(pol->v.zonelist); + pol->v.zonelist = zonelist; + } + break; + } + default: + BUG(); + break; + } +} + +/* + * Someone moved this task to different nodes. Fixup mempolicies. + * + * TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well, + * once we have a cpuset mechanism to mark which cpuset subtree is migrating. + */ +void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new) +{ + rebind_policy(current->mempolicy, old, new); +}