2 * IA-32 Huge TLB Page Support for Kernel.
4 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
7 #include <linux/init.h>
10 #include <linux/hugetlb.h>
11 #include <linux/pagemap.h>
12 #include <linux/slab.h>
13 #include <linux/err.h>
14 #include <linux/sysctl.h>
17 #include <asm/tlbflush.h>
18 #include <asm/pgalloc.h>
20 static unsigned long page_table_shareable(struct vm_area_struct
*svma
,
21 struct vm_area_struct
*vma
,
22 unsigned long addr
, pgoff_t idx
)
24 unsigned long saddr
= ((idx
- svma
->vm_pgoff
) << PAGE_SHIFT
) +
26 unsigned long sbase
= saddr
& PUD_MASK
;
27 unsigned long s_end
= sbase
+ PUD_SIZE
;
30 * match the virtual addresses, permission and the alignment of the
33 if (pmd_index(addr
) != pmd_index(saddr
) ||
34 vma
->vm_flags
!= svma
->vm_flags
||
35 sbase
< svma
->vm_start
|| svma
->vm_end
< s_end
)
41 static int vma_shareable(struct vm_area_struct
*vma
, unsigned long addr
)
43 unsigned long base
= addr
& PUD_MASK
;
44 unsigned long end
= base
+ PUD_SIZE
;
47 * check on proper vm_flags and page table alignment
49 if (vma
->vm_flags
& VM_MAYSHARE
&&
50 vma
->vm_start
<= base
&& end
<= vma
->vm_end
)
56 * search for a shareable pmd page for hugetlb.
58 static void huge_pmd_share(struct mm_struct
*mm
, unsigned long addr
, pud_t
*pud
)
60 struct vm_area_struct
*vma
= find_vma(mm
, addr
);
61 struct address_space
*mapping
= vma
->vm_file
->f_mapping
;
62 pgoff_t idx
= ((addr
- vma
->vm_start
) >> PAGE_SHIFT
) +
64 struct prio_tree_iter iter
;
65 struct vm_area_struct
*svma
;
69 if (!vma_shareable(vma
, addr
))
72 spin_lock(&mapping
->i_mmap_lock
);
73 vma_prio_tree_foreach(svma
, &iter
, &mapping
->i_mmap
, idx
, idx
) {
77 saddr
= page_table_shareable(svma
, vma
, addr
, idx
);
79 spte
= huge_pte_offset(svma
->vm_mm
, saddr
);
81 get_page(virt_to_page(spte
));
90 spin_lock(&mm
->page_table_lock
);
92 pud_populate(mm
, pud
, (pmd_t
*)((unsigned long)spte
& PAGE_MASK
));
94 put_page(virt_to_page(spte
));
95 spin_unlock(&mm
->page_table_lock
);
97 spin_unlock(&mapping
->i_mmap_lock
);
101 * unmap huge page backed by shared pte.
103 * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
104 * indicated by page_count > 1, unmap is achieved by clearing pud and
105 * decrementing the ref count. If count == 1, the pte page is not shared.
107 * called with vma->vm_mm->page_table_lock held.
109 * returns: 1 successfully unmapped a shared pte page
110 * 0 the underlying pte page is not shared, or it is the last user
112 int huge_pmd_unshare(struct mm_struct
*mm
, unsigned long *addr
, pte_t
*ptep
)
114 pgd_t
*pgd
= pgd_offset(mm
, *addr
);
115 pud_t
*pud
= pud_offset(pgd
, *addr
);
117 BUG_ON(page_count(virt_to_page(ptep
)) == 0);
118 if (page_count(virt_to_page(ptep
)) == 1)
122 put_page(virt_to_page(ptep
));
123 *addr
= ALIGN(*addr
, HPAGE_SIZE
* PTRS_PER_PTE
) - HPAGE_SIZE
;
127 pte_t
*huge_pte_alloc(struct mm_struct
*mm
,
128 unsigned long addr
, unsigned long sz
)
134 pgd
= pgd_offset(mm
, addr
);
135 pud
= pud_alloc(mm
, pgd
, addr
);
138 huge_pmd_share(mm
, addr
, pud
);
139 pte
= (pte_t
*) pmd_alloc(mm
, pud
, addr
);
141 BUG_ON(pte
&& !pte_none(*pte
) && !pte_huge(*pte
));
146 pte_t
*huge_pte_offset(struct mm_struct
*mm
, unsigned long addr
)
152 pgd
= pgd_offset(mm
, addr
);
153 if (pgd_present(*pgd
)) {
154 pud
= pud_offset(pgd
, addr
);
155 if (pud_present(*pud
))
156 pmd
= pmd_offset(pud
, addr
);
158 return (pte_t
*) pmd
;
161 #if 0 /* This is just for testing */
163 follow_huge_addr(struct mm_struct
*mm
, unsigned long address
, int write
)
165 unsigned long start
= address
;
169 struct vm_area_struct
*vma
;
171 vma
= find_vma(mm
, addr
);
172 if (!vma
|| !is_vm_hugetlb_page(vma
))
173 return ERR_PTR(-EINVAL
);
175 pte
= huge_pte_offset(mm
, address
);
177 /* hugetlb should be locked, and hence, prefaulted */
178 WARN_ON(!pte
|| pte_none(*pte
));
180 page
= &pte_page(*pte
)[vpfn
% (HPAGE_SIZE
/PAGE_SIZE
)];
182 WARN_ON(!PageHead(page
));
187 int pmd_huge(pmd_t pmd
)
192 int pud_huge(pud_t pud
)
198 follow_huge_pmd(struct mm_struct
*mm
, unsigned long address
,
199 pmd_t
*pmd
, int write
)
207 follow_huge_addr(struct mm_struct
*mm
, unsigned long address
, int write
)
209 return ERR_PTR(-EINVAL
);
212 int pmd_huge(pmd_t pmd
)
214 return !!(pmd_val(pmd
) & _PAGE_PSE
);
217 int pud_huge(pud_t pud
)
223 follow_huge_pmd(struct mm_struct
*mm
, unsigned long address
,
224 pmd_t
*pmd
, int write
)
228 page
= pte_page(*(pte_t
*)pmd
);
230 page
+= ((address
& ~PMD_MASK
) >> PAGE_SHIFT
);
235 follow_huge_pud(struct mm_struct
*mm
, unsigned long address
,
236 pud_t
*pud
, int write
)
240 page
= pte_page(*(pte_t
*)pud
);
242 page
+= ((address
& ~PUD_MASK
) >> PAGE_SHIFT
);
248 /* x86_64 also uses this file */
250 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
251 static unsigned long hugetlb_get_unmapped_area_bottomup(struct file
*file
,
252 unsigned long addr
, unsigned long len
,
253 unsigned long pgoff
, unsigned long flags
)
255 struct mm_struct
*mm
= current
->mm
;
256 struct vm_area_struct
*vma
;
257 unsigned long start_addr
;
259 if (len
> mm
->cached_hole_size
) {
260 start_addr
= mm
->free_area_cache
;
262 start_addr
= TASK_UNMAPPED_BASE
;
263 mm
->cached_hole_size
= 0;
267 addr
= ALIGN(start_addr
, HPAGE_SIZE
);
269 for (vma
= find_vma(mm
, addr
); ; vma
= vma
->vm_next
) {
270 /* At this point: (!vma || addr < vma->vm_end). */
271 if (TASK_SIZE
- len
< addr
) {
273 * Start a new search - just in case we missed
276 if (start_addr
!= TASK_UNMAPPED_BASE
) {
277 start_addr
= TASK_UNMAPPED_BASE
;
278 mm
->cached_hole_size
= 0;
283 if (!vma
|| addr
+ len
<= vma
->vm_start
) {
284 mm
->free_area_cache
= addr
+ len
;
287 if (addr
+ mm
->cached_hole_size
< vma
->vm_start
)
288 mm
->cached_hole_size
= vma
->vm_start
- addr
;
289 addr
= ALIGN(vma
->vm_end
, HPAGE_SIZE
);
293 static unsigned long hugetlb_get_unmapped_area_topdown(struct file
*file
,
294 unsigned long addr0
, unsigned long len
,
295 unsigned long pgoff
, unsigned long flags
)
297 struct mm_struct
*mm
= current
->mm
;
298 struct vm_area_struct
*vma
, *prev_vma
;
299 unsigned long base
= mm
->mmap_base
, addr
= addr0
;
300 unsigned long largest_hole
= mm
->cached_hole_size
;
303 /* don't allow allocations above current base */
304 if (mm
->free_area_cache
> base
)
305 mm
->free_area_cache
= base
;
307 if (len
<= largest_hole
) {
309 mm
->free_area_cache
= base
;
312 /* make sure it can fit in the remaining address space */
313 if (mm
->free_area_cache
< len
)
316 /* either no address requested or cant fit in requested address hole */
317 addr
= (mm
->free_area_cache
- len
) & HPAGE_MASK
;
320 * Lookup failure means no vma is above this address,
321 * i.e. return with success:
323 if (!(vma
= find_vma_prev(mm
, addr
, &prev_vma
)))
327 * new region fits between prev_vma->vm_end and
328 * vma->vm_start, use it:
330 if (addr
+ len
<= vma
->vm_start
&&
331 (!prev_vma
|| (addr
>= prev_vma
->vm_end
))) {
332 /* remember the address as a hint for next time */
333 mm
->cached_hole_size
= largest_hole
;
334 return (mm
->free_area_cache
= addr
);
336 /* pull free_area_cache down to the first hole */
337 if (mm
->free_area_cache
== vma
->vm_end
) {
338 mm
->free_area_cache
= vma
->vm_start
;
339 mm
->cached_hole_size
= largest_hole
;
343 /* remember the largest hole we saw so far */
344 if (addr
+ largest_hole
< vma
->vm_start
)
345 largest_hole
= vma
->vm_start
- addr
;
347 /* try just below the current vma->vm_start */
348 addr
= (vma
->vm_start
- len
) & HPAGE_MASK
;
349 } while (len
<= vma
->vm_start
);
353 * if hint left us with no space for the requested
354 * mapping then try again:
357 mm
->free_area_cache
= base
;
363 * A failed mmap() very likely causes application failure,
364 * so fall back to the bottom-up function here. This scenario
365 * can happen with large stack limits and large mmap()
368 mm
->free_area_cache
= TASK_UNMAPPED_BASE
;
369 mm
->cached_hole_size
= ~0UL;
370 addr
= hugetlb_get_unmapped_area_bottomup(file
, addr0
,
374 * Restore the topdown base:
376 mm
->free_area_cache
= base
;
377 mm
->cached_hole_size
= ~0UL;
383 hugetlb_get_unmapped_area(struct file
*file
, unsigned long addr
,
384 unsigned long len
, unsigned long pgoff
, unsigned long flags
)
386 struct mm_struct
*mm
= current
->mm
;
387 struct vm_area_struct
*vma
;
389 if (len
& ~HPAGE_MASK
)
394 if (flags
& MAP_FIXED
) {
395 if (prepare_hugepage_range(file
, addr
, len
))
401 addr
= ALIGN(addr
, HPAGE_SIZE
);
402 vma
= find_vma(mm
, addr
);
403 if (TASK_SIZE
- len
>= addr
&&
404 (!vma
|| addr
+ len
<= vma
->vm_start
))
407 if (mm
->get_unmapped_area
== arch_get_unmapped_area
)
408 return hugetlb_get_unmapped_area_bottomup(file
, addr
, len
,
411 return hugetlb_get_unmapped_area_topdown(file
, addr
, len
,
415 #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
This page took 0.044765 seconds and 5 git commands to generate.