Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * PPC64 (POWER4) Huge TLB Page Support for Kernel. | |
3 | * | |
4 | * Copyright (C) 2003 David Gibson, IBM Corporation. | |
5 | * | |
6 | * Based on the IA-32 version: | |
7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | |
8 | */ | |
9 | ||
10 | #include <linux/init.h> | |
11 | #include <linux/fs.h> | |
12 | #include <linux/mm.h> | |
13 | #include <linux/hugetlb.h> | |
14 | #include <linux/pagemap.h> | |
1da177e4 LT |
15 | #include <linux/slab.h> |
16 | #include <linux/err.h> | |
17 | #include <linux/sysctl.h> | |
18 | #include <asm/mman.h> | |
19 | #include <asm/pgalloc.h> | |
20 | #include <asm/tlb.h> | |
21 | #include <asm/tlbflush.h> | |
22 | #include <asm/mmu_context.h> | |
23 | #include <asm/machdep.h> | |
24 | #include <asm/cputable.h> | |
25 | #include <asm/tlb.h> | |
94b2a439 | 26 | #include <asm/spu.h> |
1da177e4 LT |
27 | |
28 | #include <linux/sysctl.h> | |
29 | ||
c594adad DG |
30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) |
31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | |
32 | ||
f10a04c0 DG |
33 | #ifdef CONFIG_PPC_64K_PAGES |
34 | #define HUGEPTE_INDEX_SIZE (PMD_SHIFT-HPAGE_SHIFT) | |
35 | #else | |
36 | #define HUGEPTE_INDEX_SIZE (PUD_SHIFT-HPAGE_SHIFT) | |
37 | #endif | |
38 | #define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) | |
39 | #define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << HUGEPTE_INDEX_SIZE) | |
40 | ||
41 | #define HUGEPD_SHIFT (HPAGE_SHIFT + HUGEPTE_INDEX_SIZE) | |
42 | #define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) | |
43 | #define HUGEPD_MASK (~(HUGEPD_SIZE-1)) | |
44 | ||
45 | #define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) | |
46 | ||
47 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | |
48 | * will choke on pointers to hugepte tables, which is handy for | |
49 | * catching screwups early. */ | |
50 | #define HUGEPD_OK 0x1 | |
51 | ||
52 | typedef struct { unsigned long pd; } hugepd_t; | |
53 | ||
54 | #define hugepd_none(hpd) ((hpd).pd == 0) | |
55 | ||
56 | static inline pte_t *hugepd_page(hugepd_t hpd) | |
57 | { | |
58 | BUG_ON(!(hpd.pd & HUGEPD_OK)); | |
59 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); | |
60 | } | |
61 | ||
62 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) | |
63 | { | |
64 | unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); | |
65 | pte_t *dir = hugepd_page(*hpdp); | |
66 | ||
67 | return dir + idx; | |
68 | } | |
69 | ||
70 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | |
71 | unsigned long address) | |
72 | { | |
73 | pte_t *new = kmem_cache_alloc(huge_pgtable_cache, | |
74 | GFP_KERNEL|__GFP_REPEAT); | |
75 | ||
76 | if (! new) | |
77 | return -ENOMEM; | |
78 | ||
79 | spin_lock(&mm->page_table_lock); | |
80 | if (!hugepd_none(*hpdp)) | |
81 | kmem_cache_free(huge_pgtable_cache, new); | |
82 | else | |
83 | hpdp->pd = (unsigned long)new | HUGEPD_OK; | |
84 | spin_unlock(&mm->page_table_lock); | |
85 | return 0; | |
86 | } | |
87 | ||
e28f7faf DG |
88 | /* Modelled after find_linux_pte() */ |
89 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |
1da177e4 | 90 | { |
e28f7faf DG |
91 | pgd_t *pg; |
92 | pud_t *pu; | |
1da177e4 | 93 | |
e28f7faf | 94 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
1da177e4 | 95 | |
e28f7faf DG |
96 | addr &= HPAGE_MASK; |
97 | ||
98 | pg = pgd_offset(mm, addr); | |
99 | if (!pgd_none(*pg)) { | |
100 | pu = pud_offset(pg, addr); | |
101 | if (!pud_none(*pu)) { | |
3c726f8d | 102 | #ifdef CONFIG_PPC_64K_PAGES |
f10a04c0 DG |
103 | pmd_t *pm; |
104 | pm = pmd_offset(pu, addr); | |
105 | if (!pmd_none(*pm)) | |
106 | return hugepte_offset((hugepd_t *)pm, addr); | |
107 | #else | |
108 | return hugepte_offset((hugepd_t *)pu, addr); | |
109 | #endif | |
e28f7faf DG |
110 | } |
111 | } | |
1da177e4 | 112 | |
e28f7faf | 113 | return NULL; |
1da177e4 LT |
114 | } |
115 | ||
e28f7faf | 116 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
1da177e4 | 117 | { |
e28f7faf DG |
118 | pgd_t *pg; |
119 | pud_t *pu; | |
f10a04c0 | 120 | hugepd_t *hpdp = NULL; |
1da177e4 | 121 | |
1da177e4 LT |
122 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
123 | ||
e28f7faf | 124 | addr &= HPAGE_MASK; |
1da177e4 | 125 | |
e28f7faf DG |
126 | pg = pgd_offset(mm, addr); |
127 | pu = pud_alloc(mm, pg, addr); | |
1da177e4 | 128 | |
e28f7faf | 129 | if (pu) { |
f10a04c0 DG |
130 | #ifdef CONFIG_PPC_64K_PAGES |
131 | pmd_t *pm; | |
e28f7faf | 132 | pm = pmd_alloc(mm, pu, addr); |
f10a04c0 DG |
133 | if (pm) |
134 | hpdp = (hugepd_t *)pm; | |
135 | #else | |
136 | hpdp = (hugepd_t *)pu; | |
137 | #endif | |
138 | } | |
139 | ||
140 | if (! hpdp) | |
141 | return NULL; | |
142 | ||
143 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) | |
144 | return NULL; | |
145 | ||
146 | return hugepte_offset(hpdp, addr); | |
147 | } | |
148 | ||
39dde65c CK |
149 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) |
150 | { | |
151 | return 0; | |
152 | } | |
153 | ||
f10a04c0 DG |
154 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) |
155 | { | |
156 | pte_t *hugepte = hugepd_page(*hpdp); | |
157 | ||
158 | hpdp->pd = 0; | |
159 | tlb->need_flush = 1; | |
160 | pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, | |
c9169f87 | 161 | PGF_CACHENUM_MASK)); |
f10a04c0 DG |
162 | } |
163 | ||
3c726f8d | 164 | #ifdef CONFIG_PPC_64K_PAGES |
f10a04c0 DG |
165 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
166 | unsigned long addr, unsigned long end, | |
167 | unsigned long floor, unsigned long ceiling) | |
168 | { | |
169 | pmd_t *pmd; | |
170 | unsigned long next; | |
171 | unsigned long start; | |
172 | ||
173 | start = addr; | |
174 | pmd = pmd_offset(pud, addr); | |
175 | do { | |
176 | next = pmd_addr_end(addr, end); | |
177 | if (pmd_none(*pmd)) | |
178 | continue; | |
179 | free_hugepte_range(tlb, (hugepd_t *)pmd); | |
180 | } while (pmd++, addr = next, addr != end); | |
181 | ||
182 | start &= PUD_MASK; | |
183 | if (start < floor) | |
184 | return; | |
185 | if (ceiling) { | |
186 | ceiling &= PUD_MASK; | |
187 | if (!ceiling) | |
188 | return; | |
1da177e4 | 189 | } |
f10a04c0 DG |
190 | if (end - 1 > ceiling - 1) |
191 | return; | |
1da177e4 | 192 | |
f10a04c0 DG |
193 | pmd = pmd_offset(pud, start); |
194 | pud_clear(pud); | |
195 | pmd_free_tlb(tlb, pmd); | |
196 | } | |
197 | #endif | |
198 | ||
199 | static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |
200 | unsigned long addr, unsigned long end, | |
201 | unsigned long floor, unsigned long ceiling) | |
202 | { | |
203 | pud_t *pud; | |
204 | unsigned long next; | |
205 | unsigned long start; | |
206 | ||
207 | start = addr; | |
208 | pud = pud_offset(pgd, addr); | |
209 | do { | |
210 | next = pud_addr_end(addr, end); | |
211 | #ifdef CONFIG_PPC_64K_PAGES | |
212 | if (pud_none_or_clear_bad(pud)) | |
213 | continue; | |
214 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); | |
215 | #else | |
216 | if (pud_none(*pud)) | |
217 | continue; | |
218 | free_hugepte_range(tlb, (hugepd_t *)pud); | |
219 | #endif | |
220 | } while (pud++, addr = next, addr != end); | |
221 | ||
222 | start &= PGDIR_MASK; | |
223 | if (start < floor) | |
224 | return; | |
225 | if (ceiling) { | |
226 | ceiling &= PGDIR_MASK; | |
227 | if (!ceiling) | |
228 | return; | |
229 | } | |
230 | if (end - 1 > ceiling - 1) | |
231 | return; | |
232 | ||
233 | pud = pud_offset(pgd, start); | |
234 | pgd_clear(pgd); | |
235 | pud_free_tlb(tlb, pud); | |
236 | } | |
237 | ||
238 | /* | |
239 | * This function frees user-level page tables of a process. | |
240 | * | |
241 | * Must be called with pagetable lock held. | |
242 | */ | |
243 | void hugetlb_free_pgd_range(struct mmu_gather **tlb, | |
244 | unsigned long addr, unsigned long end, | |
245 | unsigned long floor, unsigned long ceiling) | |
246 | { | |
247 | pgd_t *pgd; | |
248 | unsigned long next; | |
249 | unsigned long start; | |
250 | ||
251 | /* | |
252 | * Comments below take from the normal free_pgd_range(). They | |
253 | * apply here too. The tests against HUGEPD_MASK below are | |
254 | * essential, because we *don't* test for this at the bottom | |
255 | * level. Without them we'll attempt to free a hugepte table | |
256 | * when we unmap just part of it, even if there are other | |
257 | * active mappings using it. | |
258 | * | |
259 | * The next few lines have given us lots of grief... | |
260 | * | |
261 | * Why are we testing HUGEPD* at this top level? Because | |
262 | * often there will be no work to do at all, and we'd prefer | |
263 | * not to go all the way down to the bottom just to discover | |
264 | * that. | |
265 | * | |
266 | * Why all these "- 1"s? Because 0 represents both the bottom | |
267 | * of the address space and the top of it (using -1 for the | |
268 | * top wouldn't help much: the masks would do the wrong thing). | |
269 | * The rule is that addr 0 and floor 0 refer to the bottom of | |
270 | * the address space, but end 0 and ceiling 0 refer to the top | |
271 | * Comparisons need to use "end - 1" and "ceiling - 1" (though | |
272 | * that end 0 case should be mythical). | |
273 | * | |
274 | * Wherever addr is brought up or ceiling brought down, we | |
275 | * must be careful to reject "the opposite 0" before it | |
276 | * confuses the subsequent tests. But what about where end is | |
277 | * brought down by HUGEPD_SIZE below? no, end can't go down to | |
278 | * 0 there. | |
279 | * | |
280 | * Whereas we round start (addr) and ceiling down, by different | |
281 | * masks at different levels, in order to test whether a table | |
282 | * now has no other vmas using it, so can be freed, we don't | |
283 | * bother to round floor or end up - the tests don't need that. | |
284 | */ | |
285 | ||
286 | addr &= HUGEPD_MASK; | |
287 | if (addr < floor) { | |
288 | addr += HUGEPD_SIZE; | |
289 | if (!addr) | |
290 | return; | |
291 | } | |
292 | if (ceiling) { | |
293 | ceiling &= HUGEPD_MASK; | |
294 | if (!ceiling) | |
295 | return; | |
296 | } | |
297 | if (end - 1 > ceiling - 1) | |
298 | end -= HUGEPD_SIZE; | |
299 | if (addr > end - 1) | |
300 | return; | |
301 | ||
302 | start = addr; | |
303 | pgd = pgd_offset((*tlb)->mm, addr); | |
304 | do { | |
305 | BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); | |
306 | next = pgd_addr_end(addr, end); | |
307 | if (pgd_none_or_clear_bad(pgd)) | |
308 | continue; | |
309 | hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); | |
310 | } while (pgd++, addr = next, addr != end); | |
1da177e4 LT |
311 | } |
312 | ||
e28f7faf DG |
313 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
314 | pte_t *ptep, pte_t pte) | |
315 | { | |
e28f7faf | 316 | if (pte_present(*ptep)) { |
3c726f8d | 317 | /* We open-code pte_clear because we need to pass the right |
a741e679 BH |
318 | * argument to hpte_need_flush (huge / !huge). Might not be |
319 | * necessary anymore if we make hpte_need_flush() get the | |
320 | * page size from the slices | |
3c726f8d | 321 | */ |
a741e679 | 322 | pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); |
e28f7faf | 323 | } |
3c726f8d | 324 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); |
1da177e4 LT |
325 | } |
326 | ||
e28f7faf DG |
327 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
328 | pte_t *ptep) | |
1da177e4 | 329 | { |
a741e679 | 330 | unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1); |
e28f7faf | 331 | return __pte(old); |
1da177e4 LT |
332 | } |
333 | ||
23ed6cb9 DG |
334 | struct slb_flush_info { |
335 | struct mm_struct *mm; | |
336 | u16 newareas; | |
337 | }; | |
338 | ||
c594adad | 339 | static void flush_low_segments(void *parm) |
1da177e4 | 340 | { |
23ed6cb9 | 341 | struct slb_flush_info *fi = parm; |
1da177e4 LT |
342 | unsigned long i; |
343 | ||
23ed6cb9 DG |
344 | BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_LOW_AREAS); |
345 | ||
346 | if (current->active_mm != fi->mm) | |
347 | return; | |
1da177e4 | 348 | |
23ed6cb9 DG |
349 | /* Only need to do anything if this CPU is working in the same |
350 | * mm as the one which has changed */ | |
351 | ||
352 | /* update the paca copy of the context struct */ | |
353 | get_paca()->context = current->active_mm->context; | |
c594adad | 354 | |
23ed6cb9 | 355 | asm volatile("isync" : : : "memory"); |
c594adad | 356 | for (i = 0; i < NUM_LOW_AREAS; i++) { |
23ed6cb9 | 357 | if (! (fi->newareas & (1U << i))) |
1da177e4 | 358 | continue; |
14b34661 DG |
359 | asm volatile("slbie %0" |
360 | : : "r" ((i << SID_SHIFT) | SLBIE_C)); | |
1da177e4 | 361 | } |
1da177e4 LT |
362 | asm volatile("isync" : : : "memory"); |
363 | } | |
364 | ||
c594adad DG |
365 | static void flush_high_segments(void *parm) |
366 | { | |
23ed6cb9 | 367 | struct slb_flush_info *fi = parm; |
c594adad DG |
368 | unsigned long i, j; |
369 | ||
c594adad | 370 | |
23ed6cb9 DG |
371 | BUILD_BUG_ON((sizeof(fi->newareas)*8) != NUM_HIGH_AREAS); |
372 | ||
373 | if (current->active_mm != fi->mm) | |
374 | return; | |
375 | ||
376 | /* Only need to do anything if this CPU is working in the same | |
377 | * mm as the one which has changed */ | |
c594adad | 378 | |
23ed6cb9 DG |
379 | /* update the paca copy of the context struct */ |
380 | get_paca()->context = current->active_mm->context; | |
381 | ||
382 | asm volatile("isync" : : : "memory"); | |
c594adad | 383 | for (i = 0; i < NUM_HIGH_AREAS; i++) { |
23ed6cb9 | 384 | if (! (fi->newareas & (1U << i))) |
c594adad DG |
385 | continue; |
386 | for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) | |
387 | asm volatile("slbie %0" | |
14b34661 | 388 | :: "r" (((i << HTLB_AREA_SHIFT) |
23ed6cb9 | 389 | + (j << SID_SHIFT)) | SLBIE_C)); |
c594adad | 390 | } |
c594adad DG |
391 | asm volatile("isync" : : : "memory"); |
392 | } | |
393 | ||
394 | static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) | |
1da177e4 | 395 | { |
c594adad DG |
396 | unsigned long start = area << SID_SHIFT; |
397 | unsigned long end = (area+1) << SID_SHIFT; | |
1da177e4 | 398 | struct vm_area_struct *vma; |
1da177e4 | 399 | |
c594adad | 400 | BUG_ON(area >= NUM_LOW_AREAS); |
1da177e4 LT |
401 | |
402 | /* Check no VMAs are in the region */ | |
403 | vma = find_vma(mm, start); | |
404 | if (vma && (vma->vm_start < end)) | |
405 | return -EBUSY; | |
406 | ||
1da177e4 LT |
407 | return 0; |
408 | } | |
409 | ||
c594adad DG |
410 | static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) |
411 | { | |
412 | unsigned long start = area << HTLB_AREA_SHIFT; | |
413 | unsigned long end = (area+1) << HTLB_AREA_SHIFT; | |
414 | struct vm_area_struct *vma; | |
415 | ||
416 | BUG_ON(area >= NUM_HIGH_AREAS); | |
417 | ||
7d24f0b8 DG |
418 | /* Hack, so that each addresses is controlled by exactly one |
419 | * of the high or low area bitmaps, the first high area starts | |
420 | * at 4GB, not 0 */ | |
421 | if (start == 0) | |
422 | start = 0x100000000UL; | |
423 | ||
c594adad DG |
424 | /* Check no VMAs are in the region */ |
425 | vma = find_vma(mm, start); | |
426 | if (vma && (vma->vm_start < end)) | |
427 | return -EBUSY; | |
428 | ||
429 | return 0; | |
430 | } | |
431 | ||
432 | static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) | |
1da177e4 LT |
433 | { |
434 | unsigned long i; | |
23ed6cb9 | 435 | struct slb_flush_info fi; |
1da177e4 | 436 | |
c594adad DG |
437 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); |
438 | BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); | |
439 | ||
440 | newareas &= ~(mm->context.low_htlb_areas); | |
441 | if (! newareas) | |
1da177e4 LT |
442 | return 0; /* The segments we want are already open */ |
443 | ||
c594adad DG |
444 | for (i = 0; i < NUM_LOW_AREAS; i++) |
445 | if ((1 << i) & newareas) | |
446 | if (prepare_low_area_for_htlb(mm, i) != 0) | |
447 | return -EBUSY; | |
448 | ||
449 | mm->context.low_htlb_areas |= newareas; | |
450 | ||
c594adad DG |
451 | /* the context change must make it to memory before the flush, |
452 | * so that further SLB misses do the right thing. */ | |
453 | mb(); | |
23ed6cb9 DG |
454 | |
455 | fi.mm = mm; | |
456 | fi.newareas = newareas; | |
457 | on_each_cpu(flush_low_segments, &fi, 0, 1); | |
c594adad DG |
458 | |
459 | return 0; | |
460 | } | |
461 | ||
462 | static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) | |
463 | { | |
23ed6cb9 | 464 | struct slb_flush_info fi; |
c594adad DG |
465 | unsigned long i; |
466 | ||
467 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); | |
468 | BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) | |
469 | != NUM_HIGH_AREAS); | |
470 | ||
471 | newareas &= ~(mm->context.high_htlb_areas); | |
472 | if (! newareas) | |
473 | return 0; /* The areas we want are already open */ | |
474 | ||
475 | for (i = 0; i < NUM_HIGH_AREAS; i++) | |
476 | if ((1 << i) & newareas) | |
477 | if (prepare_high_area_for_htlb(mm, i) != 0) | |
1da177e4 LT |
478 | return -EBUSY; |
479 | ||
c594adad | 480 | mm->context.high_htlb_areas |= newareas; |
1da177e4 | 481 | |
1da177e4 LT |
482 | /* the context change must make it to memory before the flush, |
483 | * so that further SLB misses do the right thing. */ | |
484 | mb(); | |
23ed6cb9 DG |
485 | |
486 | fi.mm = mm; | |
487 | fi.newareas = newareas; | |
488 | on_each_cpu(flush_high_segments, &fi, 0, 1); | |
1da177e4 LT |
489 | |
490 | return 0; | |
491 | } | |
492 | ||
68589bc3 | 493 | int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff) |
1da177e4 | 494 | { |
5e391dc9 | 495 | int err = 0; |
c594adad | 496 | |
68589bc3 HD |
497 | if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) |
498 | return -EINVAL; | |
499 | if (len & ~HPAGE_MASK) | |
500 | return -EINVAL; | |
501 | if (addr & ~HPAGE_MASK) | |
c594adad DG |
502 | return -EINVAL; |
503 | ||
5e391dc9 | 504 | if (addr < 0x100000000UL) |
c594adad | 505 | err = open_low_hpage_areas(current->mm, |
1da177e4 | 506 | LOW_ESID_MASK(addr, len)); |
9a94c579 | 507 | if ((addr + len) > 0x100000000UL) |
c594adad DG |
508 | err = open_high_hpage_areas(current->mm, |
509 | HTLB_AREA_MASK(addr, len)); | |
94b2a439 BH |
510 | #ifdef CONFIG_SPE_BASE |
511 | spu_flush_all_slbs(current->mm); | |
512 | #endif | |
c594adad DG |
513 | if (err) { |
514 | printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" | |
515 | " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", | |
516 | addr, len, | |
517 | LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); | |
1da177e4 LT |
518 | return err; |
519 | } | |
520 | ||
c594adad | 521 | return 0; |
1da177e4 LT |
522 | } |
523 | ||
1da177e4 LT |
524 | struct page * |
525 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |
526 | { | |
527 | pte_t *ptep; | |
528 | struct page *page; | |
529 | ||
530 | if (! in_hugepage_area(mm->context, address)) | |
531 | return ERR_PTR(-EINVAL); | |
532 | ||
533 | ptep = huge_pte_offset(mm, address); | |
534 | page = pte_page(*ptep); | |
535 | if (page) | |
536 | page += (address % HPAGE_SIZE) / PAGE_SIZE; | |
537 | ||
538 | return page; | |
539 | } | |
540 | ||
541 | int pmd_huge(pmd_t pmd) | |
542 | { | |
543 | return 0; | |
544 | } | |
545 | ||
546 | struct page * | |
547 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |
548 | pmd_t *pmd, int write) | |
549 | { | |
550 | BUG(); | |
551 | return NULL; | |
552 | } | |
553 | ||
1da177e4 LT |
554 | /* Because we have an exclusive hugepage region which lies within the |
555 | * normal user address space, we have to take special measures to make | |
556 | * non-huge mmap()s evade the hugepage reserved regions. */ | |
557 | unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, | |
558 | unsigned long len, unsigned long pgoff, | |
559 | unsigned long flags) | |
560 | { | |
561 | struct mm_struct *mm = current->mm; | |
562 | struct vm_area_struct *vma; | |
563 | unsigned long start_addr; | |
564 | ||
565 | if (len > TASK_SIZE) | |
566 | return -ENOMEM; | |
567 | ||
d506a772 BH |
568 | /* handle fixed mapping: prevent overlap with huge pages */ |
569 | if (flags & MAP_FIXED) { | |
570 | if (is_hugepage_only_range(mm, addr, len)) | |
571 | return -EINVAL; | |
572 | return addr; | |
573 | } | |
574 | ||
1da177e4 LT |
575 | if (addr) { |
576 | addr = PAGE_ALIGN(addr); | |
577 | vma = find_vma(mm, addr); | |
578 | if (((TASK_SIZE - len) >= addr) | |
579 | && (!vma || (addr+len) <= vma->vm_start) | |
580 | && !is_hugepage_only_range(mm, addr,len)) | |
581 | return addr; | |
582 | } | |
1363c3cd WW |
583 | if (len > mm->cached_hole_size) { |
584 | start_addr = addr = mm->free_area_cache; | |
585 | } else { | |
586 | start_addr = addr = TASK_UNMAPPED_BASE; | |
587 | mm->cached_hole_size = 0; | |
588 | } | |
1da177e4 LT |
589 | |
590 | full_search: | |
591 | vma = find_vma(mm, addr); | |
592 | while (TASK_SIZE - len >= addr) { | |
593 | BUG_ON(vma && (addr >= vma->vm_end)); | |
594 | ||
595 | if (touches_hugepage_low_range(mm, addr, len)) { | |
596 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | |
597 | vma = find_vma(mm, addr); | |
598 | continue; | |
599 | } | |
c594adad DG |
600 | if (touches_hugepage_high_range(mm, addr, len)) { |
601 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | |
1da177e4 LT |
602 | vma = find_vma(mm, addr); |
603 | continue; | |
604 | } | |
605 | if (!vma || addr + len <= vma->vm_start) { | |
606 | /* | |
607 | * Remember the place where we stopped the search: | |
608 | */ | |
609 | mm->free_area_cache = addr + len; | |
610 | return addr; | |
611 | } | |
1363c3cd WW |
612 | if (addr + mm->cached_hole_size < vma->vm_start) |
613 | mm->cached_hole_size = vma->vm_start - addr; | |
1da177e4 LT |
614 | addr = vma->vm_end; |
615 | vma = vma->vm_next; | |
616 | } | |
617 | ||
618 | /* Make sure we didn't miss any holes */ | |
619 | if (start_addr != TASK_UNMAPPED_BASE) { | |
620 | start_addr = addr = TASK_UNMAPPED_BASE; | |
1363c3cd | 621 | mm->cached_hole_size = 0; |
1da177e4 LT |
622 | goto full_search; |
623 | } | |
624 | return -ENOMEM; | |
625 | } | |
626 | ||
627 | /* | |
628 | * This mmap-allocator allocates new areas top-down from below the | |
629 | * stack's low limit (the base): | |
630 | * | |
631 | * Because we have an exclusive hugepage region which lies within the | |
632 | * normal user address space, we have to take special measures to make | |
633 | * non-huge mmap()s evade the hugepage reserved regions. | |
634 | */ | |
635 | unsigned long | |
636 | arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |
637 | const unsigned long len, const unsigned long pgoff, | |
638 | const unsigned long flags) | |
639 | { | |
640 | struct vm_area_struct *vma, *prev_vma; | |
641 | struct mm_struct *mm = current->mm; | |
642 | unsigned long base = mm->mmap_base, addr = addr0; | |
1363c3cd | 643 | unsigned long largest_hole = mm->cached_hole_size; |
1da177e4 LT |
644 | int first_time = 1; |
645 | ||
646 | /* requested length too big for entire address space */ | |
647 | if (len > TASK_SIZE) | |
648 | return -ENOMEM; | |
649 | ||
d506a772 BH |
650 | /* handle fixed mapping: prevent overlap with huge pages */ |
651 | if (flags & MAP_FIXED) { | |
652 | if (is_hugepage_only_range(mm, addr, len)) | |
653 | return -EINVAL; | |
654 | return addr; | |
655 | } | |
656 | ||
1da177e4 LT |
657 | /* dont allow allocations above current base */ |
658 | if (mm->free_area_cache > base) | |
659 | mm->free_area_cache = base; | |
660 | ||
661 | /* requesting a specific address */ | |
662 | if (addr) { | |
663 | addr = PAGE_ALIGN(addr); | |
664 | vma = find_vma(mm, addr); | |
665 | if (TASK_SIZE - len >= addr && | |
666 | (!vma || addr + len <= vma->vm_start) | |
667 | && !is_hugepage_only_range(mm, addr,len)) | |
668 | return addr; | |
669 | } | |
670 | ||
1363c3cd WW |
671 | if (len <= largest_hole) { |
672 | largest_hole = 0; | |
673 | mm->free_area_cache = base; | |
674 | } | |
1da177e4 LT |
675 | try_again: |
676 | /* make sure it can fit in the remaining address space */ | |
677 | if (mm->free_area_cache < len) | |
678 | goto fail; | |
679 | ||
680 | /* either no address requested or cant fit in requested address hole */ | |
681 | addr = (mm->free_area_cache - len) & PAGE_MASK; | |
682 | do { | |
683 | hugepage_recheck: | |
684 | if (touches_hugepage_low_range(mm, addr, len)) { | |
685 | addr = (addr & ((~0) << SID_SHIFT)) - len; | |
686 | goto hugepage_recheck; | |
c594adad DG |
687 | } else if (touches_hugepage_high_range(mm, addr, len)) { |
688 | addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; | |
689 | goto hugepage_recheck; | |
1da177e4 LT |
690 | } |
691 | ||
692 | /* | |
693 | * Lookup failure means no vma is above this address, | |
694 | * i.e. return with success: | |
695 | */ | |
696 | if (!(vma = find_vma_prev(mm, addr, &prev_vma))) | |
697 | return addr; | |
698 | ||
699 | /* | |
700 | * new region fits between prev_vma->vm_end and | |
701 | * vma->vm_start, use it: | |
702 | */ | |
703 | if (addr+len <= vma->vm_start && | |
1363c3cd | 704 | (!prev_vma || (addr >= prev_vma->vm_end))) { |
1da177e4 | 705 | /* remember the address as a hint for next time */ |
1363c3cd WW |
706 | mm->cached_hole_size = largest_hole; |
707 | return (mm->free_area_cache = addr); | |
708 | } else { | |
1da177e4 | 709 | /* pull free_area_cache down to the first hole */ |
1363c3cd | 710 | if (mm->free_area_cache == vma->vm_end) { |
1da177e4 | 711 | mm->free_area_cache = vma->vm_start; |
1363c3cd WW |
712 | mm->cached_hole_size = largest_hole; |
713 | } | |
714 | } | |
715 | ||
716 | /* remember the largest hole we saw so far */ | |
717 | if (addr + largest_hole < vma->vm_start) | |
718 | largest_hole = vma->vm_start - addr; | |
1da177e4 LT |
719 | |
720 | /* try just below the current vma->vm_start */ | |
721 | addr = vma->vm_start-len; | |
722 | } while (len <= vma->vm_start); | |
723 | ||
724 | fail: | |
725 | /* | |
726 | * if hint left us with no space for the requested | |
727 | * mapping then try again: | |
728 | */ | |
729 | if (first_time) { | |
730 | mm->free_area_cache = base; | |
1363c3cd | 731 | largest_hole = 0; |
1da177e4 LT |
732 | first_time = 0; |
733 | goto try_again; | |
734 | } | |
735 | /* | |
736 | * A failed mmap() very likely causes application failure, | |
737 | * so fall back to the bottom-up function here. This scenario | |
738 | * can happen with large stack limits and large mmap() | |
739 | * allocations. | |
740 | */ | |
741 | mm->free_area_cache = TASK_UNMAPPED_BASE; | |
1363c3cd | 742 | mm->cached_hole_size = ~0UL; |
1da177e4 LT |
743 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); |
744 | /* | |
745 | * Restore the topdown base: | |
746 | */ | |
747 | mm->free_area_cache = base; | |
1363c3cd | 748 | mm->cached_hole_size = ~0UL; |
1da177e4 LT |
749 | |
750 | return addr; | |
751 | } | |
752 | ||
456752f7 DG |
753 | static int htlb_check_hinted_area(unsigned long addr, unsigned long len) |
754 | { | |
755 | struct vm_area_struct *vma; | |
756 | ||
757 | vma = find_vma(current->mm, addr); | |
6aa3e1e9 DG |
758 | if (TASK_SIZE - len >= addr && |
759 | (!vma || ((addr + len) <= vma->vm_start))) | |
456752f7 DG |
760 | return 0; |
761 | ||
762 | return -ENOMEM; | |
763 | } | |
764 | ||
1da177e4 LT |
765 | static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) |
766 | { | |
767 | unsigned long addr = 0; | |
768 | struct vm_area_struct *vma; | |
769 | ||
770 | vma = find_vma(current->mm, addr); | |
771 | while (addr + len <= 0x100000000UL) { | |
772 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | |
773 | ||
774 | if (! __within_hugepage_low_range(addr, len, segmask)) { | |
775 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | |
776 | vma = find_vma(current->mm, addr); | |
777 | continue; | |
778 | } | |
779 | ||
780 | if (!vma || (addr + len) <= vma->vm_start) | |
781 | return addr; | |
782 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | |
783 | /* Depending on segmask this might not be a confirmed | |
784 | * hugepage region, so the ALIGN could have skipped | |
785 | * some VMAs */ | |
786 | vma = find_vma(current->mm, addr); | |
787 | } | |
788 | ||
789 | return -ENOMEM; | |
790 | } | |
791 | ||
c594adad | 792 | static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) |
1da177e4 | 793 | { |
c594adad | 794 | unsigned long addr = 0x100000000UL; |
1da177e4 LT |
795 | struct vm_area_struct *vma; |
796 | ||
797 | vma = find_vma(current->mm, addr); | |
c594adad | 798 | while (addr + len <= TASK_SIZE_USER64) { |
1da177e4 | 799 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ |
c594adad DG |
800 | |
801 | if (! __within_hugepage_high_range(addr, len, areamask)) { | |
802 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | |
803 | vma = find_vma(current->mm, addr); | |
804 | continue; | |
805 | } | |
1da177e4 LT |
806 | |
807 | if (!vma || (addr + len) <= vma->vm_start) | |
808 | return addr; | |
809 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | |
c594adad DG |
810 | /* Depending on segmask this might not be a confirmed |
811 | * hugepage region, so the ALIGN could have skipped | |
812 | * some VMAs */ | |
813 | vma = find_vma(current->mm, addr); | |
1da177e4 LT |
814 | } |
815 | ||
816 | return -ENOMEM; | |
817 | } | |
818 | ||
819 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |
820 | unsigned long len, unsigned long pgoff, | |
821 | unsigned long flags) | |
822 | { | |
c594adad DG |
823 | int lastshift; |
824 | u16 areamask, curareas; | |
825 | ||
3c726f8d BH |
826 | if (HPAGE_SHIFT == 0) |
827 | return -EINVAL; | |
1da177e4 LT |
828 | if (len & ~HPAGE_MASK) |
829 | return -EINVAL; | |
6aa3e1e9 DG |
830 | if (len > TASK_SIZE) |
831 | return -ENOMEM; | |
1da177e4 LT |
832 | |
833 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | |
834 | return -EINVAL; | |
835 | ||
456752f7 DG |
836 | /* Paranoia, caller should have dealt with this */ |
837 | BUG_ON((addr + len) < addr); | |
838 | ||
d506a772 BH |
839 | /* Handle MAP_FIXED */ |
840 | if (flags & MAP_FIXED) { | |
841 | if (prepare_hugepage_range(addr, len, pgoff)) | |
842 | return -EINVAL; | |
843 | return addr; | |
844 | } | |
845 | ||
1da177e4 | 846 | if (test_thread_flag(TIF_32BIT)) { |
c594adad | 847 | curareas = current->mm->context.low_htlb_areas; |
1da177e4 | 848 | |
456752f7 DG |
849 | /* First see if we can use the hint address */ |
850 | if (addr && (htlb_check_hinted_area(addr, len) == 0)) { | |
851 | areamask = LOW_ESID_MASK(addr, len); | |
852 | if (open_low_hpage_areas(current->mm, areamask) == 0) | |
853 | return addr; | |
854 | } | |
855 | ||
856 | /* Next see if we can map in the existing low areas */ | |
c594adad | 857 | addr = htlb_get_low_area(len, curareas); |
1da177e4 LT |
858 | if (addr != -ENOMEM) |
859 | return addr; | |
860 | ||
456752f7 | 861 | /* Finally go looking for areas to open */ |
c594adad DG |
862 | lastshift = 0; |
863 | for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); | |
864 | ! lastshift; areamask >>=1) { | |
865 | if (areamask & 1) | |
1da177e4 LT |
866 | lastshift = 1; |
867 | ||
c594adad | 868 | addr = htlb_get_low_area(len, curareas | areamask); |
1da177e4 | 869 | if ((addr != -ENOMEM) |
c594adad | 870 | && open_low_hpage_areas(current->mm, areamask) == 0) |
1da177e4 LT |
871 | return addr; |
872 | } | |
1da177e4 | 873 | } else { |
c594adad DG |
874 | curareas = current->mm->context.high_htlb_areas; |
875 | ||
456752f7 DG |
876 | /* First see if we can use the hint address */ |
877 | /* We discourage 64-bit processes from doing hugepage | |
878 | * mappings below 4GB (must use MAP_FIXED) */ | |
879 | if ((addr >= 0x100000000UL) | |
880 | && (htlb_check_hinted_area(addr, len) == 0)) { | |
881 | areamask = HTLB_AREA_MASK(addr, len); | |
882 | if (open_high_hpage_areas(current->mm, areamask) == 0) | |
883 | return addr; | |
884 | } | |
885 | ||
886 | /* Next see if we can map in the existing high areas */ | |
c594adad DG |
887 | addr = htlb_get_high_area(len, curareas); |
888 | if (addr != -ENOMEM) | |
889 | return addr; | |
890 | ||
456752f7 | 891 | /* Finally go looking for areas to open */ |
c594adad DG |
892 | lastshift = 0; |
893 | for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); | |
894 | ! lastshift; areamask >>=1) { | |
895 | if (areamask & 1) | |
896 | lastshift = 1; | |
897 | ||
898 | addr = htlb_get_high_area(len, curareas | areamask); | |
899 | if ((addr != -ENOMEM) | |
900 | && open_high_hpage_areas(current->mm, areamask) == 0) | |
901 | return addr; | |
902 | } | |
1da177e4 | 903 | } |
c594adad DG |
904 | printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" |
905 | " enough areas\n"); | |
906 | return -ENOMEM; | |
1da177e4 LT |
907 | } |
908 | ||
cbf52afd DG |
909 | /* |
910 | * Called by asm hashtable.S for doing lazy icache flush | |
911 | */ | |
912 | static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, | |
913 | pte_t pte, int trap) | |
914 | { | |
915 | struct page *page; | |
916 | int i; | |
917 | ||
918 | if (!pfn_valid(pte_pfn(pte))) | |
919 | return rflags; | |
920 | ||
921 | page = pte_page(pte); | |
922 | ||
923 | /* page is dirty */ | |
924 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { | |
925 | if (trap == 0x400) { | |
926 | for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) | |
927 | __flush_dcache_icache(page_address(page+i)); | |
928 | set_bit(PG_arch_1, &page->flags); | |
929 | } else { | |
930 | rflags |= HPTE_R_N; | |
931 | } | |
932 | } | |
933 | return rflags; | |
934 | } | |
935 | ||
1da177e4 | 936 | int hash_huge_page(struct mm_struct *mm, unsigned long access, |
cbf52afd DG |
937 | unsigned long ea, unsigned long vsid, int local, |
938 | unsigned long trap) | |
1da177e4 LT |
939 | { |
940 | pte_t *ptep; | |
3c726f8d BH |
941 | unsigned long old_pte, new_pte; |
942 | unsigned long va, rflags, pa; | |
1da177e4 LT |
943 | long slot; |
944 | int err = 1; | |
945 | ||
1da177e4 LT |
946 | ptep = huge_pte_offset(mm, ea); |
947 | ||
948 | /* Search the Linux page table for a match with va */ | |
949 | va = (vsid << 28) | (ea & 0x0fffffff); | |
1da177e4 LT |
950 | |
951 | /* | |
952 | * If no pte found or not present, send the problem up to | |
953 | * do_page_fault | |
954 | */ | |
955 | if (unlikely(!ptep || pte_none(*ptep))) | |
956 | goto out; | |
957 | ||
1da177e4 LT |
958 | /* |
959 | * Check the user's access rights to the page. If access should be | |
960 | * prevented then send the problem up to do_page_fault. | |
961 | */ | |
962 | if (unlikely(access & ~pte_val(*ptep))) | |
963 | goto out; | |
964 | /* | |
965 | * At this point, we have a pte (old_pte) which can be used to build | |
966 | * or update an HPTE. There are 2 cases: | |
967 | * | |
968 | * 1. There is a valid (present) pte with no associated HPTE (this is | |
969 | * the most common case) | |
970 | * 2. There is a valid (present) pte with an associated HPTE. The | |
971 | * current values of the pp bits in the HPTE prevent access | |
972 | * because we are doing software DIRTY bit management and the | |
973 | * page is currently not DIRTY. | |
974 | */ | |
975 | ||
976 | ||
3c726f8d BH |
977 | do { |
978 | old_pte = pte_val(*ptep); | |
979 | if (old_pte & _PAGE_BUSY) | |
980 | goto out; | |
981 | new_pte = old_pte | _PAGE_BUSY | | |
982 | _PAGE_ACCESSED | _PAGE_HASHPTE; | |
983 | } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, | |
984 | old_pte, new_pte)); | |
985 | ||
986 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | |
1da177e4 | 987 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ |
3c726f8d | 988 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); |
cbf52afd DG |
989 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) |
990 | /* No CPU has hugepages but lacks no execute, so we | |
991 | * don't need to worry about that case */ | |
992 | rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), | |
993 | trap); | |
1da177e4 LT |
994 | |
995 | /* Check if pte already has an hpte (case 2) */ | |
3c726f8d | 996 | if (unlikely(old_pte & _PAGE_HASHPTE)) { |
1da177e4 LT |
997 | /* There MIGHT be an HPTE for this pte */ |
998 | unsigned long hash, slot; | |
999 | ||
3c726f8d BH |
1000 | hash = hpt_hash(va, HPAGE_SHIFT); |
1001 | if (old_pte & _PAGE_F_SECOND) | |
1da177e4 LT |
1002 | hash = ~hash; |
1003 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | |
3c726f8d | 1004 | slot += (old_pte & _PAGE_F_GIX) >> 12; |
1da177e4 | 1005 | |
325c82a0 BH |
1006 | if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, |
1007 | local) == -1) | |
3c726f8d | 1008 | old_pte &= ~_PAGE_HPTEFLAGS; |
1da177e4 LT |
1009 | } |
1010 | ||
3c726f8d BH |
1011 | if (likely(!(old_pte & _PAGE_HASHPTE))) { |
1012 | unsigned long hash = hpt_hash(va, HPAGE_SHIFT); | |
1da177e4 LT |
1013 | unsigned long hpte_group; |
1014 | ||
3c726f8d | 1015 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; |
1da177e4 LT |
1016 | |
1017 | repeat: | |
1018 | hpte_group = ((hash & htab_hash_mask) * | |
1019 | HPTES_PER_GROUP) & ~0x7UL; | |
1020 | ||
3c726f8d BH |
1021 | /* clear HPTE slot informations in new PTE */ |
1022 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | |
1da177e4 LT |
1023 | |
1024 | /* Add in WIMG bits */ | |
1025 | /* XXX We should store these in the pte */ | |
3c726f8d | 1026 | /* --BenH: I think they are ... */ |
96e28449 | 1027 | rflags |= _PAGE_COHERENT; |
1da177e4 | 1028 | |
3c726f8d BH |
1029 | /* Insert into the hash table, primary slot */ |
1030 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, | |
1031 | mmu_huge_psize); | |
1da177e4 LT |
1032 | |
1033 | /* Primary is full, try the secondary */ | |
1034 | if (unlikely(slot == -1)) { | |
1da177e4 LT |
1035 | hpte_group = ((~hash & htab_hash_mask) * |
1036 | HPTES_PER_GROUP) & ~0x7UL; | |
3c726f8d | 1037 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, |
67b10813 | 1038 | HPTE_V_SECONDARY, |
3c726f8d | 1039 | mmu_huge_psize); |
1da177e4 LT |
1040 | if (slot == -1) { |
1041 | if (mftb() & 0x1) | |
67b10813 BH |
1042 | hpte_group = ((hash & htab_hash_mask) * |
1043 | HPTES_PER_GROUP)&~0x7UL; | |
1da177e4 LT |
1044 | |
1045 | ppc_md.hpte_remove(hpte_group); | |
1046 | goto repeat; | |
1047 | } | |
1048 | } | |
1049 | ||
1050 | if (unlikely(slot == -2)) | |
1051 | panic("hash_huge_page: pte_insert failed\n"); | |
1052 | ||
d649bd7b | 1053 | new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); |
1da177e4 LT |
1054 | } |
1055 | ||
3c726f8d | 1056 | /* |
01edcd89 | 1057 | * No need to use ldarx/stdcx here |
3c726f8d BH |
1058 | */ |
1059 | *ptep = __pte(new_pte & ~_PAGE_BUSY); | |
1060 | ||
1da177e4 LT |
1061 | err = 0; |
1062 | ||
1063 | out: | |
1da177e4 LT |
1064 | return err; |
1065 | } | |
f10a04c0 | 1066 | |
e18b890b | 1067 | static void zero_ctor(void *addr, struct kmem_cache *cache, unsigned long flags) |
f10a04c0 DG |
1068 | { |
1069 | memset(addr, 0, kmem_cache_size(cache)); | |
1070 | } | |
1071 | ||
1072 | static int __init hugetlbpage_init(void) | |
1073 | { | |
1074 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | |
1075 | return -ENODEV; | |
1076 | ||
1077 | huge_pgtable_cache = kmem_cache_create("hugepte_cache", | |
1078 | HUGEPTE_TABLE_SIZE, | |
1079 | HUGEPTE_TABLE_SIZE, | |
f0f3980b | 1080 | 0, |
f10a04c0 DG |
1081 | zero_ctor, NULL); |
1082 | if (! huge_pgtable_cache) | |
1083 | panic("hugetlbpage_init(): could not create hugepte cache\n"); | |
1084 | ||
1085 | return 0; | |
1086 | } | |
1087 | ||
1088 | module_init(hugetlbpage_init); |