Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * PPC64 (POWER4) Huge TLB Page Support for Kernel. | |
3 | * | |
4 | * Copyright (C) 2003 David Gibson, IBM Corporation. | |
5 | * | |
6 | * Based on the IA-32 version: | |
7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | |
8 | */ | |
9 | ||
10 | #include <linux/init.h> | |
11 | #include <linux/fs.h> | |
12 | #include <linux/mm.h> | |
13 | #include <linux/hugetlb.h> | |
14 | #include <linux/pagemap.h> | |
15 | #include <linux/smp_lock.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/err.h> | |
18 | #include <linux/sysctl.h> | |
19 | #include <asm/mman.h> | |
20 | #include <asm/pgalloc.h> | |
21 | #include <asm/tlb.h> | |
22 | #include <asm/tlbflush.h> | |
23 | #include <asm/mmu_context.h> | |
24 | #include <asm/machdep.h> | |
25 | #include <asm/cputable.h> | |
26 | #include <asm/tlb.h> | |
27 | ||
28 | #include <linux/sysctl.h> | |
29 | ||
c594adad DG |
30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) |
31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | |
32 | ||
e28f7faf DG |
33 | /* Modelled after find_linux_pte() */ |
34 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |
1da177e4 | 35 | { |
e28f7faf DG |
36 | pgd_t *pg; |
37 | pud_t *pu; | |
38 | pmd_t *pm; | |
39 | pte_t *pt; | |
1da177e4 | 40 | |
e28f7faf | 41 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
1da177e4 | 42 | |
e28f7faf DG |
43 | addr &= HPAGE_MASK; |
44 | ||
45 | pg = pgd_offset(mm, addr); | |
46 | if (!pgd_none(*pg)) { | |
47 | pu = pud_offset(pg, addr); | |
48 | if (!pud_none(*pu)) { | |
49 | pm = pmd_offset(pu, addr); | |
50 | pt = (pte_t *)pm; | |
51 | BUG_ON(!pmd_none(*pm) | |
52 | && !(pte_present(*pt) && pte_huge(*pt))); | |
53 | return pt; | |
54 | } | |
55 | } | |
1da177e4 | 56 | |
e28f7faf | 57 | return NULL; |
1da177e4 LT |
58 | } |
59 | ||
e28f7faf | 60 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) |
1da177e4 | 61 | { |
e28f7faf DG |
62 | pgd_t *pg; |
63 | pud_t *pu; | |
64 | pmd_t *pm; | |
65 | pte_t *pt; | |
1da177e4 | 66 | |
1da177e4 LT |
67 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
68 | ||
e28f7faf | 69 | addr &= HPAGE_MASK; |
1da177e4 | 70 | |
e28f7faf DG |
71 | pg = pgd_offset(mm, addr); |
72 | pu = pud_alloc(mm, pg, addr); | |
1da177e4 | 73 | |
e28f7faf DG |
74 | if (pu) { |
75 | pm = pmd_alloc(mm, pu, addr); | |
76 | if (pm) { | |
77 | pt = (pte_t *)pm; | |
78 | BUG_ON(!pmd_none(*pm) | |
79 | && !(pte_present(*pt) && pte_huge(*pt))); | |
80 | return pt; | |
1da177e4 LT |
81 | } |
82 | } | |
83 | ||
e28f7faf | 84 | return NULL; |
1da177e4 LT |
85 | } |
86 | ||
e28f7faf | 87 | #define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) |
1da177e4 | 88 | |
e28f7faf DG |
89 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
90 | pte_t *ptep, pte_t pte) | |
91 | { | |
92 | int i; | |
1da177e4 | 93 | |
e28f7faf DG |
94 | if (pte_present(*ptep)) { |
95 | pte_clear(mm, addr, ptep); | |
96 | flush_tlb_pending(); | |
97 | } | |
1da177e4 | 98 | |
e28f7faf DG |
99 | for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { |
100 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | |
101 | ptep++; | |
102 | } | |
1da177e4 LT |
103 | } |
104 | ||
e28f7faf DG |
105 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
106 | pte_t *ptep) | |
1da177e4 | 107 | { |
e28f7faf DG |
108 | unsigned long old = pte_update(ptep, ~0UL); |
109 | int i; | |
1da177e4 | 110 | |
e28f7faf DG |
111 | if (old & _PAGE_HASHPTE) |
112 | hpte_update(mm, addr, old, 0); | |
1da177e4 | 113 | |
e28f7faf DG |
114 | for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) |
115 | ptep[i] = __pte(0); | |
1da177e4 | 116 | |
e28f7faf | 117 | return __pte(old); |
1da177e4 LT |
118 | } |
119 | ||
1da177e4 LT |
120 | /* |
121 | * This function checks for proper alignment of input addr and len parameters. | |
122 | */ | |
123 | int is_aligned_hugepage_range(unsigned long addr, unsigned long len) | |
124 | { | |
125 | if (len & ~HPAGE_MASK) | |
126 | return -EINVAL; | |
127 | if (addr & ~HPAGE_MASK) | |
128 | return -EINVAL; | |
129 | if (! (within_hugepage_low_range(addr, len) | |
130 | || within_hugepage_high_range(addr, len)) ) | |
131 | return -EINVAL; | |
132 | return 0; | |
133 | } | |
134 | ||
c594adad | 135 | static void flush_low_segments(void *parm) |
1da177e4 | 136 | { |
c594adad | 137 | u16 areas = (unsigned long) parm; |
1da177e4 LT |
138 | unsigned long i; |
139 | ||
140 | asm volatile("isync" : : : "memory"); | |
141 | ||
c594adad DG |
142 | BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); |
143 | ||
144 | for (i = 0; i < NUM_LOW_AREAS; i++) { | |
145 | if (! (areas & (1U << i))) | |
1da177e4 LT |
146 | continue; |
147 | asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); | |
148 | } | |
149 | ||
150 | asm volatile("isync" : : : "memory"); | |
151 | } | |
152 | ||
c594adad DG |
153 | static void flush_high_segments(void *parm) |
154 | { | |
155 | u16 areas = (unsigned long) parm; | |
156 | unsigned long i, j; | |
157 | ||
158 | asm volatile("isync" : : : "memory"); | |
159 | ||
160 | BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); | |
161 | ||
162 | for (i = 0; i < NUM_HIGH_AREAS; i++) { | |
163 | if (! (areas & (1U << i))) | |
164 | continue; | |
165 | for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) | |
166 | asm volatile("slbie %0" | |
167 | :: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT))); | |
168 | } | |
169 | ||
170 | asm volatile("isync" : : : "memory"); | |
171 | } | |
172 | ||
173 | static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) | |
1da177e4 | 174 | { |
c594adad DG |
175 | unsigned long start = area << SID_SHIFT; |
176 | unsigned long end = (area+1) << SID_SHIFT; | |
1da177e4 | 177 | struct vm_area_struct *vma; |
1da177e4 | 178 | |
c594adad | 179 | BUG_ON(area >= NUM_LOW_AREAS); |
1da177e4 LT |
180 | |
181 | /* Check no VMAs are in the region */ | |
182 | vma = find_vma(mm, start); | |
183 | if (vma && (vma->vm_start < end)) | |
184 | return -EBUSY; | |
185 | ||
1da177e4 LT |
186 | return 0; |
187 | } | |
188 | ||
c594adad DG |
189 | static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) |
190 | { | |
191 | unsigned long start = area << HTLB_AREA_SHIFT; | |
192 | unsigned long end = (area+1) << HTLB_AREA_SHIFT; | |
193 | struct vm_area_struct *vma; | |
194 | ||
195 | BUG_ON(area >= NUM_HIGH_AREAS); | |
196 | ||
197 | /* Check no VMAs are in the region */ | |
198 | vma = find_vma(mm, start); | |
199 | if (vma && (vma->vm_start < end)) | |
200 | return -EBUSY; | |
201 | ||
202 | return 0; | |
203 | } | |
204 | ||
205 | static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) | |
1da177e4 LT |
206 | { |
207 | unsigned long i; | |
208 | ||
c594adad DG |
209 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); |
210 | BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); | |
211 | ||
212 | newareas &= ~(mm->context.low_htlb_areas); | |
213 | if (! newareas) | |
1da177e4 LT |
214 | return 0; /* The segments we want are already open */ |
215 | ||
c594adad DG |
216 | for (i = 0; i < NUM_LOW_AREAS; i++) |
217 | if ((1 << i) & newareas) | |
218 | if (prepare_low_area_for_htlb(mm, i) != 0) | |
219 | return -EBUSY; | |
220 | ||
221 | mm->context.low_htlb_areas |= newareas; | |
222 | ||
223 | /* update the paca copy of the context struct */ | |
224 | get_paca()->context = mm->context; | |
225 | ||
226 | /* the context change must make it to memory before the flush, | |
227 | * so that further SLB misses do the right thing. */ | |
228 | mb(); | |
229 | on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); | |
230 | ||
231 | return 0; | |
232 | } | |
233 | ||
234 | static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) | |
235 | { | |
236 | unsigned long i; | |
237 | ||
238 | BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); | |
239 | BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) | |
240 | != NUM_HIGH_AREAS); | |
241 | ||
242 | newareas &= ~(mm->context.high_htlb_areas); | |
243 | if (! newareas) | |
244 | return 0; /* The areas we want are already open */ | |
245 | ||
246 | for (i = 0; i < NUM_HIGH_AREAS; i++) | |
247 | if ((1 << i) & newareas) | |
248 | if (prepare_high_area_for_htlb(mm, i) != 0) | |
1da177e4 LT |
249 | return -EBUSY; |
250 | ||
c594adad | 251 | mm->context.high_htlb_areas |= newareas; |
1da177e4 LT |
252 | |
253 | /* update the paca copy of the context struct */ | |
254 | get_paca()->context = mm->context; | |
255 | ||
256 | /* the context change must make it to memory before the flush, | |
257 | * so that further SLB misses do the right thing. */ | |
258 | mb(); | |
c594adad | 259 | on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); |
1da177e4 LT |
260 | |
261 | return 0; | |
262 | } | |
263 | ||
264 | int prepare_hugepage_range(unsigned long addr, unsigned long len) | |
265 | { | |
c594adad DG |
266 | int err; |
267 | ||
268 | if ( (addr+len) < addr ) | |
269 | return -EINVAL; | |
270 | ||
271 | if ((addr + len) < 0x100000000UL) | |
272 | err = open_low_hpage_areas(current->mm, | |
1da177e4 | 273 | LOW_ESID_MASK(addr, len)); |
c594adad DG |
274 | else |
275 | err = open_high_hpage_areas(current->mm, | |
276 | HTLB_AREA_MASK(addr, len)); | |
277 | if (err) { | |
278 | printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" | |
279 | " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", | |
280 | addr, len, | |
281 | LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); | |
1da177e4 LT |
282 | return err; |
283 | } | |
284 | ||
c594adad | 285 | return 0; |
1da177e4 LT |
286 | } |
287 | ||
1da177e4 LT |
288 | struct page * |
289 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |
290 | { | |
291 | pte_t *ptep; | |
292 | struct page *page; | |
293 | ||
294 | if (! in_hugepage_area(mm->context, address)) | |
295 | return ERR_PTR(-EINVAL); | |
296 | ||
297 | ptep = huge_pte_offset(mm, address); | |
298 | page = pte_page(*ptep); | |
299 | if (page) | |
300 | page += (address % HPAGE_SIZE) / PAGE_SIZE; | |
301 | ||
302 | return page; | |
303 | } | |
304 | ||
305 | int pmd_huge(pmd_t pmd) | |
306 | { | |
307 | return 0; | |
308 | } | |
309 | ||
310 | struct page * | |
311 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |
312 | pmd_t *pmd, int write) | |
313 | { | |
314 | BUG(); | |
315 | return NULL; | |
316 | } | |
317 | ||
1da177e4 LT |
318 | /* Because we have an exclusive hugepage region which lies within the |
319 | * normal user address space, we have to take special measures to make | |
320 | * non-huge mmap()s evade the hugepage reserved regions. */ | |
321 | unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, | |
322 | unsigned long len, unsigned long pgoff, | |
323 | unsigned long flags) | |
324 | { | |
325 | struct mm_struct *mm = current->mm; | |
326 | struct vm_area_struct *vma; | |
327 | unsigned long start_addr; | |
328 | ||
329 | if (len > TASK_SIZE) | |
330 | return -ENOMEM; | |
331 | ||
332 | if (addr) { | |
333 | addr = PAGE_ALIGN(addr); | |
334 | vma = find_vma(mm, addr); | |
335 | if (((TASK_SIZE - len) >= addr) | |
336 | && (!vma || (addr+len) <= vma->vm_start) | |
337 | && !is_hugepage_only_range(mm, addr,len)) | |
338 | return addr; | |
339 | } | |
1363c3cd WW |
340 | if (len > mm->cached_hole_size) { |
341 | start_addr = addr = mm->free_area_cache; | |
342 | } else { | |
343 | start_addr = addr = TASK_UNMAPPED_BASE; | |
344 | mm->cached_hole_size = 0; | |
345 | } | |
1da177e4 LT |
346 | |
347 | full_search: | |
348 | vma = find_vma(mm, addr); | |
349 | while (TASK_SIZE - len >= addr) { | |
350 | BUG_ON(vma && (addr >= vma->vm_end)); | |
351 | ||
352 | if (touches_hugepage_low_range(mm, addr, len)) { | |
353 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | |
354 | vma = find_vma(mm, addr); | |
355 | continue; | |
356 | } | |
c594adad DG |
357 | if (touches_hugepage_high_range(mm, addr, len)) { |
358 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | |
1da177e4 LT |
359 | vma = find_vma(mm, addr); |
360 | continue; | |
361 | } | |
362 | if (!vma || addr + len <= vma->vm_start) { | |
363 | /* | |
364 | * Remember the place where we stopped the search: | |
365 | */ | |
366 | mm->free_area_cache = addr + len; | |
367 | return addr; | |
368 | } | |
1363c3cd WW |
369 | if (addr + mm->cached_hole_size < vma->vm_start) |
370 | mm->cached_hole_size = vma->vm_start - addr; | |
1da177e4 LT |
371 | addr = vma->vm_end; |
372 | vma = vma->vm_next; | |
373 | } | |
374 | ||
375 | /* Make sure we didn't miss any holes */ | |
376 | if (start_addr != TASK_UNMAPPED_BASE) { | |
377 | start_addr = addr = TASK_UNMAPPED_BASE; | |
1363c3cd | 378 | mm->cached_hole_size = 0; |
1da177e4 LT |
379 | goto full_search; |
380 | } | |
381 | return -ENOMEM; | |
382 | } | |
383 | ||
384 | /* | |
385 | * This mmap-allocator allocates new areas top-down from below the | |
386 | * stack's low limit (the base): | |
387 | * | |
388 | * Because we have an exclusive hugepage region which lies within the | |
389 | * normal user address space, we have to take special measures to make | |
390 | * non-huge mmap()s evade the hugepage reserved regions. | |
391 | */ | |
392 | unsigned long | |
393 | arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |
394 | const unsigned long len, const unsigned long pgoff, | |
395 | const unsigned long flags) | |
396 | { | |
397 | struct vm_area_struct *vma, *prev_vma; | |
398 | struct mm_struct *mm = current->mm; | |
399 | unsigned long base = mm->mmap_base, addr = addr0; | |
1363c3cd | 400 | unsigned long largest_hole = mm->cached_hole_size; |
1da177e4 LT |
401 | int first_time = 1; |
402 | ||
403 | /* requested length too big for entire address space */ | |
404 | if (len > TASK_SIZE) | |
405 | return -ENOMEM; | |
406 | ||
407 | /* dont allow allocations above current base */ | |
408 | if (mm->free_area_cache > base) | |
409 | mm->free_area_cache = base; | |
410 | ||
411 | /* requesting a specific address */ | |
412 | if (addr) { | |
413 | addr = PAGE_ALIGN(addr); | |
414 | vma = find_vma(mm, addr); | |
415 | if (TASK_SIZE - len >= addr && | |
416 | (!vma || addr + len <= vma->vm_start) | |
417 | && !is_hugepage_only_range(mm, addr,len)) | |
418 | return addr; | |
419 | } | |
420 | ||
1363c3cd WW |
421 | if (len <= largest_hole) { |
422 | largest_hole = 0; | |
423 | mm->free_area_cache = base; | |
424 | } | |
1da177e4 LT |
425 | try_again: |
426 | /* make sure it can fit in the remaining address space */ | |
427 | if (mm->free_area_cache < len) | |
428 | goto fail; | |
429 | ||
430 | /* either no address requested or cant fit in requested address hole */ | |
431 | addr = (mm->free_area_cache - len) & PAGE_MASK; | |
432 | do { | |
433 | hugepage_recheck: | |
434 | if (touches_hugepage_low_range(mm, addr, len)) { | |
435 | addr = (addr & ((~0) << SID_SHIFT)) - len; | |
436 | goto hugepage_recheck; | |
c594adad DG |
437 | } else if (touches_hugepage_high_range(mm, addr, len)) { |
438 | addr = (addr & ((~0UL) << HTLB_AREA_SHIFT)) - len; | |
439 | goto hugepage_recheck; | |
1da177e4 LT |
440 | } |
441 | ||
442 | /* | |
443 | * Lookup failure means no vma is above this address, | |
444 | * i.e. return with success: | |
445 | */ | |
446 | if (!(vma = find_vma_prev(mm, addr, &prev_vma))) | |
447 | return addr; | |
448 | ||
449 | /* | |
450 | * new region fits between prev_vma->vm_end and | |
451 | * vma->vm_start, use it: | |
452 | */ | |
453 | if (addr+len <= vma->vm_start && | |
1363c3cd | 454 | (!prev_vma || (addr >= prev_vma->vm_end))) { |
1da177e4 | 455 | /* remember the address as a hint for next time */ |
1363c3cd WW |
456 | mm->cached_hole_size = largest_hole; |
457 | return (mm->free_area_cache = addr); | |
458 | } else { | |
1da177e4 | 459 | /* pull free_area_cache down to the first hole */ |
1363c3cd | 460 | if (mm->free_area_cache == vma->vm_end) { |
1da177e4 | 461 | mm->free_area_cache = vma->vm_start; |
1363c3cd WW |
462 | mm->cached_hole_size = largest_hole; |
463 | } | |
464 | } | |
465 | ||
466 | /* remember the largest hole we saw so far */ | |
467 | if (addr + largest_hole < vma->vm_start) | |
468 | largest_hole = vma->vm_start - addr; | |
1da177e4 LT |
469 | |
470 | /* try just below the current vma->vm_start */ | |
471 | addr = vma->vm_start-len; | |
472 | } while (len <= vma->vm_start); | |
473 | ||
474 | fail: | |
475 | /* | |
476 | * if hint left us with no space for the requested | |
477 | * mapping then try again: | |
478 | */ | |
479 | if (first_time) { | |
480 | mm->free_area_cache = base; | |
1363c3cd | 481 | largest_hole = 0; |
1da177e4 LT |
482 | first_time = 0; |
483 | goto try_again; | |
484 | } | |
485 | /* | |
486 | * A failed mmap() very likely causes application failure, | |
487 | * so fall back to the bottom-up function here. This scenario | |
488 | * can happen with large stack limits and large mmap() | |
489 | * allocations. | |
490 | */ | |
491 | mm->free_area_cache = TASK_UNMAPPED_BASE; | |
1363c3cd | 492 | mm->cached_hole_size = ~0UL; |
1da177e4 LT |
493 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); |
494 | /* | |
495 | * Restore the topdown base: | |
496 | */ | |
497 | mm->free_area_cache = base; | |
1363c3cd | 498 | mm->cached_hole_size = ~0UL; |
1da177e4 LT |
499 | |
500 | return addr; | |
501 | } | |
502 | ||
503 | static unsigned long htlb_get_low_area(unsigned long len, u16 segmask) | |
504 | { | |
505 | unsigned long addr = 0; | |
506 | struct vm_area_struct *vma; | |
507 | ||
508 | vma = find_vma(current->mm, addr); | |
509 | while (addr + len <= 0x100000000UL) { | |
510 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ | |
511 | ||
512 | if (! __within_hugepage_low_range(addr, len, segmask)) { | |
513 | addr = ALIGN(addr+1, 1<<SID_SHIFT); | |
514 | vma = find_vma(current->mm, addr); | |
515 | continue; | |
516 | } | |
517 | ||
518 | if (!vma || (addr + len) <= vma->vm_start) | |
519 | return addr; | |
520 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | |
521 | /* Depending on segmask this might not be a confirmed | |
522 | * hugepage region, so the ALIGN could have skipped | |
523 | * some VMAs */ | |
524 | vma = find_vma(current->mm, addr); | |
525 | } | |
526 | ||
527 | return -ENOMEM; | |
528 | } | |
529 | ||
c594adad | 530 | static unsigned long htlb_get_high_area(unsigned long len, u16 areamask) |
1da177e4 | 531 | { |
c594adad | 532 | unsigned long addr = 0x100000000UL; |
1da177e4 LT |
533 | struct vm_area_struct *vma; |
534 | ||
535 | vma = find_vma(current->mm, addr); | |
c594adad | 536 | while (addr + len <= TASK_SIZE_USER64) { |
1da177e4 | 537 | BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ |
c594adad DG |
538 | |
539 | if (! __within_hugepage_high_range(addr, len, areamask)) { | |
540 | addr = ALIGN(addr+1, 1UL<<HTLB_AREA_SHIFT); | |
541 | vma = find_vma(current->mm, addr); | |
542 | continue; | |
543 | } | |
1da177e4 LT |
544 | |
545 | if (!vma || (addr + len) <= vma->vm_start) | |
546 | return addr; | |
547 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | |
c594adad DG |
548 | /* Depending on segmask this might not be a confirmed |
549 | * hugepage region, so the ALIGN could have skipped | |
550 | * some VMAs */ | |
551 | vma = find_vma(current->mm, addr); | |
1da177e4 LT |
552 | } |
553 | ||
554 | return -ENOMEM; | |
555 | } | |
556 | ||
557 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |
558 | unsigned long len, unsigned long pgoff, | |
559 | unsigned long flags) | |
560 | { | |
c594adad DG |
561 | int lastshift; |
562 | u16 areamask, curareas; | |
563 | ||
1da177e4 LT |
564 | if (len & ~HPAGE_MASK) |
565 | return -EINVAL; | |
566 | ||
567 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | |
568 | return -EINVAL; | |
569 | ||
570 | if (test_thread_flag(TIF_32BIT)) { | |
c594adad | 571 | curareas = current->mm->context.low_htlb_areas; |
1da177e4 LT |
572 | |
573 | /* First see if we can do the mapping in the existing | |
c594adad DG |
574 | * low areas */ |
575 | addr = htlb_get_low_area(len, curareas); | |
1da177e4 LT |
576 | if (addr != -ENOMEM) |
577 | return addr; | |
578 | ||
c594adad DG |
579 | lastshift = 0; |
580 | for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); | |
581 | ! lastshift; areamask >>=1) { | |
582 | if (areamask & 1) | |
1da177e4 LT |
583 | lastshift = 1; |
584 | ||
c594adad | 585 | addr = htlb_get_low_area(len, curareas | areamask); |
1da177e4 | 586 | if ((addr != -ENOMEM) |
c594adad | 587 | && open_low_hpage_areas(current->mm, areamask) == 0) |
1da177e4 LT |
588 | return addr; |
589 | } | |
1da177e4 | 590 | } else { |
c594adad DG |
591 | curareas = current->mm->context.high_htlb_areas; |
592 | ||
593 | /* First see if we can do the mapping in the existing | |
594 | * high areas */ | |
595 | addr = htlb_get_high_area(len, curareas); | |
596 | if (addr != -ENOMEM) | |
597 | return addr; | |
598 | ||
599 | lastshift = 0; | |
600 | for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); | |
601 | ! lastshift; areamask >>=1) { | |
602 | if (areamask & 1) | |
603 | lastshift = 1; | |
604 | ||
605 | addr = htlb_get_high_area(len, curareas | areamask); | |
606 | if ((addr != -ENOMEM) | |
607 | && open_high_hpage_areas(current->mm, areamask) == 0) | |
608 | return addr; | |
609 | } | |
1da177e4 | 610 | } |
c594adad DG |
611 | printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" |
612 | " enough areas\n"); | |
613 | return -ENOMEM; | |
1da177e4 LT |
614 | } |
615 | ||
1da177e4 LT |
616 | int hash_huge_page(struct mm_struct *mm, unsigned long access, |
617 | unsigned long ea, unsigned long vsid, int local) | |
618 | { | |
619 | pte_t *ptep; | |
620 | unsigned long va, vpn; | |
621 | pte_t old_pte, new_pte; | |
96e28449 | 622 | unsigned long rflags, prpn; |
1da177e4 LT |
623 | long slot; |
624 | int err = 1; | |
625 | ||
626 | spin_lock(&mm->page_table_lock); | |
627 | ||
628 | ptep = huge_pte_offset(mm, ea); | |
629 | ||
630 | /* Search the Linux page table for a match with va */ | |
631 | va = (vsid << 28) | (ea & 0x0fffffff); | |
632 | vpn = va >> HPAGE_SHIFT; | |
633 | ||
634 | /* | |
635 | * If no pte found or not present, send the problem up to | |
636 | * do_page_fault | |
637 | */ | |
638 | if (unlikely(!ptep || pte_none(*ptep))) | |
639 | goto out; | |
640 | ||
641 | /* BUG_ON(pte_bad(*ptep)); */ | |
642 | ||
643 | /* | |
644 | * Check the user's access rights to the page. If access should be | |
645 | * prevented then send the problem up to do_page_fault. | |
646 | */ | |
647 | if (unlikely(access & ~pte_val(*ptep))) | |
648 | goto out; | |
649 | /* | |
650 | * At this point, we have a pte (old_pte) which can be used to build | |
651 | * or update an HPTE. There are 2 cases: | |
652 | * | |
653 | * 1. There is a valid (present) pte with no associated HPTE (this is | |
654 | * the most common case) | |
655 | * 2. There is a valid (present) pte with an associated HPTE. The | |
656 | * current values of the pp bits in the HPTE prevent access | |
657 | * because we are doing software DIRTY bit management and the | |
658 | * page is currently not DIRTY. | |
659 | */ | |
660 | ||
661 | ||
662 | old_pte = *ptep; | |
663 | new_pte = old_pte; | |
664 | ||
96e28449 | 665 | rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); |
1da177e4 | 666 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ |
96e28449 | 667 | rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); |
1da177e4 LT |
668 | |
669 | /* Check if pte already has an hpte (case 2) */ | |
670 | if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { | |
671 | /* There MIGHT be an HPTE for this pte */ | |
672 | unsigned long hash, slot; | |
673 | ||
674 | hash = hpt_hash(vpn, 1); | |
675 | if (pte_val(old_pte) & _PAGE_SECONDARY) | |
676 | hash = ~hash; | |
677 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | |
678 | slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; | |
679 | ||
96e28449 | 680 | if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) |
1da177e4 LT |
681 | pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; |
682 | } | |
683 | ||
684 | if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { | |
685 | unsigned long hash = hpt_hash(vpn, 1); | |
686 | unsigned long hpte_group; | |
687 | ||
688 | prpn = pte_pfn(old_pte); | |
689 | ||
690 | repeat: | |
691 | hpte_group = ((hash & htab_hash_mask) * | |
692 | HPTES_PER_GROUP) & ~0x7UL; | |
693 | ||
694 | /* Update the linux pte with the HPTE slot */ | |
695 | pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; | |
696 | pte_val(new_pte) |= _PAGE_HASHPTE; | |
697 | ||
698 | /* Add in WIMG bits */ | |
699 | /* XXX We should store these in the pte */ | |
96e28449 | 700 | rflags |= _PAGE_COHERENT; |
1da177e4 | 701 | |
96e28449 DG |
702 | slot = ppc_md.hpte_insert(hpte_group, va, prpn, |
703 | HPTE_V_LARGE, rflags); | |
1da177e4 LT |
704 | |
705 | /* Primary is full, try the secondary */ | |
706 | if (unlikely(slot == -1)) { | |
707 | pte_val(new_pte) |= _PAGE_SECONDARY; | |
708 | hpte_group = ((~hash & htab_hash_mask) * | |
709 | HPTES_PER_GROUP) & ~0x7UL; | |
710 | slot = ppc_md.hpte_insert(hpte_group, va, prpn, | |
96e28449 | 711 | HPTE_V_LARGE, rflags); |
1da177e4 LT |
712 | if (slot == -1) { |
713 | if (mftb() & 0x1) | |
714 | hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; | |
715 | ||
716 | ppc_md.hpte_remove(hpte_group); | |
717 | goto repeat; | |
718 | } | |
719 | } | |
720 | ||
721 | if (unlikely(slot == -2)) | |
722 | panic("hash_huge_page: pte_insert failed\n"); | |
723 | ||
724 | pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; | |
725 | ||
726 | /* | |
727 | * No need to use ldarx/stdcx here because all who | |
728 | * might be updating the pte will hold the | |
729 | * page_table_lock | |
730 | */ | |
731 | *ptep = new_pte; | |
732 | } | |
733 | ||
734 | err = 0; | |
735 | ||
736 | out: | |
737 | spin_unlock(&mm->page_table_lock); | |
738 | ||
739 | return err; | |
740 | } |