Commit | Line | Data |
---|---|---|
3610cce8 | 1 | /* |
a53c8fab | 2 | * Copyright IBM Corp. 2007, 2011 |
3610cce8 MS |
3 | * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> |
4 | */ | |
5 | ||
6 | #include <linux/sched.h> | |
7 | #include <linux/kernel.h> | |
8 | #include <linux/errno.h> | |
5a0e3ad6 | 9 | #include <linux/gfp.h> |
3610cce8 MS |
10 | #include <linux/mm.h> |
11 | #include <linux/swap.h> | |
12 | #include <linux/smp.h> | |
13 | #include <linux/highmem.h> | |
3610cce8 MS |
14 | #include <linux/pagemap.h> |
15 | #include <linux/spinlock.h> | |
16 | #include <linux/module.h> | |
17 | #include <linux/quicklist.h> | |
80217147 | 18 | #include <linux/rcupdate.h> |
e5992f2e | 19 | #include <linux/slab.h> |
3610cce8 | 20 | |
3610cce8 MS |
21 | #include <asm/pgtable.h> |
22 | #include <asm/pgalloc.h> | |
23 | #include <asm/tlb.h> | |
24 | #include <asm/tlbflush.h> | |
6252d702 | 25 | #include <asm/mmu_context.h> |
3610cce8 MS |
26 | |
27 | #ifndef CONFIG_64BIT | |
28 | #define ALLOC_ORDER 1 | |
36409f63 | 29 | #define FRAG_MASK 0x0f |
3610cce8 MS |
30 | #else |
31 | #define ALLOC_ORDER 2 | |
36409f63 | 32 | #define FRAG_MASK 0x03 |
3610cce8 MS |
33 | #endif |
34 | ||
239a6425 | 35 | |
043d0708 | 36 | unsigned long *crst_table_alloc(struct mm_struct *mm) |
3610cce8 MS |
37 | { |
38 | struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | |
39 | ||
40 | if (!page) | |
41 | return NULL; | |
3610cce8 MS |
42 | return (unsigned long *) page_to_phys(page); |
43 | } | |
44 | ||
80217147 MS |
45 | void crst_table_free(struct mm_struct *mm, unsigned long *table) |
46 | { | |
043d0708 | 47 | free_pages((unsigned long) table, ALLOC_ORDER); |
80217147 MS |
48 | } |
49 | ||
6252d702 | 50 | #ifdef CONFIG_64BIT |
10607864 MS |
51 | static void __crst_table_upgrade(void *arg) |
52 | { | |
53 | struct mm_struct *mm = arg; | |
54 | ||
55 | if (current->active_mm == mm) | |
56 | update_mm(mm, current); | |
57 | __tlb_flush_local(); | |
58 | } | |
59 | ||
6252d702 MS |
60 | int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) |
61 | { | |
62 | unsigned long *table, *pgd; | |
63 | unsigned long entry; | |
10607864 | 64 | int flush; |
6252d702 MS |
65 | |
66 | BUG_ON(limit > (1UL << 53)); | |
10607864 | 67 | flush = 0; |
6252d702 | 68 | repeat: |
043d0708 | 69 | table = crst_table_alloc(mm); |
6252d702 MS |
70 | if (!table) |
71 | return -ENOMEM; | |
80217147 | 72 | spin_lock_bh(&mm->page_table_lock); |
6252d702 MS |
73 | if (mm->context.asce_limit < limit) { |
74 | pgd = (unsigned long *) mm->pgd; | |
75 | if (mm->context.asce_limit <= (1UL << 31)) { | |
76 | entry = _REGION3_ENTRY_EMPTY; | |
77 | mm->context.asce_limit = 1UL << 42; | |
78 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
79 | _ASCE_USER_BITS | | |
80 | _ASCE_TYPE_REGION3; | |
81 | } else { | |
82 | entry = _REGION2_ENTRY_EMPTY; | |
83 | mm->context.asce_limit = 1UL << 53; | |
84 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
85 | _ASCE_USER_BITS | | |
86 | _ASCE_TYPE_REGION2; | |
87 | } | |
88 | crst_table_init(table, entry); | |
89 | pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); | |
90 | mm->pgd = (pgd_t *) table; | |
f481bfaf | 91 | mm->task_size = mm->context.asce_limit; |
6252d702 | 92 | table = NULL; |
10607864 | 93 | flush = 1; |
6252d702 | 94 | } |
80217147 | 95 | spin_unlock_bh(&mm->page_table_lock); |
6252d702 MS |
96 | if (table) |
97 | crst_table_free(mm, table); | |
98 | if (mm->context.asce_limit < limit) | |
99 | goto repeat; | |
10607864 MS |
100 | if (flush) |
101 | on_each_cpu(__crst_table_upgrade, mm, 0); | |
6252d702 MS |
102 | return 0; |
103 | } | |
104 | ||
105 | void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) | |
106 | { | |
107 | pgd_t *pgd; | |
108 | ||
10607864 MS |
109 | if (current->active_mm == mm) |
110 | __tlb_flush_mm(mm); | |
6252d702 MS |
111 | while (mm->context.asce_limit > limit) { |
112 | pgd = mm->pgd; | |
113 | switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { | |
114 | case _REGION_ENTRY_TYPE_R2: | |
115 | mm->context.asce_limit = 1UL << 42; | |
116 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
117 | _ASCE_USER_BITS | | |
118 | _ASCE_TYPE_REGION3; | |
119 | break; | |
120 | case _REGION_ENTRY_TYPE_R3: | |
121 | mm->context.asce_limit = 1UL << 31; | |
122 | mm->context.asce_bits = _ASCE_TABLE_LENGTH | | |
123 | _ASCE_USER_BITS | | |
124 | _ASCE_TYPE_SEGMENT; | |
125 | break; | |
126 | default: | |
127 | BUG(); | |
128 | } | |
129 | mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); | |
f481bfaf | 130 | mm->task_size = mm->context.asce_limit; |
6252d702 MS |
131 | crst_table_free(mm, (unsigned long *) pgd); |
132 | } | |
10607864 MS |
133 | if (current->active_mm == mm) |
134 | update_mm(mm, current); | |
6252d702 MS |
135 | } |
136 | #endif | |
137 | ||
e5992f2e MS |
138 | #ifdef CONFIG_PGSTE |
139 | ||
140 | /** | |
141 | * gmap_alloc - allocate a guest address space | |
142 | * @mm: pointer to the parent mm_struct | |
143 | * | |
144 | * Returns a guest address space structure. | |
145 | */ | |
146 | struct gmap *gmap_alloc(struct mm_struct *mm) | |
36409f63 | 147 | { |
e5992f2e MS |
148 | struct gmap *gmap; |
149 | struct page *page; | |
150 | unsigned long *table; | |
36409f63 | 151 | |
e5992f2e MS |
152 | gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); |
153 | if (!gmap) | |
154 | goto out; | |
155 | INIT_LIST_HEAD(&gmap->crst_list); | |
156 | gmap->mm = mm; | |
157 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); | |
158 | if (!page) | |
159 | goto out_free; | |
160 | list_add(&page->lru, &gmap->crst_list); | |
161 | table = (unsigned long *) page_to_phys(page); | |
162 | crst_table_init(table, _REGION1_ENTRY_EMPTY); | |
163 | gmap->table = table; | |
480e5926 CB |
164 | gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH | |
165 | _ASCE_USER_BITS | __pa(table); | |
e5992f2e MS |
166 | list_add(&gmap->list, &mm->context.gmap_list); |
167 | return gmap; | |
168 | ||
169 | out_free: | |
170 | kfree(gmap); | |
171 | out: | |
172 | return NULL; | |
36409f63 | 173 | } |
e5992f2e | 174 | EXPORT_SYMBOL_GPL(gmap_alloc); |
36409f63 | 175 | |
e5992f2e MS |
176 | static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) |
177 | { | |
178 | struct gmap_pgtable *mp; | |
179 | struct gmap_rmap *rmap; | |
180 | struct page *page; | |
181 | ||
e5098611 | 182 | if (*table & _SEGMENT_ENTRY_INVALID) |
e5992f2e MS |
183 | return 0; |
184 | page = pfn_to_page(*table >> PAGE_SHIFT); | |
185 | mp = (struct gmap_pgtable *) page->index; | |
186 | list_for_each_entry(rmap, &mp->mapper, list) { | |
187 | if (rmap->entry != table) | |
188 | continue; | |
189 | list_del(&rmap->list); | |
190 | kfree(rmap); | |
191 | break; | |
192 | } | |
e5098611 | 193 | *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; |
e5992f2e MS |
194 | return 1; |
195 | } | |
196 | ||
197 | static void gmap_flush_tlb(struct gmap *gmap) | |
198 | { | |
199 | if (MACHINE_HAS_IDTE) | |
200 | __tlb_flush_idte((unsigned long) gmap->table | | |
201 | _ASCE_TYPE_REGION1); | |
202 | else | |
203 | __tlb_flush_global(); | |
204 | } | |
205 | ||
206 | /** | |
207 | * gmap_free - free a guest address space | |
208 | * @gmap: pointer to the guest address space structure | |
3610cce8 | 209 | */ |
e5992f2e MS |
210 | void gmap_free(struct gmap *gmap) |
211 | { | |
212 | struct page *page, *next; | |
213 | unsigned long *table; | |
214 | int i; | |
215 | ||
216 | ||
217 | /* Flush tlb. */ | |
218 | if (MACHINE_HAS_IDTE) | |
219 | __tlb_flush_idte((unsigned long) gmap->table | | |
220 | _ASCE_TYPE_REGION1); | |
221 | else | |
222 | __tlb_flush_global(); | |
223 | ||
224 | /* Free all segment & region tables. */ | |
225 | down_read(&gmap->mm->mmap_sem); | |
cc772456 | 226 | spin_lock(&gmap->mm->page_table_lock); |
e5992f2e MS |
227 | list_for_each_entry_safe(page, next, &gmap->crst_list, lru) { |
228 | table = (unsigned long *) page_to_phys(page); | |
229 | if ((*table & _REGION_ENTRY_TYPE_MASK) == 0) | |
230 | /* Remove gmap rmap structures for segment table. */ | |
231 | for (i = 0; i < PTRS_PER_PMD; i++, table++) | |
232 | gmap_unlink_segment(gmap, table); | |
233 | __free_pages(page, ALLOC_ORDER); | |
234 | } | |
cc772456 | 235 | spin_unlock(&gmap->mm->page_table_lock); |
e5992f2e MS |
236 | up_read(&gmap->mm->mmap_sem); |
237 | list_del(&gmap->list); | |
238 | kfree(gmap); | |
239 | } | |
240 | EXPORT_SYMBOL_GPL(gmap_free); | |
241 | ||
242 | /** | |
243 | * gmap_enable - switch primary space to the guest address space | |
244 | * @gmap: pointer to the guest address space structure | |
245 | */ | |
246 | void gmap_enable(struct gmap *gmap) | |
247 | { | |
e5992f2e MS |
248 | S390_lowcore.gmap = (unsigned long) gmap; |
249 | } | |
250 | EXPORT_SYMBOL_GPL(gmap_enable); | |
251 | ||
252 | /** | |
253 | * gmap_disable - switch back to the standard primary address space | |
254 | * @gmap: pointer to the guest address space structure | |
255 | */ | |
256 | void gmap_disable(struct gmap *gmap) | |
257 | { | |
e5992f2e MS |
258 | S390_lowcore.gmap = 0UL; |
259 | } | |
260 | EXPORT_SYMBOL_GPL(gmap_disable); | |
261 | ||
a9162f23 CO |
262 | /* |
263 | * gmap_alloc_table is assumed to be called with mmap_sem held | |
264 | */ | |
e5992f2e | 265 | static int gmap_alloc_table(struct gmap *gmap, |
984e2a59 HC |
266 | unsigned long *table, unsigned long init) |
267 | __releases(&gmap->mm->page_table_lock) | |
268 | __acquires(&gmap->mm->page_table_lock) | |
e5992f2e MS |
269 | { |
270 | struct page *page; | |
271 | unsigned long *new; | |
272 | ||
c86cce2a CB |
273 | /* since we dont free the gmap table until gmap_free we can unlock */ |
274 | spin_unlock(&gmap->mm->page_table_lock); | |
e5992f2e | 275 | page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); |
c86cce2a | 276 | spin_lock(&gmap->mm->page_table_lock); |
e5992f2e MS |
277 | if (!page) |
278 | return -ENOMEM; | |
279 | new = (unsigned long *) page_to_phys(page); | |
280 | crst_table_init(new, init); | |
e5098611 | 281 | if (*table & _REGION_ENTRY_INVALID) { |
e5992f2e MS |
282 | list_add(&page->lru, &gmap->crst_list); |
283 | *table = (unsigned long) new | _REGION_ENTRY_LENGTH | | |
284 | (*table & _REGION_ENTRY_TYPE_MASK); | |
285 | } else | |
286 | __free_pages(page, ALLOC_ORDER); | |
e5992f2e MS |
287 | return 0; |
288 | } | |
289 | ||
290 | /** | |
291 | * gmap_unmap_segment - unmap segment from the guest address space | |
292 | * @gmap: pointer to the guest address space structure | |
293 | * @addr: address in the guest address space | |
294 | * @len: length of the memory area to unmap | |
295 | * | |
b4a96015 | 296 | * Returns 0 if the unmap succeeded, -EINVAL if not. |
e5992f2e MS |
297 | */ |
298 | int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) | |
299 | { | |
300 | unsigned long *table; | |
301 | unsigned long off; | |
302 | int flush; | |
303 | ||
304 | if ((to | len) & (PMD_SIZE - 1)) | |
305 | return -EINVAL; | |
306 | if (len == 0 || to + len < to) | |
307 | return -EINVAL; | |
308 | ||
309 | flush = 0; | |
310 | down_read(&gmap->mm->mmap_sem); | |
cc772456 | 311 | spin_lock(&gmap->mm->page_table_lock); |
e5992f2e MS |
312 | for (off = 0; off < len; off += PMD_SIZE) { |
313 | /* Walk the guest addr space page table */ | |
314 | table = gmap->table + (((to + off) >> 53) & 0x7ff); | |
e5098611 | 315 | if (*table & _REGION_ENTRY_INVALID) |
05873df9 | 316 | goto out; |
e5992f2e MS |
317 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
318 | table = table + (((to + off) >> 42) & 0x7ff); | |
e5098611 | 319 | if (*table & _REGION_ENTRY_INVALID) |
05873df9 | 320 | goto out; |
e5992f2e MS |
321 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
322 | table = table + (((to + off) >> 31) & 0x7ff); | |
e5098611 | 323 | if (*table & _REGION_ENTRY_INVALID) |
05873df9 | 324 | goto out; |
e5992f2e MS |
325 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); |
326 | table = table + (((to + off) >> 20) & 0x7ff); | |
327 | ||
328 | /* Clear segment table entry in guest address space. */ | |
329 | flush |= gmap_unlink_segment(gmap, table); | |
e5098611 | 330 | *table = _SEGMENT_ENTRY_INVALID; |
e5992f2e | 331 | } |
05873df9 | 332 | out: |
cc772456 | 333 | spin_unlock(&gmap->mm->page_table_lock); |
e5992f2e MS |
334 | up_read(&gmap->mm->mmap_sem); |
335 | if (flush) | |
336 | gmap_flush_tlb(gmap); | |
337 | return 0; | |
338 | } | |
339 | EXPORT_SYMBOL_GPL(gmap_unmap_segment); | |
340 | ||
341 | /** | |
342 | * gmap_mmap_segment - map a segment to the guest address space | |
343 | * @gmap: pointer to the guest address space structure | |
344 | * @from: source address in the parent address space | |
345 | * @to: target address in the guest address space | |
346 | * | |
b4a96015 | 347 | * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. |
e5992f2e MS |
348 | */ |
349 | int gmap_map_segment(struct gmap *gmap, unsigned long from, | |
350 | unsigned long to, unsigned long len) | |
351 | { | |
352 | unsigned long *table; | |
353 | unsigned long off; | |
354 | int flush; | |
355 | ||
356 | if ((from | to | len) & (PMD_SIZE - 1)) | |
357 | return -EINVAL; | |
ee6ee55b | 358 | if (len == 0 || from + len > TASK_MAX_SIZE || |
e5992f2e MS |
359 | from + len < from || to + len < to) |
360 | return -EINVAL; | |
361 | ||
362 | flush = 0; | |
363 | down_read(&gmap->mm->mmap_sem); | |
cc772456 | 364 | spin_lock(&gmap->mm->page_table_lock); |
e5992f2e MS |
365 | for (off = 0; off < len; off += PMD_SIZE) { |
366 | /* Walk the gmap address space page table */ | |
367 | table = gmap->table + (((to + off) >> 53) & 0x7ff); | |
e5098611 | 368 | if ((*table & _REGION_ENTRY_INVALID) && |
e5992f2e MS |
369 | gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) |
370 | goto out_unmap; | |
371 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
372 | table = table + (((to + off) >> 42) & 0x7ff); | |
e5098611 | 373 | if ((*table & _REGION_ENTRY_INVALID) && |
e5992f2e MS |
374 | gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) |
375 | goto out_unmap; | |
376 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
377 | table = table + (((to + off) >> 31) & 0x7ff); | |
e5098611 | 378 | if ((*table & _REGION_ENTRY_INVALID) && |
e5992f2e MS |
379 | gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) |
380 | goto out_unmap; | |
381 | table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); | |
382 | table = table + (((to + off) >> 20) & 0x7ff); | |
383 | ||
384 | /* Store 'from' address in an invalid segment table entry. */ | |
385 | flush |= gmap_unlink_segment(gmap, table); | |
e5098611 MS |
386 | *table = (from + off) | (_SEGMENT_ENTRY_INVALID | |
387 | _SEGMENT_ENTRY_PROTECT); | |
e5992f2e | 388 | } |
cc772456 | 389 | spin_unlock(&gmap->mm->page_table_lock); |
e5992f2e MS |
390 | up_read(&gmap->mm->mmap_sem); |
391 | if (flush) | |
392 | gmap_flush_tlb(gmap); | |
393 | return 0; | |
394 | ||
395 | out_unmap: | |
cc772456 | 396 | spin_unlock(&gmap->mm->page_table_lock); |
e5992f2e MS |
397 | up_read(&gmap->mm->mmap_sem); |
398 | gmap_unmap_segment(gmap, to, len); | |
399 | return -ENOMEM; | |
400 | } | |
401 | EXPORT_SYMBOL_GPL(gmap_map_segment); | |
402 | ||
c5034945 HC |
403 | static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) |
404 | { | |
405 | unsigned long *table; | |
406 | ||
407 | table = gmap->table + ((address >> 53) & 0x7ff); | |
e5098611 | 408 | if (unlikely(*table & _REGION_ENTRY_INVALID)) |
c5034945 HC |
409 | return ERR_PTR(-EFAULT); |
410 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
411 | table = table + ((address >> 42) & 0x7ff); | |
e5098611 | 412 | if (unlikely(*table & _REGION_ENTRY_INVALID)) |
c5034945 HC |
413 | return ERR_PTR(-EFAULT); |
414 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
415 | table = table + ((address >> 31) & 0x7ff); | |
e5098611 | 416 | if (unlikely(*table & _REGION_ENTRY_INVALID)) |
c5034945 HC |
417 | return ERR_PTR(-EFAULT); |
418 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
419 | table = table + ((address >> 20) & 0x7ff); | |
420 | return table; | |
421 | } | |
422 | ||
423 | /** | |
424 | * __gmap_translate - translate a guest address to a user space address | |
425 | * @address: guest address | |
426 | * @gmap: pointer to guest mapping meta data structure | |
427 | * | |
428 | * Returns user space address which corresponds to the guest address or | |
429 | * -EFAULT if no such mapping exists. | |
430 | * This function does not establish potentially missing page table entries. | |
431 | * The mmap_sem of the mm that belongs to the address space must be held | |
432 | * when this function gets called. | |
433 | */ | |
434 | unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) | |
435 | { | |
436 | unsigned long *segment_ptr, vmaddr, segment; | |
437 | struct gmap_pgtable *mp; | |
438 | struct page *page; | |
439 | ||
440 | current->thread.gmap_addr = address; | |
441 | segment_ptr = gmap_table_walk(address, gmap); | |
442 | if (IS_ERR(segment_ptr)) | |
443 | return PTR_ERR(segment_ptr); | |
444 | /* Convert the gmap address to an mm address. */ | |
445 | segment = *segment_ptr; | |
e5098611 | 446 | if (!(segment & _SEGMENT_ENTRY_INVALID)) { |
c5034945 HC |
447 | page = pfn_to_page(segment >> PAGE_SHIFT); |
448 | mp = (struct gmap_pgtable *) page->index; | |
449 | return mp->vmaddr | (address & ~PMD_MASK); | |
e5098611 | 450 | } else if (segment & _SEGMENT_ENTRY_PROTECT) { |
c5034945 HC |
451 | vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; |
452 | return vmaddr | (address & ~PMD_MASK); | |
453 | } | |
454 | return -EFAULT; | |
455 | } | |
456 | EXPORT_SYMBOL_GPL(__gmap_translate); | |
457 | ||
458 | /** | |
459 | * gmap_translate - translate a guest address to a user space address | |
460 | * @address: guest address | |
461 | * @gmap: pointer to guest mapping meta data structure | |
462 | * | |
463 | * Returns user space address which corresponds to the guest address or | |
464 | * -EFAULT if no such mapping exists. | |
465 | * This function does not establish potentially missing page table entries. | |
466 | */ | |
467 | unsigned long gmap_translate(unsigned long address, struct gmap *gmap) | |
468 | { | |
469 | unsigned long rc; | |
470 | ||
471 | down_read(&gmap->mm->mmap_sem); | |
472 | rc = __gmap_translate(address, gmap); | |
473 | up_read(&gmap->mm->mmap_sem); | |
474 | return rc; | |
475 | } | |
476 | EXPORT_SYMBOL_GPL(gmap_translate); | |
477 | ||
d3383632 MS |
478 | static int gmap_connect_pgtable(unsigned long address, unsigned long segment, |
479 | unsigned long *segment_ptr, struct gmap *gmap) | |
e5992f2e | 480 | { |
ab8e5235 | 481 | unsigned long vmaddr; |
c5034945 | 482 | struct vm_area_struct *vma; |
e5992f2e MS |
483 | struct gmap_pgtable *mp; |
484 | struct gmap_rmap *rmap; | |
c5034945 | 485 | struct mm_struct *mm; |
e5992f2e MS |
486 | struct page *page; |
487 | pgd_t *pgd; | |
488 | pud_t *pud; | |
489 | pmd_t *pmd; | |
490 | ||
ab8e5235 MS |
491 | mm = gmap->mm; |
492 | vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; | |
493 | vma = find_vma(mm, vmaddr); | |
494 | if (!vma || vma->vm_start > vmaddr) | |
495 | return -EFAULT; | |
496 | /* Walk the parent mm page table */ | |
497 | pgd = pgd_offset(mm, vmaddr); | |
498 | pud = pud_alloc(mm, pgd, vmaddr); | |
499 | if (!pud) | |
500 | return -ENOMEM; | |
501 | pmd = pmd_alloc(mm, pud, vmaddr); | |
502 | if (!pmd) | |
503 | return -ENOMEM; | |
504 | if (!pmd_present(*pmd) && | |
505 | __pte_alloc(mm, vma, pmd, vmaddr)) | |
506 | return -ENOMEM; | |
507 | /* pmd now points to a valid segment table entry. */ | |
508 | rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); | |
509 | if (!rmap) | |
510 | return -ENOMEM; | |
511 | /* Link gmap segment table entry location to page table. */ | |
512 | page = pmd_page(*pmd); | |
513 | mp = (struct gmap_pgtable *) page->index; | |
d3383632 | 514 | rmap->gmap = gmap; |
ab8e5235 | 515 | rmap->entry = segment_ptr; |
e86cbd87 | 516 | rmap->vmaddr = address & PMD_MASK; |
ab8e5235 MS |
517 | spin_lock(&mm->page_table_lock); |
518 | if (*segment_ptr == segment) { | |
519 | list_add(&rmap->list, &mp->mapper); | |
520 | /* Set gmap segment table entry to page table. */ | |
521 | *segment_ptr = pmd_val(*pmd) & PAGE_MASK; | |
522 | rmap = NULL; | |
523 | } | |
524 | spin_unlock(&mm->page_table_lock); | |
525 | kfree(rmap); | |
526 | return 0; | |
527 | } | |
528 | ||
529 | static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) | |
530 | { | |
531 | struct gmap_rmap *rmap, *next; | |
532 | struct gmap_pgtable *mp; | |
533 | struct page *page; | |
534 | int flush; | |
535 | ||
536 | flush = 0; | |
537 | spin_lock(&mm->page_table_lock); | |
538 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | |
539 | mp = (struct gmap_pgtable *) page->index; | |
540 | list_for_each_entry_safe(rmap, next, &mp->mapper, list) { | |
e5098611 MS |
541 | *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | |
542 | _SEGMENT_ENTRY_PROTECT); | |
ab8e5235 MS |
543 | list_del(&rmap->list); |
544 | kfree(rmap); | |
545 | flush = 1; | |
546 | } | |
547 | spin_unlock(&mm->page_table_lock); | |
548 | if (flush) | |
549 | __tlb_flush_global(); | |
550 | } | |
551 | ||
552 | /* | |
553 | * this function is assumed to be called with mmap_sem held | |
554 | */ | |
555 | unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) | |
556 | { | |
557 | unsigned long *segment_ptr, segment; | |
558 | struct gmap_pgtable *mp; | |
559 | struct page *page; | |
560 | int rc; | |
561 | ||
e5992f2e | 562 | current->thread.gmap_addr = address; |
c5034945 HC |
563 | segment_ptr = gmap_table_walk(address, gmap); |
564 | if (IS_ERR(segment_ptr)) | |
e5992f2e | 565 | return -EFAULT; |
e5992f2e | 566 | /* Convert the gmap address to an mm address. */ |
ab8e5235 MS |
567 | while (1) { |
568 | segment = *segment_ptr; | |
e5098611 | 569 | if (!(segment & _SEGMENT_ENTRY_INVALID)) { |
ab8e5235 MS |
570 | /* Page table is present */ |
571 | page = pfn_to_page(segment >> PAGE_SHIFT); | |
572 | mp = (struct gmap_pgtable *) page->index; | |
573 | return mp->vmaddr | (address & ~PMD_MASK); | |
574 | } | |
e5098611 | 575 | if (!(segment & _SEGMENT_ENTRY_PROTECT)) |
ab8e5235 MS |
576 | /* Nothing mapped in the gmap address space. */ |
577 | break; | |
d3383632 | 578 | rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); |
ab8e5235 MS |
579 | if (rc) |
580 | return rc; | |
e5992f2e MS |
581 | } |
582 | return -EFAULT; | |
499069e1 CO |
583 | } |
584 | ||
585 | unsigned long gmap_fault(unsigned long address, struct gmap *gmap) | |
586 | { | |
587 | unsigned long rc; | |
588 | ||
589 | down_read(&gmap->mm->mmap_sem); | |
590 | rc = __gmap_fault(address, gmap); | |
591 | up_read(&gmap->mm->mmap_sem); | |
e5992f2e | 592 | |
499069e1 | 593 | return rc; |
e5992f2e MS |
594 | } |
595 | EXPORT_SYMBOL_GPL(gmap_fault); | |
596 | ||
388186bc CB |
597 | void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) |
598 | { | |
599 | ||
600 | unsigned long *table, address, size; | |
601 | struct vm_area_struct *vma; | |
602 | struct gmap_pgtable *mp; | |
603 | struct page *page; | |
604 | ||
605 | down_read(&gmap->mm->mmap_sem); | |
606 | address = from; | |
607 | while (address < to) { | |
608 | /* Walk the gmap address space page table */ | |
609 | table = gmap->table + ((address >> 53) & 0x7ff); | |
e5098611 | 610 | if (unlikely(*table & _REGION_ENTRY_INVALID)) { |
388186bc CB |
611 | address = (address + PMD_SIZE) & PMD_MASK; |
612 | continue; | |
613 | } | |
614 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
615 | table = table + ((address >> 42) & 0x7ff); | |
e5098611 | 616 | if (unlikely(*table & _REGION_ENTRY_INVALID)) { |
388186bc CB |
617 | address = (address + PMD_SIZE) & PMD_MASK; |
618 | continue; | |
619 | } | |
620 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
621 | table = table + ((address >> 31) & 0x7ff); | |
e5098611 | 622 | if (unlikely(*table & _REGION_ENTRY_INVALID)) { |
388186bc CB |
623 | address = (address + PMD_SIZE) & PMD_MASK; |
624 | continue; | |
625 | } | |
626 | table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); | |
627 | table = table + ((address >> 20) & 0x7ff); | |
e5098611 | 628 | if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { |
388186bc CB |
629 | address = (address + PMD_SIZE) & PMD_MASK; |
630 | continue; | |
631 | } | |
632 | page = pfn_to_page(*table >> PAGE_SHIFT); | |
633 | mp = (struct gmap_pgtable *) page->index; | |
634 | vma = find_vma(gmap->mm, mp->vmaddr); | |
635 | size = min(to - address, PMD_SIZE - (address & ~PMD_MASK)); | |
636 | zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK), | |
637 | size, NULL); | |
638 | address = (address + PMD_SIZE) & PMD_MASK; | |
639 | } | |
640 | up_read(&gmap->mm->mmap_sem); | |
641 | } | |
642 | EXPORT_SYMBOL_GPL(gmap_discard); | |
643 | ||
d3383632 MS |
644 | static LIST_HEAD(gmap_notifier_list); |
645 | static DEFINE_SPINLOCK(gmap_notifier_lock); | |
646 | ||
647 | /** | |
648 | * gmap_register_ipte_notifier - register a pte invalidation callback | |
649 | * @nb: pointer to the gmap notifier block | |
650 | */ | |
651 | void gmap_register_ipte_notifier(struct gmap_notifier *nb) | |
652 | { | |
653 | spin_lock(&gmap_notifier_lock); | |
654 | list_add(&nb->list, &gmap_notifier_list); | |
655 | spin_unlock(&gmap_notifier_lock); | |
656 | } | |
657 | EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); | |
658 | ||
659 | /** | |
660 | * gmap_unregister_ipte_notifier - remove a pte invalidation callback | |
661 | * @nb: pointer to the gmap notifier block | |
662 | */ | |
663 | void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) | |
664 | { | |
665 | spin_lock(&gmap_notifier_lock); | |
666 | list_del_init(&nb->list); | |
667 | spin_unlock(&gmap_notifier_lock); | |
668 | } | |
669 | EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); | |
670 | ||
671 | /** | |
672 | * gmap_ipte_notify - mark a range of ptes for invalidation notification | |
673 | * @gmap: pointer to guest mapping meta data structure | |
674 | * @address: virtual address in the guest address space | |
675 | * @len: size of area | |
676 | * | |
677 | * Returns 0 if for each page in the given range a gmap mapping exists and | |
678 | * the invalidation notification could be set. If the gmap mapping is missing | |
679 | * for one or more pages -EFAULT is returned. If no memory could be allocated | |
680 | * -ENOMEM is returned. This function establishes missing page table entries. | |
681 | */ | |
682 | int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) | |
683 | { | |
684 | unsigned long addr; | |
685 | spinlock_t *ptl; | |
686 | pte_t *ptep, entry; | |
687 | pgste_t pgste; | |
688 | int rc = 0; | |
689 | ||
690 | if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK)) | |
691 | return -EINVAL; | |
692 | down_read(&gmap->mm->mmap_sem); | |
693 | while (len) { | |
694 | /* Convert gmap address and connect the page tables */ | |
695 | addr = __gmap_fault(start, gmap); | |
696 | if (IS_ERR_VALUE(addr)) { | |
697 | rc = addr; | |
698 | break; | |
699 | } | |
700 | /* Get the page mapped */ | |
bb4b42ce | 701 | if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) { |
d3383632 MS |
702 | rc = -EFAULT; |
703 | break; | |
704 | } | |
705 | /* Walk the process page table, lock and get pte pointer */ | |
706 | ptep = get_locked_pte(gmap->mm, addr, &ptl); | |
707 | if (unlikely(!ptep)) | |
708 | continue; | |
709 | /* Set notification bit in the pgste of the pte */ | |
710 | entry = *ptep; | |
e5098611 | 711 | if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { |
d3383632 | 712 | pgste = pgste_get_lock(ptep); |
0d0dafc1 | 713 | pgste_val(pgste) |= PGSTE_IN_BIT; |
d3383632 MS |
714 | pgste_set_unlock(ptep, pgste); |
715 | start += PAGE_SIZE; | |
716 | len -= PAGE_SIZE; | |
717 | } | |
718 | spin_unlock(ptl); | |
719 | } | |
720 | up_read(&gmap->mm->mmap_sem); | |
721 | return rc; | |
722 | } | |
723 | EXPORT_SYMBOL_GPL(gmap_ipte_notify); | |
724 | ||
725 | /** | |
726 | * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. | |
727 | * @mm: pointer to the process mm_struct | |
728 | * @addr: virtual address in the process address space | |
729 | * @pte: pointer to the page table entry | |
730 | * | |
731 | * This function is assumed to be called with the page table lock held | |
732 | * for the pte to notify. | |
733 | */ | |
734 | void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) | |
735 | { | |
736 | unsigned long segment_offset; | |
737 | struct gmap_notifier *nb; | |
738 | struct gmap_pgtable *mp; | |
739 | struct gmap_rmap *rmap; | |
740 | struct page *page; | |
741 | ||
742 | segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); | |
743 | segment_offset = segment_offset * (4096 / sizeof(pte_t)); | |
744 | page = pfn_to_page(__pa(pte) >> PAGE_SHIFT); | |
745 | mp = (struct gmap_pgtable *) page->index; | |
746 | spin_lock(&gmap_notifier_lock); | |
747 | list_for_each_entry(rmap, &mp->mapper, list) { | |
748 | list_for_each_entry(nb, &gmap_notifier_list, list) | |
749 | nb->notifier_call(rmap->gmap, | |
750 | rmap->vmaddr + segment_offset); | |
751 | } | |
752 | spin_unlock(&gmap_notifier_lock); | |
753 | } | |
754 | ||
3eabaee9 MS |
755 | static inline int page_table_with_pgste(struct page *page) |
756 | { | |
757 | return atomic_read(&page->_mapcount) == 0; | |
758 | } | |
759 | ||
e5992f2e MS |
760 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, |
761 | unsigned long vmaddr) | |
36409f63 MS |
762 | { |
763 | struct page *page; | |
764 | unsigned long *table; | |
e5992f2e | 765 | struct gmap_pgtable *mp; |
36409f63 MS |
766 | |
767 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); | |
768 | if (!page) | |
769 | return NULL; | |
e5992f2e MS |
770 | mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT); |
771 | if (!mp) { | |
772 | __free_page(page); | |
773 | return NULL; | |
774 | } | |
e89cfa58 KS |
775 | if (!pgtable_page_ctor(page)) { |
776 | kfree(mp); | |
777 | __free_page(page); | |
778 | return NULL; | |
779 | } | |
e5992f2e MS |
780 | mp->vmaddr = vmaddr & PMD_MASK; |
781 | INIT_LIST_HEAD(&mp->mapper); | |
782 | page->index = (unsigned long) mp; | |
3eabaee9 | 783 | atomic_set(&page->_mapcount, 0); |
36409f63 | 784 | table = (unsigned long *) page_to_phys(page); |
e5098611 | 785 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); |
0944fe3f MS |
786 | clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, |
787 | PAGE_SIZE/2); | |
36409f63 MS |
788 | return table; |
789 | } | |
790 | ||
791 | static inline void page_table_free_pgste(unsigned long *table) | |
792 | { | |
793 | struct page *page; | |
e5992f2e | 794 | struct gmap_pgtable *mp; |
36409f63 MS |
795 | |
796 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | |
e5992f2e MS |
797 | mp = (struct gmap_pgtable *) page->index; |
798 | BUG_ON(!list_empty(&mp->mapper)); | |
2320c579 | 799 | pgtable_page_dtor(page); |
36409f63 | 800 | atomic_set(&page->_mapcount, -1); |
e5992f2e | 801 | kfree(mp); |
36409f63 MS |
802 | __free_page(page); |
803 | } | |
36409f63 | 804 | |
24d5dd02 CB |
805 | int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, |
806 | unsigned long key, bool nq) | |
807 | { | |
808 | spinlock_t *ptl; | |
809 | pgste_t old, new; | |
810 | pte_t *ptep; | |
811 | ||
812 | down_read(&mm->mmap_sem); | |
813 | ptep = get_locked_pte(current->mm, addr, &ptl); | |
814 | if (unlikely(!ptep)) { | |
815 | up_read(&mm->mmap_sem); | |
816 | return -EFAULT; | |
817 | } | |
818 | ||
819 | new = old = pgste_get_lock(ptep); | |
820 | pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | | |
821 | PGSTE_ACC_BITS | PGSTE_FP_BIT); | |
822 | pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; | |
823 | pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; | |
824 | if (!(pte_val(*ptep) & _PAGE_INVALID)) { | |
0944fe3f | 825 | unsigned long address, bits, skey; |
24d5dd02 CB |
826 | |
827 | address = pte_val(*ptep) & PAGE_MASK; | |
0944fe3f | 828 | skey = (unsigned long) page_get_storage_key(address); |
24d5dd02 | 829 | bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); |
0944fe3f | 830 | skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); |
24d5dd02 | 831 | /* Set storage key ACC and FP */ |
0944fe3f | 832 | page_set_storage_key(address, skey, !nq); |
24d5dd02 CB |
833 | /* Merge host changed & referenced into pgste */ |
834 | pgste_val(new) |= bits << 52; | |
24d5dd02 CB |
835 | } |
836 | /* changing the guest storage key is considered a change of the page */ | |
837 | if ((pgste_val(new) ^ pgste_val(old)) & | |
838 | (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) | |
0944fe3f | 839 | pgste_val(new) |= PGSTE_HC_BIT; |
24d5dd02 CB |
840 | |
841 | pgste_set_unlock(ptep, new); | |
842 | pte_unmap_unlock(*ptep, ptl); | |
843 | up_read(&mm->mmap_sem); | |
844 | return 0; | |
845 | } | |
846 | EXPORT_SYMBOL(set_guest_storage_key); | |
847 | ||
e5992f2e MS |
848 | #else /* CONFIG_PGSTE */ |
849 | ||
3eabaee9 MS |
850 | static inline int page_table_with_pgste(struct page *page) |
851 | { | |
852 | return 0; | |
853 | } | |
854 | ||
e5992f2e MS |
855 | static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, |
856 | unsigned long vmaddr) | |
857 | { | |
944291de | 858 | return NULL; |
e5992f2e MS |
859 | } |
860 | ||
861 | static inline void page_table_free_pgste(unsigned long *table) | |
862 | { | |
863 | } | |
864 | ||
ab8e5235 MS |
865 | static inline void gmap_disconnect_pgtable(struct mm_struct *mm, |
866 | unsigned long *table) | |
e5992f2e MS |
867 | { |
868 | } | |
869 | ||
870 | #endif /* CONFIG_PGSTE */ | |
871 | ||
872 | static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) | |
873 | { | |
874 | unsigned int old, new; | |
875 | ||
876 | do { | |
877 | old = atomic_read(v); | |
878 | new = old ^ bits; | |
879 | } while (atomic_cmpxchg(v, old, new) != old); | |
880 | return new; | |
881 | } | |
882 | ||
883 | /* | |
884 | * page table entry allocation/free routines. | |
885 | */ | |
886 | unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) | |
3610cce8 | 887 | { |
41459d36 HC |
888 | unsigned long *uninitialized_var(table); |
889 | struct page *uninitialized_var(page); | |
36409f63 | 890 | unsigned int mask, bit; |
3610cce8 | 891 | |
36409f63 | 892 | if (mm_has_pgste(mm)) |
e5992f2e | 893 | return page_table_alloc_pgste(mm, vmaddr); |
36409f63 | 894 | /* Allocate fragments of a 4K page as 1K/2K page table */ |
80217147 | 895 | spin_lock_bh(&mm->context.list_lock); |
36409f63 | 896 | mask = FRAG_MASK; |
146e4b3c MS |
897 | if (!list_empty(&mm->context.pgtable_list)) { |
898 | page = list_first_entry(&mm->context.pgtable_list, | |
899 | struct page, lru); | |
36409f63 MS |
900 | table = (unsigned long *) page_to_phys(page); |
901 | mask = atomic_read(&page->_mapcount); | |
902 | mask = mask | (mask >> 4); | |
146e4b3c | 903 | } |
36409f63 | 904 | if ((mask & FRAG_MASK) == FRAG_MASK) { |
80217147 | 905 | spin_unlock_bh(&mm->context.list_lock); |
146e4b3c MS |
906 | page = alloc_page(GFP_KERNEL|__GFP_REPEAT); |
907 | if (!page) | |
3610cce8 | 908 | return NULL; |
e89cfa58 KS |
909 | if (!pgtable_page_ctor(page)) { |
910 | __free_page(page); | |
911 | return NULL; | |
912 | } | |
36409f63 | 913 | atomic_set(&page->_mapcount, 1); |
146e4b3c | 914 | table = (unsigned long *) page_to_phys(page); |
e5098611 | 915 | clear_table(table, _PAGE_INVALID, PAGE_SIZE); |
80217147 | 916 | spin_lock_bh(&mm->context.list_lock); |
146e4b3c | 917 | list_add(&page->lru, &mm->context.pgtable_list); |
36409f63 MS |
918 | } else { |
919 | for (bit = 1; mask & bit; bit <<= 1) | |
920 | table += PTRS_PER_PTE; | |
921 | mask = atomic_xor_bits(&page->_mapcount, bit); | |
922 | if ((mask & FRAG_MASK) == FRAG_MASK) | |
923 | list_del(&page->lru); | |
3610cce8 | 924 | } |
80217147 | 925 | spin_unlock_bh(&mm->context.list_lock); |
3610cce8 MS |
926 | return table; |
927 | } | |
928 | ||
36409f63 | 929 | void page_table_free(struct mm_struct *mm, unsigned long *table) |
80217147 MS |
930 | { |
931 | struct page *page; | |
36409f63 | 932 | unsigned int bit, mask; |
80217147 | 933 | |
3eabaee9 MS |
934 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
935 | if (page_table_with_pgste(page)) { | |
ab8e5235 | 936 | gmap_disconnect_pgtable(mm, table); |
36409f63 | 937 | return page_table_free_pgste(table); |
e5992f2e | 938 | } |
36409f63 | 939 | /* Free 1K/2K page table fragment of a 4K page */ |
36409f63 MS |
940 | bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); |
941 | spin_lock_bh(&mm->context.list_lock); | |
942 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | |
943 | list_del(&page->lru); | |
944 | mask = atomic_xor_bits(&page->_mapcount, bit); | |
945 | if (mask & FRAG_MASK) | |
946 | list_add(&page->lru, &mm->context.pgtable_list); | |
947 | spin_unlock_bh(&mm->context.list_lock); | |
948 | if (mask == 0) { | |
80217147 | 949 | pgtable_page_dtor(page); |
36409f63 | 950 | atomic_set(&page->_mapcount, -1); |
80217147 MS |
951 | __free_page(page); |
952 | } | |
953 | } | |
954 | ||
36409f63 | 955 | static void __page_table_free_rcu(void *table, unsigned bit) |
3610cce8 | 956 | { |
146e4b3c | 957 | struct page *page; |
3610cce8 | 958 | |
36409f63 MS |
959 | if (bit == FRAG_MASK) |
960 | return page_table_free_pgste(table); | |
36409f63 | 961 | /* Free 1K/2K page table fragment of a 4K page */ |
146e4b3c | 962 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
36409f63 | 963 | if (atomic_xor_bits(&page->_mapcount, bit) == 0) { |
146e4b3c | 964 | pgtable_page_dtor(page); |
36409f63 | 965 | atomic_set(&page->_mapcount, -1); |
146e4b3c MS |
966 | __free_page(page); |
967 | } | |
968 | } | |
3610cce8 | 969 | |
36409f63 | 970 | void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) |
80217147 | 971 | { |
36409f63 | 972 | struct mm_struct *mm; |
80217147 | 973 | struct page *page; |
36409f63 | 974 | unsigned int bit, mask; |
80217147 | 975 | |
36409f63 | 976 | mm = tlb->mm; |
3eabaee9 MS |
977 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); |
978 | if (page_table_with_pgste(page)) { | |
ab8e5235 | 979 | gmap_disconnect_pgtable(mm, table); |
36409f63 MS |
980 | table = (unsigned long *) (__pa(table) | FRAG_MASK); |
981 | tlb_remove_table(tlb, table); | |
982 | return; | |
80217147 | 983 | } |
36409f63 | 984 | bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); |
80217147 | 985 | spin_lock_bh(&mm->context.list_lock); |
36409f63 MS |
986 | if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) |
987 | list_del(&page->lru); | |
988 | mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); | |
989 | if (mask & FRAG_MASK) | |
990 | list_add_tail(&page->lru, &mm->context.pgtable_list); | |
80217147 | 991 | spin_unlock_bh(&mm->context.list_lock); |
36409f63 MS |
992 | table = (unsigned long *) (__pa(table) | (bit << 4)); |
993 | tlb_remove_table(tlb, table); | |
994 | } | |
995 | ||
63df41d6 | 996 | static void __tlb_remove_table(void *_table) |
36409f63 | 997 | { |
e73b7fff MS |
998 | const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; |
999 | void *table = (void *)((unsigned long) _table & ~mask); | |
1000 | unsigned type = (unsigned long) _table & mask; | |
36409f63 MS |
1001 | |
1002 | if (type) | |
1003 | __page_table_free_rcu(table, type); | |
1004 | else | |
1005 | free_pages((unsigned long) table, ALLOC_ORDER); | |
80217147 MS |
1006 | } |
1007 | ||
cd94154c MS |
1008 | static void tlb_remove_table_smp_sync(void *arg) |
1009 | { | |
1010 | /* Simply deliver the interrupt */ | |
1011 | } | |
1012 | ||
1013 | static void tlb_remove_table_one(void *table) | |
1014 | { | |
1015 | /* | |
1016 | * This isn't an RCU grace period and hence the page-tables cannot be | |
1017 | * assumed to be actually RCU-freed. | |
1018 | * | |
1019 | * It is however sufficient for software page-table walkers that rely | |
1020 | * on IRQ disabling. See the comment near struct mmu_table_batch. | |
1021 | */ | |
1022 | smp_call_function(tlb_remove_table_smp_sync, NULL, 1); | |
1023 | __tlb_remove_table(table); | |
1024 | } | |
1025 | ||
1026 | static void tlb_remove_table_rcu(struct rcu_head *head) | |
1027 | { | |
1028 | struct mmu_table_batch *batch; | |
1029 | int i; | |
1030 | ||
1031 | batch = container_of(head, struct mmu_table_batch, rcu); | |
1032 | ||
1033 | for (i = 0; i < batch->nr; i++) | |
1034 | __tlb_remove_table(batch->tables[i]); | |
1035 | ||
1036 | free_page((unsigned long)batch); | |
1037 | } | |
1038 | ||
1039 | void tlb_table_flush(struct mmu_gather *tlb) | |
1040 | { | |
1041 | struct mmu_table_batch **batch = &tlb->batch; | |
1042 | ||
1043 | if (*batch) { | |
cd94154c MS |
1044 | call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); |
1045 | *batch = NULL; | |
1046 | } | |
1047 | } | |
1048 | ||
1049 | void tlb_remove_table(struct mmu_gather *tlb, void *table) | |
1050 | { | |
1051 | struct mmu_table_batch **batch = &tlb->batch; | |
1052 | ||
5c474a1e | 1053 | tlb->mm->context.flush_mm = 1; |
cd94154c MS |
1054 | if (*batch == NULL) { |
1055 | *batch = (struct mmu_table_batch *) | |
1056 | __get_free_page(GFP_NOWAIT | __GFP_NOWARN); | |
1057 | if (*batch == NULL) { | |
5c474a1e | 1058 | __tlb_flush_mm_lazy(tlb->mm); |
cd94154c MS |
1059 | tlb_remove_table_one(table); |
1060 | return; | |
1061 | } | |
1062 | (*batch)->nr = 0; | |
1063 | } | |
1064 | (*batch)->tables[(*batch)->nr++] = table; | |
1065 | if ((*batch)->nr == MAX_TABLE_BATCH) | |
5c474a1e | 1066 | tlb_flush_mmu(tlb); |
cd94154c | 1067 | } |
36409f63 | 1068 | |
274023da | 1069 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
3eabaee9 | 1070 | static inline void thp_split_vma(struct vm_area_struct *vma) |
274023da GS |
1071 | { |
1072 | unsigned long addr; | |
274023da | 1073 | |
3eabaee9 MS |
1074 | for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) |
1075 | follow_page(vma, addr, FOLL_SPLIT); | |
274023da GS |
1076 | } |
1077 | ||
3eabaee9 | 1078 | static inline void thp_split_mm(struct mm_struct *mm) |
274023da | 1079 | { |
3eabaee9 | 1080 | struct vm_area_struct *vma; |
274023da | 1081 | |
3eabaee9 | 1082 | for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { |
274023da GS |
1083 | thp_split_vma(vma); |
1084 | vma->vm_flags &= ~VM_HUGEPAGE; | |
1085 | vma->vm_flags |= VM_NOHUGEPAGE; | |
274023da | 1086 | } |
3eabaee9 MS |
1087 | mm->def_flags |= VM_NOHUGEPAGE; |
1088 | } | |
1089 | #else | |
1090 | static inline void thp_split_mm(struct mm_struct *mm) | |
1091 | { | |
274023da GS |
1092 | } |
1093 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | |
1094 | ||
3eabaee9 MS |
1095 | static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, |
1096 | struct mm_struct *mm, pud_t *pud, | |
1097 | unsigned long addr, unsigned long end) | |
1098 | { | |
1099 | unsigned long next, *table, *new; | |
1100 | struct page *page; | |
1101 | pmd_t *pmd; | |
1102 | ||
1103 | pmd = pmd_offset(pud, addr); | |
1104 | do { | |
1105 | next = pmd_addr_end(addr, end); | |
1106 | again: | |
1107 | if (pmd_none_or_clear_bad(pmd)) | |
1108 | continue; | |
1109 | table = (unsigned long *) pmd_deref(*pmd); | |
1110 | page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | |
1111 | if (page_table_with_pgste(page)) | |
1112 | continue; | |
1113 | /* Allocate new page table with pgstes */ | |
1114 | new = page_table_alloc_pgste(mm, addr); | |
be39f196 DD |
1115 | if (!new) |
1116 | return -ENOMEM; | |
1117 | ||
3eabaee9 MS |
1118 | spin_lock(&mm->page_table_lock); |
1119 | if (likely((unsigned long *) pmd_deref(*pmd) == table)) { | |
1120 | /* Nuke pmd entry pointing to the "short" page table */ | |
1121 | pmdp_flush_lazy(mm, addr, pmd); | |
1122 | pmd_clear(pmd); | |
1123 | /* Copy ptes from old table to new table */ | |
1124 | memcpy(new, table, PAGE_SIZE/2); | |
1125 | clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); | |
1126 | /* Establish new table */ | |
1127 | pmd_populate(mm, pmd, (pte_t *) new); | |
1128 | /* Free old table with rcu, there might be a walker! */ | |
1129 | page_table_free_rcu(tlb, table); | |
1130 | new = NULL; | |
1131 | } | |
1132 | spin_unlock(&mm->page_table_lock); | |
1133 | if (new) { | |
1134 | page_table_free_pgste(new); | |
1135 | goto again; | |
1136 | } | |
1137 | } while (pmd++, addr = next, addr != end); | |
1138 | ||
1139 | return addr; | |
1140 | } | |
1141 | ||
1142 | static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, | |
1143 | struct mm_struct *mm, pgd_t *pgd, | |
1144 | unsigned long addr, unsigned long end) | |
1145 | { | |
1146 | unsigned long next; | |
1147 | pud_t *pud; | |
1148 | ||
1149 | pud = pud_offset(pgd, addr); | |
1150 | do { | |
1151 | next = pud_addr_end(addr, end); | |
1152 | if (pud_none_or_clear_bad(pud)) | |
1153 | continue; | |
1154 | next = page_table_realloc_pmd(tlb, mm, pud, addr, next); | |
be39f196 DD |
1155 | if (unlikely(IS_ERR_VALUE(next))) |
1156 | return next; | |
3eabaee9 MS |
1157 | } while (pud++, addr = next, addr != end); |
1158 | ||
1159 | return addr; | |
1160 | } | |
1161 | ||
be39f196 DD |
1162 | static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, |
1163 | unsigned long addr, unsigned long end) | |
3eabaee9 MS |
1164 | { |
1165 | unsigned long next; | |
1166 | pgd_t *pgd; | |
1167 | ||
1168 | pgd = pgd_offset(mm, addr); | |
1169 | do { | |
1170 | next = pgd_addr_end(addr, end); | |
1171 | if (pgd_none_or_clear_bad(pgd)) | |
1172 | continue; | |
1173 | next = page_table_realloc_pud(tlb, mm, pgd, addr, next); | |
be39f196 DD |
1174 | if (unlikely(IS_ERR_VALUE(next))) |
1175 | return next; | |
3eabaee9 | 1176 | } while (pgd++, addr = next, addr != end); |
be39f196 DD |
1177 | |
1178 | return 0; | |
3eabaee9 MS |
1179 | } |
1180 | ||
402b0862 CO |
1181 | /* |
1182 | * switch on pgstes for its userspace process (for kvm) | |
1183 | */ | |
1184 | int s390_enable_sie(void) | |
1185 | { | |
1186 | struct task_struct *tsk = current; | |
3eabaee9 MS |
1187 | struct mm_struct *mm = tsk->mm; |
1188 | struct mmu_gather tlb; | |
402b0862 | 1189 | |
74b6b522 | 1190 | /* Do we have pgstes? if yes, we are done */ |
36409f63 | 1191 | if (mm_has_pgste(tsk->mm)) |
74b6b522 | 1192 | return 0; |
402b0862 | 1193 | |
3eabaee9 | 1194 | down_write(&mm->mmap_sem); |
274023da GS |
1195 | /* split thp mappings and disable thp for future mappings */ |
1196 | thp_split_mm(mm); | |
3eabaee9 | 1197 | /* Reallocate the page tables with pgstes */ |
ae7a835c | 1198 | tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE); |
be39f196 DD |
1199 | if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE)) |
1200 | mm->context.has_pgste = 1; | |
ae7a835c | 1201 | tlb_finish_mmu(&tlb, 0, TASK_SIZE); |
3eabaee9 MS |
1202 | up_write(&mm->mmap_sem); |
1203 | return mm->context.has_pgste ? 0 : -ENOMEM; | |
402b0862 CO |
1204 | } |
1205 | EXPORT_SYMBOL_GPL(s390_enable_sie); | |
7db11a36 | 1206 | |
75077afb | 1207 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
1ae1c1d0 GS |
1208 | int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, |
1209 | pmd_t *pmdp) | |
1210 | { | |
1211 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | |
1212 | /* No need to flush TLB | |
1213 | * On s390 reference bits are in storage key and never in TLB */ | |
1214 | return pmdp_test_and_clear_young(vma, address, pmdp); | |
1215 | } | |
1216 | ||
1217 | int pmdp_set_access_flags(struct vm_area_struct *vma, | |
1218 | unsigned long address, pmd_t *pmdp, | |
1219 | pmd_t entry, int dirty) | |
1220 | { | |
1221 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | |
1222 | ||
1223 | if (pmd_same(*pmdp, entry)) | |
1224 | return 0; | |
1225 | pmdp_invalidate(vma, address, pmdp); | |
1226 | set_pmd_at(vma->vm_mm, address, pmdp, entry); | |
1227 | return 1; | |
1228 | } | |
1229 | ||
75077afb GS |
1230 | static void pmdp_splitting_flush_sync(void *arg) |
1231 | { | |
1232 | /* Simply deliver the interrupt */ | |
1233 | } | |
1234 | ||
1235 | void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, | |
1236 | pmd_t *pmdp) | |
1237 | { | |
1238 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | |
1239 | if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT, | |
1240 | (unsigned long *) pmdp)) { | |
1241 | /* need to serialize against gup-fast (IRQ disabled) */ | |
1242 | smp_call_function(pmdp_splitting_flush_sync, NULL, 1); | |
1243 | } | |
1244 | } | |
9501d09f | 1245 | |
6b0b50b0 AK |
1246 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, |
1247 | pgtable_t pgtable) | |
9501d09f GS |
1248 | { |
1249 | struct list_head *lh = (struct list_head *) pgtable; | |
1250 | ||
1251 | assert_spin_locked(&mm->page_table_lock); | |
1252 | ||
1253 | /* FIFO */ | |
c389a250 | 1254 | if (!pmd_huge_pte(mm, pmdp)) |
9501d09f GS |
1255 | INIT_LIST_HEAD(lh); |
1256 | else | |
c389a250 KS |
1257 | list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); |
1258 | pmd_huge_pte(mm, pmdp) = pgtable; | |
9501d09f GS |
1259 | } |
1260 | ||
6b0b50b0 | 1261 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) |
9501d09f GS |
1262 | { |
1263 | struct list_head *lh; | |
1264 | pgtable_t pgtable; | |
1265 | pte_t *ptep; | |
1266 | ||
1267 | assert_spin_locked(&mm->page_table_lock); | |
1268 | ||
1269 | /* FIFO */ | |
c389a250 | 1270 | pgtable = pmd_huge_pte(mm, pmdp); |
9501d09f GS |
1271 | lh = (struct list_head *) pgtable; |
1272 | if (list_empty(lh)) | |
c389a250 | 1273 | pmd_huge_pte(mm, pmdp) = NULL; |
9501d09f | 1274 | else { |
c389a250 | 1275 | pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; |
9501d09f GS |
1276 | list_del(lh); |
1277 | } | |
1278 | ptep = (pte_t *) pgtable; | |
e5098611 | 1279 | pte_val(*ptep) = _PAGE_INVALID; |
9501d09f | 1280 | ptep++; |
e5098611 | 1281 | pte_val(*ptep) = _PAGE_INVALID; |
9501d09f GS |
1282 | return pgtable; |
1283 | } | |
75077afb | 1284 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |