Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright 2002 Andi Kleen, SuSE Labs. | |
3 | * Thanks to Ben LaHaise for precious feedback. | |
4 | */ | |
5 | ||
1da177e4 LT |
6 | #include <linux/mm.h> |
7 | #include <linux/sched.h> | |
8 | #include <linux/highmem.h> | |
9 | #include <linux/module.h> | |
10 | #include <linux/slab.h> | |
11 | #include <asm/uaccess.h> | |
12 | #include <asm/processor.h> | |
13 | #include <asm/tlbflush.h> | |
c9b02a24 | 14 | #include <asm/pgalloc.h> |
f8af095d | 15 | #include <asm/sections.h> |
1da177e4 LT |
16 | |
17 | static DEFINE_SPINLOCK(cpa_lock); | |
18 | static struct list_head df_list = LIST_HEAD_INIT(df_list); | |
19 | ||
20 | ||
21 | pte_t *lookup_address(unsigned long address) | |
22 | { | |
23 | pgd_t *pgd = pgd_offset_k(address); | |
24 | pud_t *pud; | |
25 | pmd_t *pmd; | |
26 | if (pgd_none(*pgd)) | |
27 | return NULL; | |
28 | pud = pud_offset(pgd, address); | |
29 | if (pud_none(*pud)) | |
30 | return NULL; | |
31 | pmd = pmd_offset(pud, address); | |
32 | if (pmd_none(*pmd)) | |
33 | return NULL; | |
34 | if (pmd_large(*pmd)) | |
35 | return (pte_t *)pmd; | |
36 | return pte_offset_kernel(pmd, address); | |
37 | } | |
38 | ||
f8af095d DJ |
39 | static struct page *split_large_page(unsigned long address, pgprot_t prot, |
40 | pgprot_t ref_prot) | |
1da177e4 LT |
41 | { |
42 | int i; | |
43 | unsigned long addr; | |
44 | struct page *base; | |
45 | pte_t *pbase; | |
46 | ||
47 | spin_unlock_irq(&cpa_lock); | |
48 | base = alloc_pages(GFP_KERNEL, 0); | |
49 | spin_lock_irq(&cpa_lock); | |
50 | if (!base) | |
51 | return NULL; | |
52 | ||
84d1c054 NP |
53 | /* |
54 | * page_private is used to track the number of entries in | |
55 | * the page table page that have non standard attributes. | |
56 | */ | |
57 | SetPagePrivate(base); | |
58 | page_private(base) = 0; | |
59 | ||
1da177e4 LT |
60 | address = __pa(address); |
61 | addr = address & LARGE_PAGE_MASK; | |
62 | pbase = (pte_t *)page_address(base); | |
63 | for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { | |
c9b02a24 | 64 | set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, |
f8af095d | 65 | addr == address ? prot : ref_prot)); |
1da177e4 LT |
66 | } |
67 | return base; | |
68 | } | |
69 | ||
3760dd6e | 70 | static void flush_kernel_map(void *arg) |
1da177e4 | 71 | { |
3760dd6e AK |
72 | unsigned long adr = (unsigned long)arg; |
73 | ||
74 | if (adr && cpu_has_clflush) { | |
75 | int i; | |
76 | for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) | |
77 | asm volatile("clflush (%0)" :: "r" (adr + i)); | |
78 | } else if (boot_cpu_data.x86_model >= 4) | |
4bb0d3ec | 79 | wbinvd(); |
3760dd6e | 80 | |
1da177e4 LT |
81 | /* Flush all to work around Errata in early athlons regarding |
82 | * large page flushing. | |
83 | */ | |
84 | __flush_tlb_all(); | |
85 | } | |
86 | ||
87 | static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) | |
88 | { | |
89 | struct page *page; | |
90 | unsigned long flags; | |
91 | ||
92 | set_pte_atomic(kpte, pte); /* change init_mm */ | |
93 | if (PTRS_PER_PMD > 1) | |
94 | return; | |
95 | ||
96 | spin_lock_irqsave(&pgd_lock, flags); | |
97 | for (page = pgd_list; page; page = (struct page *)page->index) { | |
98 | pgd_t *pgd; | |
99 | pud_t *pud; | |
100 | pmd_t *pmd; | |
101 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | |
102 | pud = pud_offset(pgd, address); | |
103 | pmd = pmd_offset(pud, address); | |
104 | set_pte_atomic((pte_t *)pmd, pte); | |
105 | } | |
106 | spin_unlock_irqrestore(&pgd_lock, flags); | |
107 | } | |
108 | ||
109 | /* | |
110 | * No more special protections in this 2/4MB area - revert to a | |
111 | * large page again. | |
112 | */ | |
113 | static inline void revert_page(struct page *kpte_page, unsigned long address) | |
114 | { | |
f8af095d DJ |
115 | pgprot_t ref_prot; |
116 | pte_t *linear; | |
117 | ||
118 | ref_prot = | |
119 | ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) | |
120 | ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE; | |
121 | ||
122 | linear = (pte_t *) | |
1da177e4 LT |
123 | pmd_offset(pud_offset(pgd_offset_k(address), address), address); |
124 | set_pmd_pte(linear, address, | |
125 | pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, | |
f8af095d | 126 | ref_prot)); |
1da177e4 LT |
127 | } |
128 | ||
129 | static int | |
130 | __change_page_attr(struct page *page, pgprot_t prot) | |
131 | { | |
132 | pte_t *kpte; | |
133 | unsigned long address; | |
134 | struct page *kpte_page; | |
135 | ||
136 | BUG_ON(PageHighMem(page)); | |
137 | address = (unsigned long)page_address(page); | |
138 | ||
139 | kpte = lookup_address(address); | |
140 | if (!kpte) | |
141 | return -EINVAL; | |
142 | kpte_page = virt_to_page(kpte); | |
143 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { | |
144 | if ((pte_val(*kpte) & _PAGE_PSE) == 0) { | |
145 | set_pte_atomic(kpte, mk_pte(page, prot)); | |
146 | } else { | |
f8af095d DJ |
147 | pgprot_t ref_prot; |
148 | struct page *split; | |
149 | ||
150 | ref_prot = | |
151 | ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) | |
152 | ? PAGE_KERNEL_EXEC : PAGE_KERNEL; | |
153 | split = split_large_page(address, prot, ref_prot); | |
1da177e4 LT |
154 | if (!split) |
155 | return -ENOMEM; | |
f8af095d | 156 | set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); |
1da177e4 | 157 | kpte_page = split; |
84d1c054 NP |
158 | } |
159 | page_private(kpte_page)++; | |
1da177e4 LT |
160 | } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { |
161 | set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); | |
84d1c054 NP |
162 | BUG_ON(page_private(kpte_page) == 0); |
163 | page_private(kpte_page)--; | |
1da177e4 LT |
164 | } else |
165 | BUG(); | |
166 | ||
167 | /* | |
168 | * If the pte was reserved, it means it was created at boot | |
169 | * time (not via split_large_page) and in turn we must not | |
170 | * replace it with a largepage. | |
171 | */ | |
172 | if (!PageReserved(kpte_page)) { | |
84d1c054 NP |
173 | if (cpu_has_pse && (page_private(kpte_page) == 0)) { |
174 | ClearPagePrivate(kpte_page); | |
1da177e4 LT |
175 | list_add(&kpte_page->lru, &df_list); |
176 | revert_page(kpte_page, address); | |
177 | } | |
178 | } | |
179 | return 0; | |
180 | } | |
181 | ||
3760dd6e | 182 | static inline void flush_map(void *adr) |
1da177e4 | 183 | { |
3760dd6e | 184 | on_each_cpu(flush_kernel_map, adr, 1, 1); |
1da177e4 LT |
185 | } |
186 | ||
187 | /* | |
188 | * Change the page attributes of an page in the linear mapping. | |
189 | * | |
190 | * This should be used when a page is mapped with a different caching policy | |
191 | * than write-back somewhere - some CPUs do not like it when mappings with | |
192 | * different caching policies exist. This changes the page attributes of the | |
193 | * in kernel linear mapping too. | |
194 | * | |
195 | * The caller needs to ensure that there are no conflicting mappings elsewhere. | |
196 | * This function only deals with the kernel linear map. | |
197 | * | |
198 | * Caller must call global_flush_tlb() after this. | |
199 | */ | |
200 | int change_page_attr(struct page *page, int numpages, pgprot_t prot) | |
201 | { | |
202 | int err = 0; | |
203 | int i; | |
204 | unsigned long flags; | |
205 | ||
206 | spin_lock_irqsave(&cpa_lock, flags); | |
207 | for (i = 0; i < numpages; i++, page++) { | |
208 | err = __change_page_attr(page, prot); | |
209 | if (err) | |
210 | break; | |
211 | } | |
212 | spin_unlock_irqrestore(&cpa_lock, flags); | |
213 | return err; | |
214 | } | |
215 | ||
216 | void global_flush_tlb(void) | |
626ab0e6 ON |
217 | { |
218 | struct list_head l; | |
1da177e4 LT |
219 | struct page *pg, *next; |
220 | ||
221 | BUG_ON(irqs_disabled()); | |
222 | ||
223 | spin_lock_irq(&cpa_lock); | |
626ab0e6 | 224 | list_replace_init(&df_list, &l); |
1da177e4 | 225 | spin_unlock_irq(&cpa_lock); |
3760dd6e | 226 | if (!cpu_has_clflush) |
11718b4d | 227 | flush_map(NULL); |
3760dd6e AK |
228 | list_for_each_entry_safe(pg, next, &l, lru) { |
229 | if (cpu_has_clflush) | |
230 | flush_map(page_address(pg)); | |
1da177e4 | 231 | __free_page(pg); |
3760dd6e | 232 | } |
626ab0e6 | 233 | } |
1da177e4 LT |
234 | |
235 | #ifdef CONFIG_DEBUG_PAGEALLOC | |
236 | void kernel_map_pages(struct page *page, int numpages, int enable) | |
237 | { | |
238 | if (PageHighMem(page)) | |
239 | return; | |
de5097c2 | 240 | if (!enable) |
f9b8404c IM |
241 | debug_check_no_locks_freed(page_address(page), |
242 | numpages * PAGE_SIZE); | |
de5097c2 | 243 | |
1da177e4 LT |
244 | /* the return value is ignored - the calls cannot fail, |
245 | * large pages are disabled at boot time. | |
246 | */ | |
247 | change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); | |
248 | /* we should perform an IPI and flush all tlbs, | |
249 | * but that can deadlock->flush only current cpu. | |
250 | */ | |
251 | __flush_tlb_all(); | |
252 | } | |
253 | #endif | |
254 | ||
255 | EXPORT_SYMBOL(change_page_attr); | |
256 | EXPORT_SYMBOL(global_flush_tlb); |