2 * Handle caching attributes in page tables (PAT)
4 * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
5 * Suresh B Siddha <suresh.b.siddha@intel.com>
7 * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
11 #include <linux/kernel.h>
12 #include <linux/gfp.h>
16 #include <asm/tlbflush.h>
17 #include <asm/processor.h>
18 #include <asm/pgtable.h>
21 #include <asm/cacheflush.h>
22 #include <asm/fcntl.h>
25 int pat_wc_enabled
= 1;
27 static u64 __read_mostly boot_pat_state
;
29 static int nopat(char *str
)
32 printk(KERN_INFO
"x86: PAT support disabled.\n");
36 early_param("nopat", nopat
);
38 static int pat_known_cpu(void)
43 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_INTEL
&&
44 (boot_cpu_data
.x86
== 0xF ||
45 (boot_cpu_data
.x86
== 6 && boot_cpu_data
.x86_model
>= 15))) {
52 printk(KERN_INFO
"CPU and/or kernel does not support PAT.\n");
57 PAT_UC
= 0, /* uncached */
58 PAT_WC
= 1, /* Write combining */
59 PAT_WT
= 4, /* Write Through */
60 PAT_WP
= 5, /* Write Protected */
61 PAT_WB
= 6, /* Write Back (default) */
62 PAT_UC_MINUS
= 7, /* UC, but can be overriden by MTRR */
65 #define PAT(x,y) ((u64)PAT_ ## y << ((x)*8))
71 #ifndef CONFIG_X86_PAT
75 /* Boot CPU enables PAT based on CPU feature */
76 if (!smp_processor_id() && !pat_known_cpu())
79 /* APs enable PAT iff boot CPU has enabled it before */
80 if (smp_processor_id() && !pat_wc_enabled
)
83 /* Set PWT to Write-Combining. All other bits stay the same */
85 * PTE encoding used in Linux:
90 * 000 WB _PAGE_CACHE_WB
91 * 001 WC _PAGE_CACHE_WC
92 * 010 UC- _PAGE_CACHE_UC_MINUS
93 * 011 UC _PAGE_CACHE_UC
96 pat
= PAT(0,WB
) | PAT(1,WC
) | PAT(2,UC_MINUS
) | PAT(3,UC
) |
97 PAT(4,WB
) | PAT(5,WC
) | PAT(6,UC_MINUS
) | PAT(7,UC
);
100 if (!smp_processor_id()) {
101 rdmsrl(MSR_IA32_CR_PAT
, boot_pat_state
);
104 wrmsrl(MSR_IA32_CR_PAT
, pat
);
105 printk(KERN_INFO
"x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
106 smp_processor_id(), boot_pat_state
, pat
);
111 static char *cattr_name(unsigned long flags
)
113 switch (flags
& _PAGE_CACHE_MASK
) {
114 case _PAGE_CACHE_UC
: return "uncached";
115 case _PAGE_CACHE_UC_MINUS
: return "uncached-minus";
116 case _PAGE_CACHE_WB
: return "write-back";
117 case _PAGE_CACHE_WC
: return "write-combining";
118 default: return "broken";
123 * The global memtype list keeps track of memory type for specific
124 * physical memory areas. Conflicting memory types in different
125 * mappings can cause CPU cache corruption. To avoid this we keep track.
127 * The list is sorted based on starting address and can contain multiple
128 * entries for each address (this allows reference counting for overlapping
129 * areas). All the aliases have the same cache attributes of course.
130 * Zero attributes are represented as holes.
132 * Currently the data structure is a list because the number of mappings
133 * are expected to be relatively small. If this should be a problem
134 * it could be changed to a rbtree or similar.
136 * memtype_lock protects the whole list.
146 static LIST_HEAD(memtype_list
);
147 static DEFINE_SPINLOCK(memtype_lock
); /* protects memtype list */
150 * Does intersection of PAT memory type and MTRR memory type and returns
151 * the resulting memory type as PAT understands it.
152 * (Type in pat and mtrr will not have same value)
153 * The intersection is based on "Effective Memory Type" tables in IA-32
156 static int pat_x_mtrr_type(u64 start
, u64 end
, unsigned long prot
,
157 unsigned long *ret_prot
)
159 unsigned long pat_type
;
162 mtrr_type
= mtrr_type_lookup(start
, end
);
163 if (mtrr_type
== 0xFF) { /* MTRR not enabled */
167 if (mtrr_type
== 0xFE) { /* MTRR match error */
168 *ret_prot
= _PAGE_CACHE_UC
;
171 if (mtrr_type
!= MTRR_TYPE_UNCACHABLE
&&
172 mtrr_type
!= MTRR_TYPE_WRBACK
&&
173 mtrr_type
!= MTRR_TYPE_WRCOMB
) { /* MTRR type unhandled */
174 *ret_prot
= _PAGE_CACHE_UC
;
178 pat_type
= prot
& _PAGE_CACHE_MASK
;
179 prot
&= (~_PAGE_CACHE_MASK
);
181 /* Currently doing intersection by hand. Optimize it later. */
182 if (pat_type
== _PAGE_CACHE_WC
) {
183 *ret_prot
= prot
| _PAGE_CACHE_WC
;
184 } else if (pat_type
== _PAGE_CACHE_UC_MINUS
) {
185 *ret_prot
= prot
| _PAGE_CACHE_UC_MINUS
;
186 } else if (pat_type
== _PAGE_CACHE_UC
||
187 mtrr_type
== MTRR_TYPE_UNCACHABLE
) {
188 *ret_prot
= prot
| _PAGE_CACHE_UC
;
189 } else if (mtrr_type
== MTRR_TYPE_WRCOMB
) {
190 *ret_prot
= prot
| _PAGE_CACHE_WC
;
192 *ret_prot
= prot
| _PAGE_CACHE_WB
;
198 int reserve_memtype(u64 start
, u64 end
, unsigned long req_type
,
199 unsigned long *ret_type
)
201 struct memtype
*new_entry
= NULL
;
202 struct memtype
*parse
;
203 unsigned long actual_type
;
206 /* Only track when pat_wc_enabled */
207 if (!pat_wc_enabled
) {
209 *ret_type
= req_type
;
214 /* Low ISA region is always mapped WB in page table. No need to track */
215 if (start
>= ISA_START_ADDRESS
&& (end
- 1) <= ISA_END_ADDRESS
) {
217 *ret_type
= _PAGE_CACHE_WB
;
222 req_type
&= _PAGE_CACHE_MASK
;
223 err
= pat_x_mtrr_type(start
, end
, req_type
, &actual_type
);
226 *ret_type
= actual_type
;
231 new_entry
= kmalloc(sizeof(struct memtype
), GFP_KERNEL
);
235 new_entry
->start
= start
;
236 new_entry
->end
= end
;
237 new_entry
->type
= actual_type
;
240 *ret_type
= actual_type
;
242 spin_lock(&memtype_lock
);
244 /* Search for existing mapping that overlaps the current range */
245 list_for_each_entry(parse
, &memtype_list
, nd
) {
246 struct memtype
*saved_ptr
;
248 if (parse
->start
>= end
) {
249 printk("New Entry\n");
250 list_add(&new_entry
->nd
, parse
->nd
.prev
);
255 if (start
<= parse
->start
&& end
>= parse
->start
) {
256 if (actual_type
!= parse
->type
&& ret_type
) {
257 actual_type
= parse
->type
;
258 *ret_type
= actual_type
;
259 new_entry
->type
= actual_type
;
262 if (actual_type
!= parse
->type
) {
264 KERN_INFO
"%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
265 current
->comm
, current
->pid
,
267 cattr_name(actual_type
),
268 cattr_name(parse
->type
));
275 * Check to see whether the request overlaps more
276 * than one entry in the list
278 list_for_each_entry_continue(parse
, &memtype_list
, nd
) {
279 if (end
<= parse
->start
) {
283 if (actual_type
!= parse
->type
) {
285 KERN_INFO
"%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
286 current
->comm
, current
->pid
,
288 cattr_name(actual_type
),
289 cattr_name(parse
->type
));
299 printk("Overlap at 0x%Lx-0x%Lx\n",
300 saved_ptr
->start
, saved_ptr
->end
);
301 /* No conflict. Go ahead and add this new entry */
302 list_add(&new_entry
->nd
, saved_ptr
->nd
.prev
);
307 if (start
< parse
->end
) {
308 if (actual_type
!= parse
->type
&& ret_type
) {
309 actual_type
= parse
->type
;
310 *ret_type
= actual_type
;
311 new_entry
->type
= actual_type
;
314 if (actual_type
!= parse
->type
) {
316 KERN_INFO
"%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
317 current
->comm
, current
->pid
,
319 cattr_name(actual_type
),
320 cattr_name(parse
->type
));
327 * Check to see whether the request overlaps more
328 * than one entry in the list
330 list_for_each_entry_continue(parse
, &memtype_list
, nd
) {
331 if (end
<= parse
->start
) {
335 if (actual_type
!= parse
->type
) {
337 KERN_INFO
"%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
338 current
->comm
, current
->pid
,
340 cattr_name(actual_type
),
341 cattr_name(parse
->type
));
351 printk("Overlap at 0x%Lx-0x%Lx\n",
352 saved_ptr
->start
, saved_ptr
->end
);
353 /* No conflict. Go ahead and add this new entry */
354 list_add(&new_entry
->nd
, &saved_ptr
->nd
);
362 "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
363 start
, end
, cattr_name(new_entry
->type
),
364 cattr_name(req_type
));
366 spin_unlock(&memtype_lock
);
371 /* No conflict. Not yet added to the list. Add to the tail */
372 list_add_tail(&new_entry
->nd
, &memtype_list
);
373 printk("New Entry\n");
378 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
379 start
, end
, cattr_name(actual_type
),
380 cattr_name(req_type
), cattr_name(*ret_type
));
383 "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
384 start
, end
, cattr_name(actual_type
),
385 cattr_name(req_type
));
388 spin_unlock(&memtype_lock
);
392 int free_memtype(u64 start
, u64 end
)
397 /* Only track when pat_wc_enabled */
398 if (!pat_wc_enabled
) {
402 /* Low ISA region is always mapped WB. No need to track */
403 if (start
>= ISA_START_ADDRESS
&& end
<= ISA_END_ADDRESS
) {
407 spin_lock(&memtype_lock
);
408 list_for_each_entry(ml
, &memtype_list
, nd
) {
409 if (ml
->start
== start
&& ml
->end
== end
) {
416 spin_unlock(&memtype_lock
);
419 printk(KERN_DEBUG
"%s:%d freeing invalid memtype %Lx-%Lx\n",
420 current
->comm
, current
->pid
, start
, end
);
423 printk( "free_memtype request 0x%Lx-0x%Lx\n", start
, end
);