diff options
author | venkatesh.pallipadi@intel.com <venkatesh.pallipadi@intel.com> | 2008-03-18 20:00:14 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-17 11:41:19 -0400 |
commit | 2e5d9c857d4e6c9e7b7d8c8c86a68a7842d213d6 (patch) | |
tree | 6c90c0f9f38ff85e2f42ddc0f4ef0291cdd47d38 /arch/x86/mm/pat.c | |
parent | d27554d874c7eeb14c8bfecdc39c3a8618cd8d32 (diff) |
x86: PAT infrastructure patch
Sets up pat_init() infrastructure.
PAT MSR has following setting.
PAT
|PCD
||PWT
|||
000 WB _PAGE_CACHE_WB
001 WC _PAGE_CACHE_WC
010 UC- _PAGE_CACHE_UC_MINUS
011 UC _PAGE_CACHE_UC
We are effectively changing WT from boot time setting to WC.
UC_MINUS is used to provide backward compatibility to existing /dev/mem
users(X).
reserve_memtype and free_memtype are new interfaces for maintaining alias-free
mapping. It is currently implemented in a simple way with a linked list and
not optimized. reserve and free tracks the effective memory type, as a result
of PAT and MTRR setting rather than what is actually requested in PAT.
pat_init piggy backs on mtrr_init as the rules for setting both pat and mtrr
are same.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm/pat.c')
-rw-r--r-- | arch/x86/mm/pat.c | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c new file mode 100644 index 000000000000..7cc71d868483 --- /dev/null +++ b/arch/x86/mm/pat.c | |||
@@ -0,0 +1,402 @@ | |||
1 | /* | ||
2 | * Handle caching attributes in page tables (PAT) | ||
3 | * | ||
4 | * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | ||
5 | * Suresh B Siddha <suresh.b.siddha@intel.com> | ||
6 | * | ||
7 | * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen. | ||
8 | */ | ||
9 | |||
10 | #include <linux/mm.h> | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/gfp.h> | ||
13 | #include <linux/fs.h> | ||
14 | |||
15 | #include <asm/msr.h> | ||
16 | #include <asm/tlbflush.h> | ||
17 | #include <asm/processor.h> | ||
18 | #include <asm/pgtable.h> | ||
19 | #include <asm/pat.h> | ||
20 | #include <asm/e820.h> | ||
21 | #include <asm/cacheflush.h> | ||
22 | #include <asm/fcntl.h> | ||
23 | #include <asm/mtrr.h> | ||
24 | |||
25 | int pat_wc_enabled = 1; | ||
26 | |||
27 | static u64 __read_mostly boot_pat_state; | ||
28 | |||
29 | static int nopat(char *str) | ||
30 | { | ||
31 | pat_wc_enabled = 0; | ||
32 | printk(KERN_INFO "x86: PAT support disabled.\n"); | ||
33 | |||
34 | return 0; | ||
35 | } | ||
36 | early_param("nopat", nopat); | ||
37 | |||
38 | static int pat_known_cpu(void) | ||
39 | { | ||
40 | if (!pat_wc_enabled) | ||
41 | return 0; | ||
42 | |||
43 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | ||
44 | (boot_cpu_data.x86 == 0xF || | ||
45 | (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model >= 15))) { | ||
46 | if (cpu_has_pat) { | ||
47 | return 1; | ||
48 | } | ||
49 | } | ||
50 | |||
51 | pat_wc_enabled = 0; | ||
52 | printk(KERN_INFO "CPU and/or kernel does not support PAT.\n"); | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | enum { | ||
57 | PAT_UC = 0, /* uncached */ | ||
58 | PAT_WC = 1, /* Write combining */ | ||
59 | PAT_WT = 4, /* Write Through */ | ||
60 | PAT_WP = 5, /* Write Protected */ | ||
61 | PAT_WB = 6, /* Write Back (default) */ | ||
62 | PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */ | ||
63 | }; | ||
64 | |||
65 | #define PAT(x,y) ((u64)PAT_ ## y << ((x)*8)) | ||
66 | |||
67 | void pat_init(void) | ||
68 | { | ||
69 | u64 pat; | ||
70 | |||
71 | #ifndef CONFIG_X86_PAT | ||
72 | nopat(NULL); | ||
73 | #endif | ||
74 | |||
75 | /* Boot CPU enables PAT based on CPU feature */ | ||
76 | if (!smp_processor_id() && !pat_known_cpu()) | ||
77 | return; | ||
78 | |||
79 | /* APs enable PAT iff boot CPU has enabled it before */ | ||
80 | if (smp_processor_id() && !pat_wc_enabled) | ||
81 | return; | ||
82 | |||
83 | /* Set PWT to Write-Combining. All other bits stay the same */ | ||
84 | /* | ||
85 | * PTE encoding used in Linux: | ||
86 | * PAT | ||
87 | * |PCD | ||
88 | * ||PWT | ||
89 | * ||| | ||
90 | * 000 WB _PAGE_CACHE_WB | ||
91 | * 001 WC _PAGE_CACHE_WC | ||
92 | * 010 UC- _PAGE_CACHE_UC_MINUS | ||
93 | * 011 UC _PAGE_CACHE_UC | ||
94 | * PAT bit unused | ||
95 | */ | ||
96 | pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) | | ||
97 | PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC); | ||
98 | |||
99 | /* Boot CPU check */ | ||
100 | if (!smp_processor_id()) { | ||
101 | rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); | ||
102 | } | ||
103 | |||
104 | wrmsrl(MSR_IA32_CR_PAT, pat); | ||
105 | printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n", | ||
106 | smp_processor_id(), boot_pat_state, pat); | ||
107 | } | ||
108 | |||
109 | #undef PAT | ||
110 | |||
111 | static char *cattr_name(unsigned long flags) | ||
112 | { | ||
113 | switch (flags & _PAGE_CACHE_MASK) { | ||
114 | case _PAGE_CACHE_UC: return "uncached"; | ||
115 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | ||
116 | case _PAGE_CACHE_WB: return "write-back"; | ||
117 | case _PAGE_CACHE_WC: return "write-combining"; | ||
118 | default: return "broken"; | ||
119 | } | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * The global memtype list keeps track of memory type for specific | ||
124 | * physical memory areas. Conflicting memory types in different | ||
125 | * mappings can cause CPU cache corruption. To avoid this we keep track. | ||
126 | * | ||
127 | * The list is sorted based on starting address and can contain multiple | ||
128 | * entries for each address (this allows reference counting for overlapping | ||
129 | * areas). All the aliases have the same cache attributes of course. | ||
130 | * Zero attributes are represented as holes. | ||
131 | * | ||
132 | * Currently the data structure is a list because the number of mappings | ||
133 | * are expected to be relatively small. If this should be a problem | ||
134 | * it could be changed to a rbtree or similar. | ||
135 | * | ||
136 | * memtype_lock protects the whole list. | ||
137 | */ | ||
138 | |||
139 | struct memtype { | ||
140 | u64 start; | ||
141 | u64 end; | ||
142 | unsigned long type; | ||
143 | struct list_head nd; | ||
144 | }; | ||
145 | |||
146 | static LIST_HEAD(memtype_list); | ||
147 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | ||
148 | |||
149 | /* | ||
150 | * Does intersection of PAT memory type and MTRR memory type and returns | ||
151 | * the resulting memory type as PAT understands it. | ||
152 | * (Type in pat and mtrr will not have same value) | ||
153 | * The intersection is based on "Effective Memory Type" tables in IA-32 | ||
154 | * SDM vol 3a | ||
155 | */ | ||
156 | static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, | ||
157 | unsigned long *ret_prot) | ||
158 | { | ||
159 | unsigned long pat_type; | ||
160 | u8 mtrr_type; | ||
161 | |||
162 | mtrr_type = mtrr_type_lookup(start, end); | ||
163 | if (mtrr_type == 0xFF) { /* MTRR not enabled */ | ||
164 | *ret_prot = prot; | ||
165 | return 0; | ||
166 | } | ||
167 | if (mtrr_type == 0xFE) { /* MTRR match error */ | ||
168 | *ret_prot = _PAGE_CACHE_UC; | ||
169 | return -1; | ||
170 | } | ||
171 | if (mtrr_type != MTRR_TYPE_UNCACHABLE && | ||
172 | mtrr_type != MTRR_TYPE_WRBACK && | ||
173 | mtrr_type != MTRR_TYPE_WRCOMB) { /* MTRR type unhandled */ | ||
174 | *ret_prot = _PAGE_CACHE_UC; | ||
175 | return -1; | ||
176 | } | ||
177 | |||
178 | pat_type = prot & _PAGE_CACHE_MASK; | ||
179 | prot &= (~_PAGE_CACHE_MASK); | ||
180 | |||
181 | /* Currently doing intersection by hand. Optimize it later. */ | ||
182 | if (pat_type == _PAGE_CACHE_WC) { | ||
183 | *ret_prot = prot | _PAGE_CACHE_WC; | ||
184 | } else if (pat_type == _PAGE_CACHE_UC_MINUS) { | ||
185 | *ret_prot = prot | _PAGE_CACHE_UC_MINUS; | ||
186 | } else if (pat_type == _PAGE_CACHE_UC || | ||
187 | mtrr_type == MTRR_TYPE_UNCACHABLE) { | ||
188 | *ret_prot = prot | _PAGE_CACHE_UC; | ||
189 | } else if (mtrr_type == MTRR_TYPE_WRCOMB) { | ||
190 | *ret_prot = prot | _PAGE_CACHE_WC; | ||
191 | } else { | ||
192 | *ret_prot = prot | _PAGE_CACHE_WB; | ||
193 | } | ||
194 | |||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, | ||
199 | unsigned long *ret_type) | ||
200 | { | ||
201 | struct memtype *new_entry = NULL; | ||
202 | struct memtype *parse; | ||
203 | unsigned long actual_type; | ||
204 | int err = 0; | ||
205 | |||
206 | /* Only track when pat_wc_enabled */ | ||
207 | if (!pat_wc_enabled) { | ||
208 | if (ret_type) | ||
209 | *ret_type = req_type; | ||
210 | |||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | /* Low ISA region is always mapped WB in page table. No need to track */ | ||
215 | if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) { | ||
216 | if (ret_type) | ||
217 | *ret_type = _PAGE_CACHE_WB; | ||
218 | |||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | req_type &= _PAGE_CACHE_MASK; | ||
223 | err = pat_x_mtrr_type(start, end, req_type, &actual_type); | ||
224 | if (err) { | ||
225 | if (ret_type) | ||
226 | *ret_type = actual_type; | ||
227 | |||
228 | return -EINVAL; | ||
229 | } | ||
230 | |||
231 | new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); | ||
232 | if (!new_entry) | ||
233 | return -ENOMEM; | ||
234 | |||
235 | new_entry->start = start; | ||
236 | new_entry->end = end; | ||
237 | new_entry->type = actual_type; | ||
238 | |||
239 | if (ret_type) | ||
240 | *ret_type = actual_type; | ||
241 | |||
242 | spin_lock(&memtype_lock); | ||
243 | |||
244 | /* Search for existing mapping that overlaps the current range */ | ||
245 | list_for_each_entry(parse, &memtype_list, nd) { | ||
246 | struct memtype *saved_ptr; | ||
247 | |||
248 | if (parse->start >= end) { | ||
249 | list_add(&new_entry->nd, parse->nd.prev); | ||
250 | new_entry = NULL; | ||
251 | break; | ||
252 | } | ||
253 | |||
254 | if (start <= parse->start && end >= parse->start) { | ||
255 | if (actual_type != parse->type && ret_type) { | ||
256 | actual_type = parse->type; | ||
257 | *ret_type = actual_type; | ||
258 | new_entry->type = actual_type; | ||
259 | } | ||
260 | |||
261 | if (actual_type != parse->type) { | ||
262 | printk( | ||
263 | KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | ||
264 | current->comm, current->pid, | ||
265 | start, end, | ||
266 | cattr_name(actual_type), | ||
267 | cattr_name(parse->type)); | ||
268 | err = -EBUSY; | ||
269 | break; | ||
270 | } | ||
271 | |||
272 | saved_ptr = parse; | ||
273 | /* | ||
274 | * Check to see whether the request overlaps more | ||
275 | * than one entry in the list | ||
276 | */ | ||
277 | list_for_each_entry_continue(parse, &memtype_list, nd) { | ||
278 | if (end <= parse->start) { | ||
279 | break; | ||
280 | } | ||
281 | |||
282 | if (actual_type != parse->type) { | ||
283 | printk( | ||
284 | KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | ||
285 | current->comm, current->pid, | ||
286 | start, end, | ||
287 | cattr_name(actual_type), | ||
288 | cattr_name(parse->type)); | ||
289 | err = -EBUSY; | ||
290 | break; | ||
291 | } | ||
292 | } | ||
293 | |||
294 | if (err) { | ||
295 | break; | ||
296 | } | ||
297 | |||
298 | /* No conflict. Go ahead and add this new entry */ | ||
299 | list_add(&new_entry->nd, saved_ptr->nd.prev); | ||
300 | new_entry = NULL; | ||
301 | break; | ||
302 | } | ||
303 | |||
304 | if (start < parse->end) { | ||
305 | if (actual_type != parse->type && ret_type) { | ||
306 | actual_type = parse->type; | ||
307 | *ret_type = actual_type; | ||
308 | new_entry->type = actual_type; | ||
309 | } | ||
310 | |||
311 | if (actual_type != parse->type) { | ||
312 | printk( | ||
313 | KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | ||
314 | current->comm, current->pid, | ||
315 | start, end, | ||
316 | cattr_name(actual_type), | ||
317 | cattr_name(parse->type)); | ||
318 | err = -EBUSY; | ||
319 | break; | ||
320 | } | ||
321 | |||
322 | saved_ptr = parse; | ||
323 | /* | ||
324 | * Check to see whether the request overlaps more | ||
325 | * than one entry in the list | ||
326 | */ | ||
327 | list_for_each_entry_continue(parse, &memtype_list, nd) { | ||
328 | if (end <= parse->start) { | ||
329 | break; | ||
330 | } | ||
331 | |||
332 | if (actual_type != parse->type) { | ||
333 | printk( | ||
334 | KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n", | ||
335 | current->comm, current->pid, | ||
336 | start, end, | ||
337 | cattr_name(actual_type), | ||
338 | cattr_name(parse->type)); | ||
339 | err = -EBUSY; | ||
340 | break; | ||
341 | } | ||
342 | } | ||
343 | |||
344 | if (err) { | ||
345 | break; | ||
346 | } | ||
347 | |||
348 | /* No conflict. Go ahead and add this new entry */ | ||
349 | list_add(&new_entry->nd, &saved_ptr->nd); | ||
350 | new_entry = NULL; | ||
351 | break; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | if (err) { | ||
356 | kfree(new_entry); | ||
357 | spin_unlock(&memtype_lock); | ||
358 | return err; | ||
359 | } | ||
360 | |||
361 | if (new_entry) { | ||
362 | /* No conflict. Not yet added to the list. Add to the tail */ | ||
363 | list_add_tail(&new_entry->nd, &memtype_list); | ||
364 | } | ||
365 | |||
366 | spin_unlock(&memtype_lock); | ||
367 | return err; | ||
368 | } | ||
369 | |||
370 | int free_memtype(u64 start, u64 end) | ||
371 | { | ||
372 | struct memtype *ml; | ||
373 | int err = -EINVAL; | ||
374 | |||
375 | /* Only track when pat_wc_enabled */ | ||
376 | if (!pat_wc_enabled) { | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | /* Low ISA region is always mapped WB. No need to track */ | ||
381 | if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) { | ||
382 | return 0; | ||
383 | } | ||
384 | |||
385 | spin_lock(&memtype_lock); | ||
386 | list_for_each_entry(ml, &memtype_list, nd) { | ||
387 | if (ml->start == start && ml->end == end) { | ||
388 | list_del(&ml->nd); | ||
389 | kfree(ml); | ||
390 | err = 0; | ||
391 | break; | ||
392 | } | ||
393 | } | ||
394 | spin_unlock(&memtype_lock); | ||
395 | |||
396 | if (err) { | ||
397 | printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n", | ||
398 | current->comm, current->pid, start, end); | ||
399 | } | ||
400 | return err; | ||
401 | } | ||
402 | |||