diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-15 12:19:38 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-15 12:19:38 -0400 |
commit | 227423904c709a8e60245c97081bbeb4fb500655 (patch) | |
tree | 97db1b8df1e4518334aea2fdf60363e0a691eb1e /arch/x86/mm | |
parent | 1aaf2e59135fd67321f47c11c64a54aac27014e9 (diff) | |
parent | fa526d0d641b5365676a1fb821ce359e217c9b85 (diff) |
Merge branch 'x86-pat-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-pat-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86, pat: Fix cacheflush address in change_page_attr_set_clr()
mm: remove !NUMA condition from PAGEFLAGS_EXTENDED condition set
x86: Fix earlyprintk=dbgp for machines without NX
x86, pat: Sanity check remap_pfn_range for RAM region
x86, pat: Lookup the protection from memtype list on vm_insert_pfn()
x86, pat: Add lookup_memtype to get the current memtype of a paddr
x86, pat: Use page flags to track memtypes of RAM pages
x86, pat: Generalize the use of page flag PG_uncached
x86, pat: Add rbtree to do quick lookup in memtype tracking
x86, pat: Add PAT reserve free to io_mapping* APIs
x86, pat: New i/f for driver to request memtype for IO regions
x86, pat: ioremap to follow same PAT restrictions as other PAT users
x86, pat: Keep identity maps consistent with mmaps even when pat_disabled
x86, mtrr: make mtrr_aps_delayed_init static bool
x86, pat/mtrr: Rendezvous all the cpus for MTRR/PAT init
generic-ipi: Allow cpus not yet online to call smp_call_function with irqs disabled
x86: Fix an incorrect argument of reserve_bootmem()
x86: Fix system crash when loading with "reservetop" parameter
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/iomap_32.c | 27 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 18 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 353 |
4 files changed, 300 insertions, 106 deletions
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84ca121e..84e236ce76ba 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | 23 | ||
24 | int is_io_mapping_possible(resource_size_t base, unsigned long size) | 24 | static int is_io_mapping_possible(resource_size_t base, unsigned long size) |
25 | { | 25 | { |
26 | #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) | 26 | #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) |
27 | /* There is no way to map greater than 1 << 32 address without PAE */ | 27 | /* There is no way to map greater than 1 << 32 address without PAE */ |
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) | |||
30 | #endif | 30 | #endif |
31 | return 1; | 31 | return 1; |
32 | } | 32 | } |
33 | EXPORT_SYMBOL_GPL(is_io_mapping_possible); | 33 | |
34 | int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) | ||
35 | { | ||
36 | unsigned long flag = _PAGE_CACHE_WC; | ||
37 | int ret; | ||
38 | |||
39 | if (!is_io_mapping_possible(base, size)) | ||
40 | return -EINVAL; | ||
41 | |||
42 | ret = io_reserve_memtype(base, base + size, &flag); | ||
43 | if (ret) | ||
44 | return ret; | ||
45 | |||
46 | *prot = __pgprot(__PAGE_KERNEL | flag); | ||
47 | return 0; | ||
48 | } | ||
49 | EXPORT_SYMBOL_GPL(iomap_create_wc); | ||
50 | |||
51 | void | ||
52 | iomap_free(resource_size_t base, unsigned long size) | ||
53 | { | ||
54 | io_free_memtype(base, base + size); | ||
55 | } | ||
56 | EXPORT_SYMBOL_GPL(iomap_free); | ||
34 | 57 | ||
35 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) | 58 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) |
36 | { | 59 | { |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 04e1ad60c63a..334e63ca7b2b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -158,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
158 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, | 158 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, |
159 | prot_val, &new_prot_val); | 159 | prot_val, &new_prot_val); |
160 | if (retval) { | 160 | if (retval) { |
161 | pr_debug("Warning: reserve_memtype returned %d\n", retval); | 161 | printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); |
162 | return NULL; | 162 | return NULL; |
163 | } | 163 | } |
164 | 164 | ||
165 | if (prot_val != new_prot_val) { | 165 | if (prot_val != new_prot_val) { |
166 | /* | 166 | if (!is_new_memtype_allowed(phys_addr, size, |
167 | * Do not fallback to certain memory types with certain | 167 | prot_val, new_prot_val)) { |
168 | * requested type: | 168 | printk(KERN_ERR |
169 | * - request is uc-, return cannot be write-back | ||
170 | * - request is uc-, return cannot be write-combine | ||
171 | * - request is write-combine, return cannot be write-back | ||
172 | */ | ||
173 | if ((prot_val == _PAGE_CACHE_UC_MINUS && | ||
174 | (new_prot_val == _PAGE_CACHE_WB || | ||
175 | new_prot_val == _PAGE_CACHE_WC)) || | ||
176 | (prot_val == _PAGE_CACHE_WC && | ||
177 | new_prot_val == _PAGE_CACHE_WB)) { | ||
178 | pr_debug( | ||
179 | "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", | 169 | "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", |
180 | (unsigned long long)phys_addr, | 170 | (unsigned long long)phys_addr, |
181 | (unsigned long long)(phys_addr + size), | 171 | (unsigned long long)(phys_addr + size), |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7e600c1962db..e245775ec856 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -822,6 +822,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
822 | { | 822 | { |
823 | struct cpa_data cpa; | 823 | struct cpa_data cpa; |
824 | int ret, cache, checkalias; | 824 | int ret, cache, checkalias; |
825 | unsigned long baddr = 0; | ||
825 | 826 | ||
826 | /* | 827 | /* |
827 | * Check, if we are requested to change a not supported | 828 | * Check, if we are requested to change a not supported |
@@ -853,6 +854,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
853 | */ | 854 | */ |
854 | WARN_ON_ONCE(1); | 855 | WARN_ON_ONCE(1); |
855 | } | 856 | } |
857 | /* | ||
858 | * Save address for cache flush. *addr is modified in the call | ||
859 | * to __change_page_attr_set_clr() below. | ||
860 | */ | ||
861 | baddr = *addr; | ||
856 | } | 862 | } |
857 | 863 | ||
858 | /* Must avoid aliasing mappings in the highmem code */ | 864 | /* Must avoid aliasing mappings in the highmem code */ |
@@ -900,7 +906,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
900 | cpa_flush_array(addr, numpages, cache, | 906 | cpa_flush_array(addr, numpages, cache, |
901 | cpa.flags, pages); | 907 | cpa.flags, pages); |
902 | } else | 908 | } else |
903 | cpa_flush_range(*addr, numpages, cache); | 909 | cpa_flush_range(baddr, numpages, cache); |
904 | } else | 910 | } else |
905 | cpa_flush_all(cache); | 911 | cpa_flush_all(cache); |
906 | 912 | ||
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index b2f7d3e59b86..d7ebc3a10f2f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/rbtree.h> | ||
18 | 19 | ||
19 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
20 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
@@ -148,11 +149,10 @@ static char *cattr_name(unsigned long flags) | |||
148 | * areas). All the aliases have the same cache attributes of course. | 149 | * areas). All the aliases have the same cache attributes of course. |
149 | * Zero attributes are represented as holes. | 150 | * Zero attributes are represented as holes. |
150 | * | 151 | * |
151 | * Currently the data structure is a list because the number of mappings | 152 | * The data structure is a list that is also organized as an rbtree |
152 | * are expected to be relatively small. If this should be a problem | 153 | * sorted on the start address of memtype range. |
153 | * it could be changed to a rbtree or similar. | ||
154 | * | 154 | * |
155 | * memtype_lock protects the whole list. | 155 | * memtype_lock protects both the linear list and rbtree. |
156 | */ | 156 | */ |
157 | 157 | ||
158 | struct memtype { | 158 | struct memtype { |
@@ -160,11 +160,53 @@ struct memtype { | |||
160 | u64 end; | 160 | u64 end; |
161 | unsigned long type; | 161 | unsigned long type; |
162 | struct list_head nd; | 162 | struct list_head nd; |
163 | struct rb_node rb; | ||
163 | }; | 164 | }; |
164 | 165 | ||
166 | static struct rb_root memtype_rbroot = RB_ROOT; | ||
165 | static LIST_HEAD(memtype_list); | 167 | static LIST_HEAD(memtype_list); |
166 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | 168 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ |
167 | 169 | ||
170 | static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) | ||
171 | { | ||
172 | struct rb_node *node = root->rb_node; | ||
173 | struct memtype *last_lower = NULL; | ||
174 | |||
175 | while (node) { | ||
176 | struct memtype *data = container_of(node, struct memtype, rb); | ||
177 | |||
178 | if (data->start < start) { | ||
179 | last_lower = data; | ||
180 | node = node->rb_right; | ||
181 | } else if (data->start > start) { | ||
182 | node = node->rb_left; | ||
183 | } else | ||
184 | return data; | ||
185 | } | ||
186 | |||
187 | /* Will return NULL if there is no entry with its start <= start */ | ||
188 | return last_lower; | ||
189 | } | ||
190 | |||
191 | static void memtype_rb_insert(struct rb_root *root, struct memtype *data) | ||
192 | { | ||
193 | struct rb_node **new = &(root->rb_node); | ||
194 | struct rb_node *parent = NULL; | ||
195 | |||
196 | while (*new) { | ||
197 | struct memtype *this = container_of(*new, struct memtype, rb); | ||
198 | |||
199 | parent = *new; | ||
200 | if (data->start <= this->start) | ||
201 | new = &((*new)->rb_left); | ||
202 | else if (data->start > this->start) | ||
203 | new = &((*new)->rb_right); | ||
204 | } | ||
205 | |||
206 | rb_link_node(&data->rb, parent, new); | ||
207 | rb_insert_color(&data->rb, root); | ||
208 | } | ||
209 | |||
168 | /* | 210 | /* |
169 | * Does intersection of PAT memory type and MTRR memory type and returns | 211 | * Does intersection of PAT memory type and MTRR memory type and returns |
170 | * the resulting memory type as PAT understands it. | 212 | * the resulting memory type as PAT understands it. |
@@ -218,9 +260,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) | |||
218 | return -EBUSY; | 260 | return -EBUSY; |
219 | } | 261 | } |
220 | 262 | ||
221 | static struct memtype *cached_entry; | ||
222 | static u64 cached_start; | ||
223 | |||
224 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | 263 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) |
225 | { | 264 | { |
226 | int ram_page = 0, not_rampage = 0; | 265 | int ram_page = 0, not_rampage = 0; |
@@ -249,63 +288,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | |||
249 | } | 288 | } |
250 | 289 | ||
251 | /* | 290 | /* |
252 | * For RAM pages, mark the pages as non WB memory type using | 291 | * For RAM pages, we use page flags to mark the pages with appropriate type. |
253 | * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or | 292 | * Here we do two pass: |
254 | * set_memory_wc() on a RAM page at a time before marking it as WB again. | 293 | * - Find the memtype of all the pages in the range, look for any conflicts |
255 | * This is ok, because only one driver will be owning the page and | 294 | * - In case of no conflicts, set the new memtype for pages in the range |
256 | * doing set_memory_*() calls. | ||
257 | * | 295 | * |
258 | * For now, we use PageNonWB to track that the RAM page is being mapped | 296 | * Caller must hold memtype_lock for atomicity. |
259 | * as non WB. In future, we will have to use one more flag | ||
260 | * (or some other mechanism in page_struct) to distinguish between | ||
261 | * UC and WC mapping. | ||
262 | */ | 297 | */ |
263 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | 298 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, |
264 | unsigned long *new_type) | 299 | unsigned long *new_type) |
265 | { | 300 | { |
266 | struct page *page; | 301 | struct page *page; |
267 | u64 pfn, end_pfn; | 302 | u64 pfn; |
303 | |||
304 | if (req_type == _PAGE_CACHE_UC) { | ||
305 | /* We do not support strong UC */ | ||
306 | WARN_ON_ONCE(1); | ||
307 | req_type = _PAGE_CACHE_UC_MINUS; | ||
308 | } | ||
268 | 309 | ||
269 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 310 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { |
270 | page = pfn_to_page(pfn); | 311 | unsigned long type; |
271 | if (page_mapped(page) || PageNonWB(page)) | ||
272 | goto out; | ||
273 | 312 | ||
274 | SetPageNonWB(page); | 313 | page = pfn_to_page(pfn); |
314 | type = get_page_memtype(page); | ||
315 | if (type != -1) { | ||
316 | printk(KERN_INFO "reserve_ram_pages_type failed " | ||
317 | "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", | ||
318 | start, end, type, req_type); | ||
319 | if (new_type) | ||
320 | *new_type = type; | ||
321 | |||
322 | return -EBUSY; | ||
323 | } | ||
275 | } | 324 | } |
276 | return 0; | ||
277 | 325 | ||
278 | out: | 326 | if (new_type) |
279 | end_pfn = pfn; | 327 | *new_type = req_type; |
280 | for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { | 328 | |
329 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | ||
281 | page = pfn_to_page(pfn); | 330 | page = pfn_to_page(pfn); |
282 | ClearPageNonWB(page); | 331 | set_page_memtype(page, req_type); |
283 | } | 332 | } |
284 | 333 | return 0; | |
285 | return -EINVAL; | ||
286 | } | 334 | } |
287 | 335 | ||
288 | static int free_ram_pages_type(u64 start, u64 end) | 336 | static int free_ram_pages_type(u64 start, u64 end) |
289 | { | 337 | { |
290 | struct page *page; | 338 | struct page *page; |
291 | u64 pfn, end_pfn; | 339 | u64 pfn; |
292 | 340 | ||
293 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 341 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { |
294 | page = pfn_to_page(pfn); | 342 | page = pfn_to_page(pfn); |
295 | if (page_mapped(page) || !PageNonWB(page)) | 343 | set_page_memtype(page, -1); |
296 | goto out; | ||
297 | |||
298 | ClearPageNonWB(page); | ||
299 | } | 344 | } |
300 | return 0; | 345 | return 0; |
301 | |||
302 | out: | ||
303 | end_pfn = pfn; | ||
304 | for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { | ||
305 | page = pfn_to_page(pfn); | ||
306 | SetPageNonWB(page); | ||
307 | } | ||
308 | return -EINVAL; | ||
309 | } | 346 | } |
310 | 347 | ||
311 | /* | 348 | /* |
@@ -339,6 +376,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
339 | if (new_type) { | 376 | if (new_type) { |
340 | if (req_type == -1) | 377 | if (req_type == -1) |
341 | *new_type = _PAGE_CACHE_WB; | 378 | *new_type = _PAGE_CACHE_WB; |
379 | else if (req_type == _PAGE_CACHE_WC) | ||
380 | *new_type = _PAGE_CACHE_UC_MINUS; | ||
342 | else | 381 | else |
343 | *new_type = req_type & _PAGE_CACHE_MASK; | 382 | *new_type = req_type & _PAGE_CACHE_MASK; |
344 | } | 383 | } |
@@ -364,11 +403,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
364 | *new_type = actual_type; | 403 | *new_type = actual_type; |
365 | 404 | ||
366 | is_range_ram = pat_pagerange_is_ram(start, end); | 405 | is_range_ram = pat_pagerange_is_ram(start, end); |
367 | if (is_range_ram == 1) | 406 | if (is_range_ram == 1) { |
368 | return reserve_ram_pages_type(start, end, req_type, | 407 | |
369 | new_type); | 408 | spin_lock(&memtype_lock); |
370 | else if (is_range_ram < 0) | 409 | err = reserve_ram_pages_type(start, end, req_type, new_type); |
410 | spin_unlock(&memtype_lock); | ||
411 | |||
412 | return err; | ||
413 | } else if (is_range_ram < 0) { | ||
371 | return -EINVAL; | 414 | return -EINVAL; |
415 | } | ||
372 | 416 | ||
373 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); | 417 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); |
374 | if (!new) | 418 | if (!new) |
@@ -380,17 +424,19 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
380 | 424 | ||
381 | spin_lock(&memtype_lock); | 425 | spin_lock(&memtype_lock); |
382 | 426 | ||
383 | if (cached_entry && start >= cached_start) | 427 | entry = memtype_rb_search(&memtype_rbroot, new->start); |
384 | entry = cached_entry; | 428 | if (likely(entry != NULL)) { |
385 | else | 429 | /* To work correctly with list_for_each_entry_continue */ |
430 | entry = list_entry(entry->nd.prev, struct memtype, nd); | ||
431 | } else { | ||
386 | entry = list_entry(&memtype_list, struct memtype, nd); | 432 | entry = list_entry(&memtype_list, struct memtype, nd); |
433 | } | ||
387 | 434 | ||
388 | /* Search for existing mapping that overlaps the current range */ | 435 | /* Search for existing mapping that overlaps the current range */ |
389 | where = NULL; | 436 | where = NULL; |
390 | list_for_each_entry_continue(entry, &memtype_list, nd) { | 437 | list_for_each_entry_continue(entry, &memtype_list, nd) { |
391 | if (end <= entry->start) { | 438 | if (end <= entry->start) { |
392 | where = entry->nd.prev; | 439 | where = entry->nd.prev; |
393 | cached_entry = list_entry(where, struct memtype, nd); | ||
394 | break; | 440 | break; |
395 | } else if (start <= entry->start) { /* end > entry->start */ | 441 | } else if (start <= entry->start) { /* end > entry->start */ |
396 | err = chk_conflict(new, entry, new_type); | 442 | err = chk_conflict(new, entry, new_type); |
@@ -398,8 +444,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
398 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 444 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
399 | entry->start, entry->end); | 445 | entry->start, entry->end); |
400 | where = entry->nd.prev; | 446 | where = entry->nd.prev; |
401 | cached_entry = list_entry(where, | ||
402 | struct memtype, nd); | ||
403 | } | 447 | } |
404 | break; | 448 | break; |
405 | } else if (start < entry->end) { /* start > entry->start */ | 449 | } else if (start < entry->end) { /* start > entry->start */ |
@@ -407,8 +451,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
407 | if (!err) { | 451 | if (!err) { |
408 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 452 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
409 | entry->start, entry->end); | 453 | entry->start, entry->end); |
410 | cached_entry = list_entry(entry->nd.prev, | ||
411 | struct memtype, nd); | ||
412 | 454 | ||
413 | /* | 455 | /* |
414 | * Move to right position in the linked | 456 | * Move to right position in the linked |
@@ -436,13 +478,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
436 | return err; | 478 | return err; |
437 | } | 479 | } |
438 | 480 | ||
439 | cached_start = start; | ||
440 | |||
441 | if (where) | 481 | if (where) |
442 | list_add(&new->nd, where); | 482 | list_add(&new->nd, where); |
443 | else | 483 | else |
444 | list_add_tail(&new->nd, &memtype_list); | 484 | list_add_tail(&new->nd, &memtype_list); |
445 | 485 | ||
486 | memtype_rb_insert(&memtype_rbroot, new); | ||
487 | |||
446 | spin_unlock(&memtype_lock); | 488 | spin_unlock(&memtype_lock); |
447 | 489 | ||
448 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | 490 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", |
@@ -454,7 +496,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
454 | 496 | ||
455 | int free_memtype(u64 start, u64 end) | 497 | int free_memtype(u64 start, u64 end) |
456 | { | 498 | { |
457 | struct memtype *entry; | 499 | struct memtype *entry, *saved_entry; |
458 | int err = -EINVAL; | 500 | int err = -EINVAL; |
459 | int is_range_ram; | 501 | int is_range_ram; |
460 | 502 | ||
@@ -466,23 +508,58 @@ int free_memtype(u64 start, u64 end) | |||
466 | return 0; | 508 | return 0; |
467 | 509 | ||
468 | is_range_ram = pat_pagerange_is_ram(start, end); | 510 | is_range_ram = pat_pagerange_is_ram(start, end); |
469 | if (is_range_ram == 1) | 511 | if (is_range_ram == 1) { |
470 | return free_ram_pages_type(start, end); | 512 | |
471 | else if (is_range_ram < 0) | 513 | spin_lock(&memtype_lock); |
514 | err = free_ram_pages_type(start, end); | ||
515 | spin_unlock(&memtype_lock); | ||
516 | |||
517 | return err; | ||
518 | } else if (is_range_ram < 0) { | ||
472 | return -EINVAL; | 519 | return -EINVAL; |
520 | } | ||
473 | 521 | ||
474 | spin_lock(&memtype_lock); | 522 | spin_lock(&memtype_lock); |
523 | |||
524 | entry = memtype_rb_search(&memtype_rbroot, start); | ||
525 | if (unlikely(entry == NULL)) | ||
526 | goto unlock_ret; | ||
527 | |||
528 | /* | ||
529 | * Saved entry points to an entry with start same or less than what | ||
530 | * we searched for. Now go through the list in both directions to look | ||
531 | * for the entry that matches with both start and end, with list stored | ||
532 | * in sorted start address | ||
533 | */ | ||
534 | saved_entry = entry; | ||
475 | list_for_each_entry(entry, &memtype_list, nd) { | 535 | list_for_each_entry(entry, &memtype_list, nd) { |
476 | if (entry->start == start && entry->end == end) { | 536 | if (entry->start == start && entry->end == end) { |
477 | if (cached_entry == entry || cached_start == start) | 537 | rb_erase(&entry->rb, &memtype_rbroot); |
478 | cached_entry = NULL; | 538 | list_del(&entry->nd); |
539 | kfree(entry); | ||
540 | err = 0; | ||
541 | break; | ||
542 | } else if (entry->start > start) { | ||
543 | break; | ||
544 | } | ||
545 | } | ||
546 | |||
547 | if (!err) | ||
548 | goto unlock_ret; | ||
479 | 549 | ||
550 | entry = saved_entry; | ||
551 | list_for_each_entry_reverse(entry, &memtype_list, nd) { | ||
552 | if (entry->start == start && entry->end == end) { | ||
553 | rb_erase(&entry->rb, &memtype_rbroot); | ||
480 | list_del(&entry->nd); | 554 | list_del(&entry->nd); |
481 | kfree(entry); | 555 | kfree(entry); |
482 | err = 0; | 556 | err = 0; |
483 | break; | 557 | break; |
558 | } else if (entry->start < start) { | ||
559 | break; | ||
484 | } | 560 | } |
485 | } | 561 | } |
562 | unlock_ret: | ||
486 | spin_unlock(&memtype_lock); | 563 | spin_unlock(&memtype_lock); |
487 | 564 | ||
488 | if (err) { | 565 | if (err) { |
@@ -496,6 +573,101 @@ int free_memtype(u64 start, u64 end) | |||
496 | } | 573 | } |
497 | 574 | ||
498 | 575 | ||
576 | /** | ||
577 | * lookup_memtype - Looksup the memory type for a physical address | ||
578 | * @paddr: physical address of which memory type needs to be looked up | ||
579 | * | ||
580 | * Only to be called when PAT is enabled | ||
581 | * | ||
582 | * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or | ||
583 | * _PAGE_CACHE_UC | ||
584 | */ | ||
585 | static unsigned long lookup_memtype(u64 paddr) | ||
586 | { | ||
587 | int rettype = _PAGE_CACHE_WB; | ||
588 | struct memtype *entry; | ||
589 | |||
590 | if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) | ||
591 | return rettype; | ||
592 | |||
593 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | ||
594 | struct page *page; | ||
595 | spin_lock(&memtype_lock); | ||
596 | page = pfn_to_page(paddr >> PAGE_SHIFT); | ||
597 | rettype = get_page_memtype(page); | ||
598 | spin_unlock(&memtype_lock); | ||
599 | /* | ||
600 | * -1 from get_page_memtype() implies RAM page is in its | ||
601 | * default state and not reserved, and hence of type WB | ||
602 | */ | ||
603 | if (rettype == -1) | ||
604 | rettype = _PAGE_CACHE_WB; | ||
605 | |||
606 | return rettype; | ||
607 | } | ||
608 | |||
609 | spin_lock(&memtype_lock); | ||
610 | |||
611 | entry = memtype_rb_search(&memtype_rbroot, paddr); | ||
612 | if (entry != NULL) | ||
613 | rettype = entry->type; | ||
614 | else | ||
615 | rettype = _PAGE_CACHE_UC_MINUS; | ||
616 | |||
617 | spin_unlock(&memtype_lock); | ||
618 | return rettype; | ||
619 | } | ||
620 | |||
621 | /** | ||
622 | * io_reserve_memtype - Request a memory type mapping for a region of memory | ||
623 | * @start: start (physical address) of the region | ||
624 | * @end: end (physical address) of the region | ||
625 | * @type: A pointer to memtype, with requested type. On success, requested | ||
626 | * or any other compatible type that was available for the region is returned | ||
627 | * | ||
628 | * On success, returns 0 | ||
629 | * On failure, returns non-zero | ||
630 | */ | ||
631 | int io_reserve_memtype(resource_size_t start, resource_size_t end, | ||
632 | unsigned long *type) | ||
633 | { | ||
634 | resource_size_t size = end - start; | ||
635 | unsigned long req_type = *type; | ||
636 | unsigned long new_type; | ||
637 | int ret; | ||
638 | |||
639 | WARN_ON_ONCE(iomem_map_sanity_check(start, size)); | ||
640 | |||
641 | ret = reserve_memtype(start, end, req_type, &new_type); | ||
642 | if (ret) | ||
643 | goto out_err; | ||
644 | |||
645 | if (!is_new_memtype_allowed(start, size, req_type, new_type)) | ||
646 | goto out_free; | ||
647 | |||
648 | if (kernel_map_sync_memtype(start, size, new_type) < 0) | ||
649 | goto out_free; | ||
650 | |||
651 | *type = new_type; | ||
652 | return 0; | ||
653 | |||
654 | out_free: | ||
655 | free_memtype(start, end); | ||
656 | ret = -EBUSY; | ||
657 | out_err: | ||
658 | return ret; | ||
659 | } | ||
660 | |||
661 | /** | ||
662 | * io_free_memtype - Release a memory type mapping for a region of memory | ||
663 | * @start: start (physical address) of the region | ||
664 | * @end: end (physical address) of the region | ||
665 | */ | ||
666 | void io_free_memtype(resource_size_t start, resource_size_t end) | ||
667 | { | ||
668 | free_memtype(start, end); | ||
669 | } | ||
670 | |||
499 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | 671 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
500 | unsigned long size, pgprot_t vma_prot) | 672 | unsigned long size, pgprot_t vma_prot) |
501 | { | 673 | { |
@@ -577,7 +749,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) | |||
577 | { | 749 | { |
578 | unsigned long id_sz; | 750 | unsigned long id_sz; |
579 | 751 | ||
580 | if (!pat_enabled || base >= __pa(high_memory)) | 752 | if (base >= __pa(high_memory)) |
581 | return 0; | 753 | return 0; |
582 | 754 | ||
583 | id_sz = (__pa(high_memory) < base + size) ? | 755 | id_sz = (__pa(high_memory) < base + size) ? |
@@ -612,11 +784,29 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
612 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); | 784 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); |
613 | 785 | ||
614 | /* | 786 | /* |
615 | * reserve_pfn_range() doesn't support RAM pages. Maintain the current | 787 | * reserve_pfn_range() for RAM pages. We do not refcount to keep |
616 | * behavior with RAM pages by returning success. | 788 | * track of number of mappings of RAM pages. We can assert that |
789 | * the type requested matches the type of first page in the range. | ||
617 | */ | 790 | */ |
618 | if (is_ram != 0) | 791 | if (is_ram) { |
792 | if (!pat_enabled) | ||
793 | return 0; | ||
794 | |||
795 | flags = lookup_memtype(paddr); | ||
796 | if (want_flags != flags) { | ||
797 | printk(KERN_WARNING | ||
798 | "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", | ||
799 | current->comm, current->pid, | ||
800 | cattr_name(want_flags), | ||
801 | (unsigned long long)paddr, | ||
802 | (unsigned long long)(paddr + size), | ||
803 | cattr_name(flags)); | ||
804 | *vma_prot = __pgprot((pgprot_val(*vma_prot) & | ||
805 | (~_PAGE_CACHE_MASK)) | | ||
806 | flags); | ||
807 | } | ||
619 | return 0; | 808 | return 0; |
809 | } | ||
620 | 810 | ||
621 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); | 811 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); |
622 | if (ret) | 812 | if (ret) |
@@ -678,14 +868,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) | |||
678 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 868 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
679 | pgprot_t pgprot; | 869 | pgprot_t pgprot; |
680 | 870 | ||
681 | if (!pat_enabled) | ||
682 | return 0; | ||
683 | |||
684 | /* | ||
685 | * For now, only handle remap_pfn_range() vmas where | ||
686 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
687 | * vm_insert_pfn() is TBD. | ||
688 | */ | ||
689 | if (is_linear_pfn_mapping(vma)) { | 871 | if (is_linear_pfn_mapping(vma)) { |
690 | /* | 872 | /* |
691 | * reserve the whole chunk covered by vma. We need the | 873 | * reserve the whole chunk covered by vma. We need the |
@@ -713,23 +895,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) | |||
713 | int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, | 895 | int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, |
714 | unsigned long pfn, unsigned long size) | 896 | unsigned long pfn, unsigned long size) |
715 | { | 897 | { |
898 | unsigned long flags; | ||
716 | resource_size_t paddr; | 899 | resource_size_t paddr; |
717 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 900 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
718 | 901 | ||
719 | if (!pat_enabled) | ||
720 | return 0; | ||
721 | |||
722 | /* | ||
723 | * For now, only handle remap_pfn_range() vmas where | ||
724 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
725 | * vm_insert_pfn() is TBD. | ||
726 | */ | ||
727 | if (is_linear_pfn_mapping(vma)) { | 902 | if (is_linear_pfn_mapping(vma)) { |
728 | /* reserve the whole chunk starting from vm_pgoff */ | 903 | /* reserve the whole chunk starting from vm_pgoff */ |
729 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | 904 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; |
730 | return reserve_pfn_range(paddr, vma_size, prot, 0); | 905 | return reserve_pfn_range(paddr, vma_size, prot, 0); |
731 | } | 906 | } |
732 | 907 | ||
908 | if (!pat_enabled) | ||
909 | return 0; | ||
910 | |||
911 | /* for vm_insert_pfn and friends, we set prot based on lookup */ | ||
912 | flags = lookup_memtype(pfn << PAGE_SHIFT); | ||
913 | *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | | ||
914 | flags); | ||
915 | |||
733 | return 0; | 916 | return 0; |
734 | } | 917 | } |
735 | 918 | ||
@@ -744,14 +927,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, | |||
744 | resource_size_t paddr; | 927 | resource_size_t paddr; |
745 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 928 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
746 | 929 | ||
747 | if (!pat_enabled) | ||
748 | return; | ||
749 | |||
750 | /* | ||
751 | * For now, only handle remap_pfn_range() vmas where | ||
752 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
753 | * vm_insert_pfn() is TBD. | ||
754 | */ | ||
755 | if (is_linear_pfn_mapping(vma)) { | 930 | if (is_linear_pfn_mapping(vma)) { |
756 | /* free the whole chunk starting from vm_pgoff */ | 931 | /* free the whole chunk starting from vm_pgoff */ |
757 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | 932 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; |