aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorRussell King <rmk+kernel@arm.linux.org.uk>2009-09-19 08:47:57 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2009-09-19 08:47:57 -0400
commit40d743b8c16a8cf6e30c1d941aa6147f9550ea75 (patch)
tree9fcdf9a06b18a275253048d1ea7c9803cec38845 /arch/x86/mm
parent7da18afa423f167e7ef3c9728e584d8bf05bd55a (diff)
parent83e686ea0291ee93b87dcdc00b96443b80de56c9 (diff)
Merge branch 'for-rmk' of git://linux-arm.org/linux-2.6
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/iomap_32.c27
-rw-r--r--arch/x86/mm/ioremap.c18
-rw-r--r--arch/x86/mm/mmap.c17
-rw-r--r--arch/x86/mm/pageattr.c29
-rw-r--r--arch/x86/mm/pat.c353
5 files changed, 313 insertions, 131 deletions
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index fe6f84ca121e..84e236ce76ba 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -21,7 +21,7 @@
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/highmem.h> 22#include <linux/highmem.h>
23 23
24int is_io_mapping_possible(resource_size_t base, unsigned long size) 24static int is_io_mapping_possible(resource_size_t base, unsigned long size)
25{ 25{
26#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) 26#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
27 /* There is no way to map greater than 1 << 32 address without PAE */ 27 /* There is no way to map greater than 1 << 32 address without PAE */
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
30#endif 30#endif
31 return 1; 31 return 1;
32} 32}
33EXPORT_SYMBOL_GPL(is_io_mapping_possible); 33
34int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
35{
36 unsigned long flag = _PAGE_CACHE_WC;
37 int ret;
38
39 if (!is_io_mapping_possible(base, size))
40 return -EINVAL;
41
42 ret = io_reserve_memtype(base, base + size, &flag);
43 if (ret)
44 return ret;
45
46 *prot = __pgprot(__PAGE_KERNEL | flag);
47 return 0;
48}
49EXPORT_SYMBOL_GPL(iomap_create_wc);
50
51void
52iomap_free(resource_size_t base, unsigned long size)
53{
54 io_free_memtype(base, base + size);
55}
56EXPORT_SYMBOL_GPL(iomap_free);
34 57
35void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) 58void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
36{ 59{
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 04e1ad60c63a..334e63ca7b2b 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -158,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
158 retval = reserve_memtype(phys_addr, (u64)phys_addr + size, 158 retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
159 prot_val, &new_prot_val); 159 prot_val, &new_prot_val);
160 if (retval) { 160 if (retval) {
161 pr_debug("Warning: reserve_memtype returned %d\n", retval); 161 printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
162 return NULL; 162 return NULL;
163 } 163 }
164 164
165 if (prot_val != new_prot_val) { 165 if (prot_val != new_prot_val) {
166 /* 166 if (!is_new_memtype_allowed(phys_addr, size,
167 * Do not fallback to certain memory types with certain 167 prot_val, new_prot_val)) {
168 * requested type: 168 printk(KERN_ERR
169 * - request is uc-, return cannot be write-back
170 * - request is uc-, return cannot be write-combine
171 * - request is write-combine, return cannot be write-back
172 */
173 if ((prot_val == _PAGE_CACHE_UC_MINUS &&
174 (new_prot_val == _PAGE_CACHE_WB ||
175 new_prot_val == _PAGE_CACHE_WC)) ||
176 (prot_val == _PAGE_CACHE_WC &&
177 new_prot_val == _PAGE_CACHE_WB)) {
178 pr_debug(
179 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", 169 "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
180 (unsigned long long)phys_addr, 170 (unsigned long long)phys_addr,
181 (unsigned long long)(phys_addr + size), 171 (unsigned long long)(phys_addr + size),
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 165829600566..c8191defc38a 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -29,13 +29,26 @@
29#include <linux/random.h> 29#include <linux/random.h>
30#include <linux/limits.h> 30#include <linux/limits.h>
31#include <linux/sched.h> 31#include <linux/sched.h>
32#include <asm/elf.h>
33
34static unsigned int stack_maxrandom_size(void)
35{
36 unsigned int max = 0;
37 if ((current->flags & PF_RANDOMIZE) &&
38 !(current->personality & ADDR_NO_RANDOMIZE)) {
39 max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT;
40 }
41
42 return max;
43}
44
32 45
33/* 46/*
34 * Top of mmap area (just below the process stack). 47 * Top of mmap area (just below the process stack).
35 * 48 *
36 * Leave an at least ~128 MB hole. 49 * Leave an at least ~128 MB hole with possible stack randomization.
37 */ 50 */
38#define MIN_GAP (128*1024*1024) 51#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
39#define MAX_GAP (TASK_SIZE/6*5) 52#define MAX_GAP (TASK_SIZE/6*5)
40 53
41/* 54/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7e600c1962db..24952fdc7e40 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -12,6 +12,7 @@
12#include <linux/seq_file.h> 12#include <linux/seq_file.h>
13#include <linux/debugfs.h> 13#include <linux/debugfs.h>
14#include <linux/pfn.h> 14#include <linux/pfn.h>
15#include <linux/percpu.h>
15 16
16#include <asm/e820.h> 17#include <asm/e820.h>
17#include <asm/processor.h> 18#include <asm/processor.h>
@@ -686,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
686{ 687{
687 struct cpa_data alias_cpa; 688 struct cpa_data alias_cpa;
688 unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); 689 unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
689 unsigned long vaddr, remapped; 690 unsigned long vaddr;
690 int ret; 691 int ret;
691 692
692 if (cpa->pfn >= max_pfn_mapped) 693 if (cpa->pfn >= max_pfn_mapped)
@@ -744,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa)
744 } 745 }
745#endif 746#endif
746 747
747 /*
748 * If the PMD page was partially used for per-cpu remapping,
749 * the recycled area needs to be split and modified. Because
750 * the area is always proper subset of a PMD page
751 * cpa->numpages is guaranteed to be 1 for these areas, so
752 * there's no need to loop over and check for further remaps.
753 */
754 remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
755 if (remapped) {
756 WARN_ON(cpa->numpages > 1);
757 alias_cpa = *cpa;
758 alias_cpa.vaddr = &remapped;
759 alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
760 ret = __change_page_attr_set_clr(&alias_cpa, 0);
761 if (ret)
762 return ret;
763 }
764
765 return 0; 748 return 0;
766} 749}
767 750
@@ -822,6 +805,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
822{ 805{
823 struct cpa_data cpa; 806 struct cpa_data cpa;
824 int ret, cache, checkalias; 807 int ret, cache, checkalias;
808 unsigned long baddr = 0;
825 809
826 /* 810 /*
827 * Check, if we are requested to change a not supported 811 * Check, if we are requested to change a not supported
@@ -853,6 +837,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
853 */ 837 */
854 WARN_ON_ONCE(1); 838 WARN_ON_ONCE(1);
855 } 839 }
840 /*
841 * Save address for cache flush. *addr is modified in the call
842 * to __change_page_attr_set_clr() below.
843 */
844 baddr = *addr;
856 } 845 }
857 846
858 /* Must avoid aliasing mappings in the highmem code */ 847 /* Must avoid aliasing mappings in the highmem code */
@@ -900,7 +889,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
900 cpa_flush_array(addr, numpages, cache, 889 cpa_flush_array(addr, numpages, cache,
901 cpa.flags, pages); 890 cpa.flags, pages);
902 } else 891 } else
903 cpa_flush_range(*addr, numpages, cache); 892 cpa_flush_range(baddr, numpages, cache);
904 } else 893 } else
905 cpa_flush_all(cache); 894 cpa_flush_all(cache);
906 895
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index b2f7d3e59b86..7257cf3decf9 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -15,6 +15,7 @@
15#include <linux/gfp.h> 15#include <linux/gfp.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/rbtree.h>
18 19
19#include <asm/cacheflush.h> 20#include <asm/cacheflush.h>
20#include <asm/processor.h> 21#include <asm/processor.h>
@@ -148,11 +149,10 @@ static char *cattr_name(unsigned long flags)
148 * areas). All the aliases have the same cache attributes of course. 149 * areas). All the aliases have the same cache attributes of course.
149 * Zero attributes are represented as holes. 150 * Zero attributes are represented as holes.
150 * 151 *
151 * Currently the data structure is a list because the number of mappings 152 * The data structure is a list that is also organized as an rbtree
152 * are expected to be relatively small. If this should be a problem 153 * sorted on the start address of memtype range.
153 * it could be changed to a rbtree or similar.
154 * 154 *
155 * memtype_lock protects the whole list. 155 * memtype_lock protects both the linear list and rbtree.
156 */ 156 */
157 157
158struct memtype { 158struct memtype {
@@ -160,11 +160,53 @@ struct memtype {
160 u64 end; 160 u64 end;
161 unsigned long type; 161 unsigned long type;
162 struct list_head nd; 162 struct list_head nd;
163 struct rb_node rb;
163}; 164};
164 165
166static struct rb_root memtype_rbroot = RB_ROOT;
165static LIST_HEAD(memtype_list); 167static LIST_HEAD(memtype_list);
166static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ 168static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
167 169
170static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
171{
172 struct rb_node *node = root->rb_node;
173 struct memtype *last_lower = NULL;
174
175 while (node) {
176 struct memtype *data = container_of(node, struct memtype, rb);
177
178 if (data->start < start) {
179 last_lower = data;
180 node = node->rb_right;
181 } else if (data->start > start) {
182 node = node->rb_left;
183 } else
184 return data;
185 }
186
187 /* Will return NULL if there is no entry with its start <= start */
188 return last_lower;
189}
190
191static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
192{
193 struct rb_node **new = &(root->rb_node);
194 struct rb_node *parent = NULL;
195
196 while (*new) {
197 struct memtype *this = container_of(*new, struct memtype, rb);
198
199 parent = *new;
200 if (data->start <= this->start)
201 new = &((*new)->rb_left);
202 else if (data->start > this->start)
203 new = &((*new)->rb_right);
204 }
205
206 rb_link_node(&data->rb, parent, new);
207 rb_insert_color(&data->rb, root);
208}
209
168/* 210/*
169 * Does intersection of PAT memory type and MTRR memory type and returns 211 * Does intersection of PAT memory type and MTRR memory type and returns
170 * the resulting memory type as PAT understands it. 212 * the resulting memory type as PAT understands it.
@@ -218,9 +260,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
218 return -EBUSY; 260 return -EBUSY;
219} 261}
220 262
221static struct memtype *cached_entry;
222static u64 cached_start;
223
224static int pat_pagerange_is_ram(unsigned long start, unsigned long end) 263static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
225{ 264{
226 int ram_page = 0, not_rampage = 0; 265 int ram_page = 0, not_rampage = 0;
@@ -249,63 +288,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
249} 288}
250 289
251/* 290/*
252 * For RAM pages, mark the pages as non WB memory type using 291 * For RAM pages, we use page flags to mark the pages with appropriate type.
253 * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or 292 * Here we do two pass:
254 * set_memory_wc() on a RAM page at a time before marking it as WB again. 293 * - Find the memtype of all the pages in the range, look for any conflicts
255 * This is ok, because only one driver will be owning the page and 294 * - In case of no conflicts, set the new memtype for pages in the range
256 * doing set_memory_*() calls.
257 * 295 *
258 * For now, we use PageNonWB to track that the RAM page is being mapped 296 * Caller must hold memtype_lock for atomicity.
259 * as non WB. In future, we will have to use one more flag
260 * (or some other mechanism in page_struct) to distinguish between
261 * UC and WC mapping.
262 */ 297 */
263static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, 298static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
264 unsigned long *new_type) 299 unsigned long *new_type)
265{ 300{
266 struct page *page; 301 struct page *page;
267 u64 pfn, end_pfn; 302 u64 pfn;
303
304 if (req_type == _PAGE_CACHE_UC) {
305 /* We do not support strong UC */
306 WARN_ON_ONCE(1);
307 req_type = _PAGE_CACHE_UC_MINUS;
308 }
268 309
269 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { 310 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
270 page = pfn_to_page(pfn); 311 unsigned long type;
271 if (page_mapped(page) || PageNonWB(page))
272 goto out;
273 312
274 SetPageNonWB(page); 313 page = pfn_to_page(pfn);
314 type = get_page_memtype(page);
315 if (type != -1) {
316 printk(KERN_INFO "reserve_ram_pages_type failed "
317 "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
318 start, end, type, req_type);
319 if (new_type)
320 *new_type = type;
321
322 return -EBUSY;
323 }
275 } 324 }
276 return 0;
277 325
278out: 326 if (new_type)
279 end_pfn = pfn; 327 *new_type = req_type;
280 for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { 328
329 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
281 page = pfn_to_page(pfn); 330 page = pfn_to_page(pfn);
282 ClearPageNonWB(page); 331 set_page_memtype(page, req_type);
283 } 332 }
284 333 return 0;
285 return -EINVAL;
286} 334}
287 335
288static int free_ram_pages_type(u64 start, u64 end) 336static int free_ram_pages_type(u64 start, u64 end)
289{ 337{
290 struct page *page; 338 struct page *page;
291 u64 pfn, end_pfn; 339 u64 pfn;
292 340
293 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { 341 for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
294 page = pfn_to_page(pfn); 342 page = pfn_to_page(pfn);
295 if (page_mapped(page) || !PageNonWB(page)) 343 set_page_memtype(page, -1);
296 goto out;
297
298 ClearPageNonWB(page);
299 } 344 }
300 return 0; 345 return 0;
301
302out:
303 end_pfn = pfn;
304 for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
305 page = pfn_to_page(pfn);
306 SetPageNonWB(page);
307 }
308 return -EINVAL;
309} 346}
310 347
311/* 348/*
@@ -339,6 +376,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
339 if (new_type) { 376 if (new_type) {
340 if (req_type == -1) 377 if (req_type == -1)
341 *new_type = _PAGE_CACHE_WB; 378 *new_type = _PAGE_CACHE_WB;
379 else if (req_type == _PAGE_CACHE_WC)
380 *new_type = _PAGE_CACHE_UC_MINUS;
342 else 381 else
343 *new_type = req_type & _PAGE_CACHE_MASK; 382 *new_type = req_type & _PAGE_CACHE_MASK;
344 } 383 }
@@ -364,11 +403,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
364 *new_type = actual_type; 403 *new_type = actual_type;
365 404
366 is_range_ram = pat_pagerange_is_ram(start, end); 405 is_range_ram = pat_pagerange_is_ram(start, end);
367 if (is_range_ram == 1) 406 if (is_range_ram == 1) {
368 return reserve_ram_pages_type(start, end, req_type, 407
369 new_type); 408 spin_lock(&memtype_lock);
370 else if (is_range_ram < 0) 409 err = reserve_ram_pages_type(start, end, req_type, new_type);
410 spin_unlock(&memtype_lock);
411
412 return err;
413 } else if (is_range_ram < 0) {
371 return -EINVAL; 414 return -EINVAL;
415 }
372 416
373 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 417 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
374 if (!new) 418 if (!new)
@@ -380,17 +424,11 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
380 424
381 spin_lock(&memtype_lock); 425 spin_lock(&memtype_lock);
382 426
383 if (cached_entry && start >= cached_start)
384 entry = cached_entry;
385 else
386 entry = list_entry(&memtype_list, struct memtype, nd);
387
388 /* Search for existing mapping that overlaps the current range */ 427 /* Search for existing mapping that overlaps the current range */
389 where = NULL; 428 where = NULL;
390 list_for_each_entry_continue(entry, &memtype_list, nd) { 429 list_for_each_entry(entry, &memtype_list, nd) {
391 if (end <= entry->start) { 430 if (end <= entry->start) {
392 where = entry->nd.prev; 431 where = entry->nd.prev;
393 cached_entry = list_entry(where, struct memtype, nd);
394 break; 432 break;
395 } else if (start <= entry->start) { /* end > entry->start */ 433 } else if (start <= entry->start) { /* end > entry->start */
396 err = chk_conflict(new, entry, new_type); 434 err = chk_conflict(new, entry, new_type);
@@ -398,8 +436,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
398 dprintk("Overlap at 0x%Lx-0x%Lx\n", 436 dprintk("Overlap at 0x%Lx-0x%Lx\n",
399 entry->start, entry->end); 437 entry->start, entry->end);
400 where = entry->nd.prev; 438 where = entry->nd.prev;
401 cached_entry = list_entry(where,
402 struct memtype, nd);
403 } 439 }
404 break; 440 break;
405 } else if (start < entry->end) { /* start > entry->start */ 441 } else if (start < entry->end) { /* start > entry->start */
@@ -407,8 +443,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
407 if (!err) { 443 if (!err) {
408 dprintk("Overlap at 0x%Lx-0x%Lx\n", 444 dprintk("Overlap at 0x%Lx-0x%Lx\n",
409 entry->start, entry->end); 445 entry->start, entry->end);
410 cached_entry = list_entry(entry->nd.prev,
411 struct memtype, nd);
412 446
413 /* 447 /*
414 * Move to right position in the linked 448 * Move to right position in the linked
@@ -436,13 +470,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
436 return err; 470 return err;
437 } 471 }
438 472
439 cached_start = start;
440
441 if (where) 473 if (where)
442 list_add(&new->nd, where); 474 list_add(&new->nd, where);
443 else 475 else
444 list_add_tail(&new->nd, &memtype_list); 476 list_add_tail(&new->nd, &memtype_list);
445 477
478 memtype_rb_insert(&memtype_rbroot, new);
479
446 spin_unlock(&memtype_lock); 480 spin_unlock(&memtype_lock);
447 481
448 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", 482 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
@@ -454,7 +488,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
454 488
455int free_memtype(u64 start, u64 end) 489int free_memtype(u64 start, u64 end)
456{ 490{
457 struct memtype *entry; 491 struct memtype *entry, *saved_entry;
458 int err = -EINVAL; 492 int err = -EINVAL;
459 int is_range_ram; 493 int is_range_ram;
460 494
@@ -466,23 +500,58 @@ int free_memtype(u64 start, u64 end)
466 return 0; 500 return 0;
467 501
468 is_range_ram = pat_pagerange_is_ram(start, end); 502 is_range_ram = pat_pagerange_is_ram(start, end);
469 if (is_range_ram == 1) 503 if (is_range_ram == 1) {
470 return free_ram_pages_type(start, end); 504
471 else if (is_range_ram < 0) 505 spin_lock(&memtype_lock);
506 err = free_ram_pages_type(start, end);
507 spin_unlock(&memtype_lock);
508
509 return err;
510 } else if (is_range_ram < 0) {
472 return -EINVAL; 511 return -EINVAL;
512 }
473 513
474 spin_lock(&memtype_lock); 514 spin_lock(&memtype_lock);
475 list_for_each_entry(entry, &memtype_list, nd) { 515
516 entry = memtype_rb_search(&memtype_rbroot, start);
517 if (unlikely(entry == NULL))
518 goto unlock_ret;
519
520 /*
521 * Saved entry points to an entry with start same or less than what
522 * we searched for. Now go through the list in both directions to look
523 * for the entry that matches with both start and end, with list stored
524 * in sorted start address
525 */
526 saved_entry = entry;
527 list_for_each_entry_from(entry, &memtype_list, nd) {
476 if (entry->start == start && entry->end == end) { 528 if (entry->start == start && entry->end == end) {
477 if (cached_entry == entry || cached_start == start) 529 rb_erase(&entry->rb, &memtype_rbroot);
478 cached_entry = NULL; 530 list_del(&entry->nd);
531 kfree(entry);
532 err = 0;
533 break;
534 } else if (entry->start > start) {
535 break;
536 }
537 }
538
539 if (!err)
540 goto unlock_ret;
479 541
542 entry = saved_entry;
543 list_for_each_entry_reverse(entry, &memtype_list, nd) {
544 if (entry->start == start && entry->end == end) {
545 rb_erase(&entry->rb, &memtype_rbroot);
480 list_del(&entry->nd); 546 list_del(&entry->nd);
481 kfree(entry); 547 kfree(entry);
482 err = 0; 548 err = 0;
483 break; 549 break;
550 } else if (entry->start < start) {
551 break;
484 } 552 }
485 } 553 }
554unlock_ret:
486 spin_unlock(&memtype_lock); 555 spin_unlock(&memtype_lock);
487 556
488 if (err) { 557 if (err) {
@@ -496,6 +565,101 @@ int free_memtype(u64 start, u64 end)
496} 565}
497 566
498 567
568/**
569 * lookup_memtype - Looksup the memory type for a physical address
570 * @paddr: physical address of which memory type needs to be looked up
571 *
572 * Only to be called when PAT is enabled
573 *
574 * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or
575 * _PAGE_CACHE_UC
576 */
577static unsigned long lookup_memtype(u64 paddr)
578{
579 int rettype = _PAGE_CACHE_WB;
580 struct memtype *entry;
581
582 if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
583 return rettype;
584
585 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
586 struct page *page;
587 spin_lock(&memtype_lock);
588 page = pfn_to_page(paddr >> PAGE_SHIFT);
589 rettype = get_page_memtype(page);
590 spin_unlock(&memtype_lock);
591 /*
592 * -1 from get_page_memtype() implies RAM page is in its
593 * default state and not reserved, and hence of type WB
594 */
595 if (rettype == -1)
596 rettype = _PAGE_CACHE_WB;
597
598 return rettype;
599 }
600
601 spin_lock(&memtype_lock);
602
603 entry = memtype_rb_search(&memtype_rbroot, paddr);
604 if (entry != NULL)
605 rettype = entry->type;
606 else
607 rettype = _PAGE_CACHE_UC_MINUS;
608
609 spin_unlock(&memtype_lock);
610 return rettype;
611}
612
613/**
614 * io_reserve_memtype - Request a memory type mapping for a region of memory
615 * @start: start (physical address) of the region
616 * @end: end (physical address) of the region
617 * @type: A pointer to memtype, with requested type. On success, requested
618 * or any other compatible type that was available for the region is returned
619 *
620 * On success, returns 0
621 * On failure, returns non-zero
622 */
623int io_reserve_memtype(resource_size_t start, resource_size_t end,
624 unsigned long *type)
625{
626 resource_size_t size = end - start;
627 unsigned long req_type = *type;
628 unsigned long new_type;
629 int ret;
630
631 WARN_ON_ONCE(iomem_map_sanity_check(start, size));
632
633 ret = reserve_memtype(start, end, req_type, &new_type);
634 if (ret)
635 goto out_err;
636
637 if (!is_new_memtype_allowed(start, size, req_type, new_type))
638 goto out_free;
639
640 if (kernel_map_sync_memtype(start, size, new_type) < 0)
641 goto out_free;
642
643 *type = new_type;
644 return 0;
645
646out_free:
647 free_memtype(start, end);
648 ret = -EBUSY;
649out_err:
650 return ret;
651}
652
653/**
654 * io_free_memtype - Release a memory type mapping for a region of memory
655 * @start: start (physical address) of the region
656 * @end: end (physical address) of the region
657 */
658void io_free_memtype(resource_size_t start, resource_size_t end)
659{
660 free_memtype(start, end);
661}
662
499pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, 663pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
500 unsigned long size, pgprot_t vma_prot) 664 unsigned long size, pgprot_t vma_prot)
501{ 665{
@@ -577,7 +741,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
577{ 741{
578 unsigned long id_sz; 742 unsigned long id_sz;
579 743
580 if (!pat_enabled || base >= __pa(high_memory)) 744 if (base >= __pa(high_memory))
581 return 0; 745 return 0;
582 746
583 id_sz = (__pa(high_memory) < base + size) ? 747 id_sz = (__pa(high_memory) < base + size) ?
@@ -612,11 +776,29 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
612 is_ram = pat_pagerange_is_ram(paddr, paddr + size); 776 is_ram = pat_pagerange_is_ram(paddr, paddr + size);
613 777
614 /* 778 /*
615 * reserve_pfn_range() doesn't support RAM pages. Maintain the current 779 * reserve_pfn_range() for RAM pages. We do not refcount to keep
616 * behavior with RAM pages by returning success. 780 * track of number of mappings of RAM pages. We can assert that
781 * the type requested matches the type of first page in the range.
617 */ 782 */
618 if (is_ram != 0) 783 if (is_ram) {
784 if (!pat_enabled)
785 return 0;
786
787 flags = lookup_memtype(paddr);
788 if (want_flags != flags) {
789 printk(KERN_WARNING
790 "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
791 current->comm, current->pid,
792 cattr_name(want_flags),
793 (unsigned long long)paddr,
794 (unsigned long long)(paddr + size),
795 cattr_name(flags));
796 *vma_prot = __pgprot((pgprot_val(*vma_prot) &
797 (~_PAGE_CACHE_MASK)) |
798 flags);
799 }
619 return 0; 800 return 0;
801 }
620 802
621 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); 803 ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
622 if (ret) 804 if (ret)
@@ -678,14 +860,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
678 unsigned long vma_size = vma->vm_end - vma->vm_start; 860 unsigned long vma_size = vma->vm_end - vma->vm_start;
679 pgprot_t pgprot; 861 pgprot_t pgprot;
680 862
681 if (!pat_enabled)
682 return 0;
683
684 /*
685 * For now, only handle remap_pfn_range() vmas where
686 * is_linear_pfn_mapping() == TRUE. Handling of
687 * vm_insert_pfn() is TBD.
688 */
689 if (is_linear_pfn_mapping(vma)) { 863 if (is_linear_pfn_mapping(vma)) {
690 /* 864 /*
691 * reserve the whole chunk covered by vma. We need the 865 * reserve the whole chunk covered by vma. We need the
@@ -713,23 +887,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
713int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, 887int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
714 unsigned long pfn, unsigned long size) 888 unsigned long pfn, unsigned long size)
715{ 889{
890 unsigned long flags;
716 resource_size_t paddr; 891 resource_size_t paddr;
717 unsigned long vma_size = vma->vm_end - vma->vm_start; 892 unsigned long vma_size = vma->vm_end - vma->vm_start;
718 893
719 if (!pat_enabled)
720 return 0;
721
722 /*
723 * For now, only handle remap_pfn_range() vmas where
724 * is_linear_pfn_mapping() == TRUE. Handling of
725 * vm_insert_pfn() is TBD.
726 */
727 if (is_linear_pfn_mapping(vma)) { 894 if (is_linear_pfn_mapping(vma)) {
728 /* reserve the whole chunk starting from vm_pgoff */ 895 /* reserve the whole chunk starting from vm_pgoff */
729 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 896 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
730 return reserve_pfn_range(paddr, vma_size, prot, 0); 897 return reserve_pfn_range(paddr, vma_size, prot, 0);
731 } 898 }
732 899
900 if (!pat_enabled)
901 return 0;
902
903 /* for vm_insert_pfn and friends, we set prot based on lookup */
904 flags = lookup_memtype(pfn << PAGE_SHIFT);
905 *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
906 flags);
907
733 return 0; 908 return 0;
734} 909}
735 910
@@ -744,14 +919,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
744 resource_size_t paddr; 919 resource_size_t paddr;
745 unsigned long vma_size = vma->vm_end - vma->vm_start; 920 unsigned long vma_size = vma->vm_end - vma->vm_start;
746 921
747 if (!pat_enabled)
748 return;
749
750 /*
751 * For now, only handle remap_pfn_range() vmas where
752 * is_linear_pfn_mapping() == TRUE. Handling of
753 * vm_insert_pfn() is TBD.
754 */
755 if (is_linear_pfn_mapping(vma)) { 922 if (is_linear_pfn_mapping(vma)) {
756 /* free the whole chunk starting from vm_pgoff */ 923 /* free the whole chunk starting from vm_pgoff */
757 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 924 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;