diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2009-09-19 08:47:57 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2009-09-19 08:47:57 -0400 |
commit | 40d743b8c16a8cf6e30c1d941aa6147f9550ea75 (patch) | |
tree | 9fcdf9a06b18a275253048d1ea7c9803cec38845 /arch/x86/mm | |
parent | 7da18afa423f167e7ef3c9728e584d8bf05bd55a (diff) | |
parent | 83e686ea0291ee93b87dcdc00b96443b80de56c9 (diff) |
Merge branch 'for-rmk' of git://linux-arm.org/linux-2.6
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/iomap_32.c | 27 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 18 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 17 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 29 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 353 |
5 files changed, 313 insertions, 131 deletions
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index fe6f84ca121e..84e236ce76ba 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -21,7 +21,7 @@ | |||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | 23 | ||
24 | int is_io_mapping_possible(resource_size_t base, unsigned long size) | 24 | static int is_io_mapping_possible(resource_size_t base, unsigned long size) |
25 | { | 25 | { |
26 | #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) | 26 | #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT) |
27 | /* There is no way to map greater than 1 << 32 address without PAE */ | 27 | /* There is no way to map greater than 1 << 32 address without PAE */ |
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size) | |||
30 | #endif | 30 | #endif |
31 | return 1; | 31 | return 1; |
32 | } | 32 | } |
33 | EXPORT_SYMBOL_GPL(is_io_mapping_possible); | 33 | |
34 | int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot) | ||
35 | { | ||
36 | unsigned long flag = _PAGE_CACHE_WC; | ||
37 | int ret; | ||
38 | |||
39 | if (!is_io_mapping_possible(base, size)) | ||
40 | return -EINVAL; | ||
41 | |||
42 | ret = io_reserve_memtype(base, base + size, &flag); | ||
43 | if (ret) | ||
44 | return ret; | ||
45 | |||
46 | *prot = __pgprot(__PAGE_KERNEL | flag); | ||
47 | return 0; | ||
48 | } | ||
49 | EXPORT_SYMBOL_GPL(iomap_create_wc); | ||
50 | |||
51 | void | ||
52 | iomap_free(resource_size_t base, unsigned long size) | ||
53 | { | ||
54 | io_free_memtype(base, base + size); | ||
55 | } | ||
56 | EXPORT_SYMBOL_GPL(iomap_free); | ||
34 | 57 | ||
35 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) | 58 | void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) |
36 | { | 59 | { |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 04e1ad60c63a..334e63ca7b2b 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -158,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
158 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, | 158 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, |
159 | prot_val, &new_prot_val); | 159 | prot_val, &new_prot_val); |
160 | if (retval) { | 160 | if (retval) { |
161 | pr_debug("Warning: reserve_memtype returned %d\n", retval); | 161 | printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval); |
162 | return NULL; | 162 | return NULL; |
163 | } | 163 | } |
164 | 164 | ||
165 | if (prot_val != new_prot_val) { | 165 | if (prot_val != new_prot_val) { |
166 | /* | 166 | if (!is_new_memtype_allowed(phys_addr, size, |
167 | * Do not fallback to certain memory types with certain | 167 | prot_val, new_prot_val)) { |
168 | * requested type: | 168 | printk(KERN_ERR |
169 | * - request is uc-, return cannot be write-back | ||
170 | * - request is uc-, return cannot be write-combine | ||
171 | * - request is write-combine, return cannot be write-back | ||
172 | */ | ||
173 | if ((prot_val == _PAGE_CACHE_UC_MINUS && | ||
174 | (new_prot_val == _PAGE_CACHE_WB || | ||
175 | new_prot_val == _PAGE_CACHE_WC)) || | ||
176 | (prot_val == _PAGE_CACHE_WC && | ||
177 | new_prot_val == _PAGE_CACHE_WB)) { | ||
178 | pr_debug( | ||
179 | "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", | 169 | "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", |
180 | (unsigned long long)phys_addr, | 170 | (unsigned long long)phys_addr, |
181 | (unsigned long long)(phys_addr + size), | 171 | (unsigned long long)(phys_addr + size), |
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 165829600566..c8191defc38a 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c | |||
@@ -29,13 +29,26 @@ | |||
29 | #include <linux/random.h> | 29 | #include <linux/random.h> |
30 | #include <linux/limits.h> | 30 | #include <linux/limits.h> |
31 | #include <linux/sched.h> | 31 | #include <linux/sched.h> |
32 | #include <asm/elf.h> | ||
33 | |||
34 | static unsigned int stack_maxrandom_size(void) | ||
35 | { | ||
36 | unsigned int max = 0; | ||
37 | if ((current->flags & PF_RANDOMIZE) && | ||
38 | !(current->personality & ADDR_NO_RANDOMIZE)) { | ||
39 | max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT; | ||
40 | } | ||
41 | |||
42 | return max; | ||
43 | } | ||
44 | |||
32 | 45 | ||
33 | /* | 46 | /* |
34 | * Top of mmap area (just below the process stack). | 47 | * Top of mmap area (just below the process stack). |
35 | * | 48 | * |
36 | * Leave an at least ~128 MB hole. | 49 | * Leave an at least ~128 MB hole with possible stack randomization. |
37 | */ | 50 | */ |
38 | #define MIN_GAP (128*1024*1024) | 51 | #define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) |
39 | #define MAX_GAP (TASK_SIZE/6*5) | 52 | #define MAX_GAP (TASK_SIZE/6*5) |
40 | 53 | ||
41 | /* | 54 | /* |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7e600c1962db..24952fdc7e40 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
13 | #include <linux/debugfs.h> | 13 | #include <linux/debugfs.h> |
14 | #include <linux/pfn.h> | 14 | #include <linux/pfn.h> |
15 | #include <linux/percpu.h> | ||
15 | 16 | ||
16 | #include <asm/e820.h> | 17 | #include <asm/e820.h> |
17 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
@@ -686,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
686 | { | 687 | { |
687 | struct cpa_data alias_cpa; | 688 | struct cpa_data alias_cpa; |
688 | unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); | 689 | unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT); |
689 | unsigned long vaddr, remapped; | 690 | unsigned long vaddr; |
690 | int ret; | 691 | int ret; |
691 | 692 | ||
692 | if (cpa->pfn >= max_pfn_mapped) | 693 | if (cpa->pfn >= max_pfn_mapped) |
@@ -744,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
744 | } | 745 | } |
745 | #endif | 746 | #endif |
746 | 747 | ||
747 | /* | ||
748 | * If the PMD page was partially used for per-cpu remapping, | ||
749 | * the recycled area needs to be split and modified. Because | ||
750 | * the area is always proper subset of a PMD page | ||
751 | * cpa->numpages is guaranteed to be 1 for these areas, so | ||
752 | * there's no need to loop over and check for further remaps. | ||
753 | */ | ||
754 | remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr); | ||
755 | if (remapped) { | ||
756 | WARN_ON(cpa->numpages > 1); | ||
757 | alias_cpa = *cpa; | ||
758 | alias_cpa.vaddr = &remapped; | ||
759 | alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); | ||
760 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | ||
761 | if (ret) | ||
762 | return ret; | ||
763 | } | ||
764 | |||
765 | return 0; | 748 | return 0; |
766 | } | 749 | } |
767 | 750 | ||
@@ -822,6 +805,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
822 | { | 805 | { |
823 | struct cpa_data cpa; | 806 | struct cpa_data cpa; |
824 | int ret, cache, checkalias; | 807 | int ret, cache, checkalias; |
808 | unsigned long baddr = 0; | ||
825 | 809 | ||
826 | /* | 810 | /* |
827 | * Check, if we are requested to change a not supported | 811 | * Check, if we are requested to change a not supported |
@@ -853,6 +837,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
853 | */ | 837 | */ |
854 | WARN_ON_ONCE(1); | 838 | WARN_ON_ONCE(1); |
855 | } | 839 | } |
840 | /* | ||
841 | * Save address for cache flush. *addr is modified in the call | ||
842 | * to __change_page_attr_set_clr() below. | ||
843 | */ | ||
844 | baddr = *addr; | ||
856 | } | 845 | } |
857 | 846 | ||
858 | /* Must avoid aliasing mappings in the highmem code */ | 847 | /* Must avoid aliasing mappings in the highmem code */ |
@@ -900,7 +889,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
900 | cpa_flush_array(addr, numpages, cache, | 889 | cpa_flush_array(addr, numpages, cache, |
901 | cpa.flags, pages); | 890 | cpa.flags, pages); |
902 | } else | 891 | } else |
903 | cpa_flush_range(*addr, numpages, cache); | 892 | cpa_flush_range(baddr, numpages, cache); |
904 | } else | 893 | } else |
905 | cpa_flush_all(cache); | 894 | cpa_flush_all(cache); |
906 | 895 | ||
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index b2f7d3e59b86..7257cf3decf9 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/gfp.h> | 15 | #include <linux/gfp.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/rbtree.h> | ||
18 | 19 | ||
19 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
20 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
@@ -148,11 +149,10 @@ static char *cattr_name(unsigned long flags) | |||
148 | * areas). All the aliases have the same cache attributes of course. | 149 | * areas). All the aliases have the same cache attributes of course. |
149 | * Zero attributes are represented as holes. | 150 | * Zero attributes are represented as holes. |
150 | * | 151 | * |
151 | * Currently the data structure is a list because the number of mappings | 152 | * The data structure is a list that is also organized as an rbtree |
152 | * are expected to be relatively small. If this should be a problem | 153 | * sorted on the start address of memtype range. |
153 | * it could be changed to a rbtree or similar. | ||
154 | * | 154 | * |
155 | * memtype_lock protects the whole list. | 155 | * memtype_lock protects both the linear list and rbtree. |
156 | */ | 156 | */ |
157 | 157 | ||
158 | struct memtype { | 158 | struct memtype { |
@@ -160,11 +160,53 @@ struct memtype { | |||
160 | u64 end; | 160 | u64 end; |
161 | unsigned long type; | 161 | unsigned long type; |
162 | struct list_head nd; | 162 | struct list_head nd; |
163 | struct rb_node rb; | ||
163 | }; | 164 | }; |
164 | 165 | ||
166 | static struct rb_root memtype_rbroot = RB_ROOT; | ||
165 | static LIST_HEAD(memtype_list); | 167 | static LIST_HEAD(memtype_list); |
166 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | 168 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ |
167 | 169 | ||
170 | static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) | ||
171 | { | ||
172 | struct rb_node *node = root->rb_node; | ||
173 | struct memtype *last_lower = NULL; | ||
174 | |||
175 | while (node) { | ||
176 | struct memtype *data = container_of(node, struct memtype, rb); | ||
177 | |||
178 | if (data->start < start) { | ||
179 | last_lower = data; | ||
180 | node = node->rb_right; | ||
181 | } else if (data->start > start) { | ||
182 | node = node->rb_left; | ||
183 | } else | ||
184 | return data; | ||
185 | } | ||
186 | |||
187 | /* Will return NULL if there is no entry with its start <= start */ | ||
188 | return last_lower; | ||
189 | } | ||
190 | |||
191 | static void memtype_rb_insert(struct rb_root *root, struct memtype *data) | ||
192 | { | ||
193 | struct rb_node **new = &(root->rb_node); | ||
194 | struct rb_node *parent = NULL; | ||
195 | |||
196 | while (*new) { | ||
197 | struct memtype *this = container_of(*new, struct memtype, rb); | ||
198 | |||
199 | parent = *new; | ||
200 | if (data->start <= this->start) | ||
201 | new = &((*new)->rb_left); | ||
202 | else if (data->start > this->start) | ||
203 | new = &((*new)->rb_right); | ||
204 | } | ||
205 | |||
206 | rb_link_node(&data->rb, parent, new); | ||
207 | rb_insert_color(&data->rb, root); | ||
208 | } | ||
209 | |||
168 | /* | 210 | /* |
169 | * Does intersection of PAT memory type and MTRR memory type and returns | 211 | * Does intersection of PAT memory type and MTRR memory type and returns |
170 | * the resulting memory type as PAT understands it. | 212 | * the resulting memory type as PAT understands it. |
@@ -218,9 +260,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) | |||
218 | return -EBUSY; | 260 | return -EBUSY; |
219 | } | 261 | } |
220 | 262 | ||
221 | static struct memtype *cached_entry; | ||
222 | static u64 cached_start; | ||
223 | |||
224 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | 263 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) |
225 | { | 264 | { |
226 | int ram_page = 0, not_rampage = 0; | 265 | int ram_page = 0, not_rampage = 0; |
@@ -249,63 +288,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | |||
249 | } | 288 | } |
250 | 289 | ||
251 | /* | 290 | /* |
252 | * For RAM pages, mark the pages as non WB memory type using | 291 | * For RAM pages, we use page flags to mark the pages with appropriate type. |
253 | * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or | 292 | * Here we do two pass: |
254 | * set_memory_wc() on a RAM page at a time before marking it as WB again. | 293 | * - Find the memtype of all the pages in the range, look for any conflicts |
255 | * This is ok, because only one driver will be owning the page and | 294 | * - In case of no conflicts, set the new memtype for pages in the range |
256 | * doing set_memory_*() calls. | ||
257 | * | 295 | * |
258 | * For now, we use PageNonWB to track that the RAM page is being mapped | 296 | * Caller must hold memtype_lock for atomicity. |
259 | * as non WB. In future, we will have to use one more flag | ||
260 | * (or some other mechanism in page_struct) to distinguish between | ||
261 | * UC and WC mapping. | ||
262 | */ | 297 | */ |
263 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | 298 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, |
264 | unsigned long *new_type) | 299 | unsigned long *new_type) |
265 | { | 300 | { |
266 | struct page *page; | 301 | struct page *page; |
267 | u64 pfn, end_pfn; | 302 | u64 pfn; |
303 | |||
304 | if (req_type == _PAGE_CACHE_UC) { | ||
305 | /* We do not support strong UC */ | ||
306 | WARN_ON_ONCE(1); | ||
307 | req_type = _PAGE_CACHE_UC_MINUS; | ||
308 | } | ||
268 | 309 | ||
269 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 310 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { |
270 | page = pfn_to_page(pfn); | 311 | unsigned long type; |
271 | if (page_mapped(page) || PageNonWB(page)) | ||
272 | goto out; | ||
273 | 312 | ||
274 | SetPageNonWB(page); | 313 | page = pfn_to_page(pfn); |
314 | type = get_page_memtype(page); | ||
315 | if (type != -1) { | ||
316 | printk(KERN_INFO "reserve_ram_pages_type failed " | ||
317 | "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n", | ||
318 | start, end, type, req_type); | ||
319 | if (new_type) | ||
320 | *new_type = type; | ||
321 | |||
322 | return -EBUSY; | ||
323 | } | ||
275 | } | 324 | } |
276 | return 0; | ||
277 | 325 | ||
278 | out: | 326 | if (new_type) |
279 | end_pfn = pfn; | 327 | *new_type = req_type; |
280 | for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { | 328 | |
329 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | ||
281 | page = pfn_to_page(pfn); | 330 | page = pfn_to_page(pfn); |
282 | ClearPageNonWB(page); | 331 | set_page_memtype(page, req_type); |
283 | } | 332 | } |
284 | 333 | return 0; | |
285 | return -EINVAL; | ||
286 | } | 334 | } |
287 | 335 | ||
288 | static int free_ram_pages_type(u64 start, u64 end) | 336 | static int free_ram_pages_type(u64 start, u64 end) |
289 | { | 337 | { |
290 | struct page *page; | 338 | struct page *page; |
291 | u64 pfn, end_pfn; | 339 | u64 pfn; |
292 | 340 | ||
293 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { | 341 | for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) { |
294 | page = pfn_to_page(pfn); | 342 | page = pfn_to_page(pfn); |
295 | if (page_mapped(page) || !PageNonWB(page)) | 343 | set_page_memtype(page, -1); |
296 | goto out; | ||
297 | |||
298 | ClearPageNonWB(page); | ||
299 | } | 344 | } |
300 | return 0; | 345 | return 0; |
301 | |||
302 | out: | ||
303 | end_pfn = pfn; | ||
304 | for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) { | ||
305 | page = pfn_to_page(pfn); | ||
306 | SetPageNonWB(page); | ||
307 | } | ||
308 | return -EINVAL; | ||
309 | } | 346 | } |
310 | 347 | ||
311 | /* | 348 | /* |
@@ -339,6 +376,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
339 | if (new_type) { | 376 | if (new_type) { |
340 | if (req_type == -1) | 377 | if (req_type == -1) |
341 | *new_type = _PAGE_CACHE_WB; | 378 | *new_type = _PAGE_CACHE_WB; |
379 | else if (req_type == _PAGE_CACHE_WC) | ||
380 | *new_type = _PAGE_CACHE_UC_MINUS; | ||
342 | else | 381 | else |
343 | *new_type = req_type & _PAGE_CACHE_MASK; | 382 | *new_type = req_type & _PAGE_CACHE_MASK; |
344 | } | 383 | } |
@@ -364,11 +403,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
364 | *new_type = actual_type; | 403 | *new_type = actual_type; |
365 | 404 | ||
366 | is_range_ram = pat_pagerange_is_ram(start, end); | 405 | is_range_ram = pat_pagerange_is_ram(start, end); |
367 | if (is_range_ram == 1) | 406 | if (is_range_ram == 1) { |
368 | return reserve_ram_pages_type(start, end, req_type, | 407 | |
369 | new_type); | 408 | spin_lock(&memtype_lock); |
370 | else if (is_range_ram < 0) | 409 | err = reserve_ram_pages_type(start, end, req_type, new_type); |
410 | spin_unlock(&memtype_lock); | ||
411 | |||
412 | return err; | ||
413 | } else if (is_range_ram < 0) { | ||
371 | return -EINVAL; | 414 | return -EINVAL; |
415 | } | ||
372 | 416 | ||
373 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); | 417 | new = kmalloc(sizeof(struct memtype), GFP_KERNEL); |
374 | if (!new) | 418 | if (!new) |
@@ -380,17 +424,11 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
380 | 424 | ||
381 | spin_lock(&memtype_lock); | 425 | spin_lock(&memtype_lock); |
382 | 426 | ||
383 | if (cached_entry && start >= cached_start) | ||
384 | entry = cached_entry; | ||
385 | else | ||
386 | entry = list_entry(&memtype_list, struct memtype, nd); | ||
387 | |||
388 | /* Search for existing mapping that overlaps the current range */ | 427 | /* Search for existing mapping that overlaps the current range */ |
389 | where = NULL; | 428 | where = NULL; |
390 | list_for_each_entry_continue(entry, &memtype_list, nd) { | 429 | list_for_each_entry(entry, &memtype_list, nd) { |
391 | if (end <= entry->start) { | 430 | if (end <= entry->start) { |
392 | where = entry->nd.prev; | 431 | where = entry->nd.prev; |
393 | cached_entry = list_entry(where, struct memtype, nd); | ||
394 | break; | 432 | break; |
395 | } else if (start <= entry->start) { /* end > entry->start */ | 433 | } else if (start <= entry->start) { /* end > entry->start */ |
396 | err = chk_conflict(new, entry, new_type); | 434 | err = chk_conflict(new, entry, new_type); |
@@ -398,8 +436,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
398 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 436 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
399 | entry->start, entry->end); | 437 | entry->start, entry->end); |
400 | where = entry->nd.prev; | 438 | where = entry->nd.prev; |
401 | cached_entry = list_entry(where, | ||
402 | struct memtype, nd); | ||
403 | } | 439 | } |
404 | break; | 440 | break; |
405 | } else if (start < entry->end) { /* start > entry->start */ | 441 | } else if (start < entry->end) { /* start > entry->start */ |
@@ -407,8 +443,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
407 | if (!err) { | 443 | if (!err) { |
408 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 444 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
409 | entry->start, entry->end); | 445 | entry->start, entry->end); |
410 | cached_entry = list_entry(entry->nd.prev, | ||
411 | struct memtype, nd); | ||
412 | 446 | ||
413 | /* | 447 | /* |
414 | * Move to right position in the linked | 448 | * Move to right position in the linked |
@@ -436,13 +470,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
436 | return err; | 470 | return err; |
437 | } | 471 | } |
438 | 472 | ||
439 | cached_start = start; | ||
440 | |||
441 | if (where) | 473 | if (where) |
442 | list_add(&new->nd, where); | 474 | list_add(&new->nd, where); |
443 | else | 475 | else |
444 | list_add_tail(&new->nd, &memtype_list); | 476 | list_add_tail(&new->nd, &memtype_list); |
445 | 477 | ||
478 | memtype_rb_insert(&memtype_rbroot, new); | ||
479 | |||
446 | spin_unlock(&memtype_lock); | 480 | spin_unlock(&memtype_lock); |
447 | 481 | ||
448 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | 482 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", |
@@ -454,7 +488,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
454 | 488 | ||
455 | int free_memtype(u64 start, u64 end) | 489 | int free_memtype(u64 start, u64 end) |
456 | { | 490 | { |
457 | struct memtype *entry; | 491 | struct memtype *entry, *saved_entry; |
458 | int err = -EINVAL; | 492 | int err = -EINVAL; |
459 | int is_range_ram; | 493 | int is_range_ram; |
460 | 494 | ||
@@ -466,23 +500,58 @@ int free_memtype(u64 start, u64 end) | |||
466 | return 0; | 500 | return 0; |
467 | 501 | ||
468 | is_range_ram = pat_pagerange_is_ram(start, end); | 502 | is_range_ram = pat_pagerange_is_ram(start, end); |
469 | if (is_range_ram == 1) | 503 | if (is_range_ram == 1) { |
470 | return free_ram_pages_type(start, end); | 504 | |
471 | else if (is_range_ram < 0) | 505 | spin_lock(&memtype_lock); |
506 | err = free_ram_pages_type(start, end); | ||
507 | spin_unlock(&memtype_lock); | ||
508 | |||
509 | return err; | ||
510 | } else if (is_range_ram < 0) { | ||
472 | return -EINVAL; | 511 | return -EINVAL; |
512 | } | ||
473 | 513 | ||
474 | spin_lock(&memtype_lock); | 514 | spin_lock(&memtype_lock); |
475 | list_for_each_entry(entry, &memtype_list, nd) { | 515 | |
516 | entry = memtype_rb_search(&memtype_rbroot, start); | ||
517 | if (unlikely(entry == NULL)) | ||
518 | goto unlock_ret; | ||
519 | |||
520 | /* | ||
521 | * Saved entry points to an entry with start same or less than what | ||
522 | * we searched for. Now go through the list in both directions to look | ||
523 | * for the entry that matches with both start and end, with list stored | ||
524 | * in sorted start address | ||
525 | */ | ||
526 | saved_entry = entry; | ||
527 | list_for_each_entry_from(entry, &memtype_list, nd) { | ||
476 | if (entry->start == start && entry->end == end) { | 528 | if (entry->start == start && entry->end == end) { |
477 | if (cached_entry == entry || cached_start == start) | 529 | rb_erase(&entry->rb, &memtype_rbroot); |
478 | cached_entry = NULL; | 530 | list_del(&entry->nd); |
531 | kfree(entry); | ||
532 | err = 0; | ||
533 | break; | ||
534 | } else if (entry->start > start) { | ||
535 | break; | ||
536 | } | ||
537 | } | ||
538 | |||
539 | if (!err) | ||
540 | goto unlock_ret; | ||
479 | 541 | ||
542 | entry = saved_entry; | ||
543 | list_for_each_entry_reverse(entry, &memtype_list, nd) { | ||
544 | if (entry->start == start && entry->end == end) { | ||
545 | rb_erase(&entry->rb, &memtype_rbroot); | ||
480 | list_del(&entry->nd); | 546 | list_del(&entry->nd); |
481 | kfree(entry); | 547 | kfree(entry); |
482 | err = 0; | 548 | err = 0; |
483 | break; | 549 | break; |
550 | } else if (entry->start < start) { | ||
551 | break; | ||
484 | } | 552 | } |
485 | } | 553 | } |
554 | unlock_ret: | ||
486 | spin_unlock(&memtype_lock); | 555 | spin_unlock(&memtype_lock); |
487 | 556 | ||
488 | if (err) { | 557 | if (err) { |
@@ -496,6 +565,101 @@ int free_memtype(u64 start, u64 end) | |||
496 | } | 565 | } |
497 | 566 | ||
498 | 567 | ||
568 | /** | ||
569 | * lookup_memtype - Looksup the memory type for a physical address | ||
570 | * @paddr: physical address of which memory type needs to be looked up | ||
571 | * | ||
572 | * Only to be called when PAT is enabled | ||
573 | * | ||
574 | * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or | ||
575 | * _PAGE_CACHE_UC | ||
576 | */ | ||
577 | static unsigned long lookup_memtype(u64 paddr) | ||
578 | { | ||
579 | int rettype = _PAGE_CACHE_WB; | ||
580 | struct memtype *entry; | ||
581 | |||
582 | if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1)) | ||
583 | return rettype; | ||
584 | |||
585 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | ||
586 | struct page *page; | ||
587 | spin_lock(&memtype_lock); | ||
588 | page = pfn_to_page(paddr >> PAGE_SHIFT); | ||
589 | rettype = get_page_memtype(page); | ||
590 | spin_unlock(&memtype_lock); | ||
591 | /* | ||
592 | * -1 from get_page_memtype() implies RAM page is in its | ||
593 | * default state and not reserved, and hence of type WB | ||
594 | */ | ||
595 | if (rettype == -1) | ||
596 | rettype = _PAGE_CACHE_WB; | ||
597 | |||
598 | return rettype; | ||
599 | } | ||
600 | |||
601 | spin_lock(&memtype_lock); | ||
602 | |||
603 | entry = memtype_rb_search(&memtype_rbroot, paddr); | ||
604 | if (entry != NULL) | ||
605 | rettype = entry->type; | ||
606 | else | ||
607 | rettype = _PAGE_CACHE_UC_MINUS; | ||
608 | |||
609 | spin_unlock(&memtype_lock); | ||
610 | return rettype; | ||
611 | } | ||
612 | |||
613 | /** | ||
614 | * io_reserve_memtype - Request a memory type mapping for a region of memory | ||
615 | * @start: start (physical address) of the region | ||
616 | * @end: end (physical address) of the region | ||
617 | * @type: A pointer to memtype, with requested type. On success, requested | ||
618 | * or any other compatible type that was available for the region is returned | ||
619 | * | ||
620 | * On success, returns 0 | ||
621 | * On failure, returns non-zero | ||
622 | */ | ||
623 | int io_reserve_memtype(resource_size_t start, resource_size_t end, | ||
624 | unsigned long *type) | ||
625 | { | ||
626 | resource_size_t size = end - start; | ||
627 | unsigned long req_type = *type; | ||
628 | unsigned long new_type; | ||
629 | int ret; | ||
630 | |||
631 | WARN_ON_ONCE(iomem_map_sanity_check(start, size)); | ||
632 | |||
633 | ret = reserve_memtype(start, end, req_type, &new_type); | ||
634 | if (ret) | ||
635 | goto out_err; | ||
636 | |||
637 | if (!is_new_memtype_allowed(start, size, req_type, new_type)) | ||
638 | goto out_free; | ||
639 | |||
640 | if (kernel_map_sync_memtype(start, size, new_type) < 0) | ||
641 | goto out_free; | ||
642 | |||
643 | *type = new_type; | ||
644 | return 0; | ||
645 | |||
646 | out_free: | ||
647 | free_memtype(start, end); | ||
648 | ret = -EBUSY; | ||
649 | out_err: | ||
650 | return ret; | ||
651 | } | ||
652 | |||
653 | /** | ||
654 | * io_free_memtype - Release a memory type mapping for a region of memory | ||
655 | * @start: start (physical address) of the region | ||
656 | * @end: end (physical address) of the region | ||
657 | */ | ||
658 | void io_free_memtype(resource_size_t start, resource_size_t end) | ||
659 | { | ||
660 | free_memtype(start, end); | ||
661 | } | ||
662 | |||
499 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | 663 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
500 | unsigned long size, pgprot_t vma_prot) | 664 | unsigned long size, pgprot_t vma_prot) |
501 | { | 665 | { |
@@ -577,7 +741,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) | |||
577 | { | 741 | { |
578 | unsigned long id_sz; | 742 | unsigned long id_sz; |
579 | 743 | ||
580 | if (!pat_enabled || base >= __pa(high_memory)) | 744 | if (base >= __pa(high_memory)) |
581 | return 0; | 745 | return 0; |
582 | 746 | ||
583 | id_sz = (__pa(high_memory) < base + size) ? | 747 | id_sz = (__pa(high_memory) < base + size) ? |
@@ -612,11 +776,29 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, | |||
612 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); | 776 | is_ram = pat_pagerange_is_ram(paddr, paddr + size); |
613 | 777 | ||
614 | /* | 778 | /* |
615 | * reserve_pfn_range() doesn't support RAM pages. Maintain the current | 779 | * reserve_pfn_range() for RAM pages. We do not refcount to keep |
616 | * behavior with RAM pages by returning success. | 780 | * track of number of mappings of RAM pages. We can assert that |
781 | * the type requested matches the type of first page in the range. | ||
617 | */ | 782 | */ |
618 | if (is_ram != 0) | 783 | if (is_ram) { |
784 | if (!pat_enabled) | ||
785 | return 0; | ||
786 | |||
787 | flags = lookup_memtype(paddr); | ||
788 | if (want_flags != flags) { | ||
789 | printk(KERN_WARNING | ||
790 | "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n", | ||
791 | current->comm, current->pid, | ||
792 | cattr_name(want_flags), | ||
793 | (unsigned long long)paddr, | ||
794 | (unsigned long long)(paddr + size), | ||
795 | cattr_name(flags)); | ||
796 | *vma_prot = __pgprot((pgprot_val(*vma_prot) & | ||
797 | (~_PAGE_CACHE_MASK)) | | ||
798 | flags); | ||
799 | } | ||
619 | return 0; | 800 | return 0; |
801 | } | ||
620 | 802 | ||
621 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); | 803 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); |
622 | if (ret) | 804 | if (ret) |
@@ -678,14 +860,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) | |||
678 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 860 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
679 | pgprot_t pgprot; | 861 | pgprot_t pgprot; |
680 | 862 | ||
681 | if (!pat_enabled) | ||
682 | return 0; | ||
683 | |||
684 | /* | ||
685 | * For now, only handle remap_pfn_range() vmas where | ||
686 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
687 | * vm_insert_pfn() is TBD. | ||
688 | */ | ||
689 | if (is_linear_pfn_mapping(vma)) { | 863 | if (is_linear_pfn_mapping(vma)) { |
690 | /* | 864 | /* |
691 | * reserve the whole chunk covered by vma. We need the | 865 | * reserve the whole chunk covered by vma. We need the |
@@ -713,23 +887,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) | |||
713 | int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, | 887 | int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot, |
714 | unsigned long pfn, unsigned long size) | 888 | unsigned long pfn, unsigned long size) |
715 | { | 889 | { |
890 | unsigned long flags; | ||
716 | resource_size_t paddr; | 891 | resource_size_t paddr; |
717 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 892 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
718 | 893 | ||
719 | if (!pat_enabled) | ||
720 | return 0; | ||
721 | |||
722 | /* | ||
723 | * For now, only handle remap_pfn_range() vmas where | ||
724 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
725 | * vm_insert_pfn() is TBD. | ||
726 | */ | ||
727 | if (is_linear_pfn_mapping(vma)) { | 894 | if (is_linear_pfn_mapping(vma)) { |
728 | /* reserve the whole chunk starting from vm_pgoff */ | 895 | /* reserve the whole chunk starting from vm_pgoff */ |
729 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | 896 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; |
730 | return reserve_pfn_range(paddr, vma_size, prot, 0); | 897 | return reserve_pfn_range(paddr, vma_size, prot, 0); |
731 | } | 898 | } |
732 | 899 | ||
900 | if (!pat_enabled) | ||
901 | return 0; | ||
902 | |||
903 | /* for vm_insert_pfn and friends, we set prot based on lookup */ | ||
904 | flags = lookup_memtype(pfn << PAGE_SHIFT); | ||
905 | *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) | | ||
906 | flags); | ||
907 | |||
733 | return 0; | 908 | return 0; |
734 | } | 909 | } |
735 | 910 | ||
@@ -744,14 +919,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, | |||
744 | resource_size_t paddr; | 919 | resource_size_t paddr; |
745 | unsigned long vma_size = vma->vm_end - vma->vm_start; | 920 | unsigned long vma_size = vma->vm_end - vma->vm_start; |
746 | 921 | ||
747 | if (!pat_enabled) | ||
748 | return; | ||
749 | |||
750 | /* | ||
751 | * For now, only handle remap_pfn_range() vmas where | ||
752 | * is_linear_pfn_mapping() == TRUE. Handling of | ||
753 | * vm_insert_pfn() is TBD. | ||
754 | */ | ||
755 | if (is_linear_pfn_mapping(vma)) { | 922 | if (is_linear_pfn_mapping(vma)) { |
756 | /* free the whole chunk starting from vm_pgoff */ | 923 | /* free the whole chunk starting from vm_pgoff */ |
757 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | 924 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; |