diff options
author | David S. Miller <davem@davemloft.net> | 2010-05-19 02:01:55 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-19 02:01:55 -0400 |
commit | 2ec8c6bb5d8f3a62a79f463525054bae1e3d4487 (patch) | |
tree | fa7f8400ac685fb52e96f64997c7c682fc2aa021 /arch/x86/mm | |
parent | 7b39f90fabcf9e2af0cd79d0a60440d821e22b56 (diff) | |
parent | 537b60d17894b7c19a6060feae40299d7109d6e7 (diff) |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Conflicts:
include/linux/mod_devicetable.h
scripts/mod/file2alias.c
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 239 | ||||
-rw-r--r-- | arch/x86/mm/pat_internal.h | 46 | ||||
-rw-r--r-- | arch/x86/mm/pat_rbtree.c | 273 | ||||
-rw-r--r-- | arch/x86/mm/srat_64.c | 51 |
5 files changed, 389 insertions, 221 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 06630d26e56d..a4c768397baa 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -6,6 +6,7 @@ nostackp := $(call cc-option, -fno-stack-protector) | |||
6 | CFLAGS_physaddr.o := $(nostackp) | 6 | CFLAGS_physaddr.o := $(nostackp) |
7 | CFLAGS_setup_nx.o := $(nostackp) | 7 | CFLAGS_setup_nx.o := $(nostackp) |
8 | 8 | ||
9 | obj-$(CONFIG_X86_PAT) += pat_rbtree.o | ||
9 | obj-$(CONFIG_SMP) += tlb.o | 10 | obj-$(CONFIG_SMP) += tlb.o |
10 | 11 | ||
11 | obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o | 12 | obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index edc8b95afc1a..bbe5502ee1cb 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <asm/pat.h> | 30 | #include <asm/pat.h> |
31 | #include <asm/io.h> | 31 | #include <asm/io.h> |
32 | 32 | ||
33 | #include "pat_internal.h" | ||
34 | |||
33 | #ifdef CONFIG_X86_PAT | 35 | #ifdef CONFIG_X86_PAT |
34 | int __read_mostly pat_enabled = 1; | 36 | int __read_mostly pat_enabled = 1; |
35 | 37 | ||
@@ -53,19 +55,15 @@ static inline void pat_disable(const char *reason) | |||
53 | #endif | 55 | #endif |
54 | 56 | ||
55 | 57 | ||
56 | static int debug_enable; | 58 | int pat_debug_enable; |
57 | 59 | ||
58 | static int __init pat_debug_setup(char *str) | 60 | static int __init pat_debug_setup(char *str) |
59 | { | 61 | { |
60 | debug_enable = 1; | 62 | pat_debug_enable = 1; |
61 | return 0; | 63 | return 0; |
62 | } | 64 | } |
63 | __setup("debugpat", pat_debug_setup); | 65 | __setup("debugpat", pat_debug_setup); |
64 | 66 | ||
65 | #define dprintk(fmt, arg...) \ | ||
66 | do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) | ||
67 | |||
68 | |||
69 | static u64 __read_mostly boot_pat_state; | 67 | static u64 __read_mostly boot_pat_state; |
70 | 68 | ||
71 | enum { | 69 | enum { |
@@ -132,84 +130,7 @@ void pat_init(void) | |||
132 | 130 | ||
133 | #undef PAT | 131 | #undef PAT |
134 | 132 | ||
135 | static char *cattr_name(unsigned long flags) | 133 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ |
136 | { | ||
137 | switch (flags & _PAGE_CACHE_MASK) { | ||
138 | case _PAGE_CACHE_UC: return "uncached"; | ||
139 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | ||
140 | case _PAGE_CACHE_WB: return "write-back"; | ||
141 | case _PAGE_CACHE_WC: return "write-combining"; | ||
142 | default: return "broken"; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * The global memtype list keeps track of memory type for specific | ||
148 | * physical memory areas. Conflicting memory types in different | ||
149 | * mappings can cause CPU cache corruption. To avoid this we keep track. | ||
150 | * | ||
151 | * The list is sorted based on starting address and can contain multiple | ||
152 | * entries for each address (this allows reference counting for overlapping | ||
153 | * areas). All the aliases have the same cache attributes of course. | ||
154 | * Zero attributes are represented as holes. | ||
155 | * | ||
156 | * The data structure is a list that is also organized as an rbtree | ||
157 | * sorted on the start address of memtype range. | ||
158 | * | ||
159 | * memtype_lock protects both the linear list and rbtree. | ||
160 | */ | ||
161 | |||
162 | struct memtype { | ||
163 | u64 start; | ||
164 | u64 end; | ||
165 | unsigned long type; | ||
166 | struct list_head nd; | ||
167 | struct rb_node rb; | ||
168 | }; | ||
169 | |||
170 | static struct rb_root memtype_rbroot = RB_ROOT; | ||
171 | static LIST_HEAD(memtype_list); | ||
172 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | ||
173 | |||
174 | static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) | ||
175 | { | ||
176 | struct rb_node *node = root->rb_node; | ||
177 | struct memtype *last_lower = NULL; | ||
178 | |||
179 | while (node) { | ||
180 | struct memtype *data = container_of(node, struct memtype, rb); | ||
181 | |||
182 | if (data->start < start) { | ||
183 | last_lower = data; | ||
184 | node = node->rb_right; | ||
185 | } else if (data->start > start) { | ||
186 | node = node->rb_left; | ||
187 | } else | ||
188 | return data; | ||
189 | } | ||
190 | |||
191 | /* Will return NULL if there is no entry with its start <= start */ | ||
192 | return last_lower; | ||
193 | } | ||
194 | |||
195 | static void memtype_rb_insert(struct rb_root *root, struct memtype *data) | ||
196 | { | ||
197 | struct rb_node **new = &(root->rb_node); | ||
198 | struct rb_node *parent = NULL; | ||
199 | |||
200 | while (*new) { | ||
201 | struct memtype *this = container_of(*new, struct memtype, rb); | ||
202 | |||
203 | parent = *new; | ||
204 | if (data->start <= this->start) | ||
205 | new = &((*new)->rb_left); | ||
206 | else if (data->start > this->start) | ||
207 | new = &((*new)->rb_right); | ||
208 | } | ||
209 | |||
210 | rb_link_node(&data->rb, parent, new); | ||
211 | rb_insert_color(&data->rb, root); | ||
212 | } | ||
213 | 134 | ||
214 | /* | 135 | /* |
215 | * Does intersection of PAT memory type and MTRR memory type and returns | 136 | * Does intersection of PAT memory type and MTRR memory type and returns |
@@ -237,33 +158,6 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) | |||
237 | return req_type; | 158 | return req_type; |
238 | } | 159 | } |
239 | 160 | ||
240 | static int | ||
241 | chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) | ||
242 | { | ||
243 | if (new->type != entry->type) { | ||
244 | if (type) { | ||
245 | new->type = entry->type; | ||
246 | *type = entry->type; | ||
247 | } else | ||
248 | goto conflict; | ||
249 | } | ||
250 | |||
251 | /* check overlaps with more than one entry in the list */ | ||
252 | list_for_each_entry_continue(entry, &memtype_list, nd) { | ||
253 | if (new->end <= entry->start) | ||
254 | break; | ||
255 | else if (new->type != entry->type) | ||
256 | goto conflict; | ||
257 | } | ||
258 | return 0; | ||
259 | |||
260 | conflict: | ||
261 | printk(KERN_INFO "%s:%d conflicting memory types " | ||
262 | "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, | ||
263 | new->end, cattr_name(new->type), cattr_name(entry->type)); | ||
264 | return -EBUSY; | ||
265 | } | ||
266 | |||
267 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | 161 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) |
268 | { | 162 | { |
269 | int ram_page = 0, not_rampage = 0; | 163 | int ram_page = 0, not_rampage = 0; |
@@ -296,8 +190,6 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | |||
296 | * Here we do two pass: | 190 | * Here we do two pass: |
297 | * - Find the memtype of all the pages in the range, look for any conflicts | 191 | * - Find the memtype of all the pages in the range, look for any conflicts |
298 | * - In case of no conflicts, set the new memtype for pages in the range | 192 | * - In case of no conflicts, set the new memtype for pages in the range |
299 | * | ||
300 | * Caller must hold memtype_lock for atomicity. | ||
301 | */ | 193 | */ |
302 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | 194 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, |
303 | unsigned long *new_type) | 195 | unsigned long *new_type) |
@@ -364,9 +256,8 @@ static int free_ram_pages_type(u64 start, u64 end) | |||
364 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, | 256 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, |
365 | unsigned long *new_type) | 257 | unsigned long *new_type) |
366 | { | 258 | { |
367 | struct memtype *new, *entry; | 259 | struct memtype *new; |
368 | unsigned long actual_type; | 260 | unsigned long actual_type; |
369 | struct list_head *where; | ||
370 | int is_range_ram; | 261 | int is_range_ram; |
371 | int err = 0; | 262 | int err = 0; |
372 | 263 | ||
@@ -404,9 +295,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
404 | is_range_ram = pat_pagerange_is_ram(start, end); | 295 | is_range_ram = pat_pagerange_is_ram(start, end); |
405 | if (is_range_ram == 1) { | 296 | if (is_range_ram == 1) { |
406 | 297 | ||
407 | spin_lock(&memtype_lock); | ||
408 | err = reserve_ram_pages_type(start, end, req_type, new_type); | 298 | err = reserve_ram_pages_type(start, end, req_type, new_type); |
409 | spin_unlock(&memtype_lock); | ||
410 | 299 | ||
411 | return err; | 300 | return err; |
412 | } else if (is_range_ram < 0) { | 301 | } else if (is_range_ram < 0) { |
@@ -423,42 +312,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
423 | 312 | ||
424 | spin_lock(&memtype_lock); | 313 | spin_lock(&memtype_lock); |
425 | 314 | ||
426 | /* Search for existing mapping that overlaps the current range */ | 315 | err = rbt_memtype_check_insert(new, new_type); |
427 | where = NULL; | ||
428 | list_for_each_entry(entry, &memtype_list, nd) { | ||
429 | if (end <= entry->start) { | ||
430 | where = entry->nd.prev; | ||
431 | break; | ||
432 | } else if (start <= entry->start) { /* end > entry->start */ | ||
433 | err = chk_conflict(new, entry, new_type); | ||
434 | if (!err) { | ||
435 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | ||
436 | entry->start, entry->end); | ||
437 | where = entry->nd.prev; | ||
438 | } | ||
439 | break; | ||
440 | } else if (start < entry->end) { /* start > entry->start */ | ||
441 | err = chk_conflict(new, entry, new_type); | ||
442 | if (!err) { | ||
443 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | ||
444 | entry->start, entry->end); | ||
445 | |||
446 | /* | ||
447 | * Move to right position in the linked | ||
448 | * list to add this new entry | ||
449 | */ | ||
450 | list_for_each_entry_continue(entry, | ||
451 | &memtype_list, nd) { | ||
452 | if (start <= entry->start) { | ||
453 | where = entry->nd.prev; | ||
454 | break; | ||
455 | } | ||
456 | } | ||
457 | } | ||
458 | break; | ||
459 | } | ||
460 | } | ||
461 | |||
462 | if (err) { | 316 | if (err) { |
463 | printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " | 317 | printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " |
464 | "track %s, req %s\n", | 318 | "track %s, req %s\n", |
@@ -469,13 +323,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
469 | return err; | 323 | return err; |
470 | } | 324 | } |
471 | 325 | ||
472 | if (where) | ||
473 | list_add(&new->nd, where); | ||
474 | else | ||
475 | list_add_tail(&new->nd, &memtype_list); | ||
476 | |||
477 | memtype_rb_insert(&memtype_rbroot, new); | ||
478 | |||
479 | spin_unlock(&memtype_lock); | 326 | spin_unlock(&memtype_lock); |
480 | 327 | ||
481 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | 328 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", |
@@ -487,7 +334,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
487 | 334 | ||
488 | int free_memtype(u64 start, u64 end) | 335 | int free_memtype(u64 start, u64 end) |
489 | { | 336 | { |
490 | struct memtype *entry, *saved_entry; | ||
491 | int err = -EINVAL; | 337 | int err = -EINVAL; |
492 | int is_range_ram; | 338 | int is_range_ram; |
493 | 339 | ||
@@ -501,9 +347,7 @@ int free_memtype(u64 start, u64 end) | |||
501 | is_range_ram = pat_pagerange_is_ram(start, end); | 347 | is_range_ram = pat_pagerange_is_ram(start, end); |
502 | if (is_range_ram == 1) { | 348 | if (is_range_ram == 1) { |
503 | 349 | ||
504 | spin_lock(&memtype_lock); | ||
505 | err = free_ram_pages_type(start, end); | 350 | err = free_ram_pages_type(start, end); |
506 | spin_unlock(&memtype_lock); | ||
507 | 351 | ||
508 | return err; | 352 | return err; |
509 | } else if (is_range_ram < 0) { | 353 | } else if (is_range_ram < 0) { |
@@ -511,46 +355,7 @@ int free_memtype(u64 start, u64 end) | |||
511 | } | 355 | } |
512 | 356 | ||
513 | spin_lock(&memtype_lock); | 357 | spin_lock(&memtype_lock); |
514 | 358 | err = rbt_memtype_erase(start, end); | |
515 | entry = memtype_rb_search(&memtype_rbroot, start); | ||
516 | if (unlikely(entry == NULL)) | ||
517 | goto unlock_ret; | ||
518 | |||
519 | /* | ||
520 | * Saved entry points to an entry with start same or less than what | ||
521 | * we searched for. Now go through the list in both directions to look | ||
522 | * for the entry that matches with both start and end, with list stored | ||
523 | * in sorted start address | ||
524 | */ | ||
525 | saved_entry = entry; | ||
526 | list_for_each_entry_from(entry, &memtype_list, nd) { | ||
527 | if (entry->start == start && entry->end == end) { | ||
528 | rb_erase(&entry->rb, &memtype_rbroot); | ||
529 | list_del(&entry->nd); | ||
530 | kfree(entry); | ||
531 | err = 0; | ||
532 | break; | ||
533 | } else if (entry->start > start) { | ||
534 | break; | ||
535 | } | ||
536 | } | ||
537 | |||
538 | if (!err) | ||
539 | goto unlock_ret; | ||
540 | |||
541 | entry = saved_entry; | ||
542 | list_for_each_entry_reverse(entry, &memtype_list, nd) { | ||
543 | if (entry->start == start && entry->end == end) { | ||
544 | rb_erase(&entry->rb, &memtype_rbroot); | ||
545 | list_del(&entry->nd); | ||
546 | kfree(entry); | ||
547 | err = 0; | ||
548 | break; | ||
549 | } else if (entry->start < start) { | ||
550 | break; | ||
551 | } | ||
552 | } | ||
553 | unlock_ret: | ||
554 | spin_unlock(&memtype_lock); | 359 | spin_unlock(&memtype_lock); |
555 | 360 | ||
556 | if (err) { | 361 | if (err) { |
@@ -583,10 +388,8 @@ static unsigned long lookup_memtype(u64 paddr) | |||
583 | 388 | ||
584 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | 389 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { |
585 | struct page *page; | 390 | struct page *page; |
586 | spin_lock(&memtype_lock); | ||
587 | page = pfn_to_page(paddr >> PAGE_SHIFT); | 391 | page = pfn_to_page(paddr >> PAGE_SHIFT); |
588 | rettype = get_page_memtype(page); | 392 | rettype = get_page_memtype(page); |
589 | spin_unlock(&memtype_lock); | ||
590 | /* | 393 | /* |
591 | * -1 from get_page_memtype() implies RAM page is in its | 394 | * -1 from get_page_memtype() implies RAM page is in its |
592 | * default state and not reserved, and hence of type WB | 395 | * default state and not reserved, and hence of type WB |
@@ -599,7 +402,7 @@ static unsigned long lookup_memtype(u64 paddr) | |||
599 | 402 | ||
600 | spin_lock(&memtype_lock); | 403 | spin_lock(&memtype_lock); |
601 | 404 | ||
602 | entry = memtype_rb_search(&memtype_rbroot, paddr); | 405 | entry = rbt_memtype_lookup(paddr); |
603 | if (entry != NULL) | 406 | if (entry != NULL) |
604 | rettype = entry->type; | 407 | rettype = entry->type; |
605 | else | 408 | else |
@@ -936,29 +739,25 @@ EXPORT_SYMBOL_GPL(pgprot_writecombine); | |||
936 | 739 | ||
937 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) | 740 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) |
938 | 741 | ||
939 | /* get Nth element of the linked list */ | ||
940 | static struct memtype *memtype_get_idx(loff_t pos) | 742 | static struct memtype *memtype_get_idx(loff_t pos) |
941 | { | 743 | { |
942 | struct memtype *list_node, *print_entry; | 744 | struct memtype *print_entry; |
943 | int i = 1; | 745 | int ret; |
944 | 746 | ||
945 | print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); | 747 | print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); |
946 | if (!print_entry) | 748 | if (!print_entry) |
947 | return NULL; | 749 | return NULL; |
948 | 750 | ||
949 | spin_lock(&memtype_lock); | 751 | spin_lock(&memtype_lock); |
950 | list_for_each_entry(list_node, &memtype_list, nd) { | 752 | ret = rbt_memtype_copy_nth_element(print_entry, pos); |
951 | if (pos == i) { | ||
952 | *print_entry = *list_node; | ||
953 | spin_unlock(&memtype_lock); | ||
954 | return print_entry; | ||
955 | } | ||
956 | ++i; | ||
957 | } | ||
958 | spin_unlock(&memtype_lock); | 753 | spin_unlock(&memtype_lock); |
959 | kfree(print_entry); | ||
960 | 754 | ||
961 | return NULL; | 755 | if (!ret) { |
756 | return print_entry; | ||
757 | } else { | ||
758 | kfree(print_entry); | ||
759 | return NULL; | ||
760 | } | ||
962 | } | 761 | } |
963 | 762 | ||
964 | static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) | 763 | static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) |
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h new file mode 100644 index 000000000000..4f39eefa3e61 --- /dev/null +++ b/arch/x86/mm/pat_internal.h | |||
@@ -0,0 +1,46 @@ | |||
1 | #ifndef __PAT_INTERNAL_H_ | ||
2 | #define __PAT_INTERNAL_H_ | ||
3 | |||
4 | extern int pat_debug_enable; | ||
5 | |||
6 | #define dprintk(fmt, arg...) \ | ||
7 | do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) | ||
8 | |||
9 | struct memtype { | ||
10 | u64 start; | ||
11 | u64 end; | ||
12 | u64 subtree_max_end; | ||
13 | unsigned long type; | ||
14 | struct rb_node rb; | ||
15 | }; | ||
16 | |||
17 | static inline char *cattr_name(unsigned long flags) | ||
18 | { | ||
19 | switch (flags & _PAGE_CACHE_MASK) { | ||
20 | case _PAGE_CACHE_UC: return "uncached"; | ||
21 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | ||
22 | case _PAGE_CACHE_WB: return "write-back"; | ||
23 | case _PAGE_CACHE_WC: return "write-combining"; | ||
24 | default: return "broken"; | ||
25 | } | ||
26 | } | ||
27 | |||
28 | #ifdef CONFIG_X86_PAT | ||
29 | extern int rbt_memtype_check_insert(struct memtype *new, | ||
30 | unsigned long *new_type); | ||
31 | extern int rbt_memtype_erase(u64 start, u64 end); | ||
32 | extern struct memtype *rbt_memtype_lookup(u64 addr); | ||
33 | extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos); | ||
34 | #else | ||
35 | static inline int rbt_memtype_check_insert(struct memtype *new, | ||
36 | unsigned long *new_type) | ||
37 | { return 0; } | ||
38 | static inline int rbt_memtype_erase(u64 start, u64 end) | ||
39 | { return 0; } | ||
40 | static inline struct memtype *rbt_memtype_lookup(u64 addr) | ||
41 | { return NULL; } | ||
42 | static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) | ||
43 | { return 0; } | ||
44 | #endif | ||
45 | |||
46 | #endif /* __PAT_INTERNAL_H_ */ | ||
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c new file mode 100644 index 000000000000..07de4cb8cc30 --- /dev/null +++ b/arch/x86/mm/pat_rbtree.c | |||
@@ -0,0 +1,273 @@ | |||
1 | /* | ||
2 | * Handle caching attributes in page tables (PAT) | ||
3 | * | ||
4 | * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | ||
5 | * Suresh B Siddha <suresh.b.siddha@intel.com> | ||
6 | * | ||
7 | * Interval tree (augmented rbtree) used to store the PAT memory type | ||
8 | * reservations. | ||
9 | */ | ||
10 | |||
11 | #include <linux/seq_file.h> | ||
12 | #include <linux/debugfs.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/rbtree.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/gfp.h> | ||
18 | |||
19 | #include <asm/pgtable.h> | ||
20 | #include <asm/pat.h> | ||
21 | |||
22 | #include "pat_internal.h" | ||
23 | |||
24 | /* | ||
25 | * The memtype tree keeps track of memory type for specific | ||
26 | * physical memory areas. Without proper tracking, conflicting memory | ||
27 | * types in different mappings can cause CPU cache corruption. | ||
28 | * | ||
29 | * The tree is an interval tree (augmented rbtree) with tree ordered | ||
30 | * on starting address. Tree can contain multiple entries for | ||
31 | * different regions which overlap. All the aliases have the same | ||
32 | * cache attributes of course. | ||
33 | * | ||
34 | * memtype_lock protects the rbtree. | ||
35 | */ | ||
36 | |||
37 | static void memtype_rb_augment_cb(struct rb_node *node); | ||
38 | static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb); | ||
39 | |||
40 | static int is_node_overlap(struct memtype *node, u64 start, u64 end) | ||
41 | { | ||
42 | if (node->start >= end || node->end <= start) | ||
43 | return 0; | ||
44 | |||
45 | return 1; | ||
46 | } | ||
47 | |||
48 | static u64 get_subtree_max_end(struct rb_node *node) | ||
49 | { | ||
50 | u64 ret = 0; | ||
51 | if (node) { | ||
52 | struct memtype *data = container_of(node, struct memtype, rb); | ||
53 | ret = data->subtree_max_end; | ||
54 | } | ||
55 | return ret; | ||
56 | } | ||
57 | |||
58 | /* Update 'subtree_max_end' for a node, based on node and its children */ | ||
59 | static void update_node_max_end(struct rb_node *node) | ||
60 | { | ||
61 | struct memtype *data; | ||
62 | u64 max_end, child_max_end; | ||
63 | |||
64 | if (!node) | ||
65 | return; | ||
66 | |||
67 | data = container_of(node, struct memtype, rb); | ||
68 | max_end = data->end; | ||
69 | |||
70 | child_max_end = get_subtree_max_end(node->rb_right); | ||
71 | if (child_max_end > max_end) | ||
72 | max_end = child_max_end; | ||
73 | |||
74 | child_max_end = get_subtree_max_end(node->rb_left); | ||
75 | if (child_max_end > max_end) | ||
76 | max_end = child_max_end; | ||
77 | |||
78 | data->subtree_max_end = max_end; | ||
79 | } | ||
80 | |||
81 | /* Update 'subtree_max_end' for a node and all its ancestors */ | ||
82 | static void update_path_max_end(struct rb_node *node) | ||
83 | { | ||
84 | u64 old_max_end, new_max_end; | ||
85 | |||
86 | while (node) { | ||
87 | struct memtype *data = container_of(node, struct memtype, rb); | ||
88 | |||
89 | old_max_end = data->subtree_max_end; | ||
90 | update_node_max_end(node); | ||
91 | new_max_end = data->subtree_max_end; | ||
92 | |||
93 | if (new_max_end == old_max_end) | ||
94 | break; | ||
95 | |||
96 | node = rb_parent(node); | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /* Find the first (lowest start addr) overlapping range from rb tree */ | ||
101 | static struct memtype *memtype_rb_lowest_match(struct rb_root *root, | ||
102 | u64 start, u64 end) | ||
103 | { | ||
104 | struct rb_node *node = root->rb_node; | ||
105 | struct memtype *last_lower = NULL; | ||
106 | |||
107 | while (node) { | ||
108 | struct memtype *data = container_of(node, struct memtype, rb); | ||
109 | |||
110 | if (get_subtree_max_end(node->rb_left) > start) { | ||
111 | /* Lowest overlap if any must be on left side */ | ||
112 | node = node->rb_left; | ||
113 | } else if (is_node_overlap(data, start, end)) { | ||
114 | last_lower = data; | ||
115 | break; | ||
116 | } else if (start >= data->start) { | ||
117 | /* Lowest overlap if any must be on right side */ | ||
118 | node = node->rb_right; | ||
119 | } else { | ||
120 | break; | ||
121 | } | ||
122 | } | ||
123 | return last_lower; /* Returns NULL if there is no overlap */ | ||
124 | } | ||
125 | |||
126 | static struct memtype *memtype_rb_exact_match(struct rb_root *root, | ||
127 | u64 start, u64 end) | ||
128 | { | ||
129 | struct memtype *match; | ||
130 | |||
131 | match = memtype_rb_lowest_match(root, start, end); | ||
132 | while (match != NULL && match->start < end) { | ||
133 | struct rb_node *node; | ||
134 | |||
135 | if (match->start == start && match->end == end) | ||
136 | return match; | ||
137 | |||
138 | node = rb_next(&match->rb); | ||
139 | if (node) | ||
140 | match = container_of(node, struct memtype, rb); | ||
141 | else | ||
142 | match = NULL; | ||
143 | } | ||
144 | |||
145 | return NULL; /* Returns NULL if there is no exact match */ | ||
146 | } | ||
147 | |||
148 | static int memtype_rb_check_conflict(struct rb_root *root, | ||
149 | u64 start, u64 end, | ||
150 | unsigned long reqtype, unsigned long *newtype) | ||
151 | { | ||
152 | struct rb_node *node; | ||
153 | struct memtype *match; | ||
154 | int found_type = reqtype; | ||
155 | |||
156 | match = memtype_rb_lowest_match(&memtype_rbroot, start, end); | ||
157 | if (match == NULL) | ||
158 | goto success; | ||
159 | |||
160 | if (match->type != found_type && newtype == NULL) | ||
161 | goto failure; | ||
162 | |||
163 | dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); | ||
164 | found_type = match->type; | ||
165 | |||
166 | node = rb_next(&match->rb); | ||
167 | while (node) { | ||
168 | match = container_of(node, struct memtype, rb); | ||
169 | |||
170 | if (match->start >= end) /* Checked all possible matches */ | ||
171 | goto success; | ||
172 | |||
173 | if (is_node_overlap(match, start, end) && | ||
174 | match->type != found_type) { | ||
175 | goto failure; | ||
176 | } | ||
177 | |||
178 | node = rb_next(&match->rb); | ||
179 | } | ||
180 | success: | ||
181 | if (newtype) | ||
182 | *newtype = found_type; | ||
183 | |||
184 | return 0; | ||
185 | |||
186 | failure: | ||
187 | printk(KERN_INFO "%s:%d conflicting memory types " | ||
188 | "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, | ||
189 | end, cattr_name(found_type), cattr_name(match->type)); | ||
190 | return -EBUSY; | ||
191 | } | ||
192 | |||
193 | static void memtype_rb_augment_cb(struct rb_node *node) | ||
194 | { | ||
195 | if (node) | ||
196 | update_path_max_end(node); | ||
197 | } | ||
198 | |||
199 | static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) | ||
200 | { | ||
201 | struct rb_node **node = &(root->rb_node); | ||
202 | struct rb_node *parent = NULL; | ||
203 | |||
204 | while (*node) { | ||
205 | struct memtype *data = container_of(*node, struct memtype, rb); | ||
206 | |||
207 | parent = *node; | ||
208 | if (newdata->start <= data->start) | ||
209 | node = &((*node)->rb_left); | ||
210 | else if (newdata->start > data->start) | ||
211 | node = &((*node)->rb_right); | ||
212 | } | ||
213 | |||
214 | rb_link_node(&newdata->rb, parent, node); | ||
215 | rb_insert_color(&newdata->rb, root); | ||
216 | } | ||
217 | |||
218 | int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) | ||
219 | { | ||
220 | int err = 0; | ||
221 | |||
222 | err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, | ||
223 | new->type, ret_type); | ||
224 | |||
225 | if (!err) { | ||
226 | if (ret_type) | ||
227 | new->type = *ret_type; | ||
228 | |||
229 | memtype_rb_insert(&memtype_rbroot, new); | ||
230 | } | ||
231 | return err; | ||
232 | } | ||
233 | |||
234 | int rbt_memtype_erase(u64 start, u64 end) | ||
235 | { | ||
236 | struct memtype *data; | ||
237 | |||
238 | data = memtype_rb_exact_match(&memtype_rbroot, start, end); | ||
239 | if (!data) | ||
240 | return -EINVAL; | ||
241 | |||
242 | rb_erase(&data->rb, &memtype_rbroot); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | struct memtype *rbt_memtype_lookup(u64 addr) | ||
247 | { | ||
248 | struct memtype *data; | ||
249 | data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); | ||
250 | return data; | ||
251 | } | ||
252 | |||
253 | #if defined(CONFIG_DEBUG_FS) | ||
254 | int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) | ||
255 | { | ||
256 | struct rb_node *node; | ||
257 | int i = 1; | ||
258 | |||
259 | node = rb_first(&memtype_rbroot); | ||
260 | while (node && pos != i) { | ||
261 | node = rb_next(node); | ||
262 | i++; | ||
263 | } | ||
264 | |||
265 | if (node) { /* pos == i */ | ||
266 | struct memtype *this = container_of(node, struct memtype, rb); | ||
267 | *out = *this; | ||
268 | return 0; | ||
269 | } else { | ||
270 | return 1; | ||
271 | } | ||
272 | } | ||
273 | #endif | ||
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 28c68762648f..f9897f7a9ef1 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -363,6 +363,54 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
363 | for (i = 0; i < MAX_NUMNODES; i++) | 363 | for (i = 0; i < MAX_NUMNODES; i++) |
364 | cutoff_node(i, start, end); | 364 | cutoff_node(i, start, end); |
365 | 365 | ||
366 | /* | ||
367 | * Join together blocks on the same node, holes between | ||
368 | * which don't overlap with memory on other nodes. | ||
369 | */ | ||
370 | for (i = 0; i < num_node_memblks; ++i) { | ||
371 | int j, k; | ||
372 | |||
373 | for (j = i + 1; j < num_node_memblks; ++j) { | ||
374 | unsigned long start, end; | ||
375 | |||
376 | if (memblk_nodeid[i] != memblk_nodeid[j]) | ||
377 | continue; | ||
378 | start = min(node_memblk_range[i].end, | ||
379 | node_memblk_range[j].end); | ||
380 | end = max(node_memblk_range[i].start, | ||
381 | node_memblk_range[j].start); | ||
382 | for (k = 0; k < num_node_memblks; ++k) { | ||
383 | if (memblk_nodeid[i] == memblk_nodeid[k]) | ||
384 | continue; | ||
385 | if (start < node_memblk_range[k].end && | ||
386 | end > node_memblk_range[k].start) | ||
387 | break; | ||
388 | } | ||
389 | if (k < num_node_memblks) | ||
390 | continue; | ||
391 | start = min(node_memblk_range[i].start, | ||
392 | node_memblk_range[j].start); | ||
393 | end = max(node_memblk_range[i].end, | ||
394 | node_memblk_range[j].end); | ||
395 | printk(KERN_INFO "SRAT: Node %d " | ||
396 | "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", | ||
397 | memblk_nodeid[i], | ||
398 | node_memblk_range[i].start, | ||
399 | node_memblk_range[i].end, | ||
400 | node_memblk_range[j].start, | ||
401 | node_memblk_range[j].end, | ||
402 | start, end); | ||
403 | node_memblk_range[i].start = start; | ||
404 | node_memblk_range[i].end = end; | ||
405 | k = --num_node_memblks - j; | ||
406 | memmove(memblk_nodeid + j, memblk_nodeid + j+1, | ||
407 | k * sizeof(*memblk_nodeid)); | ||
408 | memmove(node_memblk_range + j, node_memblk_range + j+1, | ||
409 | k * sizeof(*node_memblk_range)); | ||
410 | --j; | ||
411 | } | ||
412 | } | ||
413 | |||
366 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 414 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
367 | memblk_nodeid); | 415 | memblk_nodeid); |
368 | if (memnode_shift < 0) { | 416 | if (memnode_shift < 0) { |
@@ -461,7 +509,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
461 | * node, it must now point to the fake node ID. | 509 | * node, it must now point to the fake node ID. |
462 | */ | 510 | */ |
463 | for (j = 0; j < MAX_LOCAL_APIC; j++) | 511 | for (j = 0; j < MAX_LOCAL_APIC; j++) |
464 | if (apicid_to_node[j] == nid) | 512 | if (apicid_to_node[j] == nid && |
513 | fake_apicid_to_node[j] == NUMA_NO_NODE) | ||
465 | fake_apicid_to_node[j] = i; | 514 | fake_apicid_to_node[j] = i; |
466 | } | 515 | } |
467 | for (i = 0; i < num_nodes; i++) | 516 | for (i = 0; i < num_nodes; i++) |