aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/pat.c239
-rw-r--r--arch/x86/mm/pat_internal.h46
-rw-r--r--arch/x86/mm/pat_rbtree.c273
-rw-r--r--arch/x86/mm/srat_64.c51
5 files changed, 389 insertions, 221 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 06630d26e56d..a4c768397baa 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -6,6 +6,7 @@ nostackp := $(call cc-option, -fno-stack-protector)
6CFLAGS_physaddr.o := $(nostackp) 6CFLAGS_physaddr.o := $(nostackp)
7CFLAGS_setup_nx.o := $(nostackp) 7CFLAGS_setup_nx.o := $(nostackp)
8 8
9obj-$(CONFIG_X86_PAT) += pat_rbtree.o
9obj-$(CONFIG_SMP) += tlb.o 10obj-$(CONFIG_SMP) += tlb.o
10 11
11obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o 12obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index edc8b95afc1a..bbe5502ee1cb 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -30,6 +30,8 @@
30#include <asm/pat.h> 30#include <asm/pat.h>
31#include <asm/io.h> 31#include <asm/io.h>
32 32
33#include "pat_internal.h"
34
33#ifdef CONFIG_X86_PAT 35#ifdef CONFIG_X86_PAT
34int __read_mostly pat_enabled = 1; 36int __read_mostly pat_enabled = 1;
35 37
@@ -53,19 +55,15 @@ static inline void pat_disable(const char *reason)
53#endif 55#endif
54 56
55 57
56static int debug_enable; 58int pat_debug_enable;
57 59
58static int __init pat_debug_setup(char *str) 60static int __init pat_debug_setup(char *str)
59{ 61{
60 debug_enable = 1; 62 pat_debug_enable = 1;
61 return 0; 63 return 0;
62} 64}
63__setup("debugpat", pat_debug_setup); 65__setup("debugpat", pat_debug_setup);
64 66
65#define dprintk(fmt, arg...) \
66 do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
67
68
69static u64 __read_mostly boot_pat_state; 67static u64 __read_mostly boot_pat_state;
70 68
71enum { 69enum {
@@ -132,84 +130,7 @@ void pat_init(void)
132 130
133#undef PAT 131#undef PAT
134 132
135static char *cattr_name(unsigned long flags) 133static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */
136{
137 switch (flags & _PAGE_CACHE_MASK) {
138 case _PAGE_CACHE_UC: return "uncached";
139 case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
140 case _PAGE_CACHE_WB: return "write-back";
141 case _PAGE_CACHE_WC: return "write-combining";
142 default: return "broken";
143 }
144}
145
146/*
147 * The global memtype list keeps track of memory type for specific
148 * physical memory areas. Conflicting memory types in different
149 * mappings can cause CPU cache corruption. To avoid this we keep track.
150 *
151 * The list is sorted based on starting address and can contain multiple
152 * entries for each address (this allows reference counting for overlapping
153 * areas). All the aliases have the same cache attributes of course.
154 * Zero attributes are represented as holes.
155 *
156 * The data structure is a list that is also organized as an rbtree
157 * sorted on the start address of memtype range.
158 *
159 * memtype_lock protects both the linear list and rbtree.
160 */
161
162struct memtype {
163 u64 start;
164 u64 end;
165 unsigned long type;
166 struct list_head nd;
167 struct rb_node rb;
168};
169
170static struct rb_root memtype_rbroot = RB_ROOT;
171static LIST_HEAD(memtype_list);
172static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
173
174static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
175{
176 struct rb_node *node = root->rb_node;
177 struct memtype *last_lower = NULL;
178
179 while (node) {
180 struct memtype *data = container_of(node, struct memtype, rb);
181
182 if (data->start < start) {
183 last_lower = data;
184 node = node->rb_right;
185 } else if (data->start > start) {
186 node = node->rb_left;
187 } else
188 return data;
189 }
190
191 /* Will return NULL if there is no entry with its start <= start */
192 return last_lower;
193}
194
195static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
196{
197 struct rb_node **new = &(root->rb_node);
198 struct rb_node *parent = NULL;
199
200 while (*new) {
201 struct memtype *this = container_of(*new, struct memtype, rb);
202
203 parent = *new;
204 if (data->start <= this->start)
205 new = &((*new)->rb_left);
206 else if (data->start > this->start)
207 new = &((*new)->rb_right);
208 }
209
210 rb_link_node(&data->rb, parent, new);
211 rb_insert_color(&data->rb, root);
212}
213 134
214/* 135/*
215 * Does intersection of PAT memory type and MTRR memory type and returns 136 * Does intersection of PAT memory type and MTRR memory type and returns
@@ -237,33 +158,6 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
237 return req_type; 158 return req_type;
238} 159}
239 160
240static int
241chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
242{
243 if (new->type != entry->type) {
244 if (type) {
245 new->type = entry->type;
246 *type = entry->type;
247 } else
248 goto conflict;
249 }
250
251 /* check overlaps with more than one entry in the list */
252 list_for_each_entry_continue(entry, &memtype_list, nd) {
253 if (new->end <= entry->start)
254 break;
255 else if (new->type != entry->type)
256 goto conflict;
257 }
258 return 0;
259
260 conflict:
261 printk(KERN_INFO "%s:%d conflicting memory types "
262 "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start,
263 new->end, cattr_name(new->type), cattr_name(entry->type));
264 return -EBUSY;
265}
266
267static int pat_pagerange_is_ram(unsigned long start, unsigned long end) 161static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
268{ 162{
269 int ram_page = 0, not_rampage = 0; 163 int ram_page = 0, not_rampage = 0;
@@ -296,8 +190,6 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
296 * Here we do two pass: 190 * Here we do two pass:
297 * - Find the memtype of all the pages in the range, look for any conflicts 191 * - Find the memtype of all the pages in the range, look for any conflicts
298 * - In case of no conflicts, set the new memtype for pages in the range 192 * - In case of no conflicts, set the new memtype for pages in the range
299 *
300 * Caller must hold memtype_lock for atomicity.
301 */ 193 */
302static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, 194static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
303 unsigned long *new_type) 195 unsigned long *new_type)
@@ -364,9 +256,8 @@ static int free_ram_pages_type(u64 start, u64 end)
364int reserve_memtype(u64 start, u64 end, unsigned long req_type, 256int reserve_memtype(u64 start, u64 end, unsigned long req_type,
365 unsigned long *new_type) 257 unsigned long *new_type)
366{ 258{
367 struct memtype *new, *entry; 259 struct memtype *new;
368 unsigned long actual_type; 260 unsigned long actual_type;
369 struct list_head *where;
370 int is_range_ram; 261 int is_range_ram;
371 int err = 0; 262 int err = 0;
372 263
@@ -404,9 +295,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
404 is_range_ram = pat_pagerange_is_ram(start, end); 295 is_range_ram = pat_pagerange_is_ram(start, end);
405 if (is_range_ram == 1) { 296 if (is_range_ram == 1) {
406 297
407 spin_lock(&memtype_lock);
408 err = reserve_ram_pages_type(start, end, req_type, new_type); 298 err = reserve_ram_pages_type(start, end, req_type, new_type);
409 spin_unlock(&memtype_lock);
410 299
411 return err; 300 return err;
412 } else if (is_range_ram < 0) { 301 } else if (is_range_ram < 0) {
@@ -423,42 +312,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
423 312
424 spin_lock(&memtype_lock); 313 spin_lock(&memtype_lock);
425 314
426 /* Search for existing mapping that overlaps the current range */ 315 err = rbt_memtype_check_insert(new, new_type);
427 where = NULL;
428 list_for_each_entry(entry, &memtype_list, nd) {
429 if (end <= entry->start) {
430 where = entry->nd.prev;
431 break;
432 } else if (start <= entry->start) { /* end > entry->start */
433 err = chk_conflict(new, entry, new_type);
434 if (!err) {
435 dprintk("Overlap at 0x%Lx-0x%Lx\n",
436 entry->start, entry->end);
437 where = entry->nd.prev;
438 }
439 break;
440 } else if (start < entry->end) { /* start > entry->start */
441 err = chk_conflict(new, entry, new_type);
442 if (!err) {
443 dprintk("Overlap at 0x%Lx-0x%Lx\n",
444 entry->start, entry->end);
445
446 /*
447 * Move to right position in the linked
448 * list to add this new entry
449 */
450 list_for_each_entry_continue(entry,
451 &memtype_list, nd) {
452 if (start <= entry->start) {
453 where = entry->nd.prev;
454 break;
455 }
456 }
457 }
458 break;
459 }
460 }
461
462 if (err) { 316 if (err) {
463 printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " 317 printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
464 "track %s, req %s\n", 318 "track %s, req %s\n",
@@ -469,13 +323,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
469 return err; 323 return err;
470 } 324 }
471 325
472 if (where)
473 list_add(&new->nd, where);
474 else
475 list_add_tail(&new->nd, &memtype_list);
476
477 memtype_rb_insert(&memtype_rbroot, new);
478
479 spin_unlock(&memtype_lock); 326 spin_unlock(&memtype_lock);
480 327
481 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", 328 dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
@@ -487,7 +334,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
487 334
488int free_memtype(u64 start, u64 end) 335int free_memtype(u64 start, u64 end)
489{ 336{
490 struct memtype *entry, *saved_entry;
491 int err = -EINVAL; 337 int err = -EINVAL;
492 int is_range_ram; 338 int is_range_ram;
493 339
@@ -501,9 +347,7 @@ int free_memtype(u64 start, u64 end)
501 is_range_ram = pat_pagerange_is_ram(start, end); 347 is_range_ram = pat_pagerange_is_ram(start, end);
502 if (is_range_ram == 1) { 348 if (is_range_ram == 1) {
503 349
504 spin_lock(&memtype_lock);
505 err = free_ram_pages_type(start, end); 350 err = free_ram_pages_type(start, end);
506 spin_unlock(&memtype_lock);
507 351
508 return err; 352 return err;
509 } else if (is_range_ram < 0) { 353 } else if (is_range_ram < 0) {
@@ -511,46 +355,7 @@ int free_memtype(u64 start, u64 end)
511 } 355 }
512 356
513 spin_lock(&memtype_lock); 357 spin_lock(&memtype_lock);
514 358 err = rbt_memtype_erase(start, end);
515 entry = memtype_rb_search(&memtype_rbroot, start);
516 if (unlikely(entry == NULL))
517 goto unlock_ret;
518
519 /*
520 * Saved entry points to an entry with start same or less than what
521 * we searched for. Now go through the list in both directions to look
522 * for the entry that matches with both start and end, with list stored
523 * in sorted start address
524 */
525 saved_entry = entry;
526 list_for_each_entry_from(entry, &memtype_list, nd) {
527 if (entry->start == start && entry->end == end) {
528 rb_erase(&entry->rb, &memtype_rbroot);
529 list_del(&entry->nd);
530 kfree(entry);
531 err = 0;
532 break;
533 } else if (entry->start > start) {
534 break;
535 }
536 }
537
538 if (!err)
539 goto unlock_ret;
540
541 entry = saved_entry;
542 list_for_each_entry_reverse(entry, &memtype_list, nd) {
543 if (entry->start == start && entry->end == end) {
544 rb_erase(&entry->rb, &memtype_rbroot);
545 list_del(&entry->nd);
546 kfree(entry);
547 err = 0;
548 break;
549 } else if (entry->start < start) {
550 break;
551 }
552 }
553unlock_ret:
554 spin_unlock(&memtype_lock); 359 spin_unlock(&memtype_lock);
555 360
556 if (err) { 361 if (err) {
@@ -583,10 +388,8 @@ static unsigned long lookup_memtype(u64 paddr)
583 388
584 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { 389 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
585 struct page *page; 390 struct page *page;
586 spin_lock(&memtype_lock);
587 page = pfn_to_page(paddr >> PAGE_SHIFT); 391 page = pfn_to_page(paddr >> PAGE_SHIFT);
588 rettype = get_page_memtype(page); 392 rettype = get_page_memtype(page);
589 spin_unlock(&memtype_lock);
590 /* 393 /*
591 * -1 from get_page_memtype() implies RAM page is in its 394 * -1 from get_page_memtype() implies RAM page is in its
592 * default state and not reserved, and hence of type WB 395 * default state and not reserved, and hence of type WB
@@ -599,7 +402,7 @@ static unsigned long lookup_memtype(u64 paddr)
599 402
600 spin_lock(&memtype_lock); 403 spin_lock(&memtype_lock);
601 404
602 entry = memtype_rb_search(&memtype_rbroot, paddr); 405 entry = rbt_memtype_lookup(paddr);
603 if (entry != NULL) 406 if (entry != NULL)
604 rettype = entry->type; 407 rettype = entry->type;
605 else 408 else
@@ -936,29 +739,25 @@ EXPORT_SYMBOL_GPL(pgprot_writecombine);
936 739
937#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 740#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
938 741
939/* get Nth element of the linked list */
940static struct memtype *memtype_get_idx(loff_t pos) 742static struct memtype *memtype_get_idx(loff_t pos)
941{ 743{
942 struct memtype *list_node, *print_entry; 744 struct memtype *print_entry;
943 int i = 1; 745 int ret;
944 746
945 print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); 747 print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL);
946 if (!print_entry) 748 if (!print_entry)
947 return NULL; 749 return NULL;
948 750
949 spin_lock(&memtype_lock); 751 spin_lock(&memtype_lock);
950 list_for_each_entry(list_node, &memtype_list, nd) { 752 ret = rbt_memtype_copy_nth_element(print_entry, pos);
951 if (pos == i) {
952 *print_entry = *list_node;
953 spin_unlock(&memtype_lock);
954 return print_entry;
955 }
956 ++i;
957 }
958 spin_unlock(&memtype_lock); 753 spin_unlock(&memtype_lock);
959 kfree(print_entry);
960 754
961 return NULL; 755 if (!ret) {
756 return print_entry;
757 } else {
758 kfree(print_entry);
759 return NULL;
760 }
962} 761}
963 762
964static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) 763static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h
new file mode 100644
index 000000000000..4f39eefa3e61
--- /dev/null
+++ b/arch/x86/mm/pat_internal.h
@@ -0,0 +1,46 @@
1#ifndef __PAT_INTERNAL_H_
2#define __PAT_INTERNAL_H_
3
4extern int pat_debug_enable;
5
6#define dprintk(fmt, arg...) \
7 do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
8
9struct memtype {
10 u64 start;
11 u64 end;
12 u64 subtree_max_end;
13 unsigned long type;
14 struct rb_node rb;
15};
16
17static inline char *cattr_name(unsigned long flags)
18{
19 switch (flags & _PAGE_CACHE_MASK) {
20 case _PAGE_CACHE_UC: return "uncached";
21 case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
22 case _PAGE_CACHE_WB: return "write-back";
23 case _PAGE_CACHE_WC: return "write-combining";
24 default: return "broken";
25 }
26}
27
28#ifdef CONFIG_X86_PAT
29extern int rbt_memtype_check_insert(struct memtype *new,
30 unsigned long *new_type);
31extern int rbt_memtype_erase(u64 start, u64 end);
32extern struct memtype *rbt_memtype_lookup(u64 addr);
33extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos);
34#else
35static inline int rbt_memtype_check_insert(struct memtype *new,
36 unsigned long *new_type)
37{ return 0; }
38static inline int rbt_memtype_erase(u64 start, u64 end)
39{ return 0; }
40static inline struct memtype *rbt_memtype_lookup(u64 addr)
41{ return NULL; }
42static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
43{ return 0; }
44#endif
45
46#endif /* __PAT_INTERNAL_H_ */
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
new file mode 100644
index 000000000000..07de4cb8cc30
--- /dev/null
+++ b/arch/x86/mm/pat_rbtree.c
@@ -0,0 +1,273 @@
1/*
2 * Handle caching attributes in page tables (PAT)
3 *
4 * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
5 * Suresh B Siddha <suresh.b.siddha@intel.com>
6 *
7 * Interval tree (augmented rbtree) used to store the PAT memory type
8 * reservations.
9 */
10
11#include <linux/seq_file.h>
12#include <linux/debugfs.h>
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/rbtree.h>
16#include <linux/sched.h>
17#include <linux/gfp.h>
18
19#include <asm/pgtable.h>
20#include <asm/pat.h>
21
22#include "pat_internal.h"
23
24/*
25 * The memtype tree keeps track of memory type for specific
26 * physical memory areas. Without proper tracking, conflicting memory
27 * types in different mappings can cause CPU cache corruption.
28 *
29 * The tree is an interval tree (augmented rbtree) with tree ordered
30 * on starting address. Tree can contain multiple entries for
31 * different regions which overlap. All the aliases have the same
32 * cache attributes of course.
33 *
34 * memtype_lock protects the rbtree.
35 */
36
37static void memtype_rb_augment_cb(struct rb_node *node);
38static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb);
39
40static int is_node_overlap(struct memtype *node, u64 start, u64 end)
41{
42 if (node->start >= end || node->end <= start)
43 return 0;
44
45 return 1;
46}
47
48static u64 get_subtree_max_end(struct rb_node *node)
49{
50 u64 ret = 0;
51 if (node) {
52 struct memtype *data = container_of(node, struct memtype, rb);
53 ret = data->subtree_max_end;
54 }
55 return ret;
56}
57
58/* Update 'subtree_max_end' for a node, based on node and its children */
59static void update_node_max_end(struct rb_node *node)
60{
61 struct memtype *data;
62 u64 max_end, child_max_end;
63
64 if (!node)
65 return;
66
67 data = container_of(node, struct memtype, rb);
68 max_end = data->end;
69
70 child_max_end = get_subtree_max_end(node->rb_right);
71 if (child_max_end > max_end)
72 max_end = child_max_end;
73
74 child_max_end = get_subtree_max_end(node->rb_left);
75 if (child_max_end > max_end)
76 max_end = child_max_end;
77
78 data->subtree_max_end = max_end;
79}
80
81/* Update 'subtree_max_end' for a node and all its ancestors */
82static void update_path_max_end(struct rb_node *node)
83{
84 u64 old_max_end, new_max_end;
85
86 while (node) {
87 struct memtype *data = container_of(node, struct memtype, rb);
88
89 old_max_end = data->subtree_max_end;
90 update_node_max_end(node);
91 new_max_end = data->subtree_max_end;
92
93 if (new_max_end == old_max_end)
94 break;
95
96 node = rb_parent(node);
97 }
98}
99
100/* Find the first (lowest start addr) overlapping range from rb tree */
101static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
102 u64 start, u64 end)
103{
104 struct rb_node *node = root->rb_node;
105 struct memtype *last_lower = NULL;
106
107 while (node) {
108 struct memtype *data = container_of(node, struct memtype, rb);
109
110 if (get_subtree_max_end(node->rb_left) > start) {
111 /* Lowest overlap if any must be on left side */
112 node = node->rb_left;
113 } else if (is_node_overlap(data, start, end)) {
114 last_lower = data;
115 break;
116 } else if (start >= data->start) {
117 /* Lowest overlap if any must be on right side */
118 node = node->rb_right;
119 } else {
120 break;
121 }
122 }
123 return last_lower; /* Returns NULL if there is no overlap */
124}
125
126static struct memtype *memtype_rb_exact_match(struct rb_root *root,
127 u64 start, u64 end)
128{
129 struct memtype *match;
130
131 match = memtype_rb_lowest_match(root, start, end);
132 while (match != NULL && match->start < end) {
133 struct rb_node *node;
134
135 if (match->start == start && match->end == end)
136 return match;
137
138 node = rb_next(&match->rb);
139 if (node)
140 match = container_of(node, struct memtype, rb);
141 else
142 match = NULL;
143 }
144
145 return NULL; /* Returns NULL if there is no exact match */
146}
147
148static int memtype_rb_check_conflict(struct rb_root *root,
149 u64 start, u64 end,
150 unsigned long reqtype, unsigned long *newtype)
151{
152 struct rb_node *node;
153 struct memtype *match;
154 int found_type = reqtype;
155
156 match = memtype_rb_lowest_match(&memtype_rbroot, start, end);
157 if (match == NULL)
158 goto success;
159
160 if (match->type != found_type && newtype == NULL)
161 goto failure;
162
163 dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
164 found_type = match->type;
165
166 node = rb_next(&match->rb);
167 while (node) {
168 match = container_of(node, struct memtype, rb);
169
170 if (match->start >= end) /* Checked all possible matches */
171 goto success;
172
173 if (is_node_overlap(match, start, end) &&
174 match->type != found_type) {
175 goto failure;
176 }
177
178 node = rb_next(&match->rb);
179 }
180success:
181 if (newtype)
182 *newtype = found_type;
183
184 return 0;
185
186failure:
187 printk(KERN_INFO "%s:%d conflicting memory types "
188 "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start,
189 end, cattr_name(found_type), cattr_name(match->type));
190 return -EBUSY;
191}
192
193static void memtype_rb_augment_cb(struct rb_node *node)
194{
195 if (node)
196 update_path_max_end(node);
197}
198
199static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
200{
201 struct rb_node **node = &(root->rb_node);
202 struct rb_node *parent = NULL;
203
204 while (*node) {
205 struct memtype *data = container_of(*node, struct memtype, rb);
206
207 parent = *node;
208 if (newdata->start <= data->start)
209 node = &((*node)->rb_left);
210 else if (newdata->start > data->start)
211 node = &((*node)->rb_right);
212 }
213
214 rb_link_node(&newdata->rb, parent, node);
215 rb_insert_color(&newdata->rb, root);
216}
217
218int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
219{
220 int err = 0;
221
222 err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
223 new->type, ret_type);
224
225 if (!err) {
226 if (ret_type)
227 new->type = *ret_type;
228
229 memtype_rb_insert(&memtype_rbroot, new);
230 }
231 return err;
232}
233
234int rbt_memtype_erase(u64 start, u64 end)
235{
236 struct memtype *data;
237
238 data = memtype_rb_exact_match(&memtype_rbroot, start, end);
239 if (!data)
240 return -EINVAL;
241
242 rb_erase(&data->rb, &memtype_rbroot);
243 return 0;
244}
245
246struct memtype *rbt_memtype_lookup(u64 addr)
247{
248 struct memtype *data;
249 data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
250 return data;
251}
252
253#if defined(CONFIG_DEBUG_FS)
254int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
255{
256 struct rb_node *node;
257 int i = 1;
258
259 node = rb_first(&memtype_rbroot);
260 while (node && pos != i) {
261 node = rb_next(node);
262 i++;
263 }
264
265 if (node) { /* pos == i */
266 struct memtype *this = container_of(node, struct memtype, rb);
267 *out = *this;
268 return 0;
269 } else {
270 return 1;
271 }
272}
273#endif
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 28c68762648f..f9897f7a9ef1 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -363,6 +363,54 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
363 for (i = 0; i < MAX_NUMNODES; i++) 363 for (i = 0; i < MAX_NUMNODES; i++)
364 cutoff_node(i, start, end); 364 cutoff_node(i, start, end);
365 365
366 /*
367 * Join together blocks on the same node, holes between
368 * which don't overlap with memory on other nodes.
369 */
370 for (i = 0; i < num_node_memblks; ++i) {
371 int j, k;
372
373 for (j = i + 1; j < num_node_memblks; ++j) {
374 unsigned long start, end;
375
376 if (memblk_nodeid[i] != memblk_nodeid[j])
377 continue;
378 start = min(node_memblk_range[i].end,
379 node_memblk_range[j].end);
380 end = max(node_memblk_range[i].start,
381 node_memblk_range[j].start);
382 for (k = 0; k < num_node_memblks; ++k) {
383 if (memblk_nodeid[i] == memblk_nodeid[k])
384 continue;
385 if (start < node_memblk_range[k].end &&
386 end > node_memblk_range[k].start)
387 break;
388 }
389 if (k < num_node_memblks)
390 continue;
391 start = min(node_memblk_range[i].start,
392 node_memblk_range[j].start);
393 end = max(node_memblk_range[i].end,
394 node_memblk_range[j].end);
395 printk(KERN_INFO "SRAT: Node %d "
396 "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n",
397 memblk_nodeid[i],
398 node_memblk_range[i].start,
399 node_memblk_range[i].end,
400 node_memblk_range[j].start,
401 node_memblk_range[j].end,
402 start, end);
403 node_memblk_range[i].start = start;
404 node_memblk_range[i].end = end;
405 k = --num_node_memblks - j;
406 memmove(memblk_nodeid + j, memblk_nodeid + j+1,
407 k * sizeof(*memblk_nodeid));
408 memmove(node_memblk_range + j, node_memblk_range + j+1,
409 k * sizeof(*node_memblk_range));
410 --j;
411 }
412 }
413
366 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, 414 memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks,
367 memblk_nodeid); 415 memblk_nodeid);
368 if (memnode_shift < 0) { 416 if (memnode_shift < 0) {
@@ -461,7 +509,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
461 * node, it must now point to the fake node ID. 509 * node, it must now point to the fake node ID.
462 */ 510 */
463 for (j = 0; j < MAX_LOCAL_APIC; j++) 511 for (j = 0; j < MAX_LOCAL_APIC; j++)
464 if (apicid_to_node[j] == nid) 512 if (apicid_to_node[j] == nid &&
513 fake_apicid_to_node[j] == NUMA_NO_NODE)
465 fake_apicid_to_node[j] = i; 514 fake_apicid_to_node[j] = i;
466 } 515 }
467 for (i = 0; i < num_nodes; i++) 516 for (i = 0; i < num_nodes; i++)