aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPallipadi, Venkatesh <venkatesh.pallipadi@intel.com>2010-02-10 18:26:07 -0500
committerH. Peter Anvin <hpa@zytor.com>2010-02-18 18:41:36 -0500
commit9e41a49aab88a5a6c8f4875bf10a5543bc321f2d (patch)
tree65a4921f5a7973f017e66b9ca00b6427eb3a6c83
parentbe5a0c126ad1dea2128dc5aef12c87083518d1ab (diff)
x86, pat: Migrate to rbtree only backend for pat memtype management
Move pat backend to fully rbtree based implementation from the existing rbtree and linked list hybrid. New rbtree based solution uses interval trees (augmented rbtrees) in order to store the PAT ranges. The new code seprates out the pat backend to pat_rbtree.c file, making is cleaner. The change also makes the PAT lookup, reserve and free operations more optimal, as we don't have to traverse linear linked list of few tens of entries in normal case. Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> LKML-Reference: <20100210232607.GB11465@linux-os.sc.intel.com> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/pat.c209
-rw-r--r--arch/x86/mm/pat_internal.h20
-rw-r--r--arch/x86/mm/pat_rbtree.c271
4 files changed, 296 insertions, 205 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 06630d26e56d..a4c768397baa 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -6,6 +6,7 @@ nostackp := $(call cc-option, -fno-stack-protector)
6CFLAGS_physaddr.o := $(nostackp) 6CFLAGS_physaddr.o := $(nostackp)
7CFLAGS_setup_nx.o := $(nostackp) 7CFLAGS_setup_nx.o := $(nostackp)
8 8
9obj-$(CONFIG_X86_PAT) += pat_rbtree.o
9obj-$(CONFIG_SMP) += tlb.o 10obj-$(CONFIG_SMP) += tlb.o
10 11
11obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o 12obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 628e507b7936..951011166ef5 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -130,65 +130,7 @@ void pat_init(void)
130 130
131#undef PAT 131#undef PAT
132 132
133/* 133static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */
134 * The global memtype list keeps track of memory type for specific
135 * physical memory areas. Conflicting memory types in different
136 * mappings can cause CPU cache corruption. To avoid this we keep track.
137 *
138 * The list is sorted based on starting address and can contain multiple
139 * entries for each address (this allows reference counting for overlapping
140 * areas). All the aliases have the same cache attributes of course.
141 * Zero attributes are represented as holes.
142 *
143 * The data structure is a list that is also organized as an rbtree
144 * sorted on the start address of memtype range.
145 *
146 * memtype_lock protects both the linear list and rbtree.
147 */
148
149static struct rb_root memtype_rbroot = RB_ROOT;
150static LIST_HEAD(memtype_list);
151static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
152
153static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
154{
155 struct rb_node *node = root->rb_node;
156 struct memtype *last_lower = NULL;
157
158 while (node) {
159 struct memtype *data = container_of(node, struct memtype, rb);
160
161 if (data->start < start) {
162 last_lower = data;
163 node = node->rb_right;
164 } else if (data->start > start) {
165 node = node->rb_left;
166 } else
167 return data;
168 }
169
170 /* Will return NULL if there is no entry with its start <= start */
171 return last_lower;
172}
173
174static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
175{
176 struct rb_node **new = &(root->rb_node);
177 struct rb_node *parent = NULL;
178
179 while (*new) {
180 struct memtype *this = container_of(*new, struct memtype, rb);
181
182 parent = *new;
183 if (data->start <= this->start)
184 new = &((*new)->rb_left);
185 else if (data->start > this->start)
186 new = &((*new)->rb_right);
187 }
188
189 rb_link_node(&data->rb, parent, new);
190 rb_insert_color(&data->rb, root);
191}
192 134
193/* 135/*
194 * Does intersection of PAT memory type and MTRR memory type and returns 136 * Does intersection of PAT memory type and MTRR memory type and returns
@@ -216,33 +158,6 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
216 return req_type; 158 return req_type;
217} 159}
218 160
219static int
220chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
221{
222 if (new->type != entry->type) {
223 if (type) {
224 new->type = entry->type;
225 *type = entry->type;
226 } else
227 goto conflict;
228 }
229
230 /* check overlaps with more than one entry in the list */
231 list_for_each_entry_continue(entry, &memtype_list, nd) {
232 if (new->end <= entry->start)
233 break;
234 else if (new->type != entry->type)
235 goto conflict;
236 }
237 return 0;
238
239 conflict:
240 printk(KERN_INFO "%s:%d conflicting memory types "
241 "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start,
242 new->end, cattr_name(new->type), cattr_name(entry->type));
243 return -EBUSY;
244}
245
246static int pat_pagerange_is_ram(unsigned long start, unsigned long end) 161static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
247{ 162{
248 int ram_page = 0, not_rampage = 0; 163 int ram_page = 0, not_rampage = 0;
@@ -328,64 +243,6 @@ static int free_ram_pages_type(u64 start, u64 end)
328 return 0; 243 return 0;
329} 244}
330 245
331static int memtype_check_insert(struct memtype *new, unsigned long *new_type)
332{
333 struct memtype *entry;
334 u64 start, end;
335 unsigned long actual_type;
336 struct list_head *where;
337 int err = 0;
338
339 start = new->start;
340 end = new->end;
341 actual_type = new->type;
342
343 /* Search for existing mapping that overlaps the current range */
344 where = NULL;
345 list_for_each_entry(entry, &memtype_list, nd) {
346 if (end <= entry->start) {
347 where = entry->nd.prev;
348 break;
349 } else if (start <= entry->start) { /* end > entry->start */
350 err = chk_conflict(new, entry, new_type);
351 if (!err) {
352 dprintk("Overlap at 0x%Lx-0x%Lx\n",
353 entry->start, entry->end);
354 where = entry->nd.prev;
355 }
356 break;
357 } else if (start < entry->end) { /* start > entry->start */
358 err = chk_conflict(new, entry, new_type);
359 if (!err) {
360 dprintk("Overlap at 0x%Lx-0x%Lx\n",
361 entry->start, entry->end);
362
363 /*
364 * Move to right position in the linked
365 * list to add this new entry
366 */
367 list_for_each_entry_continue(entry,
368 &memtype_list, nd) {
369 if (start <= entry->start) {
370 where = entry->nd.prev;
371 break;
372 }
373 }
374 }
375 break;
376 }
377 }
378 if (!err) {
379 if (where)
380 list_add(&new->nd, where);
381 else
382 list_add_tail(&new->nd, &memtype_list);
383
384 memtype_rb_insert(&memtype_rbroot, new);
385 }
386 return err;
387}
388
389/* 246/*
390 * req_type typically has one of the: 247 * req_type typically has one of the:
391 * - _PAGE_CACHE_WB 248 * - _PAGE_CACHE_WB
@@ -459,7 +316,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
459 316
460 spin_lock(&memtype_lock); 317 spin_lock(&memtype_lock);
461 318
462 err = memtype_check_insert(new, new_type); 319 err = rbt_memtype_check_insert(new, new_type);
463 if (err) { 320 if (err) {
464 printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " 321 printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
465 "track %s, req %s\n", 322 "track %s, req %s\n",
@@ -481,7 +338,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
481 338
482int free_memtype(u64 start, u64 end) 339int free_memtype(u64 start, u64 end)
483{ 340{
484 struct memtype *entry, *saved_entry;
485 int err = -EINVAL; 341 int err = -EINVAL;
486 int is_range_ram; 342 int is_range_ram;
487 343
@@ -505,46 +361,7 @@ int free_memtype(u64 start, u64 end)
505 } 361 }
506 362
507 spin_lock(&memtype_lock); 363 spin_lock(&memtype_lock);
508 364 err = rbt_memtype_erase(start, end);
509 entry = memtype_rb_search(&memtype_rbroot, start);
510 if (unlikely(entry == NULL))
511 goto unlock_ret;
512
513 /*
514 * Saved entry points to an entry with start same or less than what
515 * we searched for. Now go through the list in both directions to look
516 * for the entry that matches with both start and end, with list stored
517 * in sorted start address
518 */
519 saved_entry = entry;
520 list_for_each_entry_from(entry, &memtype_list, nd) {
521 if (entry->start == start && entry->end == end) {
522 rb_erase(&entry->rb, &memtype_rbroot);
523 list_del(&entry->nd);
524 kfree(entry);
525 err = 0;
526 break;
527 } else if (entry->start > start) {
528 break;
529 }
530 }
531
532 if (!err)
533 goto unlock_ret;
534
535 entry = saved_entry;
536 list_for_each_entry_reverse(entry, &memtype_list, nd) {
537 if (entry->start == start && entry->end == end) {
538 rb_erase(&entry->rb, &memtype_rbroot);
539 list_del(&entry->nd);
540 kfree(entry);
541 err = 0;
542 break;
543 } else if (entry->start < start) {
544 break;
545 }
546 }
547unlock_ret:
548 spin_unlock(&memtype_lock); 365 spin_unlock(&memtype_lock);
549 366
550 if (err) { 367 if (err) {
@@ -593,7 +410,7 @@ static unsigned long lookup_memtype(u64 paddr)
593 410
594 spin_lock(&memtype_lock); 411 spin_lock(&memtype_lock);
595 412
596 entry = memtype_rb_search(&memtype_rbroot, paddr); 413 entry = rbt_memtype_lookup(paddr);
597 if (entry != NULL) 414 if (entry != NULL)
598 rettype = entry->type; 415 rettype = entry->type;
599 else 416 else
@@ -930,22 +747,6 @@ EXPORT_SYMBOL_GPL(pgprot_writecombine);
930 747
931#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 748#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
932 749
933/* get Nth element of the linked list */
934static int copy_memtype_nth_element(struct memtype *out, loff_t pos)
935{
936 struct memtype *list_node;
937 int i = 1;
938
939 list_for_each_entry(list_node, &memtype_list, nd) {
940 if (pos == i) {
941 *out = *list_node;
942 return 0;
943 }
944 ++i;
945 }
946 return 1;
947}
948
949static struct memtype *memtype_get_idx(loff_t pos) 750static struct memtype *memtype_get_idx(loff_t pos)
950{ 751{
951 struct memtype *print_entry; 752 struct memtype *print_entry;
@@ -956,7 +757,7 @@ static struct memtype *memtype_get_idx(loff_t pos)
956 return NULL; 757 return NULL;
957 758
958 spin_lock(&memtype_lock); 759 spin_lock(&memtype_lock);
959 ret = copy_memtype_nth_element(print_entry, pos); 760 ret = rbt_memtype_copy_nth_element(print_entry, pos);
960 spin_unlock(&memtype_lock); 761 spin_unlock(&memtype_lock);
961 762
962 if (!ret) { 763 if (!ret) {
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h
index 6c98780eb731..4f39eefa3e61 100644
--- a/arch/x86/mm/pat_internal.h
+++ b/arch/x86/mm/pat_internal.h
@@ -9,8 +9,8 @@ extern int pat_debug_enable;
9struct memtype { 9struct memtype {
10 u64 start; 10 u64 start;
11 u64 end; 11 u64 end;
12 u64 subtree_max_end;
12 unsigned long type; 13 unsigned long type;
13 struct list_head nd;
14 struct rb_node rb; 14 struct rb_node rb;
15}; 15};
16 16
@@ -25,4 +25,22 @@ static inline char *cattr_name(unsigned long flags)
25 } 25 }
26} 26}
27 27
28#ifdef CONFIG_X86_PAT
29extern int rbt_memtype_check_insert(struct memtype *new,
30 unsigned long *new_type);
31extern int rbt_memtype_erase(u64 start, u64 end);
32extern struct memtype *rbt_memtype_lookup(u64 addr);
33extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos);
34#else
35static inline int rbt_memtype_check_insert(struct memtype *new,
36 unsigned long *new_type)
37{ return 0; }
38static inline int rbt_memtype_erase(u64 start, u64 end)
39{ return 0; }
40static inline struct memtype *rbt_memtype_lookup(u64 addr)
41{ return NULL; }
42static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
43{ return 0; }
44#endif
45
28#endif /* __PAT_INTERNAL_H_ */ 46#endif /* __PAT_INTERNAL_H_ */
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
new file mode 100644
index 000000000000..9063f40b638b
--- /dev/null
+++ b/arch/x86/mm/pat_rbtree.c
@@ -0,0 +1,271 @@
1/*
2 * Handle caching attributes in page tables (PAT)
3 *
4 * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
5 * Suresh B Siddha <suresh.b.siddha@intel.com>
6 *
7 * Interval tree (augmented rbtree) used to store the PAT memory type
8 * reservations.
9 */
10
11#include <linux/seq_file.h>
12#include <linux/debugfs.h>
13#include <linux/kernel.h>
14#include <linux/module.h>
15#include <linux/rbtree.h>
16#include <linux/sched.h>
17#include <linux/gfp.h>
18
19#include <asm/pgtable.h>
20#include <asm/pat.h>
21
22#include "pat_internal.h"
23
24/*
25 * The memtype tree keeps track of memory type for specific
26 * physical memory areas. Without proper tracking, conflicting memory
27 * types in different mappings can cause CPU cache corruption.
28 *
29 * The tree is an interval tree (augmented rbtree) with tree ordered
30 * on starting address. Tree can contain multiple entries for
31 * different regions which overlap. All the aliases have the same
32 * cache attributes of course.
33 *
34 * memtype_lock protects the rbtree.
35 */
36
37static void memtype_rb_augment_cb(struct rb_node *node);
38static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb);
39
40static int is_node_overlap(struct memtype *node, u64 start, u64 end)
41{
42 if (node->start >= end || node->end <= start)
43 return 0;
44
45 return 1;
46}
47
48static u64 get_subtree_max_end(struct rb_node *node)
49{
50 u64 ret = 0;
51 if (node) {
52 struct memtype *data = container_of(node, struct memtype, rb);
53 ret = data->subtree_max_end;
54 }
55 return ret;
56}
57
58/* Update 'subtree_max_end' for a node, based on node and its children */
59static void update_node_max_end(struct rb_node *node)
60{
61 struct memtype *data;
62 u64 max_end, child_max_end;
63
64 if (!node)
65 return;
66
67 data = container_of(node, struct memtype, rb);
68 max_end = data->end;
69
70 child_max_end = get_subtree_max_end(node->rb_right);
71 if (child_max_end > max_end)
72 max_end = child_max_end;
73
74 child_max_end = get_subtree_max_end(node->rb_left);
75 if (child_max_end > max_end)
76 max_end = child_max_end;
77
78 data->subtree_max_end = max_end;
79}
80
81/* Update 'subtree_max_end' for a node and all its ancestors */
82static void update_path_max_end(struct rb_node *node)
83{
84 u64 old_max_end, new_max_end;
85
86 while (node) {
87 struct memtype *data = container_of(node, struct memtype, rb);
88
89 old_max_end = data->subtree_max_end;
90 update_node_max_end(node);
91 new_max_end = data->subtree_max_end;
92
93 if (new_max_end == old_max_end)
94 break;
95
96 node = rb_parent(node);
97 }
98}
99
100/* Find the first (lowest start addr) overlapping range from rb tree */
101static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
102 u64 start, u64 end)
103{
104 struct rb_node *node = root->rb_node;
105 struct memtype *last_lower = NULL;
106
107 while (node) {
108 struct memtype *data = container_of(node, struct memtype, rb);
109
110 if (get_subtree_max_end(node->rb_left) > start) {
111 /* Lowest overlap if any must be on left side */
112 node = node->rb_left;
113 } else if (is_node_overlap(data, start, end)) {
114 last_lower = data;
115 break;
116 } else if (start >= data->start) {
117 /* Lowest overlap if any must be on right side */
118 node = node->rb_right;
119 } else {
120 break;
121 }
122 }
123 return last_lower; /* Returns NULL if there is no overlap */
124}
125
126static struct memtype *memtype_rb_exact_match(struct rb_root *root,
127 u64 start, u64 end)
128{
129 struct memtype *match;
130
131 match = memtype_rb_lowest_match(root, start, end);
132 while (match != NULL && match->start < end) {
133 struct rb_node *node;
134
135 if (match->start == start && match->end == end)
136 return match;
137
138 node = rb_next(&match->rb);
139 if (node)
140 match = container_of(node, struct memtype, rb);
141 else
142 match = NULL;
143 }
144
145 return NULL; /* Returns NULL if there is no exact match */
146}
147
148static int memtype_rb_check_conflict(struct rb_root *root,
149 u64 start, u64 end,
150 unsigned long reqtype, unsigned long *newtype)
151{
152 struct rb_node *node;
153 struct memtype *match;
154 int found_type = reqtype;
155
156 match = memtype_rb_lowest_match(&memtype_rbroot, start, end);
157 if (match == NULL)
158 goto success;
159
160 if (match->type != found_type && newtype == NULL)
161 goto failure;
162
163 dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
164 found_type = match->type;
165
166 node = rb_next(&match->rb);
167 while (node) {
168 match = container_of(node, struct memtype, rb);
169
170 if (match->start >= end) /* Checked all possible matches */
171 goto success;
172
173 if (is_node_overlap(match, start, end) &&
174 match->type != found_type) {
175 goto failure;
176 }
177
178 node = rb_next(&match->rb);
179 }
180success:
181 if (newtype)
182 *newtype = found_type;
183
184 return 0;
185
186failure:
187 printk(KERN_INFO "%s:%d conflicting memory types "
188 "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start,
189 end, cattr_name(found_type), cattr_name(match->type));
190 return -EBUSY;
191}
192
193static void memtype_rb_augment_cb(struct rb_node *node)
194{
195 if (node)
196 update_path_max_end(node);
197}
198
199static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
200{
201 struct rb_node **node = &(root->rb_node);
202 struct rb_node *parent = NULL;
203
204 while (*node) {
205 struct memtype *data = container_of(*node, struct memtype, rb);
206
207 parent = *node;
208 if (newdata->start <= data->start)
209 node = &((*node)->rb_left);
210 else if (newdata->start > data->start)
211 node = &((*node)->rb_right);
212 }
213
214 rb_link_node(&newdata->rb, parent, node);
215 rb_insert_color(&newdata->rb, root);
216}
217
218int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
219{
220 int err = 0;
221
222 err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
223 new->type, ret_type);
224
225 if (!err) {
226 new->type = *ret_type;
227 memtype_rb_insert(&memtype_rbroot, new);
228 }
229 return err;
230}
231
232int rbt_memtype_erase(u64 start, u64 end)
233{
234 struct memtype *data;
235
236 data = memtype_rb_exact_match(&memtype_rbroot, start, end);
237 if (!data)
238 return -EINVAL;
239
240 rb_erase(&data->rb, &memtype_rbroot);
241 return 0;
242}
243
244struct memtype *rbt_memtype_lookup(u64 addr)
245{
246 struct memtype *data;
247 data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
248 return data;
249}
250
251#if defined(CONFIG_DEBUG_FS)
252int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
253{
254 struct rb_node *node;
255 int i = 1;
256
257 node = rb_first(&memtype_rbroot);
258 while (node && pos != i) {
259 node = rb_next(node);
260 i++;
261 }
262
263 if (node) { /* pos == i */
264 struct memtype *this = container_of(node, struct memtype, rb);
265 *out = *this;
266 return 0;
267 } else {
268 return 1;
269 }
270}
271#endif