diff options
author | Hugh Dickins <hugh@veritas.com> | 2005-10-29 21:16:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:42 -0400 |
commit | 4c21e2f2441dc5fbb957b030333f5a3f2d02dea7 (patch) | |
tree | 1f76d33bb1d76221c6424bc5fed080a4f91349a6 /mm/page_alloc.c | |
parent | b38c6845b695141259019e2b7c0fe6c32a6e720d (diff) |
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.
This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock. (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)
In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.
Splitting the lock is not quite for free: another cacheline access. Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS. But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.
There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0541288ebf4b..a2995a5d012c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -154,7 +154,7 @@ static void prep_compound_page(struct page *page, unsigned long order) | |||
154 | struct page *p = page + i; | 154 | struct page *p = page + i; |
155 | 155 | ||
156 | SetPageCompound(p); | 156 | SetPageCompound(p); |
157 | p->private = (unsigned long)page; | 157 | set_page_private(p, (unsigned long)page); |
158 | } | 158 | } |
159 | } | 159 | } |
160 | 160 | ||
@@ -174,7 +174,7 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
174 | 174 | ||
175 | if (!PageCompound(p)) | 175 | if (!PageCompound(p)) |
176 | bad_page(__FUNCTION__, page); | 176 | bad_page(__FUNCTION__, page); |
177 | if (p->private != (unsigned long)page) | 177 | if (page_private(p) != (unsigned long)page) |
178 | bad_page(__FUNCTION__, page); | 178 | bad_page(__FUNCTION__, page); |
179 | ClearPageCompound(p); | 179 | ClearPageCompound(p); |
180 | } | 180 | } |
@@ -187,18 +187,18 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
187 | * So, we don't need atomic page->flags operations here. | 187 | * So, we don't need atomic page->flags operations here. |
188 | */ | 188 | */ |
189 | static inline unsigned long page_order(struct page *page) { | 189 | static inline unsigned long page_order(struct page *page) { |
190 | return page->private; | 190 | return page_private(page); |
191 | } | 191 | } |
192 | 192 | ||
193 | static inline void set_page_order(struct page *page, int order) { | 193 | static inline void set_page_order(struct page *page, int order) { |
194 | page->private = order; | 194 | set_page_private(page, order); |
195 | __SetPagePrivate(page); | 195 | __SetPagePrivate(page); |
196 | } | 196 | } |
197 | 197 | ||
198 | static inline void rmv_page_order(struct page *page) | 198 | static inline void rmv_page_order(struct page *page) |
199 | { | 199 | { |
200 | __ClearPagePrivate(page); | 200 | __ClearPagePrivate(page); |
201 | page->private = 0; | 201 | set_page_private(page, 0); |
202 | } | 202 | } |
203 | 203 | ||
204 | /* | 204 | /* |
@@ -238,7 +238,7 @@ __find_combined_index(unsigned long page_idx, unsigned int order) | |||
238 | * (a) the buddy is free && | 238 | * (a) the buddy is free && |
239 | * (b) the buddy is on the buddy system && | 239 | * (b) the buddy is on the buddy system && |
240 | * (c) a page and its buddy have the same order. | 240 | * (c) a page and its buddy have the same order. |
241 | * for recording page's order, we use page->private and PG_private. | 241 | * for recording page's order, we use page_private(page) and PG_private. |
242 | * | 242 | * |
243 | */ | 243 | */ |
244 | static inline int page_is_buddy(struct page *page, int order) | 244 | static inline int page_is_buddy(struct page *page, int order) |
@@ -264,7 +264,7 @@ static inline int page_is_buddy(struct page *page, int order) | |||
264 | * parts of the VM system. | 264 | * parts of the VM system. |
265 | * At each level, we keep a list of pages, which are heads of continuous | 265 | * At each level, we keep a list of pages, which are heads of continuous |
266 | * free pages of length of (1 << order) and marked with PG_Private.Page's | 266 | * free pages of length of (1 << order) and marked with PG_Private.Page's |
267 | * order is recorded in page->private field. | 267 | * order is recorded in page_private(page) field. |
268 | * So when we are allocating or freeing one, we can derive the state of the | 268 | * So when we are allocating or freeing one, we can derive the state of the |
269 | * other. That is, if we allocate a small block, and both were | 269 | * other. That is, if we allocate a small block, and both were |
270 | * free, the remainder of the region must be split into blocks. | 270 | * free, the remainder of the region must be split into blocks. |
@@ -463,7 +463,7 @@ static void prep_new_page(struct page *page, int order) | |||
463 | page->flags &= ~(1 << PG_uptodate | 1 << PG_error | | 463 | page->flags &= ~(1 << PG_uptodate | 1 << PG_error | |
464 | 1 << PG_referenced | 1 << PG_arch_1 | | 464 | 1 << PG_referenced | 1 << PG_arch_1 | |
465 | 1 << PG_checked | 1 << PG_mappedtodisk); | 465 | 1 << PG_checked | 1 << PG_mappedtodisk); |
466 | page->private = 0; | 466 | set_page_private(page, 0); |
467 | set_page_refs(page, order); | 467 | set_page_refs(page, order); |
468 | kernel_map_pages(page, 1 << order, 1); | 468 | kernel_map_pages(page, 1 << order, 1); |
469 | } | 469 | } |