aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorNick Piggin <piggin@cyberone.com.au>2006-04-09 21:21:48 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-04-10 13:16:37 -0400
commit676165a8af7167f488abdcce6851a9bc36e83254 (patch)
treea9b2b8dc155b48ce073b5ada31f2ac0694118e69 /mm
parentc3a9d6541f84ac3ff566982d08389b87c1c36b4e (diff)
[PATCH] Fix buddy list race that could lead to page lru list corruptions
Rohit found an obscure bug causing buddy list corruption. page_is_buddy is using a non-atomic test (PagePrivate && page_count == 0) to determine whether or not a free page's buddy is itself free and in the buddy lists. Each of the conjuncts may be true at different times due to unrelated conditions, so the non-atomic page_is_buddy test may find each conjunct to be true even if they were not both true at the same time (ie. the page was not on the buddy lists). Signed-off-by: Martin Bligh <mbligh@google.com> Signed-off-by: Rohit Seth <rohitseth@google.com> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c31
1 files changed, 18 insertions, 13 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc523a1f270d..b8165e037dee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -151,7 +151,8 @@ static void bad_page(struct page *page)
151 1 << PG_reclaim | 151 1 << PG_reclaim |
152 1 << PG_slab | 152 1 << PG_slab |
153 1 << PG_swapcache | 153 1 << PG_swapcache |
154 1 << PG_writeback ); 154 1 << PG_writeback |
155 1 << PG_buddy );
155 set_page_count(page, 0); 156 set_page_count(page, 0);
156 reset_page_mapcount(page); 157 reset_page_mapcount(page);
157 page->mapping = NULL; 158 page->mapping = NULL;
@@ -236,12 +237,12 @@ static inline unsigned long page_order(struct page *page) {
236 237
237static inline void set_page_order(struct page *page, int order) { 238static inline void set_page_order(struct page *page, int order) {
238 set_page_private(page, order); 239 set_page_private(page, order);
239 __SetPagePrivate(page); 240 __SetPageBuddy(page);
240} 241}
241 242
242static inline void rmv_page_order(struct page *page) 243static inline void rmv_page_order(struct page *page)
243{ 244{
244 __ClearPagePrivate(page); 245 __ClearPageBuddy(page);
245 set_page_private(page, 0); 246 set_page_private(page, 0);
246} 247}
247 248
@@ -280,11 +281,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
280 * This function checks whether a page is free && is the buddy 281 * This function checks whether a page is free && is the buddy
281 * we can do coalesce a page and its buddy if 282 * we can do coalesce a page and its buddy if
282 * (a) the buddy is not in a hole && 283 * (a) the buddy is not in a hole &&
283 * (b) the buddy is free && 284 * (b) the buddy is in the buddy system &&
284 * (c) the buddy is on the buddy system && 285 * (c) a page and its buddy have the same order.
285 * (d) a page and its buddy have the same order. 286 *
286 * for recording page's order, we use page_private(page) and PG_private. 287 * For recording whether a page is in the buddy system, we use PG_buddy.
288 * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
287 * 289 *
290 * For recording page's order, we use page_private(page).
288 */ 291 */
289static inline int page_is_buddy(struct page *page, int order) 292static inline int page_is_buddy(struct page *page, int order)
290{ 293{
@@ -293,10 +296,10 @@ static inline int page_is_buddy(struct page *page, int order)
293 return 0; 296 return 0;
294#endif 297#endif
295 298
296 if (PagePrivate(page) && 299 if (PageBuddy(page) && page_order(page) == order) {
297 (page_order(page) == order) && 300 BUG_ON(page_count(page) != 0);
298 page_count(page) == 0)
299 return 1; 301 return 1;
302 }
300 return 0; 303 return 0;
301} 304}
302 305
@@ -313,7 +316,7 @@ static inline int page_is_buddy(struct page *page, int order)
313 * as necessary, plus some accounting needed to play nicely with other 316 * as necessary, plus some accounting needed to play nicely with other
314 * parts of the VM system. 317 * parts of the VM system.
315 * At each level, we keep a list of pages, which are heads of continuous 318 * At each level, we keep a list of pages, which are heads of continuous
316 * free pages of length of (1 << order) and marked with PG_Private.Page's 319 * free pages of length of (1 << order) and marked with PG_buddy. Page's
317 * order is recorded in page_private(page) field. 320 * order is recorded in page_private(page) field.
318 * So when we are allocating or freeing one, we can derive the state of the 321 * So when we are allocating or freeing one, we can derive the state of the
319 * other. That is, if we allocate a small block, and both were 322 * other. That is, if we allocate a small block, and both were
@@ -376,7 +379,8 @@ static inline int free_pages_check(struct page *page)
376 1 << PG_slab | 379 1 << PG_slab |
377 1 << PG_swapcache | 380 1 << PG_swapcache |
378 1 << PG_writeback | 381 1 << PG_writeback |
379 1 << PG_reserved )))) 382 1 << PG_reserved |
383 1 << PG_buddy ))))
380 bad_page(page); 384 bad_page(page);
381 if (PageDirty(page)) 385 if (PageDirty(page))
382 __ClearPageDirty(page); 386 __ClearPageDirty(page);
@@ -524,7 +528,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
524 1 << PG_slab | 528 1 << PG_slab |
525 1 << PG_swapcache | 529 1 << PG_swapcache |
526 1 << PG_writeback | 530 1 << PG_writeback |
527 1 << PG_reserved )))) 531 1 << PG_reserved |
532 1 << PG_buddy ))))
528 bad_page(page); 533 bad_page(page);
529 534
530 /* 535 /*