aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <piggin@cyberone.com.au>2006-04-09 21:21:48 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-04-10 13:16:37 -0400
commit676165a8af7167f488abdcce6851a9bc36e83254 (patch)
treea9b2b8dc155b48ce073b5ada31f2ac0694118e69
parentc3a9d6541f84ac3ff566982d08389b87c1c36b4e (diff)
[PATCH] Fix buddy list race that could lead to page lru list corruptions
Rohit found an obscure bug causing buddy list corruption. page_is_buddy is using a non-atomic test (PagePrivate && page_count == 0) to determine whether or not a free page's buddy is itself free and in the buddy lists. Each of the conjuncts may be true at different times due to unrelated conditions, so the non-atomic page_is_buddy test may find each conjunct to be true even if they were not both true at the same time (ie. the page was not on the buddy lists). Signed-off-by: Martin Bligh <mbligh@google.com> Signed-off-by: Rohit Seth <rohitseth@google.com> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/mm.h5
-rw-r--r--include/linux/page-flags.h8
-rw-r--r--mm/page_alloc.c31
3 files changed, 27 insertions, 17 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6aa016f1d3ae..1154684209a4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -229,10 +229,9 @@ struct page {
229 unsigned long private; /* Mapping-private opaque data: 229 unsigned long private; /* Mapping-private opaque data:
230 * usually used for buffer_heads 230 * usually used for buffer_heads
231 * if PagePrivate set; used for 231 * if PagePrivate set; used for
232 * swp_entry_t if PageSwapCache. 232 * swp_entry_t if PageSwapCache;
233 * When page is free, this
234 * indicates order in the buddy 233 * indicates order in the buddy
235 * system. 234 * system if PG_buddy is set.
236 */ 235 */
237 struct address_space *mapping; /* If low bit clear, points to 236 struct address_space *mapping; /* If low bit clear, points to
238 * inode address_space, or NULL. 237 * inode address_space, or NULL.
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 9ea629c02a4b..547aac7696cd 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -74,7 +74,9 @@
74#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ 74#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */
75#define PG_reclaim 17 /* To be reclaimed asap */ 75#define PG_reclaim 17 /* To be reclaimed asap */
76#define PG_nosave_free 18 /* Free, should not be written */ 76#define PG_nosave_free 18 /* Free, should not be written */
77#define PG_uncached 19 /* Page has been mapped as uncached */ 77#define PG_buddy 19 /* Page is free, on buddy lists */
78
79#define PG_uncached 20 /* Page has been mapped as uncached */
78 80
79/* 81/*
80 * Global page accounting. One instance per CPU. Only unsigned longs are 82 * Global page accounting. One instance per CPU. Only unsigned longs are
@@ -317,6 +319,10 @@ extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
317#define SetPageNosaveFree(page) set_bit(PG_nosave_free, &(page)->flags) 319#define SetPageNosaveFree(page) set_bit(PG_nosave_free, &(page)->flags)
318#define ClearPageNosaveFree(page) clear_bit(PG_nosave_free, &(page)->flags) 320#define ClearPageNosaveFree(page) clear_bit(PG_nosave_free, &(page)->flags)
319 321
322#define PageBuddy(page) test_bit(PG_buddy, &(page)->flags)
323#define __SetPageBuddy(page) __set_bit(PG_buddy, &(page)->flags)
324#define __ClearPageBuddy(page) __clear_bit(PG_buddy, &(page)->flags)
325
320#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags) 326#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags)
321#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags) 327#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
322#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags) 328#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc523a1f270d..b8165e037dee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -151,7 +151,8 @@ static void bad_page(struct page *page)
151 1 << PG_reclaim | 151 1 << PG_reclaim |
152 1 << PG_slab | 152 1 << PG_slab |
153 1 << PG_swapcache | 153 1 << PG_swapcache |
154 1 << PG_writeback ); 154 1 << PG_writeback |
155 1 << PG_buddy );
155 set_page_count(page, 0); 156 set_page_count(page, 0);
156 reset_page_mapcount(page); 157 reset_page_mapcount(page);
157 page->mapping = NULL; 158 page->mapping = NULL;
@@ -236,12 +237,12 @@ static inline unsigned long page_order(struct page *page) {
236 237
237static inline void set_page_order(struct page *page, int order) { 238static inline void set_page_order(struct page *page, int order) {
238 set_page_private(page, order); 239 set_page_private(page, order);
239 __SetPagePrivate(page); 240 __SetPageBuddy(page);
240} 241}
241 242
242static inline void rmv_page_order(struct page *page) 243static inline void rmv_page_order(struct page *page)
243{ 244{
244 __ClearPagePrivate(page); 245 __ClearPageBuddy(page);
245 set_page_private(page, 0); 246 set_page_private(page, 0);
246} 247}
247 248
@@ -280,11 +281,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
280 * This function checks whether a page is free && is the buddy 281 * This function checks whether a page is free && is the buddy
281 * we can do coalesce a page and its buddy if 282 * we can do coalesce a page and its buddy if
282 * (a) the buddy is not in a hole && 283 * (a) the buddy is not in a hole &&
283 * (b) the buddy is free && 284 * (b) the buddy is in the buddy system &&
284 * (c) the buddy is on the buddy system && 285 * (c) a page and its buddy have the same order.
285 * (d) a page and its buddy have the same order. 286 *
286 * for recording page's order, we use page_private(page) and PG_private. 287 * For recording whether a page is in the buddy system, we use PG_buddy.
288 * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
287 * 289 *
290 * For recording page's order, we use page_private(page).
288 */ 291 */
289static inline int page_is_buddy(struct page *page, int order) 292static inline int page_is_buddy(struct page *page, int order)
290{ 293{
@@ -293,10 +296,10 @@ static inline int page_is_buddy(struct page *page, int order)
293 return 0; 296 return 0;
294#endif 297#endif
295 298
296 if (PagePrivate(page) && 299 if (PageBuddy(page) && page_order(page) == order) {
297 (page_order(page) == order) && 300 BUG_ON(page_count(page) != 0);
298 page_count(page) == 0)
299 return 1; 301 return 1;
302 }
300 return 0; 303 return 0;
301} 304}
302 305
@@ -313,7 +316,7 @@ static inline int page_is_buddy(struct page *page, int order)
313 * as necessary, plus some accounting needed to play nicely with other 316 * as necessary, plus some accounting needed to play nicely with other
314 * parts of the VM system. 317 * parts of the VM system.
315 * At each level, we keep a list of pages, which are heads of continuous 318 * At each level, we keep a list of pages, which are heads of continuous
316 * free pages of length of (1 << order) and marked with PG_Private.Page's 319 * free pages of length of (1 << order) and marked with PG_buddy. Page's
317 * order is recorded in page_private(page) field. 320 * order is recorded in page_private(page) field.
318 * So when we are allocating or freeing one, we can derive the state of the 321 * So when we are allocating or freeing one, we can derive the state of the
319 * other. That is, if we allocate a small block, and both were 322 * other. That is, if we allocate a small block, and both were
@@ -376,7 +379,8 @@ static inline int free_pages_check(struct page *page)
376 1 << PG_slab | 379 1 << PG_slab |
377 1 << PG_swapcache | 380 1 << PG_swapcache |
378 1 << PG_writeback | 381 1 << PG_writeback |
379 1 << PG_reserved )))) 382 1 << PG_reserved |
383 1 << PG_buddy ))))
380 bad_page(page); 384 bad_page(page);
381 if (PageDirty(page)) 385 if (PageDirty(page))
382 __ClearPageDirty(page); 386 __ClearPageDirty(page);
@@ -524,7 +528,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
524 1 << PG_slab | 528 1 << PG_slab |
525 1 << PG_swapcache | 529 1 << PG_swapcache |
526 1 << PG_writeback | 530 1 << PG_writeback |
527 1 << PG_reserved )))) 531 1 << PG_reserved |
532 1 << PG_buddy ))))
528 bad_page(page); 533 bad_page(page);
529 534
530 /* 535 /*