aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2008-03-04 17:29:07 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-03-04 19:35:15 -0500
commit9442ec9df40d952b0de185ae5638a74970388e01 (patch)
tree14b06d71203be119d93736464ca49f37ce402c1c
parent98837c7f82ef78aa38f40462aa2fcac68fd3acbf (diff)
memcg: bad page if page_cgroup when free
Replace free_hot_cold_page's VM_BUG_ON(page_get_page_cgroup(page)) by a "Bad page state" and clear: most users don't have CONFIG_DEBUG_VM on, and if it were set here, it'd likely cause corruption when the page is reused. Don't use page_assign_page_cgroup to clear it: that should be private to memcontrol.c, and always called with the lock taken; and memmap_init_zone doesn't need it either - like page->mapping and other pointers throughout the kernel, Linux assumes pointers in zeroed structures are NULL pointers. Instead use page_reset_bad_cgroup, added to memcontrol.h for this only. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: David Rientjes <rientjes@google.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hirokazu Takahashi <taka@valinux.co.jp> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h8
-rw-r--r--mm/memcontrol.c27
-rw-r--r--mm/page_alloc.c18
3 files changed, 28 insertions, 25 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 56432ff8d4e3..70789df7dab4 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -29,8 +29,9 @@ struct mm_struct;
29 29
30extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); 30extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
31extern void mm_free_cgroup(struct mm_struct *mm); 31extern void mm_free_cgroup(struct mm_struct *mm);
32extern void page_assign_page_cgroup(struct page *page, 32
33 struct page_cgroup *pc); 33#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0)
34
34extern struct page_cgroup *page_get_page_cgroup(struct page *page); 35extern struct page_cgroup *page_get_page_cgroup(struct page *page);
35extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, 36extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
36 gfp_t gfp_mask); 37 gfp_t gfp_mask);
@@ -82,8 +83,7 @@ static inline void mm_free_cgroup(struct mm_struct *mm)
82{ 83{
83} 84}
84 85
85static inline void page_assign_page_cgroup(struct page *page, 86static inline void page_reset_bad_cgroup(struct page *page)
86 struct page_cgroup *pc)
87{ 87{
88} 88}
89 89
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index afdd406f618a..9e170d3c71e5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -140,11 +140,17 @@ struct mem_cgroup {
140 140
141/* 141/*
142 * We use the lower bit of the page->page_cgroup pointer as a bit spin 142 * We use the lower bit of the page->page_cgroup pointer as a bit spin
143 * lock. We need to ensure that page->page_cgroup is atleast two 143 * lock. We need to ensure that page->page_cgroup is at least two
144 * byte aligned (based on comments from Nick Piggin) 144 * byte aligned (based on comments from Nick Piggin). But since
145 * bit_spin_lock doesn't actually set that lock bit in a non-debug
146 * uniprocessor kernel, we should avoid setting it here too.
145 */ 147 */
146#define PAGE_CGROUP_LOCK_BIT 0x0 148#define PAGE_CGROUP_LOCK_BIT 0x0
147#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) 149#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
150#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT)
151#else
152#define PAGE_CGROUP_LOCK 0x0
153#endif
148 154
149/* 155/*
150 * A page_cgroup page is associated with every page descriptor. The 156 * A page_cgroup page is associated with every page descriptor. The
@@ -271,19 +277,10 @@ static inline int page_cgroup_locked(struct page *page)
271 &page->page_cgroup); 277 &page->page_cgroup);
272} 278}
273 279
274void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) 280static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
275{ 281{
276 int locked; 282 VM_BUG_ON(!page_cgroup_locked(page));
277 283 page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK);
278 /*
279 * While resetting the page_cgroup we might not hold the
280 * page_cgroup lock. free_hot_cold_page() is an example
281 * of such a scenario
282 */
283 if (pc)
284 VM_BUG_ON(!page_cgroup_locked(page));
285 locked = (page->page_cgroup & PAGE_CGROUP_LOCK);
286 page->page_cgroup = ((unsigned long)pc | locked);
287} 284}
288 285
289struct page_cgroup *page_get_page_cgroup(struct page *page) 286struct page_cgroup *page_get_page_cgroup(struct page *page)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e76cf94725c9..402a504f1228 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -222,13 +222,19 @@ static inline int bad_range(struct zone *zone, struct page *page)
222 222
223static void bad_page(struct page *page) 223static void bad_page(struct page *page)
224{ 224{
225 printk(KERN_EMERG "Bad page state in process '%s'\n" 225 void *pc = page_get_page_cgroup(page);
226 KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n" 226
227 KERN_EMERG "Trying to fix it up, but a reboot is needed\n" 227 printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
228 KERN_EMERG "Backtrace:\n", 228 "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
229 current->comm, page, (int)(2*sizeof(unsigned long)), 229 current->comm, page, (int)(2*sizeof(unsigned long)),
230 (unsigned long)page->flags, page->mapping, 230 (unsigned long)page->flags, page->mapping,
231 page_mapcount(page), page_count(page)); 231 page_mapcount(page), page_count(page));
232 if (pc) {
233 printk(KERN_EMERG "cgroup:%p\n", pc);
234 page_reset_bad_cgroup(page);
235 }
236 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
237 KERN_EMERG "Backtrace:\n");
232 dump_stack(); 238 dump_stack();
233 page->flags &= ~(1 << PG_lru | 239 page->flags &= ~(1 << PG_lru |
234 1 << PG_private | 240 1 << PG_private |
@@ -454,6 +460,7 @@ static inline int free_pages_check(struct page *page)
454{ 460{
455 if (unlikely(page_mapcount(page) | 461 if (unlikely(page_mapcount(page) |
456 (page->mapping != NULL) | 462 (page->mapping != NULL) |
463 (page_get_page_cgroup(page) != NULL) |
457 (page_count(page) != 0) | 464 (page_count(page) != 0) |
458 (page->flags & ( 465 (page->flags & (
459 1 << PG_lru | 466 1 << PG_lru |
@@ -603,6 +610,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
603{ 610{
604 if (unlikely(page_mapcount(page) | 611 if (unlikely(page_mapcount(page) |
605 (page->mapping != NULL) | 612 (page->mapping != NULL) |
613 (page_get_page_cgroup(page) != NULL) |
606 (page_count(page) != 0) | 614 (page_count(page) != 0) |
607 (page->flags & ( 615 (page->flags & (
608 1 << PG_lru | 616 1 << PG_lru |
@@ -989,7 +997,6 @@ static void free_hot_cold_page(struct page *page, int cold)
989 997
990 if (!PageHighMem(page)) 998 if (!PageHighMem(page))
991 debug_check_no_locks_freed(page_address(page), PAGE_SIZE); 999 debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
992 VM_BUG_ON(page_get_page_cgroup(page));
993 arch_free_page(page, 0); 1000 arch_free_page(page, 0);
994 kernel_map_pages(page, 1, 0); 1001 kernel_map_pages(page, 1, 0);
995 1002
@@ -2528,7 +2535,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
2528 set_page_links(page, zone, nid, pfn); 2535 set_page_links(page, zone, nid, pfn);
2529 init_page_count(page); 2536 init_page_count(page);
2530 reset_page_mapcount(page); 2537 reset_page_mapcount(page);
2531 page_assign_page_cgroup(page, NULL);
2532 SetPageReserved(page); 2538 SetPageReserved(page);
2533 2539
2534 /* 2540 /*