diff options
author | Hugh Dickins <hugh@veritas.com> | 2008-03-04 17:29:07 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-03-04 19:35:15 -0500 |
commit | 9442ec9df40d952b0de185ae5638a74970388e01 (patch) | |
tree | 14b06d71203be119d93736464ca49f37ce402c1c | |
parent | 98837c7f82ef78aa38f40462aa2fcac68fd3acbf (diff) |
memcg: bad page if page_cgroup when free
Replace free_hot_cold_page's VM_BUG_ON(page_get_page_cgroup(page)) by a "Bad
page state" and clear: most users don't have CONFIG_DEBUG_VM on, and if it
were set here, it'd likely cause corruption when the page is reused.
Don't use page_assign_page_cgroup to clear it: that should be private to
memcontrol.c, and always called with the lock taken; and memmap_init_zone
doesn't need it either - like page->mapping and other pointers throughout the
kernel, Linux assumes pointers in zeroed structures are NULL pointers.
Instead use page_reset_bad_cgroup, added to memcontrol.h for this only.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/memcontrol.h | 8 | ||||
-rw-r--r-- | mm/memcontrol.c | 27 | ||||
-rw-r--r-- | mm/page_alloc.c | 18 |
3 files changed, 28 insertions, 25 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 56432ff8d4e3..70789df7dab4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -29,8 +29,9 @@ struct mm_struct; | |||
29 | 29 | ||
30 | extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); | 30 | extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); |
31 | extern void mm_free_cgroup(struct mm_struct *mm); | 31 | extern void mm_free_cgroup(struct mm_struct *mm); |
32 | extern void page_assign_page_cgroup(struct page *page, | 32 | |
33 | struct page_cgroup *pc); | 33 | #define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) |
34 | |||
34 | extern struct page_cgroup *page_get_page_cgroup(struct page *page); | 35 | extern struct page_cgroup *page_get_page_cgroup(struct page *page); |
35 | extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, | 36 | extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, |
36 | gfp_t gfp_mask); | 37 | gfp_t gfp_mask); |
@@ -82,8 +83,7 @@ static inline void mm_free_cgroup(struct mm_struct *mm) | |||
82 | { | 83 | { |
83 | } | 84 | } |
84 | 85 | ||
85 | static inline void page_assign_page_cgroup(struct page *page, | 86 | static inline void page_reset_bad_cgroup(struct page *page) |
86 | struct page_cgroup *pc) | ||
87 | { | 87 | { |
88 | } | 88 | } |
89 | 89 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index afdd406f618a..9e170d3c71e5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -140,11 +140,17 @@ struct mem_cgroup { | |||
140 | 140 | ||
141 | /* | 141 | /* |
142 | * We use the lower bit of the page->page_cgroup pointer as a bit spin | 142 | * We use the lower bit of the page->page_cgroup pointer as a bit spin |
143 | * lock. We need to ensure that page->page_cgroup is atleast two | 143 | * lock. We need to ensure that page->page_cgroup is at least two |
144 | * byte aligned (based on comments from Nick Piggin) | 144 | * byte aligned (based on comments from Nick Piggin). But since |
145 | * bit_spin_lock doesn't actually set that lock bit in a non-debug | ||
146 | * uniprocessor kernel, we should avoid setting it here too. | ||
145 | */ | 147 | */ |
146 | #define PAGE_CGROUP_LOCK_BIT 0x0 | 148 | #define PAGE_CGROUP_LOCK_BIT 0x0 |
147 | #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) | 149 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) |
150 | #define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) | ||
151 | #else | ||
152 | #define PAGE_CGROUP_LOCK 0x0 | ||
153 | #endif | ||
148 | 154 | ||
149 | /* | 155 | /* |
150 | * A page_cgroup page is associated with every page descriptor. The | 156 | * A page_cgroup page is associated with every page descriptor. The |
@@ -271,19 +277,10 @@ static inline int page_cgroup_locked(struct page *page) | |||
271 | &page->page_cgroup); | 277 | &page->page_cgroup); |
272 | } | 278 | } |
273 | 279 | ||
274 | void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) | 280 | static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) |
275 | { | 281 | { |
276 | int locked; | 282 | VM_BUG_ON(!page_cgroup_locked(page)); |
277 | 283 | page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); | |
278 | /* | ||
279 | * While resetting the page_cgroup we might not hold the | ||
280 | * page_cgroup lock. free_hot_cold_page() is an example | ||
281 | * of such a scenario | ||
282 | */ | ||
283 | if (pc) | ||
284 | VM_BUG_ON(!page_cgroup_locked(page)); | ||
285 | locked = (page->page_cgroup & PAGE_CGROUP_LOCK); | ||
286 | page->page_cgroup = ((unsigned long)pc | locked); | ||
287 | } | 284 | } |
288 | 285 | ||
289 | struct page_cgroup *page_get_page_cgroup(struct page *page) | 286 | struct page_cgroup *page_get_page_cgroup(struct page *page) |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e76cf94725c9..402a504f1228 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -222,13 +222,19 @@ static inline int bad_range(struct zone *zone, struct page *page) | |||
222 | 222 | ||
223 | static void bad_page(struct page *page) | 223 | static void bad_page(struct page *page) |
224 | { | 224 | { |
225 | printk(KERN_EMERG "Bad page state in process '%s'\n" | 225 | void *pc = page_get_page_cgroup(page); |
226 | KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n" | 226 | |
227 | KERN_EMERG "Trying to fix it up, but a reboot is needed\n" | 227 | printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG |
228 | KERN_EMERG "Backtrace:\n", | 228 | "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", |
229 | current->comm, page, (int)(2*sizeof(unsigned long)), | 229 | current->comm, page, (int)(2*sizeof(unsigned long)), |
230 | (unsigned long)page->flags, page->mapping, | 230 | (unsigned long)page->flags, page->mapping, |
231 | page_mapcount(page), page_count(page)); | 231 | page_mapcount(page), page_count(page)); |
232 | if (pc) { | ||
233 | printk(KERN_EMERG "cgroup:%p\n", pc); | ||
234 | page_reset_bad_cgroup(page); | ||
235 | } | ||
236 | printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" | ||
237 | KERN_EMERG "Backtrace:\n"); | ||
232 | dump_stack(); | 238 | dump_stack(); |
233 | page->flags &= ~(1 << PG_lru | | 239 | page->flags &= ~(1 << PG_lru | |
234 | 1 << PG_private | | 240 | 1 << PG_private | |
@@ -454,6 +460,7 @@ static inline int free_pages_check(struct page *page) | |||
454 | { | 460 | { |
455 | if (unlikely(page_mapcount(page) | | 461 | if (unlikely(page_mapcount(page) | |
456 | (page->mapping != NULL) | | 462 | (page->mapping != NULL) | |
463 | (page_get_page_cgroup(page) != NULL) | | ||
457 | (page_count(page) != 0) | | 464 | (page_count(page) != 0) | |
458 | (page->flags & ( | 465 | (page->flags & ( |
459 | 1 << PG_lru | | 466 | 1 << PG_lru | |
@@ -603,6 +610,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) | |||
603 | { | 610 | { |
604 | if (unlikely(page_mapcount(page) | | 611 | if (unlikely(page_mapcount(page) | |
605 | (page->mapping != NULL) | | 612 | (page->mapping != NULL) | |
613 | (page_get_page_cgroup(page) != NULL) | | ||
606 | (page_count(page) != 0) | | 614 | (page_count(page) != 0) | |
607 | (page->flags & ( | 615 | (page->flags & ( |
608 | 1 << PG_lru | | 616 | 1 << PG_lru | |
@@ -989,7 +997,6 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
989 | 997 | ||
990 | if (!PageHighMem(page)) | 998 | if (!PageHighMem(page)) |
991 | debug_check_no_locks_freed(page_address(page), PAGE_SIZE); | 999 | debug_check_no_locks_freed(page_address(page), PAGE_SIZE); |
992 | VM_BUG_ON(page_get_page_cgroup(page)); | ||
993 | arch_free_page(page, 0); | 1000 | arch_free_page(page, 0); |
994 | kernel_map_pages(page, 1, 0); | 1001 | kernel_map_pages(page, 1, 0); |
995 | 1002 | ||
@@ -2528,7 +2535,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
2528 | set_page_links(page, zone, nid, pfn); | 2535 | set_page_links(page, zone, nid, pfn); |
2529 | init_page_count(page); | 2536 | init_page_count(page); |
2530 | reset_page_mapcount(page); | 2537 | reset_page_mapcount(page); |
2531 | page_assign_page_cgroup(page, NULL); | ||
2532 | SetPageReserved(page); | 2538 | SetPageReserved(page); |
2533 | 2539 | ||
2534 | /* | 2540 | /* |