aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2009-01-06 17:40:08 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-06 18:59:07 -0500
commit3dc147414ccad81dc33edb80774b1fed12a38c08 (patch)
treebdcfd37fa47d6617490fa276ddfcc5a6ab0c731b
parent8cc3b39221b0ecbd83a338948a8396df097fc656 (diff)
badpage: replace page_remove_rmap Eeek and BUG
Now that bad pages are kept out of circulation, there is no need for the infamous page_remove_rmap() BUG() - once that page is freed, its negative mapcount will issue a "Bad page state" message and the page won't be freed. Removing the BUG() allows more info, on subsequent pages, to be gathered. We do have more info about the page at this point than bad_page() can know - notably, what the pmd is, which might pinpoint something like low 64kB corruption - but page_remove_rmap() isn't given the address to find that. In practice, there is only one call to page_remove_rmap() which has ever reported anything, that from zap_pte_range() (usually on exit, sometimes on munmap). It has all the info, so remove page_remove_rmap()'s "Eeek" message and leave it all to zap_pte_range(). mm/memory.c already has a hardly used print_bad_pte() function, showing some of the appropriate info: extend it to show what we want for the rmap case: pte info, page info (when there is a page) and vma info to compare. zap_pte_range() already knows the pmd, but print_bad_pte() is easier to use if it works that out for itself. Some of this info is also shown in bad_page()'s "Bad page state" message. Keep them separate, but adjust them to match each other as far as possible. Say "Bad page map" in print_bad_pte(), and add a TAINT_BAD_PAGE there too. print_bad_pte() show current->comm unconditionally (though it should get repeated in the usually irrelevant stack trace): sorry, I misled Nick Piggin to make it conditional on vm_mm == current->mm, but current->mm is already NULL in the exit case. Usually current->comm is good, though exceptionally it may not be that of the mm (when "swapoff" for example). Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memory.c52
-rw-r--r--mm/page_alloc.c16
-rw-r--r--mm/rmap.c16
3 files changed, 48 insertions, 36 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 89339c61f8e5..cda04b19f733 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -52,6 +52,9 @@
52#include <linux/writeback.h> 52#include <linux/writeback.h>
53#include <linux/memcontrol.h> 53#include <linux/memcontrol.h>
54#include <linux/mmu_notifier.h> 54#include <linux/mmu_notifier.h>
55#include <linux/kallsyms.h>
56#include <linux/swapops.h>
57#include <linux/elf.h>
55 58
56#include <asm/pgalloc.h> 59#include <asm/pgalloc.h>
57#include <asm/uaccess.h> 60#include <asm/uaccess.h>
@@ -59,9 +62,6 @@
59#include <asm/tlbflush.h> 62#include <asm/tlbflush.h>
60#include <asm/pgtable.h> 63#include <asm/pgtable.h>
61 64
62#include <linux/swapops.h>
63#include <linux/elf.h>
64
65#include "internal.h" 65#include "internal.h"
66 66
67#ifndef CONFIG_NEED_MULTIPLE_NODES 67#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -375,15 +375,41 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
375 * 375 *
376 * The calling function must still handle the error. 376 * The calling function must still handle the error.
377 */ 377 */
378static void print_bad_pte(struct vm_area_struct *vma, pte_t pte, 378static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
379 unsigned long vaddr) 379 pte_t pte, struct page *page)
380{ 380{
381 printk(KERN_ERR "Bad pte = %08llx, process = %s, " 381 pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
382 "vm_flags = %lx, vaddr = %lx\n", 382 pud_t *pud = pud_offset(pgd, addr);
383 (long long)pte_val(pte), 383 pmd_t *pmd = pmd_offset(pud, addr);
384 (vma->vm_mm == current->mm ? current->comm : "???"), 384 struct address_space *mapping;
385 vma->vm_flags, vaddr); 385 pgoff_t index;
386
387 mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL;
388 index = linear_page_index(vma, addr);
389
390 printk(KERN_EMERG "Bad page map in process %s pte:%08llx pmd:%08llx\n",
391 current->comm,
392 (long long)pte_val(pte), (long long)pmd_val(*pmd));
393 if (page) {
394 printk(KERN_EMERG
395 "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
396 page, (void *)page->flags, page_count(page),
397 page_mapcount(page), page->mapping, page->index);
398 }
399 printk(KERN_EMERG
400 "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
401 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
402 /*
403 * Choose text because data symbols depend on CONFIG_KALLSYMS_ALL=y
404 */
405 if (vma->vm_ops)
406 print_symbol(KERN_EMERG "vma->vm_ops->fault: %s\n",
407 (unsigned long)vma->vm_ops->fault);
408 if (vma->vm_file && vma->vm_file->f_op)
409 print_symbol(KERN_EMERG "vma->vm_file->f_op->mmap: %s\n",
410 (unsigned long)vma->vm_file->f_op->mmap);
386 dump_stack(); 411 dump_stack();
412 add_taint(TAINT_BAD_PAGE);
387} 413}
388 414
389static inline int is_cow_mapping(unsigned int flags) 415static inline int is_cow_mapping(unsigned int flags)
@@ -773,6 +799,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
773 file_rss--; 799 file_rss--;
774 } 800 }
775 page_remove_rmap(page, vma); 801 page_remove_rmap(page, vma);
802 if (unlikely(page_mapcount(page) < 0))
803 print_bad_pte(vma, addr, ptent, page);
776 tlb_remove_page(tlb, page); 804 tlb_remove_page(tlb, page);
777 continue; 805 continue;
778 } 806 }
@@ -2684,7 +2712,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2684 /* 2712 /*
2685 * Page table corrupted: show pte and kill process. 2713 * Page table corrupted: show pte and kill process.
2686 */ 2714 */
2687 print_bad_pte(vma, orig_pte, address); 2715 print_bad_pte(vma, address, orig_pte, NULL);
2688 return VM_FAULT_OOM; 2716 return VM_FAULT_OOM;
2689 } 2717 }
2690 2718
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bd330252fc77..3acb216e9a78 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -222,14 +222,14 @@ static inline int bad_range(struct zone *zone, struct page *page)
222 222
223static void bad_page(struct page *page) 223static void bad_page(struct page *page)
224{ 224{
225 printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG 225 printk(KERN_EMERG "Bad page state in process %s pfn:%05lx\n",
226 "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", 226 current->comm, page_to_pfn(page));
227 current->comm, page, (int)(2*sizeof(unsigned long)), 227 printk(KERN_EMERG
228 (unsigned long)page->flags, page->mapping, 228 "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
229 page_mapcount(page), page_count(page)); 229 page, (void *)page->flags, page_count(page),
230 230 page_mapcount(page), page->mapping, page->index);
231 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" 231 printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n");
232 KERN_EMERG "Backtrace:\n"); 232
233 dump_stack(); 233 dump_stack();
234 234
235 /* Leave bad fields for debug, except PageBuddy could make trouble */ 235 /* Leave bad fields for debug, except PageBuddy could make trouble */
diff --git a/mm/rmap.c b/mm/rmap.c
index b1770b11a571..32098255082e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -47,7 +47,6 @@
47#include <linux/rmap.h> 47#include <linux/rmap.h>
48#include <linux/rcupdate.h> 48#include <linux/rcupdate.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kallsyms.h>
51#include <linux/memcontrol.h> 50#include <linux/memcontrol.h>
52#include <linux/mmu_notifier.h> 51#include <linux/mmu_notifier.h>
53#include <linux/migrate.h> 52#include <linux/migrate.h>
@@ -725,21 +724,6 @@ void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long
725void page_remove_rmap(struct page *page, struct vm_area_struct *vma) 724void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
726{ 725{
727 if (atomic_add_negative(-1, &page->_mapcount)) { 726 if (atomic_add_negative(-1, &page->_mapcount)) {
728 if (unlikely(page_mapcount(page) < 0)) {
729 printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
730 printk (KERN_EMERG " page pfn = %lx\n", page_to_pfn(page));
731 printk (KERN_EMERG " page->flags = %lx\n", page->flags);
732 printk (KERN_EMERG " page->count = %x\n", page_count(page));
733 printk (KERN_EMERG " page->mapping = %p\n", page->mapping);
734 print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops);
735 if (vma->vm_ops) {
736 print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault);
737 }
738 if (vma->vm_file && vma->vm_file->f_op)
739 print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap);
740 BUG();
741 }
742
743 /* 727 /*
744 * Now that the last pte has gone, s390 must transfer dirty 728 * Now that the last pte has gone, s390 must transfer dirty
745 * flag from storage key to struct page. We can usually skip 729 * flag from storage key to struct page. We can usually skip