aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/hugetlb.c1
-rw-r--r--mm/memory.c89
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/page-writeback.c1
-rw-r--r--mm/page_alloc.c249
-rw-r--r--mm/readahead.c31
-rw-r--r--mm/slab.c131
-rw-r--r--mm/swap.c1
-rw-r--r--mm/swap_state.c1
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmscan.c6
15 files changed, 276 insertions, 252 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 1a4473fcb2ca..ae9ce6b73e8a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -126,9 +126,11 @@ comment "Memory hotplug is currently incompatible with Software Suspend"
126# Default to 4 for wider testing, though 8 might be more appropriate. 126# Default to 4 for wider testing, though 8 might be more appropriate.
127# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. 127# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
128# PA-RISC's debug spinlock_t is too large for the 32-bit struct page. 128# PA-RISC's debug spinlock_t is too large for the 32-bit struct page.
129# ARM26 and SPARC32 and PPC64 may use one page for multiple page tables.
129# 130#
130config SPLIT_PTLOCK_CPUS 131config SPLIT_PTLOCK_CPUS
131 int 132 int
132 default "4096" if ARM && !CPU_CACHE_VIPT 133 default "4096" if ARM && !CPU_CACHE_VIPT
133 default "4096" if PARISC && DEBUG_SPINLOCK && !64BIT 134 default "4096" if PARISC && DEBUG_SPINLOCK && !64BIT
135 default "4096" if ARM26 || SPARC32 || PPC64
134 default "4" 136 default "4"
diff --git a/mm/filemap.c b/mm/filemap.c
index 5d6e4c2000dc..33a28bfde158 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -134,7 +134,7 @@ static int sync_page(void *word)
134 struct address_space *mapping; 134 struct address_space *mapping;
135 struct page *page; 135 struct page *page;
136 136
137 page = container_of((page_flags_t *)word, struct page, flags); 137 page = container_of((unsigned long *)word, struct page, flags);
138 138
139 /* 139 /*
140 * page_mapping() is being called without PG_locked held. 140 * page_mapping() is being called without PG_locked held.
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9a565808da3f..728e9bda12ea 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -237,7 +237,6 @@ unsigned long hugetlb_total_pages(void)
237{ 237{
238 return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); 238 return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
239} 239}
240EXPORT_SYMBOL(hugetlb_total_pages);
241 240
242/* 241/*
243 * We cannot handle pagefaults against hugetlb pages at all. They cause 242 * We cannot handle pagefaults against hugetlb pages at all. They cause
diff --git a/mm/memory.c b/mm/memory.c
index 0f60baf6f69b..2998cfc12f5b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -549,10 +549,10 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
549 return 0; 549 return 0;
550} 550}
551 551
552static void zap_pte_range(struct mmu_gather *tlb, 552static unsigned long zap_pte_range(struct mmu_gather *tlb,
553 struct vm_area_struct *vma, pmd_t *pmd, 553 struct vm_area_struct *vma, pmd_t *pmd,
554 unsigned long addr, unsigned long end, 554 unsigned long addr, unsigned long end,
555 struct zap_details *details) 555 long *zap_work, struct zap_details *details)
556{ 556{
557 struct mm_struct *mm = tlb->mm; 557 struct mm_struct *mm = tlb->mm;
558 pte_t *pte; 558 pte_t *pte;
@@ -563,10 +563,15 @@ static void zap_pte_range(struct mmu_gather *tlb,
563 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 563 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
564 do { 564 do {
565 pte_t ptent = *pte; 565 pte_t ptent = *pte;
566 if (pte_none(ptent)) 566 if (pte_none(ptent)) {
567 (*zap_work)--;
567 continue; 568 continue;
569 }
568 if (pte_present(ptent)) { 570 if (pte_present(ptent)) {
569 struct page *page = NULL; 571 struct page *page = NULL;
572
573 (*zap_work) -= PAGE_SIZE;
574
570 if (!(vma->vm_flags & VM_RESERVED)) { 575 if (!(vma->vm_flags & VM_RESERVED)) {
571 unsigned long pfn = pte_pfn(ptent); 576 unsigned long pfn = pte_pfn(ptent);
572 if (unlikely(!pfn_valid(pfn))) 577 if (unlikely(!pfn_valid(pfn)))
@@ -624,16 +629,18 @@ static void zap_pte_range(struct mmu_gather *tlb,
624 if (!pte_file(ptent)) 629 if (!pte_file(ptent))
625 free_swap_and_cache(pte_to_swp_entry(ptent)); 630 free_swap_and_cache(pte_to_swp_entry(ptent));
626 pte_clear_full(mm, addr, pte, tlb->fullmm); 631 pte_clear_full(mm, addr, pte, tlb->fullmm);
627 } while (pte++, addr += PAGE_SIZE, addr != end); 632 } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
628 633
629 add_mm_rss(mm, file_rss, anon_rss); 634 add_mm_rss(mm, file_rss, anon_rss);
630 pte_unmap_unlock(pte - 1, ptl); 635 pte_unmap_unlock(pte - 1, ptl);
636
637 return addr;
631} 638}
632 639
633static inline void zap_pmd_range(struct mmu_gather *tlb, 640static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
634 struct vm_area_struct *vma, pud_t *pud, 641 struct vm_area_struct *vma, pud_t *pud,
635 unsigned long addr, unsigned long end, 642 unsigned long addr, unsigned long end,
636 struct zap_details *details) 643 long *zap_work, struct zap_details *details)
637{ 644{
638 pmd_t *pmd; 645 pmd_t *pmd;
639 unsigned long next; 646 unsigned long next;
@@ -641,16 +648,21 @@ static inline void zap_pmd_range(struct mmu_gather *tlb,
641 pmd = pmd_offset(pud, addr); 648 pmd = pmd_offset(pud, addr);
642 do { 649 do {
643 next = pmd_addr_end(addr, end); 650 next = pmd_addr_end(addr, end);
644 if (pmd_none_or_clear_bad(pmd)) 651 if (pmd_none_or_clear_bad(pmd)) {
652 (*zap_work)--;
645 continue; 653 continue;
646 zap_pte_range(tlb, vma, pmd, addr, next, details); 654 }
647 } while (pmd++, addr = next, addr != end); 655 next = zap_pte_range(tlb, vma, pmd, addr, next,
656 zap_work, details);
657 } while (pmd++, addr = next, (addr != end && *zap_work > 0));
658
659 return addr;
648} 660}
649 661
650static inline void zap_pud_range(struct mmu_gather *tlb, 662static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
651 struct vm_area_struct *vma, pgd_t *pgd, 663 struct vm_area_struct *vma, pgd_t *pgd,
652 unsigned long addr, unsigned long end, 664 unsigned long addr, unsigned long end,
653 struct zap_details *details) 665 long *zap_work, struct zap_details *details)
654{ 666{
655 pud_t *pud; 667 pud_t *pud;
656 unsigned long next; 668 unsigned long next;
@@ -658,15 +670,21 @@ static inline void zap_pud_range(struct mmu_gather *tlb,
658 pud = pud_offset(pgd, addr); 670 pud = pud_offset(pgd, addr);
659 do { 671 do {
660 next = pud_addr_end(addr, end); 672 next = pud_addr_end(addr, end);
661 if (pud_none_or_clear_bad(pud)) 673 if (pud_none_or_clear_bad(pud)) {
674 (*zap_work)--;
662 continue; 675 continue;
663 zap_pmd_range(tlb, vma, pud, addr, next, details); 676 }
664 } while (pud++, addr = next, addr != end); 677 next = zap_pmd_range(tlb, vma, pud, addr, next,
678 zap_work, details);
679 } while (pud++, addr = next, (addr != end && *zap_work > 0));
680
681 return addr;
665} 682}
666 683
667static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, 684static unsigned long unmap_page_range(struct mmu_gather *tlb,
685 struct vm_area_struct *vma,
668 unsigned long addr, unsigned long end, 686 unsigned long addr, unsigned long end,
669 struct zap_details *details) 687 long *zap_work, struct zap_details *details)
670{ 688{
671 pgd_t *pgd; 689 pgd_t *pgd;
672 unsigned long next; 690 unsigned long next;
@@ -679,11 +697,16 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
679 pgd = pgd_offset(vma->vm_mm, addr); 697 pgd = pgd_offset(vma->vm_mm, addr);
680 do { 698 do {
681 next = pgd_addr_end(addr, end); 699 next = pgd_addr_end(addr, end);
682 if (pgd_none_or_clear_bad(pgd)) 700 if (pgd_none_or_clear_bad(pgd)) {
701 (*zap_work)--;
683 continue; 702 continue;
684 zap_pud_range(tlb, vma, pgd, addr, next, details); 703 }
685 } while (pgd++, addr = next, addr != end); 704 next = zap_pud_range(tlb, vma, pgd, addr, next,
705 zap_work, details);
706 } while (pgd++, addr = next, (addr != end && *zap_work > 0));
686 tlb_end_vma(tlb, vma); 707 tlb_end_vma(tlb, vma);
708
709 return addr;
687} 710}
688 711
689#ifdef CONFIG_PREEMPT 712#ifdef CONFIG_PREEMPT
@@ -724,7 +747,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
724 unsigned long end_addr, unsigned long *nr_accounted, 747 unsigned long end_addr, unsigned long *nr_accounted,
725 struct zap_details *details) 748 struct zap_details *details)
726{ 749{
727 unsigned long zap_bytes = ZAP_BLOCK_SIZE; 750 long zap_work = ZAP_BLOCK_SIZE;
728 unsigned long tlb_start = 0; /* For tlb_finish_mmu */ 751 unsigned long tlb_start = 0; /* For tlb_finish_mmu */
729 int tlb_start_valid = 0; 752 int tlb_start_valid = 0;
730 unsigned long start = start_addr; 753 unsigned long start = start_addr;
@@ -745,27 +768,25 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
745 *nr_accounted += (end - start) >> PAGE_SHIFT; 768 *nr_accounted += (end - start) >> PAGE_SHIFT;
746 769
747 while (start != end) { 770 while (start != end) {
748 unsigned long block;
749
750 if (!tlb_start_valid) { 771 if (!tlb_start_valid) {
751 tlb_start = start; 772 tlb_start = start;
752 tlb_start_valid = 1; 773 tlb_start_valid = 1;
753 } 774 }
754 775
755 if (is_vm_hugetlb_page(vma)) { 776 if (unlikely(is_vm_hugetlb_page(vma))) {
756 block = end - start;
757 unmap_hugepage_range(vma, start, end); 777 unmap_hugepage_range(vma, start, end);
758 } else { 778 zap_work -= (end - start) /
759 block = min(zap_bytes, end - start); 779 (HPAGE_SIZE / PAGE_SIZE);
760 unmap_page_range(*tlbp, vma, start, 780 start = end;
761 start + block, details); 781 } else
782 start = unmap_page_range(*tlbp, vma,
783 start, end, &zap_work, details);
784
785 if (zap_work > 0) {
786 BUG_ON(start != end);
787 break;
762 } 788 }
763 789
764 start += block;
765 zap_bytes -= block;
766 if ((long)zap_bytes > 0)
767 continue;
768
769 tlb_finish_mmu(*tlbp, tlb_start, start); 790 tlb_finish_mmu(*tlbp, tlb_start, start);
770 791
771 if (need_resched() || 792 if (need_resched() ||
@@ -779,7 +800,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
779 800
780 *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm); 801 *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
781 tlb_start_valid = 0; 802 tlb_start_valid = 0;
782 zap_bytes = ZAP_BLOCK_SIZE; 803 zap_work = ZAP_BLOCK_SIZE;
783 } 804 }
784 } 805 }
785out: 806out:
diff --git a/mm/mmap.c b/mm/mmap.c
index 320dda1778c3..6c997b159600 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -155,10 +155,6 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
155 return -ENOMEM; 155 return -ENOMEM;
156} 156}
157 157
158EXPORT_SYMBOL(sysctl_overcommit_memory);
159EXPORT_SYMBOL(sysctl_overcommit_ratio);
160EXPORT_SYMBOL(sysctl_max_map_count);
161EXPORT_SYMBOL(vm_committed_space);
162EXPORT_SYMBOL(__vm_enough_memory); 158EXPORT_SYMBOL(__vm_enough_memory);
163 159
164/* 160/*
diff --git a/mm/nommu.c b/mm/nommu.c
index d1e076a487cb..6deb6ab3d6ad 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -44,10 +44,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
44int heap_stack_gap = 0; 44int heap_stack_gap = 0;
45 45
46EXPORT_SYMBOL(mem_map); 46EXPORT_SYMBOL(mem_map);
47EXPORT_SYMBOL(sysctl_max_map_count);
48EXPORT_SYMBOL(sysctl_overcommit_memory);
49EXPORT_SYMBOL(sysctl_overcommit_ratio);
50EXPORT_SYMBOL(vm_committed_space);
51EXPORT_SYMBOL(__vm_enough_memory); 47EXPORT_SYMBOL(__vm_enough_memory);
52 48
53/* list of shareable VMAs */ 49/* list of shareable VMAs */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0166ea15c9ee..74138c9a22b9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -750,7 +750,6 @@ int clear_page_dirty_for_io(struct page *page)
750 } 750 }
751 return TestClearPageDirty(page); 751 return TestClearPageDirty(page);
752} 752}
753EXPORT_SYMBOL(clear_page_dirty_for_io);
754 753
755int test_clear_page_writeback(struct page *page) 754int test_clear_page_writeback(struct page *page)
756{ 755{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2dbdd98426fd..bd4de592dc23 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -60,11 +60,13 @@ long nr_swap_pages;
60 * NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA 60 * NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
61 * HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL 61 * HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
62 * HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA 62 * HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
63 *
64 * TBD: should special case ZONE_DMA32 machines here - in those we normally
65 * don't need any ZONE_NORMAL reservation
63 */ 66 */
64int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 }; 67int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
65 68
66EXPORT_SYMBOL(totalram_pages); 69EXPORT_SYMBOL(totalram_pages);
67EXPORT_SYMBOL(nr_swap_pages);
68 70
69/* 71/*
70 * Used by page_zone() to look up the address of the struct zone whose 72 * Used by page_zone() to look up the address of the struct zone whose
@@ -73,7 +75,7 @@ EXPORT_SYMBOL(nr_swap_pages);
73struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly; 75struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
74EXPORT_SYMBOL(zone_table); 76EXPORT_SYMBOL(zone_table);
75 77
76static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; 78static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
77int min_free_kbytes = 1024; 79int min_free_kbytes = 1024;
78 80
79unsigned long __initdata nr_kernel_pages; 81unsigned long __initdata nr_kernel_pages;
@@ -125,7 +127,7 @@ static void bad_page(const char *function, struct page *page)
125 printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n", 127 printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
126 function, current->comm, page); 128 function, current->comm, page);
127 printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", 129 printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
128 (int)(2*sizeof(page_flags_t)), (unsigned long)page->flags, 130 (int)(2*sizeof(unsigned long)), (unsigned long)page->flags,
129 page->mapping, page_mapcount(page), page_count(page)); 131 page->mapping, page_mapcount(page), page_count(page));
130 printk(KERN_EMERG "Backtrace:\n"); 132 printk(KERN_EMERG "Backtrace:\n");
131 dump_stack(); 133 dump_stack();
@@ -733,9 +735,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
733 } 735 }
734 local_irq_restore(flags); 736 local_irq_restore(flags);
735 put_cpu(); 737 put_cpu();
736 } 738 } else {
737
738 if (page == NULL) {
739 spin_lock_irqsave(&zone->lock, flags); 739 spin_lock_irqsave(&zone->lock, flags);
740 page = __rmqueue(zone, order); 740 page = __rmqueue(zone, order);
741 spin_unlock_irqrestore(&zone->lock, flags); 741 spin_unlock_irqrestore(&zone->lock, flags);
@@ -755,20 +755,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
755 return page; 755 return page;
756} 756}
757 757
758#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */
759#define ALLOC_HARDER 0x02 /* try to alloc harder */
760#define ALLOC_HIGH 0x04 /* __GFP_HIGH set */
761#define ALLOC_CPUSET 0x08 /* check for correct cpuset */
762
758/* 763/*
759 * Return 1 if free pages are above 'mark'. This takes into account the order 764 * Return 1 if free pages are above 'mark'. This takes into account the order
760 * of the allocation. 765 * of the allocation.
761 */ 766 */
762int zone_watermark_ok(struct zone *z, int order, unsigned long mark, 767int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
763 int classzone_idx, int can_try_harder, gfp_t gfp_high) 768 int classzone_idx, int alloc_flags)
764{ 769{
765 /* free_pages my go negative - that's OK */ 770 /* free_pages my go negative - that's OK */
766 long min = mark, free_pages = z->free_pages - (1 << order) + 1; 771 long min = mark, free_pages = z->free_pages - (1 << order) + 1;
767 int o; 772 int o;
768 773
769 if (gfp_high) 774 if (alloc_flags & ALLOC_HIGH)
770 min -= min / 2; 775 min -= min / 2;
771 if (can_try_harder) 776 if (alloc_flags & ALLOC_HARDER)
772 min -= min / 4; 777 min -= min / 4;
773 778
774 if (free_pages <= min + z->lowmem_reserve[classzone_idx]) 779 if (free_pages <= min + z->lowmem_reserve[classzone_idx])
@@ -786,14 +791,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
786 return 1; 791 return 1;
787} 792}
788 793
789static inline int 794/*
790should_reclaim_zone(struct zone *z, gfp_t gfp_mask) 795 * get_page_from_freeliest goes through the zonelist trying to allocate
796 * a page.
797 */
798static struct page *
799get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
800 struct zonelist *zonelist, int alloc_flags)
791{ 801{
792 if (!z->reclaim_pages) 802 struct zone **z = zonelist->zones;
793 return 0; 803 struct page *page = NULL;
794 if (gfp_mask & __GFP_NORECLAIM) 804 int classzone_idx = zone_idx(*z);
795 return 0; 805
796 return 1; 806 /*
807 * Go through the zonelist once, looking for a zone with enough free.
808 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
809 */
810 do {
811 if ((alloc_flags & ALLOC_CPUSET) &&
812 !cpuset_zone_allowed(*z, gfp_mask))
813 continue;
814
815 if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
816 if (!zone_watermark_ok(*z, order, (*z)->pages_low,
817 classzone_idx, alloc_flags))
818 continue;
819 }
820
821 page = buffered_rmqueue(*z, order, gfp_mask);
822 if (page) {
823 zone_statistics(zonelist, *z);
824 break;
825 }
826 } while (*(++z) != NULL);
827 return page;
797} 828}
798 829
799/* 830/*
@@ -804,105 +835,76 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
804 struct zonelist *zonelist) 835 struct zonelist *zonelist)
805{ 836{
806 const gfp_t wait = gfp_mask & __GFP_WAIT; 837 const gfp_t wait = gfp_mask & __GFP_WAIT;
807 struct zone **zones, *z; 838 struct zone **z;
808 struct page *page; 839 struct page *page;
809 struct reclaim_state reclaim_state; 840 struct reclaim_state reclaim_state;
810 struct task_struct *p = current; 841 struct task_struct *p = current;
811 int i;
812 int classzone_idx;
813 int do_retry; 842 int do_retry;
814 int can_try_harder; 843 int alloc_flags;
815 int did_some_progress; 844 int did_some_progress;
816 845
817 might_sleep_if(wait); 846 might_sleep_if(wait);
818 847
819 /* 848restart:
820 * The caller may dip into page reserves a bit more if the caller 849 z = zonelist->zones; /* the list of zones suitable for gfp_mask */
821 * cannot run direct reclaim, or is the caller has realtime scheduling
822 * policy
823 */
824 can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
825
826 zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
827 850
828 if (unlikely(zones[0] == NULL)) { 851 if (unlikely(*z == NULL)) {
829 /* Should this ever happen?? */ 852 /* Should this ever happen?? */
830 return NULL; 853 return NULL;
831 } 854 }
832 855
833 classzone_idx = zone_idx(zones[0]); 856 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
857 zonelist, ALLOC_CPUSET);
858 if (page)
859 goto got_pg;
860
861 do {
862 wakeup_kswapd(*z, order);
863 } while (*(++z));
834 864
835restart:
836 /* 865 /*
837 * Go through the zonelist once, looking for a zone with enough free. 866 * OK, we're below the kswapd watermark and have kicked background
838 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 867 * reclaim. Now things get more complex, so set up alloc_flags according
868 * to how we want to proceed.
869 *
870 * The caller may dip into page reserves a bit more if the caller
871 * cannot run direct reclaim, or if the caller has realtime scheduling
872 * policy.
839 */ 873 */
840 for (i = 0; (z = zones[i]) != NULL; i++) { 874 alloc_flags = 0;
841 int do_reclaim = should_reclaim_zone(z, gfp_mask); 875 if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
842 876 alloc_flags |= ALLOC_HARDER;
843 if (!cpuset_zone_allowed(z, __GFP_HARDWALL)) 877 if (gfp_mask & __GFP_HIGH)
844 continue; 878 alloc_flags |= ALLOC_HIGH;
845 879 if (wait)
846 /* 880 alloc_flags |= ALLOC_CPUSET;
847 * If the zone is to attempt early page reclaim then this loop
848 * will try to reclaim pages and check the watermark a second
849 * time before giving up and falling back to the next zone.
850 */
851zone_reclaim_retry:
852 if (!zone_watermark_ok(z, order, z->pages_low,
853 classzone_idx, 0, 0)) {
854 if (!do_reclaim)
855 continue;
856 else {
857 zone_reclaim(z, gfp_mask, order);
858 /* Only try reclaim once */
859 do_reclaim = 0;
860 goto zone_reclaim_retry;
861 }
862 }
863
864 page = buffered_rmqueue(z, order, gfp_mask);
865 if (page)
866 goto got_pg;
867 }
868
869 for (i = 0; (z = zones[i]) != NULL; i++)
870 wakeup_kswapd(z, order);
871 881
872 /* 882 /*
873 * Go through the zonelist again. Let __GFP_HIGH and allocations 883 * Go through the zonelist again. Let __GFP_HIGH and allocations
874 * coming from realtime tasks to go deeper into reserves 884 * coming from realtime tasks go deeper into reserves.
875 * 885 *
876 * This is the last chance, in general, before the goto nopage. 886 * This is the last chance, in general, before the goto nopage.
877 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. 887 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
878 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 888 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
879 */ 889 */
880 for (i = 0; (z = zones[i]) != NULL; i++) { 890 page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
881 if (!zone_watermark_ok(z, order, z->pages_min, 891 if (page)
882 classzone_idx, can_try_harder, 892 goto got_pg;
883 gfp_mask & __GFP_HIGH))
884 continue;
885
886 if (wait && !cpuset_zone_allowed(z, gfp_mask))
887 continue;
888
889 page = buffered_rmqueue(z, order, gfp_mask);
890 if (page)
891 goto got_pg;
892 }
893 893
894 /* This allocation should allow future memory freeing. */ 894 /* This allocation should allow future memory freeing. */
895 895
896 if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) 896 if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
897 && !in_interrupt()) { 897 && !in_interrupt()) {
898 if (!(gfp_mask & __GFP_NOMEMALLOC)) { 898 if (!(gfp_mask & __GFP_NOMEMALLOC)) {
899nofail_alloc:
899 /* go through the zonelist yet again, ignoring mins */ 900 /* go through the zonelist yet again, ignoring mins */
900 for (i = 0; (z = zones[i]) != NULL; i++) { 901 page = get_page_from_freelist(gfp_mask, order,
901 if (!cpuset_zone_allowed(z, gfp_mask)) 902 zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
902 continue; 903 if (page)
903 page = buffered_rmqueue(z, order, gfp_mask); 904 goto got_pg;
904 if (page) 905 if (gfp_mask & __GFP_NOFAIL) {
905 goto got_pg; 906 blk_congestion_wait(WRITE, HZ/50);
907 goto nofail_alloc;
906 } 908 }
907 } 909 }
908 goto nopage; 910 goto nopage;
@@ -920,7 +922,7 @@ rebalance:
920 reclaim_state.reclaimed_slab = 0; 922 reclaim_state.reclaimed_slab = 0;
921 p->reclaim_state = &reclaim_state; 923 p->reclaim_state = &reclaim_state;
922 924
923 did_some_progress = try_to_free_pages(zones, gfp_mask); 925 did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
924 926
925 p->reclaim_state = NULL; 927 p->reclaim_state = NULL;
926 p->flags &= ~PF_MEMALLOC; 928 p->flags &= ~PF_MEMALLOC;
@@ -928,19 +930,10 @@ rebalance:
928 cond_resched(); 930 cond_resched();
929 931
930 if (likely(did_some_progress)) { 932 if (likely(did_some_progress)) {
931 for (i = 0; (z = zones[i]) != NULL; i++) { 933 page = get_page_from_freelist(gfp_mask, order,
932 if (!zone_watermark_ok(z, order, z->pages_min, 934 zonelist, alloc_flags);
933 classzone_idx, can_try_harder, 935 if (page)
934 gfp_mask & __GFP_HIGH)) 936 goto got_pg;
935 continue;
936
937 if (!cpuset_zone_allowed(z, gfp_mask))
938 continue;
939
940 page = buffered_rmqueue(z, order, gfp_mask);
941 if (page)
942 goto got_pg;
943 }
944 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { 937 } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
945 /* 938 /*
946 * Go through the zonelist yet one more time, keep 939 * Go through the zonelist yet one more time, keep
@@ -948,18 +941,10 @@ rebalance:
948 * a parallel oom killing, we must fail if we're still 941 * a parallel oom killing, we must fail if we're still
949 * under heavy pressure. 942 * under heavy pressure.
950 */ 943 */
951 for (i = 0; (z = zones[i]) != NULL; i++) { 944 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
952 if (!zone_watermark_ok(z, order, z->pages_high, 945 zonelist, ALLOC_CPUSET);
953 classzone_idx, 0, 0)) 946 if (page)
954 continue; 947 goto got_pg;
955
956 if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
957 continue;
958
959 page = buffered_rmqueue(z, order, gfp_mask);
960 if (page)
961 goto got_pg;
962 }
963 948
964 out_of_memory(gfp_mask, order); 949 out_of_memory(gfp_mask, order);
965 goto restart; 950 goto restart;
@@ -992,9 +977,7 @@ nopage:
992 dump_stack(); 977 dump_stack();
993 show_mem(); 978 show_mem();
994 } 979 }
995 return NULL;
996got_pg: 980got_pg:
997 zone_statistics(zonelist, z);
998 return page; 981 return page;
999} 982}
1000 983
@@ -1331,7 +1314,7 @@ void show_free_areas(void)
1331 } else 1314 } else
1332 printk("\n"); 1315 printk("\n");
1333 1316
1334 for_each_cpu(cpu) { 1317 for_each_online_cpu(cpu) {
1335 struct per_cpu_pageset *pageset; 1318 struct per_cpu_pageset *pageset;
1336 1319
1337 pageset = zone_pcp(zone, cpu); 1320 pageset = zone_pcp(zone, cpu);
@@ -1442,6 +1425,10 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli
1442 zone = pgdat->node_zones + ZONE_NORMAL; 1425 zone = pgdat->node_zones + ZONE_NORMAL;
1443 if (zone->present_pages) 1426 if (zone->present_pages)
1444 zonelist->zones[j++] = zone; 1427 zonelist->zones[j++] = zone;
1428 case ZONE_DMA32:
1429 zone = pgdat->node_zones + ZONE_DMA32;
1430 if (zone->present_pages)
1431 zonelist->zones[j++] = zone;
1445 case ZONE_DMA: 1432 case ZONE_DMA:
1446 zone = pgdat->node_zones + ZONE_DMA; 1433 zone = pgdat->node_zones + ZONE_DMA;
1447 if (zone->present_pages) 1434 if (zone->present_pages)
@@ -1456,6 +1443,8 @@ static inline int highest_zone(int zone_bits)
1456 int res = ZONE_NORMAL; 1443 int res = ZONE_NORMAL;
1457 if (zone_bits & (__force int)__GFP_HIGHMEM) 1444 if (zone_bits & (__force int)__GFP_HIGHMEM)
1458 res = ZONE_HIGHMEM; 1445 res = ZONE_HIGHMEM;
1446 if (zone_bits & (__force int)__GFP_DMA32)
1447 res = ZONE_DMA32;
1459 if (zone_bits & (__force int)__GFP_DMA) 1448 if (zone_bits & (__force int)__GFP_DMA)
1460 res = ZONE_DMA; 1449 res = ZONE_DMA;
1461 return res; 1450 return res;
@@ -1867,11 +1856,10 @@ static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
1867 if (process_zones(cpu)) 1856 if (process_zones(cpu))
1868 ret = NOTIFY_BAD; 1857 ret = NOTIFY_BAD;
1869 break; 1858 break;
1870#ifdef CONFIG_HOTPLUG_CPU 1859 case CPU_UP_CANCELED:
1871 case CPU_DEAD: 1860 case CPU_DEAD:
1872 free_zone_pagesets(cpu); 1861 free_zone_pagesets(cpu);
1873 break; 1862 break;
1874#endif
1875 default: 1863 default:
1876 break; 1864 break;
1877 } 1865 }
@@ -1976,7 +1964,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
1976 if (zholes_size) 1964 if (zholes_size)
1977 realsize -= zholes_size[j]; 1965 realsize -= zholes_size[j];
1978 1966
1979 if (j == ZONE_DMA || j == ZONE_NORMAL) 1967 if (j < ZONE_HIGHMEM)
1980 nr_kernel_pages += realsize; 1968 nr_kernel_pages += realsize;
1981 nr_all_pages += realsize; 1969 nr_all_pages += realsize;
1982 1970
@@ -2418,13 +2406,18 @@ void setup_per_zone_pages_min(void)
2418 } 2406 }
2419 2407
2420 for_each_zone(zone) { 2408 for_each_zone(zone) {
2409 unsigned long tmp;
2421 spin_lock_irqsave(&zone->lru_lock, flags); 2410 spin_lock_irqsave(&zone->lru_lock, flags);
2411 tmp = (pages_min * zone->present_pages) / lowmem_pages;
2422 if (is_highmem(zone)) { 2412 if (is_highmem(zone)) {
2423 /* 2413 /*
2424 * Often, highmem doesn't need to reserve any pages. 2414 * __GFP_HIGH and PF_MEMALLOC allocations usually don't
2425 * But the pages_min/low/high values are also used for 2415 * need highmem pages, so cap pages_min to a small
2426 * batching up page reclaim activity so we need a 2416 * value here.
2427 * decent value here. 2417 *
2418 * The (pages_high-pages_low) and (pages_low-pages_min)
2419 * deltas controls asynch page reclaim, and so should
2420 * not be capped for highmem.
2428 */ 2421 */
2429 int min_pages; 2422 int min_pages;
2430 2423
@@ -2435,19 +2428,15 @@ void setup_per_zone_pages_min(void)
2435 min_pages = 128; 2428 min_pages = 128;
2436 zone->pages_min = min_pages; 2429 zone->pages_min = min_pages;
2437 } else { 2430 } else {
2438 /* if it's a lowmem zone, reserve a number of pages 2431 /*
2432 * If it's a lowmem zone, reserve a number of pages
2439 * proportionate to the zone's size. 2433 * proportionate to the zone's size.
2440 */ 2434 */
2441 zone->pages_min = (pages_min * zone->present_pages) / 2435 zone->pages_min = tmp;
2442 lowmem_pages;
2443 } 2436 }
2444 2437
2445 /* 2438 zone->pages_low = zone->pages_min + tmp / 4;
2446 * When interpreting these watermarks, just keep in mind that: 2439 zone->pages_high = zone->pages_min + tmp / 2;
2447 * zone->pages_min == (zone->pages_min * 4) / 4;
2448 */
2449 zone->pages_low = (zone->pages_min * 5) / 4;
2450 zone->pages_high = (zone->pages_min * 6) / 4;
2451 spin_unlock_irqrestore(&zone->lru_lock, flags); 2440 spin_unlock_irqrestore(&zone->lru_lock, flags);
2452 } 2441 }
2453} 2442}
diff --git a/mm/readahead.c b/mm/readahead.c
index d0b50034e245..72e7adbb87c7 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -254,7 +254,7 @@ out:
254 */ 254 */
255static int 255static int
256__do_page_cache_readahead(struct address_space *mapping, struct file *filp, 256__do_page_cache_readahead(struct address_space *mapping, struct file *filp,
257 unsigned long offset, unsigned long nr_to_read) 257 pgoff_t offset, unsigned long nr_to_read)
258{ 258{
259 struct inode *inode = mapping->host; 259 struct inode *inode = mapping->host;
260 struct page *page; 260 struct page *page;
@@ -274,7 +274,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
274 */ 274 */
275 read_lock_irq(&mapping->tree_lock); 275 read_lock_irq(&mapping->tree_lock);
276 for (page_idx = 0; page_idx < nr_to_read; page_idx++) { 276 for (page_idx = 0; page_idx < nr_to_read; page_idx++) {
277 unsigned long page_offset = offset + page_idx; 277 pgoff_t page_offset = offset + page_idx;
278 278
279 if (page_offset > end_index) 279 if (page_offset > end_index)
280 break; 280 break;
@@ -311,7 +311,7 @@ out:
311 * memory at once. 311 * memory at once.
312 */ 312 */
313int force_page_cache_readahead(struct address_space *mapping, struct file *filp, 313int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
314 unsigned long offset, unsigned long nr_to_read) 314 pgoff_t offset, unsigned long nr_to_read)
315{ 315{
316 int ret = 0; 316 int ret = 0;
317 317
@@ -368,7 +368,7 @@ static inline int check_ra_success(struct file_ra_state *ra,
368 * request queues. 368 * request queues.
369 */ 369 */
370int do_page_cache_readahead(struct address_space *mapping, struct file *filp, 370int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
371 unsigned long offset, unsigned long nr_to_read) 371 pgoff_t offset, unsigned long nr_to_read)
372{ 372{
373 if (bdi_read_congested(mapping->backing_dev_info)) 373 if (bdi_read_congested(mapping->backing_dev_info))
374 return -1; 374 return -1;
@@ -385,7 +385,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
385 */ 385 */
386static int 386static int
387blockable_page_cache_readahead(struct address_space *mapping, struct file *filp, 387blockable_page_cache_readahead(struct address_space *mapping, struct file *filp,
388 unsigned long offset, unsigned long nr_to_read, 388 pgoff_t offset, unsigned long nr_to_read,
389 struct file_ra_state *ra, int block) 389 struct file_ra_state *ra, int block)
390{ 390{
391 int actual; 391 int actual;
@@ -430,14 +430,27 @@ static int make_ahead_window(struct address_space *mapping, struct file *filp,
430 return ret; 430 return ret;
431} 431}
432 432
433/* 433/**
434 * page_cache_readahead is the main function. If performs the adaptive 434 * page_cache_readahead - generic adaptive readahead
435 * @mapping: address_space which holds the pagecache and I/O vectors
436 * @ra: file_ra_state which holds the readahead state
437 * @filp: passed on to ->readpage() and ->readpages()
438 * @offset: start offset into @mapping, in PAGE_CACHE_SIZE units
439 * @req_size: hint: total size of the read which the caller is performing in
440 * PAGE_CACHE_SIZE units
441 *
442 * page_cache_readahead() is the main function. If performs the adaptive
435 * readahead window size management and submits the readahead I/O. 443 * readahead window size management and submits the readahead I/O.
444 *
445 * Note that @filp is purely used for passing on to the ->readpage[s]()
446 * handler: it may refer to a different file from @mapping (so we may not use
447 * @filp->f_mapping or @filp->f_dentry->d_inode here).
448 * Also, @ra may not be equal to &@filp->f_ra.
449 *
436 */ 450 */
437unsigned long 451unsigned long
438page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, 452page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
439 struct file *filp, unsigned long offset, 453 struct file *filp, pgoff_t offset, unsigned long req_size)
440 unsigned long req_size)
441{ 454{
442 unsigned long max, newsize; 455 unsigned long max, newsize;
443 int sequential; 456 int sequential;
diff --git a/mm/slab.c b/mm/slab.c
index 22bfb0b2ac8b..e5ec26e0c460 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -368,7 +368,7 @@ static inline void kmem_list3_init(struct kmem_list3 *parent)
368 * manages a cache. 368 * manages a cache.
369 */ 369 */
370 370
371struct kmem_cache_s { 371struct kmem_cache {
372/* 1) per-cpu data, touched during every alloc/free */ 372/* 1) per-cpu data, touched during every alloc/free */
373 struct array_cache *array[NR_CPUS]; 373 struct array_cache *array[NR_CPUS];
374 unsigned int batchcount; 374 unsigned int batchcount;
@@ -434,7 +434,7 @@ struct kmem_cache_s {
434/* Optimization question: fewer reaps means less 434/* Optimization question: fewer reaps means less
435 * probability for unnessary cpucache drain/refill cycles. 435 * probability for unnessary cpucache drain/refill cycles.
436 * 436 *
437 * OTHO the cpuarrays can contain lots of objects, 437 * OTOH the cpuarrays can contain lots of objects,
438 * which could lock up otherwise freeable slabs. 438 * which could lock up otherwise freeable slabs.
439 */ 439 */
440#define REAPTIMEOUT_CPUC (2*HZ) 440#define REAPTIMEOUT_CPUC (2*HZ)
@@ -565,14 +565,29 @@ static void **dbg_userword(kmem_cache_t *cachep, void *objp)
565#define BREAK_GFP_ORDER_LO 0 565#define BREAK_GFP_ORDER_LO 0
566static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; 566static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
567 567
568/* Macros for storing/retrieving the cachep and or slab from the 568/* Functions for storing/retrieving the cachep and or slab from the
569 * global 'mem_map'. These are used to find the slab an obj belongs to. 569 * global 'mem_map'. These are used to find the slab an obj belongs to.
570 * With kfree(), these are used to find the cache which an obj belongs to. 570 * With kfree(), these are used to find the cache which an obj belongs to.
571 */ 571 */
572#define SET_PAGE_CACHE(pg,x) ((pg)->lru.next = (struct list_head *)(x)) 572static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
573#define GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->lru.next) 573{
574#define SET_PAGE_SLAB(pg,x) ((pg)->lru.prev = (struct list_head *)(x)) 574 page->lru.next = (struct list_head *)cache;
575#define GET_PAGE_SLAB(pg) ((struct slab *)(pg)->lru.prev) 575}
576
577static inline struct kmem_cache *page_get_cache(struct page *page)
578{
579 return (struct kmem_cache *)page->lru.next;
580}
581
582static inline void page_set_slab(struct page *page, struct slab *slab)
583{
584 page->lru.prev = (struct list_head *)slab;
585}
586
587static inline struct slab *page_get_slab(struct page *page)
588{
589 return (struct slab *)page->lru.prev;
590}
576 591
577/* These are the default caches for kmalloc. Custom caches can have other sizes. */ 592/* These are the default caches for kmalloc. Custom caches can have other sizes. */
578struct cache_sizes malloc_sizes[] = { 593struct cache_sizes malloc_sizes[] = {
@@ -1190,11 +1205,7 @@ static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid)
1190 int i; 1205 int i;
1191 1206
1192 flags |= cachep->gfpflags; 1207 flags |= cachep->gfpflags;
1193 if (likely(nodeid == -1)) { 1208 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1194 page = alloc_pages(flags, cachep->gfporder);
1195 } else {
1196 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1197 }
1198 if (!page) 1209 if (!page)
1199 return NULL; 1210 return NULL;
1200 addr = page_address(page); 1211 addr = page_address(page);
@@ -1368,7 +1379,7 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp)
1368 /* Print some data about the neighboring objects, if they 1379 /* Print some data about the neighboring objects, if they
1369 * exist: 1380 * exist:
1370 */ 1381 */
1371 struct slab *slabp = GET_PAGE_SLAB(virt_to_page(objp)); 1382 struct slab *slabp = page_get_slab(virt_to_page(objp));
1372 int objnr; 1383 int objnr;
1373 1384
1374 objnr = (objp-slabp->s_mem)/cachep->objsize; 1385 objnr = (objp-slabp->s_mem)/cachep->objsize;
@@ -1502,6 +1513,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1502{ 1513{
1503 size_t left_over, slab_size, ralign; 1514 size_t left_over, slab_size, ralign;
1504 kmem_cache_t *cachep = NULL; 1515 kmem_cache_t *cachep = NULL;
1516 struct list_head *p;
1505 1517
1506 /* 1518 /*
1507 * Sanity checks... these are all serious usage bugs. 1519 * Sanity checks... these are all serious usage bugs.
@@ -1516,6 +1528,35 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1516 BUG(); 1528 BUG();
1517 } 1529 }
1518 1530
1531 down(&cache_chain_sem);
1532
1533 list_for_each(p, &cache_chain) {
1534 kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
1535 mm_segment_t old_fs = get_fs();
1536 char tmp;
1537 int res;
1538
1539 /*
1540 * This happens when the module gets unloaded and doesn't
1541 * destroy its slab cache and no-one else reuses the vmalloc
1542 * area of the module. Print a warning.
1543 */
1544 set_fs(KERNEL_DS);
1545 res = __get_user(tmp, pc->name);
1546 set_fs(old_fs);
1547 if (res) {
1548 printk("SLAB: cache with size %d has lost its name\n",
1549 pc->objsize);
1550 continue;
1551 }
1552
1553 if (!strcmp(pc->name,name)) {
1554 printk("kmem_cache_create: duplicate cache %s\n", name);
1555 dump_stack();
1556 goto oops;
1557 }
1558 }
1559
1519#if DEBUG 1560#if DEBUG
1520 WARN_ON(strchr(name, ' ')); /* It confuses parsers */ 1561 WARN_ON(strchr(name, ' ')); /* It confuses parsers */
1521 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) { 1562 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
@@ -1592,7 +1633,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1592 /* Get cache's description obj. */ 1633 /* Get cache's description obj. */
1593 cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL); 1634 cachep = (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
1594 if (!cachep) 1635 if (!cachep)
1595 goto opps; 1636 goto oops;
1596 memset(cachep, 0, sizeof(kmem_cache_t)); 1637 memset(cachep, 0, sizeof(kmem_cache_t));
1597 1638
1598#if DEBUG 1639#if DEBUG
@@ -1686,7 +1727,7 @@ next:
1686 printk("kmem_cache_create: couldn't create cache %s.\n", name); 1727 printk("kmem_cache_create: couldn't create cache %s.\n", name);
1687 kmem_cache_free(&cache_cache, cachep); 1728 kmem_cache_free(&cache_cache, cachep);
1688 cachep = NULL; 1729 cachep = NULL;
1689 goto opps; 1730 goto oops;
1690 } 1731 }
1691 slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t) 1732 slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t)
1692 + sizeof(struct slab), align); 1733 + sizeof(struct slab), align);
@@ -1781,43 +1822,14 @@ next:
1781 cachep->limit = BOOT_CPUCACHE_ENTRIES; 1822 cachep->limit = BOOT_CPUCACHE_ENTRIES;
1782 } 1823 }
1783 1824
1784 /* Need the semaphore to access the chain. */
1785 down(&cache_chain_sem);
1786 {
1787 struct list_head *p;
1788 mm_segment_t old_fs;
1789
1790 old_fs = get_fs();
1791 set_fs(KERNEL_DS);
1792 list_for_each(p, &cache_chain) {
1793 kmem_cache_t *pc = list_entry(p, kmem_cache_t, next);
1794 char tmp;
1795 /* This happens when the module gets unloaded and doesn't
1796 destroy its slab cache and noone else reuses the vmalloc
1797 area of the module. Print a warning. */
1798 if (__get_user(tmp,pc->name)) {
1799 printk("SLAB: cache with size %d has lost its name\n",
1800 pc->objsize);
1801 continue;
1802 }
1803 if (!strcmp(pc->name,name)) {
1804 printk("kmem_cache_create: duplicate cache %s\n",name);
1805 up(&cache_chain_sem);
1806 unlock_cpu_hotplug();
1807 BUG();
1808 }
1809 }
1810 set_fs(old_fs);
1811 }
1812
1813 /* cache setup completed, link it into the list */ 1825 /* cache setup completed, link it into the list */
1814 list_add(&cachep->next, &cache_chain); 1826 list_add(&cachep->next, &cache_chain);
1815 up(&cache_chain_sem);
1816 unlock_cpu_hotplug(); 1827 unlock_cpu_hotplug();
1817opps: 1828oops:
1818 if (!cachep && (flags & SLAB_PANIC)) 1829 if (!cachep && (flags & SLAB_PANIC))
1819 panic("kmem_cache_create(): failed to create slab `%s'\n", 1830 panic("kmem_cache_create(): failed to create slab `%s'\n",
1820 name); 1831 name);
1832 up(&cache_chain_sem);
1821 return cachep; 1833 return cachep;
1822} 1834}
1823EXPORT_SYMBOL(kmem_cache_create); 1835EXPORT_SYMBOL(kmem_cache_create);
@@ -2137,8 +2149,8 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp)
2137 i = 1 << cachep->gfporder; 2149 i = 1 << cachep->gfporder;
2138 page = virt_to_page(objp); 2150 page = virt_to_page(objp);
2139 do { 2151 do {
2140 SET_PAGE_CACHE(page, cachep); 2152 page_set_cache(page, cachep);
2141 SET_PAGE_SLAB(page, slabp); 2153 page_set_slab(page, slabp);
2142 page++; 2154 page++;
2143 } while (--i); 2155 } while (--i);
2144} 2156}
@@ -2268,14 +2280,14 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp,
2268 kfree_debugcheck(objp); 2280 kfree_debugcheck(objp);
2269 page = virt_to_page(objp); 2281 page = virt_to_page(objp);
2270 2282
2271 if (GET_PAGE_CACHE(page) != cachep) { 2283 if (page_get_cache(page) != cachep) {
2272 printk(KERN_ERR "mismatch in kmem_cache_free: expected cache %p, got %p\n", 2284 printk(KERN_ERR "mismatch in kmem_cache_free: expected cache %p, got %p\n",
2273 GET_PAGE_CACHE(page),cachep); 2285 page_get_cache(page),cachep);
2274 printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); 2286 printk(KERN_ERR "%p is %s.\n", cachep, cachep->name);
2275 printk(KERN_ERR "%p is %s.\n", GET_PAGE_CACHE(page), GET_PAGE_CACHE(page)->name); 2287 printk(KERN_ERR "%p is %s.\n", page_get_cache(page), page_get_cache(page)->name);
2276 WARN_ON(1); 2288 WARN_ON(1);
2277 } 2289 }
2278 slabp = GET_PAGE_SLAB(page); 2290 slabp = page_get_slab(page);
2279 2291
2280 if (cachep->flags & SLAB_RED_ZONE) { 2292 if (cachep->flags & SLAB_RED_ZONE) {
2281 if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { 2293 if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
@@ -2627,7 +2639,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int n
2627 struct slab *slabp; 2639 struct slab *slabp;
2628 unsigned int objnr; 2640 unsigned int objnr;
2629 2641
2630 slabp = GET_PAGE_SLAB(virt_to_page(objp)); 2642 slabp = page_get_slab(virt_to_page(objp));
2631 l3 = cachep->nodelists[node]; 2643 l3 = cachep->nodelists[node];
2632 list_del(&slabp->list); 2644 list_del(&slabp->list);
2633 objnr = (objp - slabp->s_mem) / cachep->objsize; 2645 objnr = (objp - slabp->s_mem) / cachep->objsize;
@@ -2743,7 +2755,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
2743#ifdef CONFIG_NUMA 2755#ifdef CONFIG_NUMA
2744 { 2756 {
2745 struct slab *slabp; 2757 struct slab *slabp;
2746 slabp = GET_PAGE_SLAB(virt_to_page(objp)); 2758 slabp = page_get_slab(virt_to_page(objp));
2747 if (unlikely(slabp->nodeid != numa_node_id())) { 2759 if (unlikely(slabp->nodeid != numa_node_id())) {
2748 struct array_cache *alien = NULL; 2760 struct array_cache *alien = NULL;
2749 int nodeid = slabp->nodeid; 2761 int nodeid = slabp->nodeid;
@@ -2829,7 +2841,7 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)
2829 page = virt_to_page(ptr); 2841 page = virt_to_page(ptr);
2830 if (unlikely(!PageSlab(page))) 2842 if (unlikely(!PageSlab(page)))
2831 goto out; 2843 goto out;
2832 if (unlikely(GET_PAGE_CACHE(page) != cachep)) 2844 if (unlikely(page_get_cache(page) != cachep))
2833 goto out; 2845 goto out;
2834 return 1; 2846 return 1;
2835out: 2847out:
@@ -3025,7 +3037,7 @@ void kfree(const void *objp)
3025 return; 3037 return;
3026 local_irq_save(flags); 3038 local_irq_save(flags);
3027 kfree_debugcheck(objp); 3039 kfree_debugcheck(objp);
3028 c = GET_PAGE_CACHE(virt_to_page(objp)); 3040 c = page_get_cache(virt_to_page(objp));
3029 __cache_free(c, (void*)objp); 3041 __cache_free(c, (void*)objp);
3030 local_irq_restore(flags); 3042 local_irq_restore(flags);
3031} 3043}
@@ -3262,6 +3274,7 @@ static void drain_array_locked(kmem_cache_t *cachep,
3262 3274
3263/** 3275/**
3264 * cache_reap - Reclaim memory from caches. 3276 * cache_reap - Reclaim memory from caches.
3277 * @unused: unused parameter
3265 * 3278 *
3266 * Called from workqueue/eventd every few seconds. 3279 * Called from workqueue/eventd every few seconds.
3267 * Purpose: 3280 * Purpose:
@@ -3278,7 +3291,7 @@ static void cache_reap(void *unused)
3278 3291
3279 if (down_trylock(&cache_chain_sem)) { 3292 if (down_trylock(&cache_chain_sem)) {
3280 /* Give up. Setup the next iteration. */ 3293 /* Give up. Setup the next iteration. */
3281 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id()); 3294 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
3282 return; 3295 return;
3283 } 3296 }
3284 3297
@@ -3347,7 +3360,7 @@ next:
3347 up(&cache_chain_sem); 3360 up(&cache_chain_sem);
3348 drain_remote_pages(); 3361 drain_remote_pages();
3349 /* Setup the next iteration */ 3362 /* Setup the next iteration */
3350 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id()); 3363 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
3351} 3364}
3352 3365
3353#ifdef CONFIG_PROC_FS 3366#ifdef CONFIG_PROC_FS
@@ -3594,7 +3607,7 @@ unsigned int ksize(const void *objp)
3594 if (unlikely(objp == NULL)) 3607 if (unlikely(objp == NULL))
3595 return 0; 3608 return 0;
3596 3609
3597 return obj_reallen(GET_PAGE_CACHE(virt_to_page(objp))); 3610 return obj_reallen(page_get_cache(virt_to_page(objp)));
3598} 3611}
3599 3612
3600 3613
diff --git a/mm/swap.c b/mm/swap.c
index 154ae13d8b7e..d09cf7f03e76 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -413,7 +413,6 @@ void vm_acct_memory(long pages)
413 } 413 }
414 preempt_enable(); 414 preempt_enable();
415} 415}
416EXPORT_SYMBOL(vm_acct_memory);
417 416
418#ifdef CONFIG_HOTPLUG_CPU 417#ifdef CONFIG_HOTPLUG_CPU
419static void lru_drain_cache(unsigned int cpu) 418static void lru_drain_cache(unsigned int cpu)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index dfd9a46755b8..0df9a57b1de8 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -40,7 +40,6 @@ struct address_space swapper_space = {
40 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), 40 .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
41 .backing_dev_info = &swap_backing_dev_info, 41 .backing_dev_info = &swap_backing_dev_info,
42}; 42};
43EXPORT_SYMBOL(swapper_space);
44 43
45#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) 44#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
46 45
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8970c0b74194..edafeace301f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -36,8 +36,6 @@ unsigned int nr_swapfiles;
36long total_swap_pages; 36long total_swap_pages;
37static int swap_overflow; 37static int swap_overflow;
38 38
39EXPORT_SYMBOL(total_swap_pages);
40
41static const char Bad_file[] = "Bad swap file entry "; 39static const char Bad_file[] = "Bad swap file entry ";
42static const char Unused_file[] = "Unused swap file entry "; 40static const char Unused_file[] = "Unused swap file entry ";
43static const char Bad_offset[] = "Bad swap offset entry "; 41static const char Bad_offset[] = "Bad swap offset entry ";
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 54a90e83cb31..729eb3eec75f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -457,7 +457,7 @@ void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
457 * @size: allocation size 457 * @size: allocation size
458 * @gfp_mask: flags for the page level allocator 458 * @gfp_mask: flags for the page level allocator
459 * @prot: protection mask for the allocated pages 459 * @prot: protection mask for the allocated pages
460 * @node node to use for allocation or -1 460 * @node: node to use for allocation or -1
461 * 461 *
462 * Allocate enough pages to cover @size from the page level 462 * Allocate enough pages to cover @size from the page level
463 * allocator with @gfp_mask flags. Map them into contiguous 463 * allocator with @gfp_mask flags. Map them into contiguous
@@ -507,7 +507,7 @@ EXPORT_SYMBOL(vmalloc);
507 * vmalloc_node - allocate memory on a specific node 507 * vmalloc_node - allocate memory on a specific node
508 * 508 *
509 * @size: allocation size 509 * @size: allocation size
510 * @node; numa node 510 * @node: numa node
511 * 511 *
512 * Allocate enough pages to cover @size from the page level 512 * Allocate enough pages to cover @size from the page level
513 * allocator and map them into contiguous kernel virtual space. 513 * allocator and map them into contiguous kernel virtual space.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 135bf8ca96ee..28130541270f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1074,7 +1074,7 @@ loop_again:
1074 continue; 1074 continue;
1075 1075
1076 if (!zone_watermark_ok(zone, order, 1076 if (!zone_watermark_ok(zone, order,
1077 zone->pages_high, 0, 0, 0)) { 1077 zone->pages_high, 0, 0)) {
1078 end_zone = i; 1078 end_zone = i;
1079 goto scan; 1079 goto scan;
1080 } 1080 }
@@ -1111,7 +1111,7 @@ scan:
1111 1111
1112 if (nr_pages == 0) { /* Not software suspend */ 1112 if (nr_pages == 0) { /* Not software suspend */
1113 if (!zone_watermark_ok(zone, order, 1113 if (!zone_watermark_ok(zone, order,
1114 zone->pages_high, end_zone, 0, 0)) 1114 zone->pages_high, end_zone, 0))
1115 all_zones_ok = 0; 1115 all_zones_ok = 0;
1116 } 1116 }
1117 zone->temp_priority = priority; 1117 zone->temp_priority = priority;
@@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order)
1259 return; 1259 return;
1260 1260
1261 pgdat = zone->zone_pgdat; 1261 pgdat = zone->zone_pgdat;
1262 if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0)) 1262 if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
1263 return; 1263 return;
1264 if (pgdat->kswapd_max_order < order) 1264 if (pgdat->kswapd_max_order < order)
1265 pgdat->kswapd_max_order = order; 1265 pgdat->kswapd_max_order = order;