aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig17
-rw-r--r--mm/filemap.c12
-rw-r--r--mm/filemap_xip.c9
-rw-r--r--mm/hugetlb.c88
-rw-r--r--mm/memory.c47
-rw-r--r--mm/mmap.c7
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page-writeback.c9
-rw-r--r--mm/page_alloc.c15
-rw-r--r--mm/quicklist.c12
-rw-r--r--mm/slab.c68
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c139
-rw-r--r--mm/sparse.c20
14 files changed, 336 insertions, 111 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index c070ec0c15bf..0016ebd4dcba 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -112,18 +112,17 @@ config SPARSEMEM_EXTREME
112 def_bool y 112 def_bool y
113 depends on SPARSEMEM && !SPARSEMEM_STATIC 113 depends on SPARSEMEM && !SPARSEMEM_STATIC
114 114
115#
116# SPARSEMEM_VMEMMAP uses a virtually mapped mem_map to optimise pfn_to_page
117# and page_to_pfn. The most efficient option where kernel virtual space is
118# not under pressure.
119#
120config SPARSEMEM_VMEMMAP_ENABLE 115config SPARSEMEM_VMEMMAP_ENABLE
121 def_bool n 116 def_bool n
122 117
123config SPARSEMEM_VMEMMAP 118config SPARSEMEM_VMEMMAP
124 bool 119 bool "Sparse Memory virtual memmap"
125 depends on SPARSEMEM 120 depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE
126 default y if (SPARSEMEM_VMEMMAP_ENABLE) 121 default y
122 help
123 SPARSEMEM_VMEMMAP uses a virtually mapped memmap to optimise
124 pfn_to_page and page_to_pfn operations. This is the most
125 efficient option when sufficient kernel resources are available.
127 126
128# eventually, we can have this option just 'select SPARSEMEM' 127# eventually, we can have this option just 'select SPARSEMEM'
129config MEMORY_HOTPLUG 128config MEMORY_HOTPLUG
@@ -188,7 +187,7 @@ config BOUNCE
188config NR_QUICK 187config NR_QUICK
189 int 188 int
190 depends on QUICKLIST 189 depends on QUICKLIST
191 default "2" if (SUPERH && !SUPERH64) 190 default "2" if SUPERH
192 default "1" 191 default "1"
193 192
194config VIRT_TO_BUS 193config VIRT_TO_BUS
diff --git a/mm/filemap.c b/mm/filemap.c
index 455119cc7f40..89ce6fe5f8be 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -124,6 +124,18 @@ void __remove_from_page_cache(struct page *page)
124 mapping->nrpages--; 124 mapping->nrpages--;
125 __dec_zone_page_state(page, NR_FILE_PAGES); 125 __dec_zone_page_state(page, NR_FILE_PAGES);
126 BUG_ON(page_mapped(page)); 126 BUG_ON(page_mapped(page));
127
128 /*
129 * Some filesystems seem to re-dirty the page even after
130 * the VM has canceled the dirty bit (eg ext3 journaling).
131 *
132 * Fix it up by doing a final dirty accounting check after
133 * having removed the page entirely.
134 */
135 if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
136 dec_zone_page_state(page, NR_FILE_DIRTY);
137 dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
138 }
127} 139}
128 140
129void remove_from_page_cache(struct page *page) 141void remove_from_page_cache(struct page *page)
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index e233fff61b4b..f874ae818ad3 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -25,14 +25,15 @@ static struct page *__xip_sparse_page;
25static struct page *xip_sparse_page(void) 25static struct page *xip_sparse_page(void)
26{ 26{
27 if (!__xip_sparse_page) { 27 if (!__xip_sparse_page) {
28 unsigned long zeroes = get_zeroed_page(GFP_HIGHUSER); 28 struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
29 if (zeroes) { 29
30 if (page) {
30 static DEFINE_SPINLOCK(xip_alloc_lock); 31 static DEFINE_SPINLOCK(xip_alloc_lock);
31 spin_lock(&xip_alloc_lock); 32 spin_lock(&xip_alloc_lock);
32 if (!__xip_sparse_page) 33 if (!__xip_sparse_page)
33 __xip_sparse_page = virt_to_page(zeroes); 34 __xip_sparse_page = page;
34 else 35 else
35 free_page(zeroes); 36 __free_page(page);
36 spin_unlock(&xip_alloc_lock); 37 spin_unlock(&xip_alloc_lock);
37 } 38 }
38 } 39 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6121b57bbe96..db861d8b6c28 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -31,7 +31,7 @@ static unsigned int free_huge_pages_node[MAX_NUMNODES];
31static unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 31static unsigned int surplus_huge_pages_node[MAX_NUMNODES];
32static gfp_t htlb_alloc_mask = GFP_HIGHUSER; 32static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
33unsigned long hugepages_treat_as_movable; 33unsigned long hugepages_treat_as_movable;
34int hugetlb_dynamic_pool; 34unsigned long nr_overcommit_huge_pages;
35static int hugetlb_next_nid; 35static int hugetlb_next_nid;
36 36
37/* 37/*
@@ -227,22 +227,58 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
227 unsigned long address) 227 unsigned long address)
228{ 228{
229 struct page *page; 229 struct page *page;
230 unsigned int nid;
230 231
231 /* Check if the dynamic pool is enabled */ 232 /*
232 if (!hugetlb_dynamic_pool) 233 * Assume we will successfully allocate the surplus page to
234 * prevent racing processes from causing the surplus to exceed
235 * overcommit
236 *
237 * This however introduces a different race, where a process B
238 * tries to grow the static hugepage pool while alloc_pages() is
239 * called by process A. B will only examine the per-node
240 * counters in determining if surplus huge pages can be
241 * converted to normal huge pages in adjust_pool_surplus(). A
242 * won't be able to increment the per-node counter, until the
243 * lock is dropped by B, but B doesn't drop hugetlb_lock until
244 * no more huge pages can be converted from surplus to normal
245 * state (and doesn't try to convert again). Thus, we have a
246 * case where a surplus huge page exists, the pool is grown, and
247 * the surplus huge page still exists after, even though it
248 * should just have been converted to a normal huge page. This
249 * does not leak memory, though, as the hugepage will be freed
250 * once it is out of use. It also does not allow the counters to
251 * go out of whack in adjust_pool_surplus() as we don't modify
252 * the node values until we've gotten the hugepage and only the
253 * per-node value is checked there.
254 */
255 spin_lock(&hugetlb_lock);
256 if (surplus_huge_pages >= nr_overcommit_huge_pages) {
257 spin_unlock(&hugetlb_lock);
233 return NULL; 258 return NULL;
259 } else {
260 nr_huge_pages++;
261 surplus_huge_pages++;
262 }
263 spin_unlock(&hugetlb_lock);
234 264
235 page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN, 265 page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
236 HUGETLB_PAGE_ORDER); 266 HUGETLB_PAGE_ORDER);
267
268 spin_lock(&hugetlb_lock);
237 if (page) { 269 if (page) {
270 nid = page_to_nid(page);
238 set_compound_page_dtor(page, free_huge_page); 271 set_compound_page_dtor(page, free_huge_page);
239 spin_lock(&hugetlb_lock); 272 /*
240 nr_huge_pages++; 273 * We incremented the global counters already
241 nr_huge_pages_node[page_to_nid(page)]++; 274 */
242 surplus_huge_pages++; 275 nr_huge_pages_node[nid]++;
243 surplus_huge_pages_node[page_to_nid(page)]++; 276 surplus_huge_pages_node[nid]++;
244 spin_unlock(&hugetlb_lock); 277 } else {
278 nr_huge_pages--;
279 surplus_huge_pages--;
245 } 280 }
281 spin_unlock(&hugetlb_lock);
246 282
247 return page; 283 return page;
248} 284}
@@ -382,9 +418,14 @@ static struct page *alloc_huge_page_private(struct vm_area_struct *vma,
382 if (free_huge_pages > resv_huge_pages) 418 if (free_huge_pages > resv_huge_pages)
383 page = dequeue_huge_page(vma, addr); 419 page = dequeue_huge_page(vma, addr);
384 spin_unlock(&hugetlb_lock); 420 spin_unlock(&hugetlb_lock);
385 if (!page) 421 if (!page) {
386 page = alloc_buddy_huge_page(vma, addr); 422 page = alloc_buddy_huge_page(vma, addr);
387 return page ? page : ERR_PTR(-VM_FAULT_OOM); 423 if (!page) {
424 hugetlb_put_quota(vma->vm_file->f_mapping, 1);
425 return ERR_PTR(-VM_FAULT_OOM);
426 }
427 }
428 return page;
388} 429}
389 430
390static struct page *alloc_huge_page(struct vm_area_struct *vma, 431static struct page *alloc_huge_page(struct vm_area_struct *vma,
@@ -481,6 +522,12 @@ static unsigned long set_max_huge_pages(unsigned long count)
481 * Increase the pool size 522 * Increase the pool size
482 * First take pages out of surplus state. Then make up the 523 * First take pages out of surplus state. Then make up the
483 * remaining difference by allocating fresh huge pages. 524 * remaining difference by allocating fresh huge pages.
525 *
526 * We might race with alloc_buddy_huge_page() here and be unable
527 * to convert a surplus huge page to a normal huge page. That is
528 * not critical, though, it just means the overall size of the
529 * pool might be one hugepage larger than it needs to be, but
530 * within all the constraints specified by the sysctls.
484 */ 531 */
485 spin_lock(&hugetlb_lock); 532 spin_lock(&hugetlb_lock);
486 while (surplus_huge_pages && count > persistent_huge_pages) { 533 while (surplus_huge_pages && count > persistent_huge_pages) {
@@ -509,6 +556,14 @@ static unsigned long set_max_huge_pages(unsigned long count)
509 * to keep enough around to satisfy reservations). Then place 556 * to keep enough around to satisfy reservations). Then place
510 * pages into surplus state as needed so the pool will shrink 557 * pages into surplus state as needed so the pool will shrink
511 * to the desired size as pages become free. 558 * to the desired size as pages become free.
559 *
560 * By placing pages into the surplus state independent of the
561 * overcommit value, we are allowing the surplus pool size to
562 * exceed overcommit. There are few sane options here. Since
563 * alloc_buddy_huge_page() is checking the global counter,
564 * though, we'll note that we're not allowed to exceed surplus
565 * and won't grow the pool anywhere else. Not until one of the
566 * sysctls are changed, or the surplus pages go out of use.
512 */ 567 */
513 min_count = resv_huge_pages + nr_huge_pages - free_huge_pages; 568 min_count = resv_huge_pages + nr_huge_pages - free_huge_pages;
514 min_count = max(count, min_count); 569 min_count = max(count, min_count);
@@ -644,6 +699,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
644 dst_pte = huge_pte_alloc(dst, addr); 699 dst_pte = huge_pte_alloc(dst, addr);
645 if (!dst_pte) 700 if (!dst_pte)
646 goto nomem; 701 goto nomem;
702
703 /* If the pagetables are shared don't copy or take references */
704 if (dst_pte == src_pte)
705 continue;
706
647 spin_lock(&dst->page_table_lock); 707 spin_lock(&dst->page_table_lock);
648 spin_lock(&src->page_table_lock); 708 spin_lock(&src->page_table_lock);
649 if (!pte_none(*src_pte)) { 709 if (!pte_none(*src_pte)) {
@@ -907,7 +967,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
907 */ 967 */
908 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); 968 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
909 969
910 if (!pte || pte_none(*pte)) { 970 if (!pte || pte_none(*pte) || (write && !pte_write(*pte))) {
911 int ret; 971 int ret;
912 972
913 spin_unlock(&mm->page_table_lock); 973 spin_unlock(&mm->page_table_lock);
@@ -1156,8 +1216,10 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
1156 if (hugetlb_get_quota(inode->i_mapping, chg)) 1216 if (hugetlb_get_quota(inode->i_mapping, chg))
1157 return -ENOSPC; 1217 return -ENOSPC;
1158 ret = hugetlb_acct_memory(chg); 1218 ret = hugetlb_acct_memory(chg);
1159 if (ret < 0) 1219 if (ret < 0) {
1220 hugetlb_put_quota(inode->i_mapping, chg);
1160 return ret; 1221 return ret;
1222 }
1161 region_add(&inode->i_mapping->private_list, from, to); 1223 region_add(&inode->i_mapping->private_list, from, to);
1162 return 0; 1224 return 0;
1163} 1225}
diff --git a/mm/memory.c b/mm/memory.c
index 4bf0b6d0eb2a..d902d0e25edc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -392,6 +392,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
392 return NULL; 392 return NULL;
393 } 393 }
394 394
395#ifdef CONFIG_DEBUG_VM
395 /* 396 /*
396 * Add some anal sanity checks for now. Eventually, 397 * Add some anal sanity checks for now. Eventually,
397 * we should just do "return pfn_to_page(pfn)", but 398 * we should just do "return pfn_to_page(pfn)", but
@@ -402,6 +403,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
402 print_bad_pte(vma, pte, addr); 403 print_bad_pte(vma, pte, addr);
403 return NULL; 404 return NULL;
404 } 405 }
406#endif
405 407
406 /* 408 /*
407 * NOTE! We still have PageReserved() pages in the page 409 * NOTE! We still have PageReserved() pages in the page
@@ -511,8 +513,7 @@ again:
511 if (progress >= 32) { 513 if (progress >= 32) {
512 progress = 0; 514 progress = 0;
513 if (need_resched() || 515 if (need_resched() ||
514 need_lockbreak(src_ptl) || 516 spin_needbreak(src_ptl) || spin_needbreak(dst_ptl))
515 need_lockbreak(dst_ptl))
516 break; 517 break;
517 } 518 }
518 if (pte_none(*src_pte)) { 519 if (pte_none(*src_pte)) {
@@ -851,7 +852,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
851 tlb_finish_mmu(*tlbp, tlb_start, start); 852 tlb_finish_mmu(*tlbp, tlb_start, start);
852 853
853 if (need_resched() || 854 if (need_resched() ||
854 (i_mmap_lock && need_lockbreak(i_mmap_lock))) { 855 (i_mmap_lock && spin_needbreak(i_mmap_lock))) {
855 if (i_mmap_lock) { 856 if (i_mmap_lock) {
856 *tlbp = NULL; 857 *tlbp = NULL;
857 goto out; 858 goto out;
@@ -1668,6 +1669,9 @@ gotten:
1668unlock: 1669unlock:
1669 pte_unmap_unlock(page_table, ptl); 1670 pte_unmap_unlock(page_table, ptl);
1670 if (dirty_page) { 1671 if (dirty_page) {
1672 if (vma->vm_file)
1673 file_update_time(vma->vm_file);
1674
1671 /* 1675 /*
1672 * Yes, Virginia, this is actually required to prevent a race 1676 * Yes, Virginia, this is actually required to prevent a race
1673 * with clear_page_dirty_for_io() from clearing the page dirty 1677 * with clear_page_dirty_for_io() from clearing the page dirty
@@ -1763,8 +1767,7 @@ again:
1763 1767
1764 restart_addr = zap_page_range(vma, start_addr, 1768 restart_addr = zap_page_range(vma, start_addr,
1765 end_addr - start_addr, details); 1769 end_addr - start_addr, details);
1766 need_break = need_resched() || 1770 need_break = need_resched() || spin_needbreak(details->i_mmap_lock);
1767 need_lockbreak(details->i_mmap_lock);
1768 1771
1769 if (restart_addr >= end_addr) { 1772 if (restart_addr >= end_addr) {
1770 /* We have now completed this vma: mark it so */ 1773 /* We have now completed this vma: mark it so */
@@ -2341,6 +2344,9 @@ out_unlocked:
2341 if (anon) 2344 if (anon)
2342 page_cache_release(vmf.page); 2345 page_cache_release(vmf.page);
2343 else if (dirty_page) { 2346 else if (dirty_page) {
2347 if (vma->vm_file)
2348 file_update_time(vma->vm_file);
2349
2344 set_page_dirty_balance(dirty_page, page_mkwrite); 2350 set_page_dirty_balance(dirty_page, page_mkwrite);
2345 put_page(dirty_page); 2351 put_page(dirty_page);
2346 } 2352 }
@@ -2748,3 +2754,34 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
2748 2754
2749 return buf - old_buf; 2755 return buf - old_buf;
2750} 2756}
2757
2758/*
2759 * Print the name of a VMA.
2760 */
2761void print_vma_addr(char *prefix, unsigned long ip)
2762{
2763 struct mm_struct *mm = current->mm;
2764 struct vm_area_struct *vma;
2765
2766 down_read(&mm->mmap_sem);
2767 vma = find_vma(mm, ip);
2768 if (vma && vma->vm_file) {
2769 struct file *f = vma->vm_file;
2770 char *buf = (char *)__get_free_page(GFP_KERNEL);
2771 if (buf) {
2772 char *p, *s;
2773
2774 p = d_path(f->f_dentry, f->f_vfsmnt, buf, PAGE_SIZE);
2775 if (IS_ERR(p))
2776 p = "?";
2777 s = strrchr(p, '/');
2778 if (s)
2779 p = s+1;
2780 printk("%s%s[%lx+%lx]", prefix, p,
2781 vma->vm_start,
2782 vma->vm_end - vma->vm_start);
2783 free_page((unsigned long)buf);
2784 }
2785 }
2786 up_read(&current->mm->mmap_sem);
2787}
diff --git a/mm/mmap.c b/mm/mmap.c
index 15678aa6ec73..d2b6d44962b7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -251,7 +251,8 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
251 * not page aligned -Ram Gupta 251 * not page aligned -Ram Gupta
252 */ 252 */
253 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; 253 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
254 if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) 254 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
255 (mm->end_data - mm->start_data) > rlim)
255 goto out; 256 goto out;
256 257
257 newbrk = PAGE_ALIGN(brk); 258 newbrk = PAGE_ALIGN(brk);
@@ -1620,7 +1621,7 @@ static inline int expand_downwards(struct vm_area_struct *vma,
1620 return -ENOMEM; 1621 return -ENOMEM;
1621 1622
1622 address &= PAGE_MASK; 1623 address &= PAGE_MASK;
1623 error = security_file_mmap(0, 0, 0, 0, address, 1); 1624 error = security_file_mmap(NULL, 0, 0, 0, address, 1);
1624 if (error) 1625 if (error)
1625 return error; 1626 return error;
1626 1627
@@ -1941,7 +1942,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
1941 if (is_hugepage_only_range(mm, addr, len)) 1942 if (is_hugepage_only_range(mm, addr, len))
1942 return -EINVAL; 1943 return -EINVAL;
1943 1944
1944 error = security_file_mmap(0, 0, 0, 0, addr, 1); 1945 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
1945 if (error) 1946 if (error)
1946 return error; 1947 return error;
1947 1948
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 91a081a82f55..96473b482099 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -286,7 +286,7 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
286 * all the memory it needs. That way it should be able to 286 * all the memory it needs. That way it should be able to
287 * exit() and clear out its resources quickly... 287 * exit() and clear out its resources quickly...
288 */ 288 */
289 p->time_slice = HZ; 289 p->rt.time_slice = HZ;
290 set_tsk_thread_flag(p, TIF_MEMDIE); 290 set_tsk_thread_flag(p, TIF_MEMDIE);
291 291
292 force_sig(SIGKILL, p); 292 force_sig(SIGKILL, p);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d55cfcae2ef1..3d3848fa6324 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -558,7 +558,6 @@ static void background_writeout(unsigned long _min_pages)
558 global_page_state(NR_UNSTABLE_NFS) < background_thresh 558 global_page_state(NR_UNSTABLE_NFS) < background_thresh
559 && min_pages <= 0) 559 && min_pages <= 0)
560 break; 560 break;
561 wbc.more_io = 0;
562 wbc.encountered_congestion = 0; 561 wbc.encountered_congestion = 0;
563 wbc.nr_to_write = MAX_WRITEBACK_PAGES; 562 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
564 wbc.pages_skipped = 0; 563 wbc.pages_skipped = 0;
@@ -566,9 +565,8 @@ static void background_writeout(unsigned long _min_pages)
566 min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; 565 min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
567 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { 566 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
568 /* Wrote less than expected */ 567 /* Wrote less than expected */
569 if (wbc.encountered_congestion || wbc.more_io) 568 congestion_wait(WRITE, HZ/10);
570 congestion_wait(WRITE, HZ/10); 569 if (!wbc.encountered_congestion)
571 else
572 break; 570 break;
573 } 571 }
574 } 572 }
@@ -633,12 +631,11 @@ static void wb_kupdate(unsigned long arg)
633 global_page_state(NR_UNSTABLE_NFS) + 631 global_page_state(NR_UNSTABLE_NFS) +
634 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 632 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
635 while (nr_to_write > 0) { 633 while (nr_to_write > 0) {
636 wbc.more_io = 0;
637 wbc.encountered_congestion = 0; 634 wbc.encountered_congestion = 0;
638 wbc.nr_to_write = MAX_WRITEBACK_PAGES; 635 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
639 writeback_inodes(&wbc); 636 writeback_inodes(&wbc);
640 if (wbc.nr_to_write > 0) { 637 if (wbc.nr_to_write > 0) {
641 if (wbc.encountered_congestion || wbc.more_io) 638 if (wbc.encountered_congestion)
642 congestion_wait(WRITE, HZ/10); 639 congestion_wait(WRITE, HZ/10);
643 else 640 else
644 break; /* All the old data is written */ 641 break; /* All the old data is written */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b5a58d476c1a..b2838c24e582 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -847,8 +847,19 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
847 struct page *page = __rmqueue(zone, order, migratetype); 847 struct page *page = __rmqueue(zone, order, migratetype);
848 if (unlikely(page == NULL)) 848 if (unlikely(page == NULL))
849 break; 849 break;
850
851 /*
852 * Split buddy pages returned by expand() are received here
853 * in physical page order. The page is added to the callers and
854 * list and the list head then moves forward. From the callers
855 * perspective, the linked list is ordered by page number in
856 * some conditions. This is useful for IO devices that can
857 * merge IO requests if the physical pages are ordered
858 * properly.
859 */
850 list_add(&page->lru, list); 860 list_add(&page->lru, list);
851 set_page_private(page, migratetype); 861 set_page_private(page, migratetype);
862 list = &page->lru;
852 } 863 }
853 spin_unlock(&zone->lock); 864 spin_unlock(&zone->lock);
854 return i; 865 return i;
@@ -2555,7 +2566,7 @@ static void __meminit zone_init_free_lists(struct pglist_data *pgdat,
2555 memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) 2566 memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
2556#endif 2567#endif
2557 2568
2558static int __devinit zone_batchsize(struct zone *zone) 2569static int zone_batchsize(struct zone *zone)
2559{ 2570{
2560 int batch; 2571 int batch;
2561 2572
@@ -3427,7 +3438,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
3427 mem_map = NODE_DATA(0)->node_mem_map; 3438 mem_map = NODE_DATA(0)->node_mem_map;
3428#ifdef CONFIG_ARCH_POPULATES_NODE_MAP 3439#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
3429 if (page_to_pfn(mem_map) != pgdat->node_start_pfn) 3440 if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
3430 mem_map -= pgdat->node_start_pfn; 3441 mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
3431#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ 3442#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
3432 } 3443 }
3433#endif 3444#endif
diff --git a/mm/quicklist.c b/mm/quicklist.c
index ae8189c2799e..3f703f7cb398 100644
--- a/mm/quicklist.c
+++ b/mm/quicklist.c
@@ -26,9 +26,17 @@ DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
26static unsigned long max_pages(unsigned long min_pages) 26static unsigned long max_pages(unsigned long min_pages)
27{ 27{
28 unsigned long node_free_pages, max; 28 unsigned long node_free_pages, max;
29 struct zone *zones = NODE_DATA(numa_node_id())->node_zones;
30
31 node_free_pages =
32#ifdef CONFIG_ZONE_DMA
33 zone_page_state(&zones[ZONE_DMA], NR_FREE_PAGES) +
34#endif
35#ifdef CONFIG_ZONE_DMA32
36 zone_page_state(&zones[ZONE_DMA32], NR_FREE_PAGES) +
37#endif
38 zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES);
29 39
30 node_free_pages = node_page_state(numa_node_id(),
31 NR_FREE_PAGES);
32 max = node_free_pages / FRACTION_OF_NODE_MEM; 40 max = node_free_pages / FRACTION_OF_NODE_MEM;
33 return max(max, min_pages); 41 return max(max, min_pages);
34} 42}
diff --git a/mm/slab.c b/mm/slab.c
index 2e338a5f7b14..40c00dacbe4b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -304,11 +304,11 @@ struct kmem_list3 {
304/* 304/*
305 * Need this for bootstrapping a per node allocator. 305 * Need this for bootstrapping a per node allocator.
306 */ 306 */
307#define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1) 307#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
308struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; 308struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
309#define CACHE_CACHE 0 309#define CACHE_CACHE 0
310#define SIZE_AC 1 310#define SIZE_AC MAX_NUMNODES
311#define SIZE_L3 (1 + MAX_NUMNODES) 311#define SIZE_L3 (2 * MAX_NUMNODES)
312 312
313static int drain_freelist(struct kmem_cache *cache, 313static int drain_freelist(struct kmem_cache *cache,
314 struct kmem_list3 *l3, int tofree); 314 struct kmem_list3 *l3, int tofree);
@@ -730,8 +730,7 @@ static inline void init_lock_keys(void)
730#endif 730#endif
731 731
732/* 732/*
733 * 1. Guard access to the cache-chain. 733 * Guard access to the cache-chain.
734 * 2. Protect sanity of cpu_online_map against cpu hotplug events
735 */ 734 */
736static DEFINE_MUTEX(cache_chain_mutex); 735static DEFINE_MUTEX(cache_chain_mutex);
737static struct list_head cache_chain; 736static struct list_head cache_chain;
@@ -1331,12 +1330,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1331 int err = 0; 1330 int err = 0;
1332 1331
1333 switch (action) { 1332 switch (action) {
1334 case CPU_LOCK_ACQUIRE:
1335 mutex_lock(&cache_chain_mutex);
1336 break;
1337 case CPU_UP_PREPARE: 1333 case CPU_UP_PREPARE:
1338 case CPU_UP_PREPARE_FROZEN: 1334 case CPU_UP_PREPARE_FROZEN:
1335 mutex_lock(&cache_chain_mutex);
1339 err = cpuup_prepare(cpu); 1336 err = cpuup_prepare(cpu);
1337 mutex_unlock(&cache_chain_mutex);
1340 break; 1338 break;
1341 case CPU_ONLINE: 1339 case CPU_ONLINE:
1342 case CPU_ONLINE_FROZEN: 1340 case CPU_ONLINE_FROZEN:
@@ -1373,9 +1371,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1373#endif 1371#endif
1374 case CPU_UP_CANCELED: 1372 case CPU_UP_CANCELED:
1375 case CPU_UP_CANCELED_FROZEN: 1373 case CPU_UP_CANCELED_FROZEN:
1374 mutex_lock(&cache_chain_mutex);
1376 cpuup_canceled(cpu); 1375 cpuup_canceled(cpu);
1377 break;
1378 case CPU_LOCK_RELEASE:
1379 mutex_unlock(&cache_chain_mutex); 1376 mutex_unlock(&cache_chain_mutex);
1380 break; 1377 break;
1381 } 1378 }
@@ -1410,6 +1407,22 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1410} 1407}
1411 1408
1412/* 1409/*
1410 * For setting up all the kmem_list3s for cache whose buffer_size is same as
1411 * size of kmem_list3.
1412 */
1413static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1414{
1415 int node;
1416
1417 for_each_online_node(node) {
1418 cachep->nodelists[node] = &initkmem_list3[index + node];
1419 cachep->nodelists[node]->next_reap = jiffies +
1420 REAPTIMEOUT_LIST3 +
1421 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1422 }
1423}
1424
1425/*
1413 * Initialisation. Called after the page allocator have been initialised and 1426 * Initialisation. Called after the page allocator have been initialised and
1414 * before smp_init(). 1427 * before smp_init().
1415 */ 1428 */
@@ -1432,6 +1445,7 @@ void __init kmem_cache_init(void)
1432 if (i < MAX_NUMNODES) 1445 if (i < MAX_NUMNODES)
1433 cache_cache.nodelists[i] = NULL; 1446 cache_cache.nodelists[i] = NULL;
1434 } 1447 }
1448 set_up_list3s(&cache_cache, CACHE_CACHE);
1435 1449
1436 /* 1450 /*
1437 * Fragmentation resistance on low memory - only use bigger 1451 * Fragmentation resistance on low memory - only use bigger
@@ -1587,10 +1601,9 @@ void __init kmem_cache_init(void)
1587 { 1601 {
1588 int nid; 1602 int nid;
1589 1603
1590 /* Replace the static kmem_list3 structures for the boot cpu */ 1604 for_each_online_node(nid) {
1591 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node); 1605 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], nid);
1592 1606
1593 for_each_node_state(nid, N_NORMAL_MEMORY) {
1594 init_list(malloc_sizes[INDEX_AC].cs_cachep, 1607 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1595 &initkmem_list3[SIZE_AC + nid], nid); 1608 &initkmem_list3[SIZE_AC + nid], nid);
1596 1609
@@ -1960,22 +1973,6 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1960 } 1973 }
1961} 1974}
1962 1975
1963/*
1964 * For setting up all the kmem_list3s for cache whose buffer_size is same as
1965 * size of kmem_list3.
1966 */
1967static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1968{
1969 int node;
1970
1971 for_each_node_state(node, N_NORMAL_MEMORY) {
1972 cachep->nodelists[node] = &initkmem_list3[index + node];
1973 cachep->nodelists[node]->next_reap = jiffies +
1974 REAPTIMEOUT_LIST3 +
1975 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1976 }
1977}
1978
1979static void __kmem_cache_destroy(struct kmem_cache *cachep) 1976static void __kmem_cache_destroy(struct kmem_cache *cachep)
1980{ 1977{
1981 int i; 1978 int i;
@@ -2099,7 +2096,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
2099 g_cpucache_up = PARTIAL_L3; 2096 g_cpucache_up = PARTIAL_L3;
2100 } else { 2097 } else {
2101 int node; 2098 int node;
2102 for_each_node_state(node, N_NORMAL_MEMORY) { 2099 for_each_online_node(node) {
2103 cachep->nodelists[node] = 2100 cachep->nodelists[node] =
2104 kmalloc_node(sizeof(struct kmem_list3), 2101 kmalloc_node(sizeof(struct kmem_list3),
2105 GFP_KERNEL, node); 2102 GFP_KERNEL, node);
@@ -2170,6 +2167,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2170 * We use cache_chain_mutex to ensure a consistent view of 2167 * We use cache_chain_mutex to ensure a consistent view of
2171 * cpu_online_map as well. Please see cpuup_callback 2168 * cpu_online_map as well. Please see cpuup_callback
2172 */ 2169 */
2170 get_online_cpus();
2173 mutex_lock(&cache_chain_mutex); 2171 mutex_lock(&cache_chain_mutex);
2174 2172
2175 list_for_each_entry(pc, &cache_chain, next) { 2173 list_for_each_entry(pc, &cache_chain, next) {
@@ -2396,6 +2394,7 @@ oops:
2396 panic("kmem_cache_create(): failed to create slab `%s'\n", 2394 panic("kmem_cache_create(): failed to create slab `%s'\n",
2397 name); 2395 name);
2398 mutex_unlock(&cache_chain_mutex); 2396 mutex_unlock(&cache_chain_mutex);
2397 put_online_cpus();
2399 return cachep; 2398 return cachep;
2400} 2399}
2401EXPORT_SYMBOL(kmem_cache_create); 2400EXPORT_SYMBOL(kmem_cache_create);
@@ -2547,9 +2546,11 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
2547 int ret; 2546 int ret;
2548 BUG_ON(!cachep || in_interrupt()); 2547 BUG_ON(!cachep || in_interrupt());
2549 2548
2549 get_online_cpus();
2550 mutex_lock(&cache_chain_mutex); 2550 mutex_lock(&cache_chain_mutex);
2551 ret = __cache_shrink(cachep); 2551 ret = __cache_shrink(cachep);
2552 mutex_unlock(&cache_chain_mutex); 2552 mutex_unlock(&cache_chain_mutex);
2553 put_online_cpus();
2553 return ret; 2554 return ret;
2554} 2555}
2555EXPORT_SYMBOL(kmem_cache_shrink); 2556EXPORT_SYMBOL(kmem_cache_shrink);
@@ -2575,6 +2576,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2575 BUG_ON(!cachep || in_interrupt()); 2576 BUG_ON(!cachep || in_interrupt());
2576 2577
2577 /* Find the cache in the chain of caches. */ 2578 /* Find the cache in the chain of caches. */
2579 get_online_cpus();
2578 mutex_lock(&cache_chain_mutex); 2580 mutex_lock(&cache_chain_mutex);
2579 /* 2581 /*
2580 * the chain is never empty, cache_cache is never destroyed 2582 * the chain is never empty, cache_cache is never destroyed
@@ -2584,6 +2586,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2584 slab_error(cachep, "Can't free all objects"); 2586 slab_error(cachep, "Can't free all objects");
2585 list_add(&cachep->next, &cache_chain); 2587 list_add(&cachep->next, &cache_chain);
2586 mutex_unlock(&cache_chain_mutex); 2588 mutex_unlock(&cache_chain_mutex);
2589 put_online_cpus();
2587 return; 2590 return;
2588 } 2591 }
2589 2592
@@ -2592,6 +2595,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2592 2595
2593 __kmem_cache_destroy(cachep); 2596 __kmem_cache_destroy(cachep);
2594 mutex_unlock(&cache_chain_mutex); 2597 mutex_unlock(&cache_chain_mutex);
2598 put_online_cpus();
2595} 2599}
2596EXPORT_SYMBOL(kmem_cache_destroy); 2600EXPORT_SYMBOL(kmem_cache_destroy);
2597 2601
@@ -3815,7 +3819,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3815 struct array_cache *new_shared; 3819 struct array_cache *new_shared;
3816 struct array_cache **new_alien = NULL; 3820 struct array_cache **new_alien = NULL;
3817 3821
3818 for_each_node_state(node, N_NORMAL_MEMORY) { 3822 for_each_online_node(node) {
3819 3823
3820 if (use_alien_caches) { 3824 if (use_alien_caches) {
3821 new_alien = alloc_alien_cache(node, cachep->limit); 3825 new_alien = alloc_alien_cache(node, cachep->limit);
@@ -4105,7 +4109,7 @@ out:
4105 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); 4109 schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
4106} 4110}
4107 4111
4108#ifdef CONFIG_PROC_FS 4112#ifdef CONFIG_SLABINFO
4109 4113
4110static void print_slabinfo_header(struct seq_file *m) 4114static void print_slabinfo_header(struct seq_file *m)
4111{ 4115{
diff --git a/mm/slob.c b/mm/slob.c
index ee2ef8af0d43..773a7aa80ab5 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -330,7 +330,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
330 330
331 /* Not enough space: must allocate a new page */ 331 /* Not enough space: must allocate a new page */
332 if (!b) { 332 if (!b) {
333 b = slob_new_page(gfp, 0, node); 333 b = slob_new_page(gfp & ~__GFP_ZERO, 0, node);
334 if (!b) 334 if (!b)
335 return 0; 335 return 0;
336 sp = (struct slob_page *)virt_to_page(b); 336 sp = (struct slob_page *)virt_to_page(b);
diff --git a/mm/slub.c b/mm/slub.c
index b9f37cb0f2e6..5cc4b7dddb50 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -172,7 +172,7 @@ static inline void ClearSlabDebug(struct page *page)
172 * Mininum number of partial slabs. These will be left on the partial 172 * Mininum number of partial slabs. These will be left on the partial
173 * lists even if they are empty. kmem_cache_shrink may reclaim them. 173 * lists even if they are empty. kmem_cache_shrink may reclaim them.
174 */ 174 */
175#define MIN_PARTIAL 2 175#define MIN_PARTIAL 5
176 176
177/* 177/*
178 * Maximum number of desirable partial slabs. 178 * Maximum number of desirable partial slabs.
@@ -1613,7 +1613,7 @@ checks_ok:
1613 * then add it. 1613 * then add it.
1614 */ 1614 */
1615 if (unlikely(!prior)) 1615 if (unlikely(!prior))
1616 add_partial(get_node(s, page_to_nid(page)), page); 1616 add_partial_tail(get_node(s, page_to_nid(page)), page);
1617 1617
1618out_unlock: 1618out_unlock:
1619 slab_unlock(page); 1619 slab_unlock(page);
@@ -3076,6 +3076,19 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3076 return slab_alloc(s, gfpflags, node, caller); 3076 return slab_alloc(s, gfpflags, node, caller);
3077} 3077}
3078 3078
3079static unsigned long count_partial(struct kmem_cache_node *n)
3080{
3081 unsigned long flags;
3082 unsigned long x = 0;
3083 struct page *page;
3084
3085 spin_lock_irqsave(&n->list_lock, flags);
3086 list_for_each_entry(page, &n->partial, lru)
3087 x += page->inuse;
3088 spin_unlock_irqrestore(&n->list_lock, flags);
3089 return x;
3090}
3091
3079#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) 3092#if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG)
3080static int validate_slab(struct kmem_cache *s, struct page *page, 3093static int validate_slab(struct kmem_cache *s, struct page *page,
3081 unsigned long *map) 3094 unsigned long *map)
@@ -3458,19 +3471,6 @@ static int list_locations(struct kmem_cache *s, char *buf,
3458 return n; 3471 return n;
3459} 3472}
3460 3473
3461static unsigned long count_partial(struct kmem_cache_node *n)
3462{
3463 unsigned long flags;
3464 unsigned long x = 0;
3465 struct page *page;
3466
3467 spin_lock_irqsave(&n->list_lock, flags);
3468 list_for_each_entry(page, &n->partial, lru)
3469 x += page->inuse;
3470 spin_unlock_irqrestore(&n->list_lock, flags);
3471 return x;
3472}
3473
3474enum slab_stat_type { 3474enum slab_stat_type {
3475 SL_FULL, 3475 SL_FULL,
3476 SL_PARTIAL, 3476 SL_PARTIAL,
@@ -3962,7 +3962,7 @@ static struct kset_uevent_ops slab_uevent_ops = {
3962 .filter = uevent_filter, 3962 .filter = uevent_filter,
3963}; 3963};
3964 3964
3965static decl_subsys(slab, &slab_ktype, &slab_uevent_ops); 3965static struct kset *slab_kset;
3966 3966
3967#define ID_STR_LENGTH 64 3967#define ID_STR_LENGTH 64
3968 3968
@@ -4015,7 +4015,7 @@ static int sysfs_slab_add(struct kmem_cache *s)
4015 * This is typically the case for debug situations. In that 4015 * This is typically the case for debug situations. In that
4016 * case we can catch duplicate names easily. 4016 * case we can catch duplicate names easily.
4017 */ 4017 */
4018 sysfs_remove_link(&slab_subsys.kobj, s->name); 4018 sysfs_remove_link(&slab_kset->kobj, s->name);
4019 name = s->name; 4019 name = s->name;
4020 } else { 4020 } else {
4021 /* 4021 /*
@@ -4025,12 +4025,12 @@ static int sysfs_slab_add(struct kmem_cache *s)
4025 name = create_unique_id(s); 4025 name = create_unique_id(s);
4026 } 4026 }
4027 4027
4028 kobj_set_kset_s(s, slab_subsys); 4028 s->kobj.kset = slab_kset;
4029 kobject_set_name(&s->kobj, name); 4029 err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name);
4030 kobject_init(&s->kobj); 4030 if (err) {
4031 err = kobject_add(&s->kobj); 4031 kobject_put(&s->kobj);
4032 if (err)
4033 return err; 4032 return err;
4033 }
4034 4034
4035 err = sysfs_create_group(&s->kobj, &slab_attr_group); 4035 err = sysfs_create_group(&s->kobj, &slab_attr_group);
4036 if (err) 4036 if (err)
@@ -4070,9 +4070,8 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name)
4070 /* 4070 /*
4071 * If we have a leftover link then remove it. 4071 * If we have a leftover link then remove it.
4072 */ 4072 */
4073 sysfs_remove_link(&slab_subsys.kobj, name); 4073 sysfs_remove_link(&slab_kset->kobj, name);
4074 return sysfs_create_link(&slab_subsys.kobj, 4074 return sysfs_create_link(&slab_kset->kobj, &s->kobj, name);
4075 &s->kobj, name);
4076 } 4075 }
4077 4076
4078 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL); 4077 al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL);
@@ -4091,8 +4090,8 @@ static int __init slab_sysfs_init(void)
4091 struct kmem_cache *s; 4090 struct kmem_cache *s;
4092 int err; 4091 int err;
4093 4092
4094 err = subsystem_register(&slab_subsys); 4093 slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj);
4095 if (err) { 4094 if (!slab_kset) {
4096 printk(KERN_ERR "Cannot register slab subsystem.\n"); 4095 printk(KERN_ERR "Cannot register slab subsystem.\n");
4097 return -ENOSYS; 4096 return -ENOSYS;
4098 } 4097 }
@@ -4123,3 +4122,89 @@ static int __init slab_sysfs_init(void)
4123 4122
4124__initcall(slab_sysfs_init); 4123__initcall(slab_sysfs_init);
4125#endif 4124#endif
4125
4126/*
4127 * The /proc/slabinfo ABI
4128 */
4129#ifdef CONFIG_SLABINFO
4130
4131ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4132 size_t count, loff_t *ppos)
4133{
4134 return -EINVAL;
4135}
4136
4137
4138static void print_slabinfo_header(struct seq_file *m)
4139{
4140 seq_puts(m, "slabinfo - version: 2.1\n");
4141 seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
4142 "<objperslab> <pagesperslab>");
4143 seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
4144 seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
4145 seq_putc(m, '\n');
4146}
4147
4148static void *s_start(struct seq_file *m, loff_t *pos)
4149{
4150 loff_t n = *pos;
4151
4152 down_read(&slub_lock);
4153 if (!n)
4154 print_slabinfo_header(m);
4155
4156 return seq_list_start(&slab_caches, *pos);
4157}
4158
4159static void *s_next(struct seq_file *m, void *p, loff_t *pos)
4160{
4161 return seq_list_next(p, &slab_caches, pos);
4162}
4163
4164static void s_stop(struct seq_file *m, void *p)
4165{
4166 up_read(&slub_lock);
4167}
4168
4169static int s_show(struct seq_file *m, void *p)
4170{
4171 unsigned long nr_partials = 0;
4172 unsigned long nr_slabs = 0;
4173 unsigned long nr_inuse = 0;
4174 unsigned long nr_objs;
4175 struct kmem_cache *s;
4176 int node;
4177
4178 s = list_entry(p, struct kmem_cache, list);
4179
4180 for_each_online_node(node) {
4181 struct kmem_cache_node *n = get_node(s, node);
4182
4183 if (!n)
4184 continue;
4185
4186 nr_partials += n->nr_partial;
4187 nr_slabs += atomic_long_read(&n->nr_slabs);
4188 nr_inuse += count_partial(n);
4189 }
4190
4191 nr_objs = nr_slabs * s->objects;
4192 nr_inuse += (nr_slabs - nr_partials) * s->objects;
4193
4194 seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse,
4195 nr_objs, s->size, s->objects, (1 << s->order));
4196 seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0);
4197 seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs,
4198 0UL);
4199 seq_putc(m, '\n');
4200 return 0;
4201}
4202
4203const struct seq_operations slabinfo_op = {
4204 .start = s_start,
4205 .next = s_next,
4206 .stop = s_stop,
4207 .show = s_show,
4208};
4209
4210#endif /* CONFIG_SLABINFO */
diff --git a/mm/sparse.c b/mm/sparse.c
index e06f514fe04f..a2183cb5d524 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -83,6 +83,8 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid)
83 return -EEXIST; 83 return -EEXIST;
84 84
85 section = sparse_index_alloc(nid); 85 section = sparse_index_alloc(nid);
86 if (!section)
87 return -ENOMEM;
86 /* 88 /*
87 * This lock keeps two different sections from 89 * This lock keeps two different sections from
88 * reallocating for the same index 90 * reallocating for the same index
@@ -389,9 +391,17 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
389 * no locking for this, because it does its own 391 * no locking for this, because it does its own
390 * plus, it does a kmalloc 392 * plus, it does a kmalloc
391 */ 393 */
392 sparse_index_init(section_nr, pgdat->node_id); 394 ret = sparse_index_init(section_nr, pgdat->node_id);
395 if (ret < 0 && ret != -EEXIST)
396 return ret;
393 memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages); 397 memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages);
398 if (!memmap)
399 return -ENOMEM;
394 usemap = __kmalloc_section_usemap(); 400 usemap = __kmalloc_section_usemap();
401 if (!usemap) {
402 __kfree_section_memmap(memmap, nr_pages);
403 return -ENOMEM;
404 }
395 405
396 pgdat_resize_lock(pgdat, &flags); 406 pgdat_resize_lock(pgdat, &flags);
397 407
@@ -401,18 +411,16 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
401 goto out; 411 goto out;
402 } 412 }
403 413
404 if (!usemap) {
405 ret = -ENOMEM;
406 goto out;
407 }
408 ms->section_mem_map |= SECTION_MARKED_PRESENT; 414 ms->section_mem_map |= SECTION_MARKED_PRESENT;
409 415
410 ret = sparse_init_one_section(ms, section_nr, memmap, usemap); 416 ret = sparse_init_one_section(ms, section_nr, memmap, usemap);
411 417
412out: 418out:
413 pgdat_resize_unlock(pgdat, &flags); 419 pgdat_resize_unlock(pgdat, &flags);
414 if (ret <= 0) 420 if (ret <= 0) {
421 kfree(usemap);
415 __kfree_section_memmap(memmap, nr_pages); 422 __kfree_section_memmap(memmap, nr_pages);
423 }
416 return ret; 424 return ret;
417} 425}
418#endif 426#endif