diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 17 | ||||
-rw-r--r-- | mm/filemap.c | 12 | ||||
-rw-r--r-- | mm/filemap_xip.c | 9 | ||||
-rw-r--r-- | mm/hugetlb.c | 88 | ||||
-rw-r--r-- | mm/memory.c | 47 | ||||
-rw-r--r-- | mm/mmap.c | 7 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page-writeback.c | 9 | ||||
-rw-r--r-- | mm/page_alloc.c | 15 | ||||
-rw-r--r-- | mm/quicklist.c | 12 | ||||
-rw-r--r-- | mm/slab.c | 68 | ||||
-rw-r--r-- | mm/slob.c | 2 | ||||
-rw-r--r-- | mm/slub.c | 139 | ||||
-rw-r--r-- | mm/sparse.c | 20 |
14 files changed, 336 insertions, 111 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index c070ec0c15bf..0016ebd4dcba 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -112,18 +112,17 @@ config SPARSEMEM_EXTREME | |||
112 | def_bool y | 112 | def_bool y |
113 | depends on SPARSEMEM && !SPARSEMEM_STATIC | 113 | depends on SPARSEMEM && !SPARSEMEM_STATIC |
114 | 114 | ||
115 | # | ||
116 | # SPARSEMEM_VMEMMAP uses a virtually mapped mem_map to optimise pfn_to_page | ||
117 | # and page_to_pfn. The most efficient option where kernel virtual space is | ||
118 | # not under pressure. | ||
119 | # | ||
120 | config SPARSEMEM_VMEMMAP_ENABLE | 115 | config SPARSEMEM_VMEMMAP_ENABLE |
121 | def_bool n | 116 | def_bool n |
122 | 117 | ||
123 | config SPARSEMEM_VMEMMAP | 118 | config SPARSEMEM_VMEMMAP |
124 | bool | 119 | bool "Sparse Memory virtual memmap" |
125 | depends on SPARSEMEM | 120 | depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE |
126 | default y if (SPARSEMEM_VMEMMAP_ENABLE) | 121 | default y |
122 | help | ||
123 | SPARSEMEM_VMEMMAP uses a virtually mapped memmap to optimise | ||
124 | pfn_to_page and page_to_pfn operations. This is the most | ||
125 | efficient option when sufficient kernel resources are available. | ||
127 | 126 | ||
128 | # eventually, we can have this option just 'select SPARSEMEM' | 127 | # eventually, we can have this option just 'select SPARSEMEM' |
129 | config MEMORY_HOTPLUG | 128 | config MEMORY_HOTPLUG |
@@ -188,7 +187,7 @@ config BOUNCE | |||
188 | config NR_QUICK | 187 | config NR_QUICK |
189 | int | 188 | int |
190 | depends on QUICKLIST | 189 | depends on QUICKLIST |
191 | default "2" if (SUPERH && !SUPERH64) | 190 | default "2" if SUPERH |
192 | default "1" | 191 | default "1" |
193 | 192 | ||
194 | config VIRT_TO_BUS | 193 | config VIRT_TO_BUS |
diff --git a/mm/filemap.c b/mm/filemap.c index 455119cc7f40..89ce6fe5f8be 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -124,6 +124,18 @@ void __remove_from_page_cache(struct page *page) | |||
124 | mapping->nrpages--; | 124 | mapping->nrpages--; |
125 | __dec_zone_page_state(page, NR_FILE_PAGES); | 125 | __dec_zone_page_state(page, NR_FILE_PAGES); |
126 | BUG_ON(page_mapped(page)); | 126 | BUG_ON(page_mapped(page)); |
127 | |||
128 | /* | ||
129 | * Some filesystems seem to re-dirty the page even after | ||
130 | * the VM has canceled the dirty bit (eg ext3 journaling). | ||
131 | * | ||
132 | * Fix it up by doing a final dirty accounting check after | ||
133 | * having removed the page entirely. | ||
134 | */ | ||
135 | if (PageDirty(page) && mapping_cap_account_dirty(mapping)) { | ||
136 | dec_zone_page_state(page, NR_FILE_DIRTY); | ||
137 | dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE); | ||
138 | } | ||
127 | } | 139 | } |
128 | 140 | ||
129 | void remove_from_page_cache(struct page *page) | 141 | void remove_from_page_cache(struct page *page) |
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index e233fff61b4b..f874ae818ad3 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -25,14 +25,15 @@ static struct page *__xip_sparse_page; | |||
25 | static struct page *xip_sparse_page(void) | 25 | static struct page *xip_sparse_page(void) |
26 | { | 26 | { |
27 | if (!__xip_sparse_page) { | 27 | if (!__xip_sparse_page) { |
28 | unsigned long zeroes = get_zeroed_page(GFP_HIGHUSER); | 28 | struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); |
29 | if (zeroes) { | 29 | |
30 | if (page) { | ||
30 | static DEFINE_SPINLOCK(xip_alloc_lock); | 31 | static DEFINE_SPINLOCK(xip_alloc_lock); |
31 | spin_lock(&xip_alloc_lock); | 32 | spin_lock(&xip_alloc_lock); |
32 | if (!__xip_sparse_page) | 33 | if (!__xip_sparse_page) |
33 | __xip_sparse_page = virt_to_page(zeroes); | 34 | __xip_sparse_page = page; |
34 | else | 35 | else |
35 | free_page(zeroes); | 36 | __free_page(page); |
36 | spin_unlock(&xip_alloc_lock); | 37 | spin_unlock(&xip_alloc_lock); |
37 | } | 38 | } |
38 | } | 39 | } |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6121b57bbe96..db861d8b6c28 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -31,7 +31,7 @@ static unsigned int free_huge_pages_node[MAX_NUMNODES]; | |||
31 | static unsigned int surplus_huge_pages_node[MAX_NUMNODES]; | 31 | static unsigned int surplus_huge_pages_node[MAX_NUMNODES]; |
32 | static gfp_t htlb_alloc_mask = GFP_HIGHUSER; | 32 | static gfp_t htlb_alloc_mask = GFP_HIGHUSER; |
33 | unsigned long hugepages_treat_as_movable; | 33 | unsigned long hugepages_treat_as_movable; |
34 | int hugetlb_dynamic_pool; | 34 | unsigned long nr_overcommit_huge_pages; |
35 | static int hugetlb_next_nid; | 35 | static int hugetlb_next_nid; |
36 | 36 | ||
37 | /* | 37 | /* |
@@ -227,22 +227,58 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, | |||
227 | unsigned long address) | 227 | unsigned long address) |
228 | { | 228 | { |
229 | struct page *page; | 229 | struct page *page; |
230 | unsigned int nid; | ||
230 | 231 | ||
231 | /* Check if the dynamic pool is enabled */ | 232 | /* |
232 | if (!hugetlb_dynamic_pool) | 233 | * Assume we will successfully allocate the surplus page to |
234 | * prevent racing processes from causing the surplus to exceed | ||
235 | * overcommit | ||
236 | * | ||
237 | * This however introduces a different race, where a process B | ||
238 | * tries to grow the static hugepage pool while alloc_pages() is | ||
239 | * called by process A. B will only examine the per-node | ||
240 | * counters in determining if surplus huge pages can be | ||
241 | * converted to normal huge pages in adjust_pool_surplus(). A | ||
242 | * won't be able to increment the per-node counter, until the | ||
243 | * lock is dropped by B, but B doesn't drop hugetlb_lock until | ||
244 | * no more huge pages can be converted from surplus to normal | ||
245 | * state (and doesn't try to convert again). Thus, we have a | ||
246 | * case where a surplus huge page exists, the pool is grown, and | ||
247 | * the surplus huge page still exists after, even though it | ||
248 | * should just have been converted to a normal huge page. This | ||
249 | * does not leak memory, though, as the hugepage will be freed | ||
250 | * once it is out of use. It also does not allow the counters to | ||
251 | * go out of whack in adjust_pool_surplus() as we don't modify | ||
252 | * the node values until we've gotten the hugepage and only the | ||
253 | * per-node value is checked there. | ||
254 | */ | ||
255 | spin_lock(&hugetlb_lock); | ||
256 | if (surplus_huge_pages >= nr_overcommit_huge_pages) { | ||
257 | spin_unlock(&hugetlb_lock); | ||
233 | return NULL; | 258 | return NULL; |
259 | } else { | ||
260 | nr_huge_pages++; | ||
261 | surplus_huge_pages++; | ||
262 | } | ||
263 | spin_unlock(&hugetlb_lock); | ||
234 | 264 | ||
235 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN, | 265 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN, |
236 | HUGETLB_PAGE_ORDER); | 266 | HUGETLB_PAGE_ORDER); |
267 | |||
268 | spin_lock(&hugetlb_lock); | ||
237 | if (page) { | 269 | if (page) { |
270 | nid = page_to_nid(page); | ||
238 | set_compound_page_dtor(page, free_huge_page); | 271 | set_compound_page_dtor(page, free_huge_page); |
239 | spin_lock(&hugetlb_lock); | 272 | /* |
240 | nr_huge_pages++; | 273 | * We incremented the global counters already |
241 | nr_huge_pages_node[page_to_nid(page)]++; | 274 | */ |
242 | surplus_huge_pages++; | 275 | nr_huge_pages_node[nid]++; |
243 | surplus_huge_pages_node[page_to_nid(page)]++; | 276 | surplus_huge_pages_node[nid]++; |
244 | spin_unlock(&hugetlb_lock); | 277 | } else { |
278 | nr_huge_pages--; | ||
279 | surplus_huge_pages--; | ||
245 | } | 280 | } |
281 | spin_unlock(&hugetlb_lock); | ||
246 | 282 | ||
247 | return page; | 283 | return page; |
248 | } | 284 | } |
@@ -382,9 +418,14 @@ static struct page *alloc_huge_page_private(struct vm_area_struct *vma, | |||
382 | if (free_huge_pages > resv_huge_pages) | 418 | if (free_huge_pages > resv_huge_pages) |
383 | page = dequeue_huge_page(vma, addr); | 419 | page = dequeue_huge_page(vma, addr); |
384 | spin_unlock(&hugetlb_lock); | 420 | spin_unlock(&hugetlb_lock); |
385 | if (!page) | 421 | if (!page) { |
386 | page = alloc_buddy_huge_page(vma, addr); | 422 | page = alloc_buddy_huge_page(vma, addr); |
387 | return page ? page : ERR_PTR(-VM_FAULT_OOM); | 423 | if (!page) { |
424 | hugetlb_put_quota(vma->vm_file->f_mapping, 1); | ||
425 | return ERR_PTR(-VM_FAULT_OOM); | ||
426 | } | ||
427 | } | ||
428 | return page; | ||
388 | } | 429 | } |
389 | 430 | ||
390 | static struct page *alloc_huge_page(struct vm_area_struct *vma, | 431 | static struct page *alloc_huge_page(struct vm_area_struct *vma, |
@@ -481,6 +522,12 @@ static unsigned long set_max_huge_pages(unsigned long count) | |||
481 | * Increase the pool size | 522 | * Increase the pool size |
482 | * First take pages out of surplus state. Then make up the | 523 | * First take pages out of surplus state. Then make up the |
483 | * remaining difference by allocating fresh huge pages. | 524 | * remaining difference by allocating fresh huge pages. |
525 | * | ||
526 | * We might race with alloc_buddy_huge_page() here and be unable | ||
527 | * to convert a surplus huge page to a normal huge page. That is | ||
528 | * not critical, though, it just means the overall size of the | ||
529 | * pool might be one hugepage larger than it needs to be, but | ||
530 | * within all the constraints specified by the sysctls. | ||
484 | */ | 531 | */ |
485 | spin_lock(&hugetlb_lock); | 532 | spin_lock(&hugetlb_lock); |
486 | while (surplus_huge_pages && count > persistent_huge_pages) { | 533 | while (surplus_huge_pages && count > persistent_huge_pages) { |
@@ -509,6 +556,14 @@ static unsigned long set_max_huge_pages(unsigned long count) | |||
509 | * to keep enough around to satisfy reservations). Then place | 556 | * to keep enough around to satisfy reservations). Then place |
510 | * pages into surplus state as needed so the pool will shrink | 557 | * pages into surplus state as needed so the pool will shrink |
511 | * to the desired size as pages become free. | 558 | * to the desired size as pages become free. |
559 | * | ||
560 | * By placing pages into the surplus state independent of the | ||
561 | * overcommit value, we are allowing the surplus pool size to | ||
562 | * exceed overcommit. There are few sane options here. Since | ||
563 | * alloc_buddy_huge_page() is checking the global counter, | ||
564 | * though, we'll note that we're not allowed to exceed surplus | ||
565 | * and won't grow the pool anywhere else. Not until one of the | ||
566 | * sysctls are changed, or the surplus pages go out of use. | ||
512 | */ | 567 | */ |
513 | min_count = resv_huge_pages + nr_huge_pages - free_huge_pages; | 568 | min_count = resv_huge_pages + nr_huge_pages - free_huge_pages; |
514 | min_count = max(count, min_count); | 569 | min_count = max(count, min_count); |
@@ -644,6 +699,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
644 | dst_pte = huge_pte_alloc(dst, addr); | 699 | dst_pte = huge_pte_alloc(dst, addr); |
645 | if (!dst_pte) | 700 | if (!dst_pte) |
646 | goto nomem; | 701 | goto nomem; |
702 | |||
703 | /* If the pagetables are shared don't copy or take references */ | ||
704 | if (dst_pte == src_pte) | ||
705 | continue; | ||
706 | |||
647 | spin_lock(&dst->page_table_lock); | 707 | spin_lock(&dst->page_table_lock); |
648 | spin_lock(&src->page_table_lock); | 708 | spin_lock(&src->page_table_lock); |
649 | if (!pte_none(*src_pte)) { | 709 | if (!pte_none(*src_pte)) { |
@@ -907,7 +967,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
907 | */ | 967 | */ |
908 | pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); | 968 | pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); |
909 | 969 | ||
910 | if (!pte || pte_none(*pte)) { | 970 | if (!pte || pte_none(*pte) || (write && !pte_write(*pte))) { |
911 | int ret; | 971 | int ret; |
912 | 972 | ||
913 | spin_unlock(&mm->page_table_lock); | 973 | spin_unlock(&mm->page_table_lock); |
@@ -1156,8 +1216,10 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to) | |||
1156 | if (hugetlb_get_quota(inode->i_mapping, chg)) | 1216 | if (hugetlb_get_quota(inode->i_mapping, chg)) |
1157 | return -ENOSPC; | 1217 | return -ENOSPC; |
1158 | ret = hugetlb_acct_memory(chg); | 1218 | ret = hugetlb_acct_memory(chg); |
1159 | if (ret < 0) | 1219 | if (ret < 0) { |
1220 | hugetlb_put_quota(inode->i_mapping, chg); | ||
1160 | return ret; | 1221 | return ret; |
1222 | } | ||
1161 | region_add(&inode->i_mapping->private_list, from, to); | 1223 | region_add(&inode->i_mapping->private_list, from, to); |
1162 | return 0; | 1224 | return 0; |
1163 | } | 1225 | } |
diff --git a/mm/memory.c b/mm/memory.c index 4bf0b6d0eb2a..d902d0e25edc 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -392,6 +392,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ | |||
392 | return NULL; | 392 | return NULL; |
393 | } | 393 | } |
394 | 394 | ||
395 | #ifdef CONFIG_DEBUG_VM | ||
395 | /* | 396 | /* |
396 | * Add some anal sanity checks for now. Eventually, | 397 | * Add some anal sanity checks for now. Eventually, |
397 | * we should just do "return pfn_to_page(pfn)", but | 398 | * we should just do "return pfn_to_page(pfn)", but |
@@ -402,6 +403,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_ | |||
402 | print_bad_pte(vma, pte, addr); | 403 | print_bad_pte(vma, pte, addr); |
403 | return NULL; | 404 | return NULL; |
404 | } | 405 | } |
406 | #endif | ||
405 | 407 | ||
406 | /* | 408 | /* |
407 | * NOTE! We still have PageReserved() pages in the page | 409 | * NOTE! We still have PageReserved() pages in the page |
@@ -511,8 +513,7 @@ again: | |||
511 | if (progress >= 32) { | 513 | if (progress >= 32) { |
512 | progress = 0; | 514 | progress = 0; |
513 | if (need_resched() || | 515 | if (need_resched() || |
514 | need_lockbreak(src_ptl) || | 516 | spin_needbreak(src_ptl) || spin_needbreak(dst_ptl)) |
515 | need_lockbreak(dst_ptl)) | ||
516 | break; | 517 | break; |
517 | } | 518 | } |
518 | if (pte_none(*src_pte)) { | 519 | if (pte_none(*src_pte)) { |
@@ -851,7 +852,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
851 | tlb_finish_mmu(*tlbp, tlb_start, start); | 852 | tlb_finish_mmu(*tlbp, tlb_start, start); |
852 | 853 | ||
853 | if (need_resched() || | 854 | if (need_resched() || |
854 | (i_mmap_lock && need_lockbreak(i_mmap_lock))) { | 855 | (i_mmap_lock && spin_needbreak(i_mmap_lock))) { |
855 | if (i_mmap_lock) { | 856 | if (i_mmap_lock) { |
856 | *tlbp = NULL; | 857 | *tlbp = NULL; |
857 | goto out; | 858 | goto out; |
@@ -1668,6 +1669,9 @@ gotten: | |||
1668 | unlock: | 1669 | unlock: |
1669 | pte_unmap_unlock(page_table, ptl); | 1670 | pte_unmap_unlock(page_table, ptl); |
1670 | if (dirty_page) { | 1671 | if (dirty_page) { |
1672 | if (vma->vm_file) | ||
1673 | file_update_time(vma->vm_file); | ||
1674 | |||
1671 | /* | 1675 | /* |
1672 | * Yes, Virginia, this is actually required to prevent a race | 1676 | * Yes, Virginia, this is actually required to prevent a race |
1673 | * with clear_page_dirty_for_io() from clearing the page dirty | 1677 | * with clear_page_dirty_for_io() from clearing the page dirty |
@@ -1763,8 +1767,7 @@ again: | |||
1763 | 1767 | ||
1764 | restart_addr = zap_page_range(vma, start_addr, | 1768 | restart_addr = zap_page_range(vma, start_addr, |
1765 | end_addr - start_addr, details); | 1769 | end_addr - start_addr, details); |
1766 | need_break = need_resched() || | 1770 | need_break = need_resched() || spin_needbreak(details->i_mmap_lock); |
1767 | need_lockbreak(details->i_mmap_lock); | ||
1768 | 1771 | ||
1769 | if (restart_addr >= end_addr) { | 1772 | if (restart_addr >= end_addr) { |
1770 | /* We have now completed this vma: mark it so */ | 1773 | /* We have now completed this vma: mark it so */ |
@@ -2341,6 +2344,9 @@ out_unlocked: | |||
2341 | if (anon) | 2344 | if (anon) |
2342 | page_cache_release(vmf.page); | 2345 | page_cache_release(vmf.page); |
2343 | else if (dirty_page) { | 2346 | else if (dirty_page) { |
2347 | if (vma->vm_file) | ||
2348 | file_update_time(vma->vm_file); | ||
2349 | |||
2344 | set_page_dirty_balance(dirty_page, page_mkwrite); | 2350 | set_page_dirty_balance(dirty_page, page_mkwrite); |
2345 | put_page(dirty_page); | 2351 | put_page(dirty_page); |
2346 | } | 2352 | } |
@@ -2748,3 +2754,34 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
2748 | 2754 | ||
2749 | return buf - old_buf; | 2755 | return buf - old_buf; |
2750 | } | 2756 | } |
2757 | |||
2758 | /* | ||
2759 | * Print the name of a VMA. | ||
2760 | */ | ||
2761 | void print_vma_addr(char *prefix, unsigned long ip) | ||
2762 | { | ||
2763 | struct mm_struct *mm = current->mm; | ||
2764 | struct vm_area_struct *vma; | ||
2765 | |||
2766 | down_read(&mm->mmap_sem); | ||
2767 | vma = find_vma(mm, ip); | ||
2768 | if (vma && vma->vm_file) { | ||
2769 | struct file *f = vma->vm_file; | ||
2770 | char *buf = (char *)__get_free_page(GFP_KERNEL); | ||
2771 | if (buf) { | ||
2772 | char *p, *s; | ||
2773 | |||
2774 | p = d_path(f->f_dentry, f->f_vfsmnt, buf, PAGE_SIZE); | ||
2775 | if (IS_ERR(p)) | ||
2776 | p = "?"; | ||
2777 | s = strrchr(p, '/'); | ||
2778 | if (s) | ||
2779 | p = s+1; | ||
2780 | printk("%s%s[%lx+%lx]", prefix, p, | ||
2781 | vma->vm_start, | ||
2782 | vma->vm_end - vma->vm_start); | ||
2783 | free_page((unsigned long)buf); | ||
2784 | } | ||
2785 | } | ||
2786 | up_read(¤t->mm->mmap_sem); | ||
2787 | } | ||
@@ -251,7 +251,8 @@ asmlinkage unsigned long sys_brk(unsigned long brk) | |||
251 | * not page aligned -Ram Gupta | 251 | * not page aligned -Ram Gupta |
252 | */ | 252 | */ |
253 | rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; | 253 | rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; |
254 | if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) | 254 | if (rlim < RLIM_INFINITY && (brk - mm->start_brk) + |
255 | (mm->end_data - mm->start_data) > rlim) | ||
255 | goto out; | 256 | goto out; |
256 | 257 | ||
257 | newbrk = PAGE_ALIGN(brk); | 258 | newbrk = PAGE_ALIGN(brk); |
@@ -1620,7 +1621,7 @@ static inline int expand_downwards(struct vm_area_struct *vma, | |||
1620 | return -ENOMEM; | 1621 | return -ENOMEM; |
1621 | 1622 | ||
1622 | address &= PAGE_MASK; | 1623 | address &= PAGE_MASK; |
1623 | error = security_file_mmap(0, 0, 0, 0, address, 1); | 1624 | error = security_file_mmap(NULL, 0, 0, 0, address, 1); |
1624 | if (error) | 1625 | if (error) |
1625 | return error; | 1626 | return error; |
1626 | 1627 | ||
@@ -1941,7 +1942,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) | |||
1941 | if (is_hugepage_only_range(mm, addr, len)) | 1942 | if (is_hugepage_only_range(mm, addr, len)) |
1942 | return -EINVAL; | 1943 | return -EINVAL; |
1943 | 1944 | ||
1944 | error = security_file_mmap(0, 0, 0, 0, addr, 1); | 1945 | error = security_file_mmap(NULL, 0, 0, 0, addr, 1); |
1945 | if (error) | 1946 | if (error) |
1946 | return error; | 1947 | return error; |
1947 | 1948 | ||
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 91a081a82f55..96473b482099 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -286,7 +286,7 @@ static void __oom_kill_task(struct task_struct *p, int verbose) | |||
286 | * all the memory it needs. That way it should be able to | 286 | * all the memory it needs. That way it should be able to |
287 | * exit() and clear out its resources quickly... | 287 | * exit() and clear out its resources quickly... |
288 | */ | 288 | */ |
289 | p->time_slice = HZ; | 289 | p->rt.time_slice = HZ; |
290 | set_tsk_thread_flag(p, TIF_MEMDIE); | 290 | set_tsk_thread_flag(p, TIF_MEMDIE); |
291 | 291 | ||
292 | force_sig(SIGKILL, p); | 292 | force_sig(SIGKILL, p); |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d55cfcae2ef1..3d3848fa6324 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -558,7 +558,6 @@ static void background_writeout(unsigned long _min_pages) | |||
558 | global_page_state(NR_UNSTABLE_NFS) < background_thresh | 558 | global_page_state(NR_UNSTABLE_NFS) < background_thresh |
559 | && min_pages <= 0) | 559 | && min_pages <= 0) |
560 | break; | 560 | break; |
561 | wbc.more_io = 0; | ||
562 | wbc.encountered_congestion = 0; | 561 | wbc.encountered_congestion = 0; |
563 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 562 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
564 | wbc.pages_skipped = 0; | 563 | wbc.pages_skipped = 0; |
@@ -566,9 +565,8 @@ static void background_writeout(unsigned long _min_pages) | |||
566 | min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; | 565 | min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; |
567 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { | 566 | if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) { |
568 | /* Wrote less than expected */ | 567 | /* Wrote less than expected */ |
569 | if (wbc.encountered_congestion || wbc.more_io) | 568 | congestion_wait(WRITE, HZ/10); |
570 | congestion_wait(WRITE, HZ/10); | 569 | if (!wbc.encountered_congestion) |
571 | else | ||
572 | break; | 570 | break; |
573 | } | 571 | } |
574 | } | 572 | } |
@@ -633,12 +631,11 @@ static void wb_kupdate(unsigned long arg) | |||
633 | global_page_state(NR_UNSTABLE_NFS) + | 631 | global_page_state(NR_UNSTABLE_NFS) + |
634 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 632 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); |
635 | while (nr_to_write > 0) { | 633 | while (nr_to_write > 0) { |
636 | wbc.more_io = 0; | ||
637 | wbc.encountered_congestion = 0; | 634 | wbc.encountered_congestion = 0; |
638 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; | 635 | wbc.nr_to_write = MAX_WRITEBACK_PAGES; |
639 | writeback_inodes(&wbc); | 636 | writeback_inodes(&wbc); |
640 | if (wbc.nr_to_write > 0) { | 637 | if (wbc.nr_to_write > 0) { |
641 | if (wbc.encountered_congestion || wbc.more_io) | 638 | if (wbc.encountered_congestion) |
642 | congestion_wait(WRITE, HZ/10); | 639 | congestion_wait(WRITE, HZ/10); |
643 | else | 640 | else |
644 | break; /* All the old data is written */ | 641 | break; /* All the old data is written */ |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b5a58d476c1a..b2838c24e582 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -847,8 +847,19 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
847 | struct page *page = __rmqueue(zone, order, migratetype); | 847 | struct page *page = __rmqueue(zone, order, migratetype); |
848 | if (unlikely(page == NULL)) | 848 | if (unlikely(page == NULL)) |
849 | break; | 849 | break; |
850 | |||
851 | /* | ||
852 | * Split buddy pages returned by expand() are received here | ||
853 | * in physical page order. The page is added to the callers and | ||
854 | * list and the list head then moves forward. From the callers | ||
855 | * perspective, the linked list is ordered by page number in | ||
856 | * some conditions. This is useful for IO devices that can | ||
857 | * merge IO requests if the physical pages are ordered | ||
858 | * properly. | ||
859 | */ | ||
850 | list_add(&page->lru, list); | 860 | list_add(&page->lru, list); |
851 | set_page_private(page, migratetype); | 861 | set_page_private(page, migratetype); |
862 | list = &page->lru; | ||
852 | } | 863 | } |
853 | spin_unlock(&zone->lock); | 864 | spin_unlock(&zone->lock); |
854 | return i; | 865 | return i; |
@@ -2555,7 +2566,7 @@ static void __meminit zone_init_free_lists(struct pglist_data *pgdat, | |||
2555 | memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) | 2566 | memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) |
2556 | #endif | 2567 | #endif |
2557 | 2568 | ||
2558 | static int __devinit zone_batchsize(struct zone *zone) | 2569 | static int zone_batchsize(struct zone *zone) |
2559 | { | 2570 | { |
2560 | int batch; | 2571 | int batch; |
2561 | 2572 | ||
@@ -3427,7 +3438,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) | |||
3427 | mem_map = NODE_DATA(0)->node_mem_map; | 3438 | mem_map = NODE_DATA(0)->node_mem_map; |
3428 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP | 3439 | #ifdef CONFIG_ARCH_POPULATES_NODE_MAP |
3429 | if (page_to_pfn(mem_map) != pgdat->node_start_pfn) | 3440 | if (page_to_pfn(mem_map) != pgdat->node_start_pfn) |
3430 | mem_map -= pgdat->node_start_pfn; | 3441 | mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET); |
3431 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ | 3442 | #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */ |
3432 | } | 3443 | } |
3433 | #endif | 3444 | #endif |
diff --git a/mm/quicklist.c b/mm/quicklist.c index ae8189c2799e..3f703f7cb398 100644 --- a/mm/quicklist.c +++ b/mm/quicklist.c | |||
@@ -26,9 +26,17 @@ DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK]; | |||
26 | static unsigned long max_pages(unsigned long min_pages) | 26 | static unsigned long max_pages(unsigned long min_pages) |
27 | { | 27 | { |
28 | unsigned long node_free_pages, max; | 28 | unsigned long node_free_pages, max; |
29 | struct zone *zones = NODE_DATA(numa_node_id())->node_zones; | ||
30 | |||
31 | node_free_pages = | ||
32 | #ifdef CONFIG_ZONE_DMA | ||
33 | zone_page_state(&zones[ZONE_DMA], NR_FREE_PAGES) + | ||
34 | #endif | ||
35 | #ifdef CONFIG_ZONE_DMA32 | ||
36 | zone_page_state(&zones[ZONE_DMA32], NR_FREE_PAGES) + | ||
37 | #endif | ||
38 | zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES); | ||
29 | 39 | ||
30 | node_free_pages = node_page_state(numa_node_id(), | ||
31 | NR_FREE_PAGES); | ||
32 | max = node_free_pages / FRACTION_OF_NODE_MEM; | 40 | max = node_free_pages / FRACTION_OF_NODE_MEM; |
33 | return max(max, min_pages); | 41 | return max(max, min_pages); |
34 | } | 42 | } |
@@ -304,11 +304,11 @@ struct kmem_list3 { | |||
304 | /* | 304 | /* |
305 | * Need this for bootstrapping a per node allocator. | 305 | * Need this for bootstrapping a per node allocator. |
306 | */ | 306 | */ |
307 | #define NUM_INIT_LISTS (2 * MAX_NUMNODES + 1) | 307 | #define NUM_INIT_LISTS (3 * MAX_NUMNODES) |
308 | struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; | 308 | struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; |
309 | #define CACHE_CACHE 0 | 309 | #define CACHE_CACHE 0 |
310 | #define SIZE_AC 1 | 310 | #define SIZE_AC MAX_NUMNODES |
311 | #define SIZE_L3 (1 + MAX_NUMNODES) | 311 | #define SIZE_L3 (2 * MAX_NUMNODES) |
312 | 312 | ||
313 | static int drain_freelist(struct kmem_cache *cache, | 313 | static int drain_freelist(struct kmem_cache *cache, |
314 | struct kmem_list3 *l3, int tofree); | 314 | struct kmem_list3 *l3, int tofree); |
@@ -730,8 +730,7 @@ static inline void init_lock_keys(void) | |||
730 | #endif | 730 | #endif |
731 | 731 | ||
732 | /* | 732 | /* |
733 | * 1. Guard access to the cache-chain. | 733 | * Guard access to the cache-chain. |
734 | * 2. Protect sanity of cpu_online_map against cpu hotplug events | ||
735 | */ | 734 | */ |
736 | static DEFINE_MUTEX(cache_chain_mutex); | 735 | static DEFINE_MUTEX(cache_chain_mutex); |
737 | static struct list_head cache_chain; | 736 | static struct list_head cache_chain; |
@@ -1331,12 +1330,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1331 | int err = 0; | 1330 | int err = 0; |
1332 | 1331 | ||
1333 | switch (action) { | 1332 | switch (action) { |
1334 | case CPU_LOCK_ACQUIRE: | ||
1335 | mutex_lock(&cache_chain_mutex); | ||
1336 | break; | ||
1337 | case CPU_UP_PREPARE: | 1333 | case CPU_UP_PREPARE: |
1338 | case CPU_UP_PREPARE_FROZEN: | 1334 | case CPU_UP_PREPARE_FROZEN: |
1335 | mutex_lock(&cache_chain_mutex); | ||
1339 | err = cpuup_prepare(cpu); | 1336 | err = cpuup_prepare(cpu); |
1337 | mutex_unlock(&cache_chain_mutex); | ||
1340 | break; | 1338 | break; |
1341 | case CPU_ONLINE: | 1339 | case CPU_ONLINE: |
1342 | case CPU_ONLINE_FROZEN: | 1340 | case CPU_ONLINE_FROZEN: |
@@ -1373,9 +1371,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1373 | #endif | 1371 | #endif |
1374 | case CPU_UP_CANCELED: | 1372 | case CPU_UP_CANCELED: |
1375 | case CPU_UP_CANCELED_FROZEN: | 1373 | case CPU_UP_CANCELED_FROZEN: |
1374 | mutex_lock(&cache_chain_mutex); | ||
1376 | cpuup_canceled(cpu); | 1375 | cpuup_canceled(cpu); |
1377 | break; | ||
1378 | case CPU_LOCK_RELEASE: | ||
1379 | mutex_unlock(&cache_chain_mutex); | 1376 | mutex_unlock(&cache_chain_mutex); |
1380 | break; | 1377 | break; |
1381 | } | 1378 | } |
@@ -1410,6 +1407,22 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, | |||
1410 | } | 1407 | } |
1411 | 1408 | ||
1412 | /* | 1409 | /* |
1410 | * For setting up all the kmem_list3s for cache whose buffer_size is same as | ||
1411 | * size of kmem_list3. | ||
1412 | */ | ||
1413 | static void __init set_up_list3s(struct kmem_cache *cachep, int index) | ||
1414 | { | ||
1415 | int node; | ||
1416 | |||
1417 | for_each_online_node(node) { | ||
1418 | cachep->nodelists[node] = &initkmem_list3[index + node]; | ||
1419 | cachep->nodelists[node]->next_reap = jiffies + | ||
1420 | REAPTIMEOUT_LIST3 + | ||
1421 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
1422 | } | ||
1423 | } | ||
1424 | |||
1425 | /* | ||
1413 | * Initialisation. Called after the page allocator have been initialised and | 1426 | * Initialisation. Called after the page allocator have been initialised and |
1414 | * before smp_init(). | 1427 | * before smp_init(). |
1415 | */ | 1428 | */ |
@@ -1432,6 +1445,7 @@ void __init kmem_cache_init(void) | |||
1432 | if (i < MAX_NUMNODES) | 1445 | if (i < MAX_NUMNODES) |
1433 | cache_cache.nodelists[i] = NULL; | 1446 | cache_cache.nodelists[i] = NULL; |
1434 | } | 1447 | } |
1448 | set_up_list3s(&cache_cache, CACHE_CACHE); | ||
1435 | 1449 | ||
1436 | /* | 1450 | /* |
1437 | * Fragmentation resistance on low memory - only use bigger | 1451 | * Fragmentation resistance on low memory - only use bigger |
@@ -1587,10 +1601,9 @@ void __init kmem_cache_init(void) | |||
1587 | { | 1601 | { |
1588 | int nid; | 1602 | int nid; |
1589 | 1603 | ||
1590 | /* Replace the static kmem_list3 structures for the boot cpu */ | 1604 | for_each_online_node(nid) { |
1591 | init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], node); | 1605 | init_list(&cache_cache, &initkmem_list3[CACHE_CACHE], nid); |
1592 | 1606 | ||
1593 | for_each_node_state(nid, N_NORMAL_MEMORY) { | ||
1594 | init_list(malloc_sizes[INDEX_AC].cs_cachep, | 1607 | init_list(malloc_sizes[INDEX_AC].cs_cachep, |
1595 | &initkmem_list3[SIZE_AC + nid], nid); | 1608 | &initkmem_list3[SIZE_AC + nid], nid); |
1596 | 1609 | ||
@@ -1960,22 +1973,6 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | |||
1960 | } | 1973 | } |
1961 | } | 1974 | } |
1962 | 1975 | ||
1963 | /* | ||
1964 | * For setting up all the kmem_list3s for cache whose buffer_size is same as | ||
1965 | * size of kmem_list3. | ||
1966 | */ | ||
1967 | static void __init set_up_list3s(struct kmem_cache *cachep, int index) | ||
1968 | { | ||
1969 | int node; | ||
1970 | |||
1971 | for_each_node_state(node, N_NORMAL_MEMORY) { | ||
1972 | cachep->nodelists[node] = &initkmem_list3[index + node]; | ||
1973 | cachep->nodelists[node]->next_reap = jiffies + | ||
1974 | REAPTIMEOUT_LIST3 + | ||
1975 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
1976 | } | ||
1977 | } | ||
1978 | |||
1979 | static void __kmem_cache_destroy(struct kmem_cache *cachep) | 1976 | static void __kmem_cache_destroy(struct kmem_cache *cachep) |
1980 | { | 1977 | { |
1981 | int i; | 1978 | int i; |
@@ -2099,7 +2096,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) | |||
2099 | g_cpucache_up = PARTIAL_L3; | 2096 | g_cpucache_up = PARTIAL_L3; |
2100 | } else { | 2097 | } else { |
2101 | int node; | 2098 | int node; |
2102 | for_each_node_state(node, N_NORMAL_MEMORY) { | 2099 | for_each_online_node(node) { |
2103 | cachep->nodelists[node] = | 2100 | cachep->nodelists[node] = |
2104 | kmalloc_node(sizeof(struct kmem_list3), | 2101 | kmalloc_node(sizeof(struct kmem_list3), |
2105 | GFP_KERNEL, node); | 2102 | GFP_KERNEL, node); |
@@ -2170,6 +2167,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2170 | * We use cache_chain_mutex to ensure a consistent view of | 2167 | * We use cache_chain_mutex to ensure a consistent view of |
2171 | * cpu_online_map as well. Please see cpuup_callback | 2168 | * cpu_online_map as well. Please see cpuup_callback |
2172 | */ | 2169 | */ |
2170 | get_online_cpus(); | ||
2173 | mutex_lock(&cache_chain_mutex); | 2171 | mutex_lock(&cache_chain_mutex); |
2174 | 2172 | ||
2175 | list_for_each_entry(pc, &cache_chain, next) { | 2173 | list_for_each_entry(pc, &cache_chain, next) { |
@@ -2396,6 +2394,7 @@ oops: | |||
2396 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 2394 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
2397 | name); | 2395 | name); |
2398 | mutex_unlock(&cache_chain_mutex); | 2396 | mutex_unlock(&cache_chain_mutex); |
2397 | put_online_cpus(); | ||
2399 | return cachep; | 2398 | return cachep; |
2400 | } | 2399 | } |
2401 | EXPORT_SYMBOL(kmem_cache_create); | 2400 | EXPORT_SYMBOL(kmem_cache_create); |
@@ -2547,9 +2546,11 @@ int kmem_cache_shrink(struct kmem_cache *cachep) | |||
2547 | int ret; | 2546 | int ret; |
2548 | BUG_ON(!cachep || in_interrupt()); | 2547 | BUG_ON(!cachep || in_interrupt()); |
2549 | 2548 | ||
2549 | get_online_cpus(); | ||
2550 | mutex_lock(&cache_chain_mutex); | 2550 | mutex_lock(&cache_chain_mutex); |
2551 | ret = __cache_shrink(cachep); | 2551 | ret = __cache_shrink(cachep); |
2552 | mutex_unlock(&cache_chain_mutex); | 2552 | mutex_unlock(&cache_chain_mutex); |
2553 | put_online_cpus(); | ||
2553 | return ret; | 2554 | return ret; |
2554 | } | 2555 | } |
2555 | EXPORT_SYMBOL(kmem_cache_shrink); | 2556 | EXPORT_SYMBOL(kmem_cache_shrink); |
@@ -2575,6 +2576,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) | |||
2575 | BUG_ON(!cachep || in_interrupt()); | 2576 | BUG_ON(!cachep || in_interrupt()); |
2576 | 2577 | ||
2577 | /* Find the cache in the chain of caches. */ | 2578 | /* Find the cache in the chain of caches. */ |
2579 | get_online_cpus(); | ||
2578 | mutex_lock(&cache_chain_mutex); | 2580 | mutex_lock(&cache_chain_mutex); |
2579 | /* | 2581 | /* |
2580 | * the chain is never empty, cache_cache is never destroyed | 2582 | * the chain is never empty, cache_cache is never destroyed |
@@ -2584,6 +2586,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) | |||
2584 | slab_error(cachep, "Can't free all objects"); | 2586 | slab_error(cachep, "Can't free all objects"); |
2585 | list_add(&cachep->next, &cache_chain); | 2587 | list_add(&cachep->next, &cache_chain); |
2586 | mutex_unlock(&cache_chain_mutex); | 2588 | mutex_unlock(&cache_chain_mutex); |
2589 | put_online_cpus(); | ||
2587 | return; | 2590 | return; |
2588 | } | 2591 | } |
2589 | 2592 | ||
@@ -2592,6 +2595,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) | |||
2592 | 2595 | ||
2593 | __kmem_cache_destroy(cachep); | 2596 | __kmem_cache_destroy(cachep); |
2594 | mutex_unlock(&cache_chain_mutex); | 2597 | mutex_unlock(&cache_chain_mutex); |
2598 | put_online_cpus(); | ||
2595 | } | 2599 | } |
2596 | EXPORT_SYMBOL(kmem_cache_destroy); | 2600 | EXPORT_SYMBOL(kmem_cache_destroy); |
2597 | 2601 | ||
@@ -3815,7 +3819,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
3815 | struct array_cache *new_shared; | 3819 | struct array_cache *new_shared; |
3816 | struct array_cache **new_alien = NULL; | 3820 | struct array_cache **new_alien = NULL; |
3817 | 3821 | ||
3818 | for_each_node_state(node, N_NORMAL_MEMORY) { | 3822 | for_each_online_node(node) { |
3819 | 3823 | ||
3820 | if (use_alien_caches) { | 3824 | if (use_alien_caches) { |
3821 | new_alien = alloc_alien_cache(node, cachep->limit); | 3825 | new_alien = alloc_alien_cache(node, cachep->limit); |
@@ -4105,7 +4109,7 @@ out: | |||
4105 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); | 4109 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); |
4106 | } | 4110 | } |
4107 | 4111 | ||
4108 | #ifdef CONFIG_PROC_FS | 4112 | #ifdef CONFIG_SLABINFO |
4109 | 4113 | ||
4110 | static void print_slabinfo_header(struct seq_file *m) | 4114 | static void print_slabinfo_header(struct seq_file *m) |
4111 | { | 4115 | { |
@@ -330,7 +330,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) | |||
330 | 330 | ||
331 | /* Not enough space: must allocate a new page */ | 331 | /* Not enough space: must allocate a new page */ |
332 | if (!b) { | 332 | if (!b) { |
333 | b = slob_new_page(gfp, 0, node); | 333 | b = slob_new_page(gfp & ~__GFP_ZERO, 0, node); |
334 | if (!b) | 334 | if (!b) |
335 | return 0; | 335 | return 0; |
336 | sp = (struct slob_page *)virt_to_page(b); | 336 | sp = (struct slob_page *)virt_to_page(b); |
@@ -172,7 +172,7 @@ static inline void ClearSlabDebug(struct page *page) | |||
172 | * Mininum number of partial slabs. These will be left on the partial | 172 | * Mininum number of partial slabs. These will be left on the partial |
173 | * lists even if they are empty. kmem_cache_shrink may reclaim them. | 173 | * lists even if they are empty. kmem_cache_shrink may reclaim them. |
174 | */ | 174 | */ |
175 | #define MIN_PARTIAL 2 | 175 | #define MIN_PARTIAL 5 |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * Maximum number of desirable partial slabs. | 178 | * Maximum number of desirable partial slabs. |
@@ -1613,7 +1613,7 @@ checks_ok: | |||
1613 | * then add it. | 1613 | * then add it. |
1614 | */ | 1614 | */ |
1615 | if (unlikely(!prior)) | 1615 | if (unlikely(!prior)) |
1616 | add_partial(get_node(s, page_to_nid(page)), page); | 1616 | add_partial_tail(get_node(s, page_to_nid(page)), page); |
1617 | 1617 | ||
1618 | out_unlock: | 1618 | out_unlock: |
1619 | slab_unlock(page); | 1619 | slab_unlock(page); |
@@ -3076,6 +3076,19 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
3076 | return slab_alloc(s, gfpflags, node, caller); | 3076 | return slab_alloc(s, gfpflags, node, caller); |
3077 | } | 3077 | } |
3078 | 3078 | ||
3079 | static unsigned long count_partial(struct kmem_cache_node *n) | ||
3080 | { | ||
3081 | unsigned long flags; | ||
3082 | unsigned long x = 0; | ||
3083 | struct page *page; | ||
3084 | |||
3085 | spin_lock_irqsave(&n->list_lock, flags); | ||
3086 | list_for_each_entry(page, &n->partial, lru) | ||
3087 | x += page->inuse; | ||
3088 | spin_unlock_irqrestore(&n->list_lock, flags); | ||
3089 | return x; | ||
3090 | } | ||
3091 | |||
3079 | #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) | 3092 | #if defined(CONFIG_SYSFS) && defined(CONFIG_SLUB_DEBUG) |
3080 | static int validate_slab(struct kmem_cache *s, struct page *page, | 3093 | static int validate_slab(struct kmem_cache *s, struct page *page, |
3081 | unsigned long *map) | 3094 | unsigned long *map) |
@@ -3458,19 +3471,6 @@ static int list_locations(struct kmem_cache *s, char *buf, | |||
3458 | return n; | 3471 | return n; |
3459 | } | 3472 | } |
3460 | 3473 | ||
3461 | static unsigned long count_partial(struct kmem_cache_node *n) | ||
3462 | { | ||
3463 | unsigned long flags; | ||
3464 | unsigned long x = 0; | ||
3465 | struct page *page; | ||
3466 | |||
3467 | spin_lock_irqsave(&n->list_lock, flags); | ||
3468 | list_for_each_entry(page, &n->partial, lru) | ||
3469 | x += page->inuse; | ||
3470 | spin_unlock_irqrestore(&n->list_lock, flags); | ||
3471 | return x; | ||
3472 | } | ||
3473 | |||
3474 | enum slab_stat_type { | 3474 | enum slab_stat_type { |
3475 | SL_FULL, | 3475 | SL_FULL, |
3476 | SL_PARTIAL, | 3476 | SL_PARTIAL, |
@@ -3962,7 +3962,7 @@ static struct kset_uevent_ops slab_uevent_ops = { | |||
3962 | .filter = uevent_filter, | 3962 | .filter = uevent_filter, |
3963 | }; | 3963 | }; |
3964 | 3964 | ||
3965 | static decl_subsys(slab, &slab_ktype, &slab_uevent_ops); | 3965 | static struct kset *slab_kset; |
3966 | 3966 | ||
3967 | #define ID_STR_LENGTH 64 | 3967 | #define ID_STR_LENGTH 64 |
3968 | 3968 | ||
@@ -4015,7 +4015,7 @@ static int sysfs_slab_add(struct kmem_cache *s) | |||
4015 | * This is typically the case for debug situations. In that | 4015 | * This is typically the case for debug situations. In that |
4016 | * case we can catch duplicate names easily. | 4016 | * case we can catch duplicate names easily. |
4017 | */ | 4017 | */ |
4018 | sysfs_remove_link(&slab_subsys.kobj, s->name); | 4018 | sysfs_remove_link(&slab_kset->kobj, s->name); |
4019 | name = s->name; | 4019 | name = s->name; |
4020 | } else { | 4020 | } else { |
4021 | /* | 4021 | /* |
@@ -4025,12 +4025,12 @@ static int sysfs_slab_add(struct kmem_cache *s) | |||
4025 | name = create_unique_id(s); | 4025 | name = create_unique_id(s); |
4026 | } | 4026 | } |
4027 | 4027 | ||
4028 | kobj_set_kset_s(s, slab_subsys); | 4028 | s->kobj.kset = slab_kset; |
4029 | kobject_set_name(&s->kobj, name); | 4029 | err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, name); |
4030 | kobject_init(&s->kobj); | 4030 | if (err) { |
4031 | err = kobject_add(&s->kobj); | 4031 | kobject_put(&s->kobj); |
4032 | if (err) | ||
4033 | return err; | 4032 | return err; |
4033 | } | ||
4034 | 4034 | ||
4035 | err = sysfs_create_group(&s->kobj, &slab_attr_group); | 4035 | err = sysfs_create_group(&s->kobj, &slab_attr_group); |
4036 | if (err) | 4036 | if (err) |
@@ -4070,9 +4070,8 @@ static int sysfs_slab_alias(struct kmem_cache *s, const char *name) | |||
4070 | /* | 4070 | /* |
4071 | * If we have a leftover link then remove it. | 4071 | * If we have a leftover link then remove it. |
4072 | */ | 4072 | */ |
4073 | sysfs_remove_link(&slab_subsys.kobj, name); | 4073 | sysfs_remove_link(&slab_kset->kobj, name); |
4074 | return sysfs_create_link(&slab_subsys.kobj, | 4074 | return sysfs_create_link(&slab_kset->kobj, &s->kobj, name); |
4075 | &s->kobj, name); | ||
4076 | } | 4075 | } |
4077 | 4076 | ||
4078 | al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL); | 4077 | al = kmalloc(sizeof(struct saved_alias), GFP_KERNEL); |
@@ -4091,8 +4090,8 @@ static int __init slab_sysfs_init(void) | |||
4091 | struct kmem_cache *s; | 4090 | struct kmem_cache *s; |
4092 | int err; | 4091 | int err; |
4093 | 4092 | ||
4094 | err = subsystem_register(&slab_subsys); | 4093 | slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); |
4095 | if (err) { | 4094 | if (!slab_kset) { |
4096 | printk(KERN_ERR "Cannot register slab subsystem.\n"); | 4095 | printk(KERN_ERR "Cannot register slab subsystem.\n"); |
4097 | return -ENOSYS; | 4096 | return -ENOSYS; |
4098 | } | 4097 | } |
@@ -4123,3 +4122,89 @@ static int __init slab_sysfs_init(void) | |||
4123 | 4122 | ||
4124 | __initcall(slab_sysfs_init); | 4123 | __initcall(slab_sysfs_init); |
4125 | #endif | 4124 | #endif |
4125 | |||
4126 | /* | ||
4127 | * The /proc/slabinfo ABI | ||
4128 | */ | ||
4129 | #ifdef CONFIG_SLABINFO | ||
4130 | |||
4131 | ssize_t slabinfo_write(struct file *file, const char __user * buffer, | ||
4132 | size_t count, loff_t *ppos) | ||
4133 | { | ||
4134 | return -EINVAL; | ||
4135 | } | ||
4136 | |||
4137 | |||
4138 | static void print_slabinfo_header(struct seq_file *m) | ||
4139 | { | ||
4140 | seq_puts(m, "slabinfo - version: 2.1\n"); | ||
4141 | seq_puts(m, "# name <active_objs> <num_objs> <objsize> " | ||
4142 | "<objperslab> <pagesperslab>"); | ||
4143 | seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>"); | ||
4144 | seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>"); | ||
4145 | seq_putc(m, '\n'); | ||
4146 | } | ||
4147 | |||
4148 | static void *s_start(struct seq_file *m, loff_t *pos) | ||
4149 | { | ||
4150 | loff_t n = *pos; | ||
4151 | |||
4152 | down_read(&slub_lock); | ||
4153 | if (!n) | ||
4154 | print_slabinfo_header(m); | ||
4155 | |||
4156 | return seq_list_start(&slab_caches, *pos); | ||
4157 | } | ||
4158 | |||
4159 | static void *s_next(struct seq_file *m, void *p, loff_t *pos) | ||
4160 | { | ||
4161 | return seq_list_next(p, &slab_caches, pos); | ||
4162 | } | ||
4163 | |||
4164 | static void s_stop(struct seq_file *m, void *p) | ||
4165 | { | ||
4166 | up_read(&slub_lock); | ||
4167 | } | ||
4168 | |||
4169 | static int s_show(struct seq_file *m, void *p) | ||
4170 | { | ||
4171 | unsigned long nr_partials = 0; | ||
4172 | unsigned long nr_slabs = 0; | ||
4173 | unsigned long nr_inuse = 0; | ||
4174 | unsigned long nr_objs; | ||
4175 | struct kmem_cache *s; | ||
4176 | int node; | ||
4177 | |||
4178 | s = list_entry(p, struct kmem_cache, list); | ||
4179 | |||
4180 | for_each_online_node(node) { | ||
4181 | struct kmem_cache_node *n = get_node(s, node); | ||
4182 | |||
4183 | if (!n) | ||
4184 | continue; | ||
4185 | |||
4186 | nr_partials += n->nr_partial; | ||
4187 | nr_slabs += atomic_long_read(&n->nr_slabs); | ||
4188 | nr_inuse += count_partial(n); | ||
4189 | } | ||
4190 | |||
4191 | nr_objs = nr_slabs * s->objects; | ||
4192 | nr_inuse += (nr_slabs - nr_partials) * s->objects; | ||
4193 | |||
4194 | seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, | ||
4195 | nr_objs, s->size, s->objects, (1 << s->order)); | ||
4196 | seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0); | ||
4197 | seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, | ||
4198 | 0UL); | ||
4199 | seq_putc(m, '\n'); | ||
4200 | return 0; | ||
4201 | } | ||
4202 | |||
4203 | const struct seq_operations slabinfo_op = { | ||
4204 | .start = s_start, | ||
4205 | .next = s_next, | ||
4206 | .stop = s_stop, | ||
4207 | .show = s_show, | ||
4208 | }; | ||
4209 | |||
4210 | #endif /* CONFIG_SLABINFO */ | ||
diff --git a/mm/sparse.c b/mm/sparse.c index e06f514fe04f..a2183cb5d524 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -83,6 +83,8 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) | |||
83 | return -EEXIST; | 83 | return -EEXIST; |
84 | 84 | ||
85 | section = sparse_index_alloc(nid); | 85 | section = sparse_index_alloc(nid); |
86 | if (!section) | ||
87 | return -ENOMEM; | ||
86 | /* | 88 | /* |
87 | * This lock keeps two different sections from | 89 | * This lock keeps two different sections from |
88 | * reallocating for the same index | 90 | * reallocating for the same index |
@@ -389,9 +391,17 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, | |||
389 | * no locking for this, because it does its own | 391 | * no locking for this, because it does its own |
390 | * plus, it does a kmalloc | 392 | * plus, it does a kmalloc |
391 | */ | 393 | */ |
392 | sparse_index_init(section_nr, pgdat->node_id); | 394 | ret = sparse_index_init(section_nr, pgdat->node_id); |
395 | if (ret < 0 && ret != -EEXIST) | ||
396 | return ret; | ||
393 | memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages); | 397 | memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, nr_pages); |
398 | if (!memmap) | ||
399 | return -ENOMEM; | ||
394 | usemap = __kmalloc_section_usemap(); | 400 | usemap = __kmalloc_section_usemap(); |
401 | if (!usemap) { | ||
402 | __kfree_section_memmap(memmap, nr_pages); | ||
403 | return -ENOMEM; | ||
404 | } | ||
395 | 405 | ||
396 | pgdat_resize_lock(pgdat, &flags); | 406 | pgdat_resize_lock(pgdat, &flags); |
397 | 407 | ||
@@ -401,18 +411,16 @@ int sparse_add_one_section(struct zone *zone, unsigned long start_pfn, | |||
401 | goto out; | 411 | goto out; |
402 | } | 412 | } |
403 | 413 | ||
404 | if (!usemap) { | ||
405 | ret = -ENOMEM; | ||
406 | goto out; | ||
407 | } | ||
408 | ms->section_mem_map |= SECTION_MARKED_PRESENT; | 414 | ms->section_mem_map |= SECTION_MARKED_PRESENT; |
409 | 415 | ||
410 | ret = sparse_init_one_section(ms, section_nr, memmap, usemap); | 416 | ret = sparse_init_one_section(ms, section_nr, memmap, usemap); |
411 | 417 | ||
412 | out: | 418 | out: |
413 | pgdat_resize_unlock(pgdat, &flags); | 419 | pgdat_resize_unlock(pgdat, &flags); |
414 | if (ret <= 0) | 420 | if (ret <= 0) { |
421 | kfree(usemap); | ||
415 | __kfree_section_memmap(memmap, nr_pages); | 422 | __kfree_section_memmap(memmap, nr_pages); |
423 | } | ||
416 | return ret; | 424 | return ret; |
417 | } | 425 | } |
418 | #endif | 426 | #endif |