diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/fadvise.c | 18 | ||||
-rw-r--r-- | mm/filemap.c | 9 | ||||
-rw-r--r-- | mm/fremap.c | 6 | ||||
-rw-r--r-- | mm/hugetlb.c | 28 | ||||
-rw-r--r-- | mm/madvise.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 155 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/mempolicy.c | 24 | ||||
-rw-r--r-- | mm/migrate.c | 10 | ||||
-rw-r--r-- | mm/mincore.c | 4 | ||||
-rw-r--r-- | mm/mlock.c | 63 | ||||
-rw-r--r-- | mm/mmap.c | 113 | ||||
-rw-r--r-- | mm/mprotect.c | 9 | ||||
-rw-r--r-- | mm/mremap.c | 6 | ||||
-rw-r--r-- | mm/msync.c | 2 | ||||
-rw-r--r-- | mm/nommu.c | 35 | ||||
-rw-r--r-- | mm/page-writeback.c | 35 | ||||
-rw-r--r-- | mm/page_alloc.c | 27 | ||||
-rw-r--r-- | mm/page_cgroup.c | 3 | ||||
-rw-r--r-- | mm/page_io.c | 2 | ||||
-rw-r--r-- | mm/rmap.c | 3 | ||||
-rw-r--r-- | mm/shmem.c | 43 | ||||
-rw-r--r-- | mm/slab.c | 1 | ||||
-rw-r--r-- | mm/slob.c | 44 | ||||
-rw-r--r-- | mm/slub.c | 75 | ||||
-rw-r--r-- | mm/swapfile.c | 13 | ||||
-rw-r--r-- | mm/util.c | 20 | ||||
-rw-r--r-- | mm/vmalloc.c | 40 | ||||
-rw-r--r-- | mm/vmscan.c | 32 |
29 files changed, 512 insertions, 312 deletions
diff --git a/mm/fadvise.c b/mm/fadvise.c index a1da969bd980..54a0f8040afa 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c | |||
@@ -24,7 +24,7 @@ | |||
24 | * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could | 24 | * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could |
25 | * deactivate the pages and clear PG_Referenced. | 25 | * deactivate the pages and clear PG_Referenced. |
26 | */ | 26 | */ |
27 | asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) | 27 | SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) |
28 | { | 28 | { |
29 | struct file *file = fget(fd); | 29 | struct file *file = fget(fd); |
30 | struct address_space *mapping; | 30 | struct address_space *mapping; |
@@ -126,12 +126,26 @@ out: | |||
126 | fput(file); | 126 | fput(file); |
127 | return ret; | 127 | return ret; |
128 | } | 128 | } |
129 | #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS | ||
130 | asmlinkage long SyS_fadvise64_64(long fd, loff_t offset, loff_t len, long advice) | ||
131 | { | ||
132 | return SYSC_fadvise64_64((int) fd, offset, len, (int) advice); | ||
133 | } | ||
134 | SYSCALL_ALIAS(sys_fadvise64_64, SyS_fadvise64_64); | ||
135 | #endif | ||
129 | 136 | ||
130 | #ifdef __ARCH_WANT_SYS_FADVISE64 | 137 | #ifdef __ARCH_WANT_SYS_FADVISE64 |
131 | 138 | ||
132 | asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice) | 139 | SYSCALL_DEFINE(fadvise64)(int fd, loff_t offset, size_t len, int advice) |
133 | { | 140 | { |
134 | return sys_fadvise64_64(fd, offset, len, advice); | 141 | return sys_fadvise64_64(fd, offset, len, advice); |
135 | } | 142 | } |
143 | #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS | ||
144 | asmlinkage long SyS_fadvise64(long fd, loff_t offset, long len, long advice) | ||
145 | { | ||
146 | return SYSC_fadvise64((int) fd, offset, (size_t)len, (int)advice); | ||
147 | } | ||
148 | SYSCALL_ALIAS(sys_fadvise64, SyS_fadvise64); | ||
149 | #endif | ||
136 | 150 | ||
137 | #endif | 151 | #endif |
diff --git a/mm/filemap.c b/mm/filemap.c index ceba0bd03662..23acefe51808 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1374,7 +1374,7 @@ do_readahead(struct address_space *mapping, struct file *filp, | |||
1374 | return 0; | 1374 | return 0; |
1375 | } | 1375 | } |
1376 | 1376 | ||
1377 | asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) | 1377 | SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) |
1378 | { | 1378 | { |
1379 | ssize_t ret; | 1379 | ssize_t ret; |
1380 | struct file *file; | 1380 | struct file *file; |
@@ -1393,6 +1393,13 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) | |||
1393 | } | 1393 | } |
1394 | return ret; | 1394 | return ret; |
1395 | } | 1395 | } |
1396 | #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS | ||
1397 | asmlinkage long SyS_readahead(long fd, loff_t offset, long count) | ||
1398 | { | ||
1399 | return SYSC_readahead((int) fd, offset, (size_t) count); | ||
1400 | } | ||
1401 | SYSCALL_ALIAS(sys_readahead, SyS_readahead); | ||
1402 | #endif | ||
1396 | 1403 | ||
1397 | #ifdef CONFIG_MMU | 1404 | #ifdef CONFIG_MMU |
1398 | /** | 1405 | /** |
diff --git a/mm/fremap.c b/mm/fremap.c index 62d5bbda921a..b6ec85abbb39 100644 --- a/mm/fremap.c +++ b/mm/fremap.c | |||
@@ -120,8 +120,8 @@ static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, | |||
120 | * and the vma's default protection is used. Arbitrary protections | 120 | * and the vma's default protection is used. Arbitrary protections |
121 | * might be implemented in the future. | 121 | * might be implemented in the future. |
122 | */ | 122 | */ |
123 | asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, | 123 | SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, |
124 | unsigned long prot, unsigned long pgoff, unsigned long flags) | 124 | unsigned long, prot, unsigned long, pgoff, unsigned long, flags) |
125 | { | 125 | { |
126 | struct mm_struct *mm = current->mm; | 126 | struct mm_struct *mm = current->mm; |
127 | struct address_space *mapping; | 127 | struct address_space *mapping; |
@@ -198,7 +198,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, | |||
198 | flags &= MAP_NONBLOCK; | 198 | flags &= MAP_NONBLOCK; |
199 | get_file(file); | 199 | get_file(file); |
200 | addr = mmap_region(file, start, size, | 200 | addr = mmap_region(file, start, size, |
201 | flags, vma->vm_flags, pgoff, 1); | 201 | flags, vma->vm_flags, pgoff); |
202 | fput(file); | 202 | fput(file); |
203 | if (IS_ERR_VALUE(addr)) { | 203 | if (IS_ERR_VALUE(addr)) { |
204 | err = addr; | 204 | err = addr; |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 618e98304080..107da3d809a8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2269,12 +2269,18 @@ void hugetlb_change_protection(struct vm_area_struct *vma, | |||
2269 | 2269 | ||
2270 | int hugetlb_reserve_pages(struct inode *inode, | 2270 | int hugetlb_reserve_pages(struct inode *inode, |
2271 | long from, long to, | 2271 | long from, long to, |
2272 | struct vm_area_struct *vma) | 2272 | struct vm_area_struct *vma, |
2273 | int acctflag) | ||
2273 | { | 2274 | { |
2274 | long ret, chg; | 2275 | long ret, chg; |
2275 | struct hstate *h = hstate_inode(inode); | 2276 | struct hstate *h = hstate_inode(inode); |
2276 | 2277 | ||
2277 | if (vma && vma->vm_flags & VM_NORESERVE) | 2278 | /* |
2279 | * Only apply hugepage reservation if asked. At fault time, an | ||
2280 | * attempt will be made for VM_NORESERVE to allocate a page | ||
2281 | * and filesystem quota without using reserves | ||
2282 | */ | ||
2283 | if (acctflag & VM_NORESERVE) | ||
2278 | return 0; | 2284 | return 0; |
2279 | 2285 | ||
2280 | /* | 2286 | /* |
@@ -2299,13 +2305,31 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
2299 | if (chg < 0) | 2305 | if (chg < 0) |
2300 | return chg; | 2306 | return chg; |
2301 | 2307 | ||
2308 | /* There must be enough filesystem quota for the mapping */ | ||
2302 | if (hugetlb_get_quota(inode->i_mapping, chg)) | 2309 | if (hugetlb_get_quota(inode->i_mapping, chg)) |
2303 | return -ENOSPC; | 2310 | return -ENOSPC; |
2311 | |||
2312 | /* | ||
2313 | * Check enough hugepages are available for the reservation. | ||
2314 | * Hand back the quota if there are not | ||
2315 | */ | ||
2304 | ret = hugetlb_acct_memory(h, chg); | 2316 | ret = hugetlb_acct_memory(h, chg); |
2305 | if (ret < 0) { | 2317 | if (ret < 0) { |
2306 | hugetlb_put_quota(inode->i_mapping, chg); | 2318 | hugetlb_put_quota(inode->i_mapping, chg); |
2307 | return ret; | 2319 | return ret; |
2308 | } | 2320 | } |
2321 | |||
2322 | /* | ||
2323 | * Account for the reservations made. Shared mappings record regions | ||
2324 | * that have reservations as they are shared by multiple VMAs. | ||
2325 | * When the last VMA disappears, the region map says how much | ||
2326 | * the reservation was and the page cache tells how much of | ||
2327 | * the reservation was consumed. Private mappings are per-VMA and | ||
2328 | * only the consumed reservations are tracked. When the VMA | ||
2329 | * disappears, the original reservation is the VMA size and the | ||
2330 | * consumed reservations are stored in the map. Hence, nothing | ||
2331 | * else has to be done for private mappings here | ||
2332 | */ | ||
2309 | if (!vma || vma->vm_flags & VM_SHARED) | 2333 | if (!vma || vma->vm_flags & VM_SHARED) |
2310 | region_add(&inode->i_mapping->private_list, from, to); | 2334 | region_add(&inode->i_mapping->private_list, from, to); |
2311 | return 0; | 2335 | return 0; |
diff --git a/mm/madvise.c b/mm/madvise.c index f9349c18a1b5..b9ce574827c8 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -281,7 +281,7 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, | |||
281 | * -EBADF - map exists, but area maps something that isn't a file. | 281 | * -EBADF - map exists, but area maps something that isn't a file. |
282 | * -EAGAIN - a kernel resource was temporarily unavailable. | 282 | * -EAGAIN - a kernel resource was temporarily unavailable. |
283 | */ | 283 | */ |
284 | asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) | 284 | SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) |
285 | { | 285 | { |
286 | unsigned long end, tmp; | 286 | unsigned long end, tmp; |
287 | struct vm_area_struct * vma, *prev; | 287 | struct vm_area_struct * vma, *prev; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2996b80601f..8e4be9cb2a6a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -202,6 +202,7 @@ pcg_default_flags[NR_CHARGE_TYPE] = { | |||
202 | 202 | ||
203 | static void mem_cgroup_get(struct mem_cgroup *mem); | 203 | static void mem_cgroup_get(struct mem_cgroup *mem); |
204 | static void mem_cgroup_put(struct mem_cgroup *mem); | 204 | static void mem_cgroup_put(struct mem_cgroup *mem); |
205 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); | ||
205 | 206 | ||
206 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | 207 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, |
207 | struct page_cgroup *pc, | 208 | struct page_cgroup *pc, |
@@ -358,6 +359,10 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) | |||
358 | return; | 359 | return; |
359 | 360 | ||
360 | pc = lookup_page_cgroup(page); | 361 | pc = lookup_page_cgroup(page); |
362 | /* | ||
363 | * Used bit is set without atomic ops but after smp_wmb(). | ||
364 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
365 | */ | ||
361 | smp_rmb(); | 366 | smp_rmb(); |
362 | /* unused page is not rotated. */ | 367 | /* unused page is not rotated. */ |
363 | if (!PageCgroupUsed(pc)) | 368 | if (!PageCgroupUsed(pc)) |
@@ -374,7 +379,10 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
374 | if (mem_cgroup_disabled()) | 379 | if (mem_cgroup_disabled()) |
375 | return; | 380 | return; |
376 | pc = lookup_page_cgroup(page); | 381 | pc = lookup_page_cgroup(page); |
377 | /* barrier to sync with "charge" */ | 382 | /* |
383 | * Used bit is set without atomic ops but after smp_wmb(). | ||
384 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
385 | */ | ||
378 | smp_rmb(); | 386 | smp_rmb(); |
379 | if (!PageCgroupUsed(pc)) | 387 | if (!PageCgroupUsed(pc)) |
380 | return; | 388 | return; |
@@ -559,6 +567,14 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) | |||
559 | return NULL; | 567 | return NULL; |
560 | 568 | ||
561 | pc = lookup_page_cgroup(page); | 569 | pc = lookup_page_cgroup(page); |
570 | /* | ||
571 | * Used bit is set without atomic ops but after smp_wmb(). | ||
572 | * For making pc->mem_cgroup visible, insert smp_rmb() here. | ||
573 | */ | ||
574 | smp_rmb(); | ||
575 | if (!PageCgroupUsed(pc)) | ||
576 | return NULL; | ||
577 | |||
562 | mz = page_cgroup_zoneinfo(pc); | 578 | mz = page_cgroup_zoneinfo(pc); |
563 | if (!mz) | 579 | if (!mz) |
564 | return NULL; | 580 | return NULL; |
@@ -618,7 +634,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
618 | * called with hierarchy_mutex held | 634 | * called with hierarchy_mutex held |
619 | */ | 635 | */ |
620 | static struct mem_cgroup * | 636 | static struct mem_cgroup * |
621 | mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | 637 | __mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) |
622 | { | 638 | { |
623 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; | 639 | struct cgroup *cgroup, *curr_cgroup, *root_cgroup; |
624 | 640 | ||
@@ -629,19 +645,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) | |||
629 | /* | 645 | /* |
630 | * Walk down to children | 646 | * Walk down to children |
631 | */ | 647 | */ |
632 | mem_cgroup_put(curr); | ||
633 | cgroup = list_entry(curr_cgroup->children.next, | 648 | cgroup = list_entry(curr_cgroup->children.next, |
634 | struct cgroup, sibling); | 649 | struct cgroup, sibling); |
635 | curr = mem_cgroup_from_cont(cgroup); | 650 | curr = mem_cgroup_from_cont(cgroup); |
636 | mem_cgroup_get(curr); | ||
637 | goto done; | 651 | goto done; |
638 | } | 652 | } |
639 | 653 | ||
640 | visit_parent: | 654 | visit_parent: |
641 | if (curr_cgroup == root_cgroup) { | 655 | if (curr_cgroup == root_cgroup) { |
642 | mem_cgroup_put(curr); | 656 | /* caller handles NULL case */ |
643 | curr = root_mem; | 657 | curr = NULL; |
644 | mem_cgroup_get(curr); | ||
645 | goto done; | 658 | goto done; |
646 | } | 659 | } |
647 | 660 | ||
@@ -649,11 +662,9 @@ visit_parent: | |||
649 | * Goto next sibling | 662 | * Goto next sibling |
650 | */ | 663 | */ |
651 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { | 664 | if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { |
652 | mem_cgroup_put(curr); | ||
653 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, | 665 | cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, |
654 | sibling); | 666 | sibling); |
655 | curr = mem_cgroup_from_cont(cgroup); | 667 | curr = mem_cgroup_from_cont(cgroup); |
656 | mem_cgroup_get(curr); | ||
657 | goto done; | 668 | goto done; |
658 | } | 669 | } |
659 | 670 | ||
@@ -664,7 +675,6 @@ visit_parent: | |||
664 | goto visit_parent; | 675 | goto visit_parent; |
665 | 676 | ||
666 | done: | 677 | done: |
667 | root_mem->last_scanned_child = curr; | ||
668 | return curr; | 678 | return curr; |
669 | } | 679 | } |
670 | 680 | ||
@@ -674,40 +684,46 @@ done: | |||
674 | * that to reclaim free pages from. | 684 | * that to reclaim free pages from. |
675 | */ | 685 | */ |
676 | static struct mem_cgroup * | 686 | static struct mem_cgroup * |
677 | mem_cgroup_get_first_node(struct mem_cgroup *root_mem) | 687 | mem_cgroup_get_next_node(struct mem_cgroup *root_mem) |
678 | { | 688 | { |
679 | struct cgroup *cgroup; | 689 | struct cgroup *cgroup; |
680 | struct mem_cgroup *ret; | 690 | struct mem_cgroup *orig, *next; |
681 | bool obsolete; | 691 | bool obsolete; |
682 | 692 | ||
683 | obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child); | ||
684 | |||
685 | /* | 693 | /* |
686 | * Scan all children under the mem_cgroup mem | 694 | * Scan all children under the mem_cgroup mem |
687 | */ | 695 | */ |
688 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | 696 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); |
697 | |||
698 | orig = root_mem->last_scanned_child; | ||
699 | obsolete = mem_cgroup_is_obsolete(orig); | ||
700 | |||
689 | if (list_empty(&root_mem->css.cgroup->children)) { | 701 | if (list_empty(&root_mem->css.cgroup->children)) { |
690 | ret = root_mem; | 702 | /* |
703 | * root_mem might have children before and last_scanned_child | ||
704 | * may point to one of them. We put it later. | ||
705 | */ | ||
706 | if (orig) | ||
707 | VM_BUG_ON(!obsolete); | ||
708 | next = NULL; | ||
691 | goto done; | 709 | goto done; |
692 | } | 710 | } |
693 | 711 | ||
694 | if (!root_mem->last_scanned_child || obsolete) { | 712 | if (!orig || obsolete) { |
695 | |||
696 | if (obsolete && root_mem->last_scanned_child) | ||
697 | mem_cgroup_put(root_mem->last_scanned_child); | ||
698 | |||
699 | cgroup = list_first_entry(&root_mem->css.cgroup->children, | 713 | cgroup = list_first_entry(&root_mem->css.cgroup->children, |
700 | struct cgroup, sibling); | 714 | struct cgroup, sibling); |
701 | ret = mem_cgroup_from_cont(cgroup); | 715 | next = mem_cgroup_from_cont(cgroup); |
702 | mem_cgroup_get(ret); | ||
703 | } else | 716 | } else |
704 | ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, | 717 | next = __mem_cgroup_get_next_node(orig, root_mem); |
705 | root_mem); | ||
706 | 718 | ||
707 | done: | 719 | done: |
708 | root_mem->last_scanned_child = ret; | 720 | if (next) |
721 | mem_cgroup_get(next); | ||
722 | root_mem->last_scanned_child = next; | ||
723 | if (orig) | ||
724 | mem_cgroup_put(orig); | ||
709 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | 725 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); |
710 | return ret; | 726 | return (next) ? next : root_mem; |
711 | } | 727 | } |
712 | 728 | ||
713 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | 729 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) |
@@ -758,28 +774,25 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
758 | * but there might be left over accounting, even after children | 774 | * but there might be left over accounting, even after children |
759 | * have left. | 775 | * have left. |
760 | */ | 776 | */ |
761 | ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, | 777 | ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, |
762 | get_swappiness(root_mem)); | 778 | get_swappiness(root_mem)); |
763 | if (mem_cgroup_check_under_limit(root_mem)) | 779 | if (mem_cgroup_check_under_limit(root_mem)) |
764 | return 0; | 780 | return 1; /* indicate reclaim has succeeded */ |
765 | if (!root_mem->use_hierarchy) | 781 | if (!root_mem->use_hierarchy) |
766 | return ret; | 782 | return ret; |
767 | 783 | ||
768 | next_mem = mem_cgroup_get_first_node(root_mem); | 784 | next_mem = mem_cgroup_get_next_node(root_mem); |
769 | 785 | ||
770 | while (next_mem != root_mem) { | 786 | while (next_mem != root_mem) { |
771 | if (mem_cgroup_is_obsolete(next_mem)) { | 787 | if (mem_cgroup_is_obsolete(next_mem)) { |
772 | mem_cgroup_put(next_mem); | 788 | next_mem = mem_cgroup_get_next_node(root_mem); |
773 | next_mem = mem_cgroup_get_first_node(root_mem); | ||
774 | continue; | 789 | continue; |
775 | } | 790 | } |
776 | ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, | 791 | ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, |
777 | get_swappiness(next_mem)); | 792 | get_swappiness(next_mem)); |
778 | if (mem_cgroup_check_under_limit(root_mem)) | 793 | if (mem_cgroup_check_under_limit(root_mem)) |
779 | return 0; | 794 | return 1; /* indicate reclaim has succeeded */ |
780 | mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); | 795 | next_mem = mem_cgroup_get_next_node(root_mem); |
781 | next_mem = mem_cgroup_get_next_node(next_mem, root_mem); | ||
782 | mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); | ||
783 | } | 796 | } |
784 | return ret; | 797 | return ret; |
785 | } | 798 | } |
@@ -863,6 +876,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
863 | 876 | ||
864 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, | 877 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, |
865 | noswap); | 878 | noswap); |
879 | if (ret) | ||
880 | continue; | ||
866 | 881 | ||
867 | /* | 882 | /* |
868 | * try_to_free_mem_cgroup_pages() might not give us a full | 883 | * try_to_free_mem_cgroup_pages() might not give us a full |
@@ -979,14 +994,15 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
979 | if (pc->mem_cgroup != from) | 994 | if (pc->mem_cgroup != from) |
980 | goto out; | 995 | goto out; |
981 | 996 | ||
982 | css_put(&from->css); | ||
983 | res_counter_uncharge(&from->res, PAGE_SIZE); | 997 | res_counter_uncharge(&from->res, PAGE_SIZE); |
984 | mem_cgroup_charge_statistics(from, pc, false); | 998 | mem_cgroup_charge_statistics(from, pc, false); |
985 | if (do_swap_account) | 999 | if (do_swap_account) |
986 | res_counter_uncharge(&from->memsw, PAGE_SIZE); | 1000 | res_counter_uncharge(&from->memsw, PAGE_SIZE); |
1001 | css_put(&from->css); | ||
1002 | |||
1003 | css_get(&to->css); | ||
987 | pc->mem_cgroup = to; | 1004 | pc->mem_cgroup = to; |
988 | mem_cgroup_charge_statistics(to, pc, true); | 1005 | mem_cgroup_charge_statistics(to, pc, true); |
989 | css_get(&to->css); | ||
990 | ret = 0; | 1006 | ret = 0; |
991 | out: | 1007 | out: |
992 | unlock_page_cgroup(pc); | 1008 | unlock_page_cgroup(pc); |
@@ -1019,8 +1035,10 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
1019 | if (ret || !parent) | 1035 | if (ret || !parent) |
1020 | return ret; | 1036 | return ret; |
1021 | 1037 | ||
1022 | if (!get_page_unless_zero(page)) | 1038 | if (!get_page_unless_zero(page)) { |
1023 | return -EBUSY; | 1039 | ret = -EBUSY; |
1040 | goto uncharge; | ||
1041 | } | ||
1024 | 1042 | ||
1025 | ret = isolate_lru_page(page); | 1043 | ret = isolate_lru_page(page); |
1026 | 1044 | ||
@@ -1029,19 +1047,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
1029 | 1047 | ||
1030 | ret = mem_cgroup_move_account(pc, child, parent); | 1048 | ret = mem_cgroup_move_account(pc, child, parent); |
1031 | 1049 | ||
1032 | /* drop extra refcnt by try_charge() (move_account increment one) */ | ||
1033 | css_put(&parent->css); | ||
1034 | putback_lru_page(page); | 1050 | putback_lru_page(page); |
1035 | if (!ret) { | 1051 | if (!ret) { |
1036 | put_page(page); | 1052 | put_page(page); |
1053 | /* drop extra refcnt by try_charge() */ | ||
1054 | css_put(&parent->css); | ||
1037 | return 0; | 1055 | return 0; |
1038 | } | 1056 | } |
1039 | /* uncharge if move fails */ | 1057 | |
1040 | cancel: | 1058 | cancel: |
1059 | put_page(page); | ||
1060 | uncharge: | ||
1061 | /* drop extra refcnt by try_charge() */ | ||
1062 | css_put(&parent->css); | ||
1063 | /* uncharge if move fails */ | ||
1041 | res_counter_uncharge(&parent->res, PAGE_SIZE); | 1064 | res_counter_uncharge(&parent->res, PAGE_SIZE); |
1042 | if (do_swap_account) | 1065 | if (do_swap_account) |
1043 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); | 1066 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); |
1044 | put_page(page); | ||
1045 | return ret; | 1067 | return ret; |
1046 | } | 1068 | } |
1047 | 1069 | ||
@@ -1663,7 +1685,7 @@ move_account: | |||
1663 | /* This is for making all *used* pages to be on LRU. */ | 1685 | /* This is for making all *used* pages to be on LRU. */ |
1664 | lru_add_drain_all(); | 1686 | lru_add_drain_all(); |
1665 | ret = 0; | 1687 | ret = 0; |
1666 | for_each_node_state(node, N_POSSIBLE) { | 1688 | for_each_node_state(node, N_HIGH_MEMORY) { |
1667 | for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { | 1689 | for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { |
1668 | enum lru_list l; | 1690 | enum lru_list l; |
1669 | for_each_lru(l) { | 1691 | for_each_lru(l) { |
@@ -1971,6 +1993,7 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | |||
1971 | { | 1993 | { |
1972 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 1994 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); |
1973 | struct mem_cgroup *parent; | 1995 | struct mem_cgroup *parent; |
1996 | |||
1974 | if (val > 100) | 1997 | if (val > 100) |
1975 | return -EINVAL; | 1998 | return -EINVAL; |
1976 | 1999 | ||
@@ -1978,15 +2001,22 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | |||
1978 | return -EINVAL; | 2001 | return -EINVAL; |
1979 | 2002 | ||
1980 | parent = mem_cgroup_from_cont(cgrp->parent); | 2003 | parent = mem_cgroup_from_cont(cgrp->parent); |
2004 | |||
2005 | cgroup_lock(); | ||
2006 | |||
1981 | /* If under hierarchy, only empty-root can set this value */ | 2007 | /* If under hierarchy, only empty-root can set this value */ |
1982 | if ((parent->use_hierarchy) || | 2008 | if ((parent->use_hierarchy) || |
1983 | (memcg->use_hierarchy && !list_empty(&cgrp->children))) | 2009 | (memcg->use_hierarchy && !list_empty(&cgrp->children))) { |
2010 | cgroup_unlock(); | ||
1984 | return -EINVAL; | 2011 | return -EINVAL; |
2012 | } | ||
1985 | 2013 | ||
1986 | spin_lock(&memcg->reclaim_param_lock); | 2014 | spin_lock(&memcg->reclaim_param_lock); |
1987 | memcg->swappiness = val; | 2015 | memcg->swappiness = val; |
1988 | spin_unlock(&memcg->reclaim_param_lock); | 2016 | spin_unlock(&memcg->reclaim_param_lock); |
1989 | 2017 | ||
2018 | cgroup_unlock(); | ||
2019 | |||
1990 | return 0; | 2020 | return 0; |
1991 | } | 2021 | } |
1992 | 2022 | ||
@@ -2164,10 +2194,23 @@ static void mem_cgroup_get(struct mem_cgroup *mem) | |||
2164 | 2194 | ||
2165 | static void mem_cgroup_put(struct mem_cgroup *mem) | 2195 | static void mem_cgroup_put(struct mem_cgroup *mem) |
2166 | { | 2196 | { |
2167 | if (atomic_dec_and_test(&mem->refcnt)) | 2197 | if (atomic_dec_and_test(&mem->refcnt)) { |
2198 | struct mem_cgroup *parent = parent_mem_cgroup(mem); | ||
2168 | __mem_cgroup_free(mem); | 2199 | __mem_cgroup_free(mem); |
2200 | if (parent) | ||
2201 | mem_cgroup_put(parent); | ||
2202 | } | ||
2169 | } | 2203 | } |
2170 | 2204 | ||
2205 | /* | ||
2206 | * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. | ||
2207 | */ | ||
2208 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem) | ||
2209 | { | ||
2210 | if (!mem->res.parent) | ||
2211 | return NULL; | ||
2212 | return mem_cgroup_from_res_counter(mem->res.parent, res); | ||
2213 | } | ||
2171 | 2214 | ||
2172 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 2215 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
2173 | static void __init enable_swap_cgroup(void) | 2216 | static void __init enable_swap_cgroup(void) |
@@ -2181,7 +2224,7 @@ static void __init enable_swap_cgroup(void) | |||
2181 | } | 2224 | } |
2182 | #endif | 2225 | #endif |
2183 | 2226 | ||
2184 | static struct cgroup_subsys_state * | 2227 | static struct cgroup_subsys_state * __ref |
2185 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | 2228 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) |
2186 | { | 2229 | { |
2187 | struct mem_cgroup *mem, *parent; | 2230 | struct mem_cgroup *mem, *parent; |
@@ -2206,6 +2249,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
2206 | if (parent && parent->use_hierarchy) { | 2249 | if (parent && parent->use_hierarchy) { |
2207 | res_counter_init(&mem->res, &parent->res); | 2250 | res_counter_init(&mem->res, &parent->res); |
2208 | res_counter_init(&mem->memsw, &parent->memsw); | 2251 | res_counter_init(&mem->memsw, &parent->memsw); |
2252 | /* | ||
2253 | * We increment refcnt of the parent to ensure that we can | ||
2254 | * safely access it on res_counter_charge/uncharge. | ||
2255 | * This refcnt will be decremented when freeing this | ||
2256 | * mem_cgroup(see mem_cgroup_put). | ||
2257 | */ | ||
2258 | mem_cgroup_get(parent); | ||
2209 | } else { | 2259 | } else { |
2210 | res_counter_init(&mem->res, NULL); | 2260 | res_counter_init(&mem->res, NULL); |
2211 | res_counter_init(&mem->memsw, NULL); | 2261 | res_counter_init(&mem->memsw, NULL); |
@@ -2232,7 +2282,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, | |||
2232 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, | 2282 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, |
2233 | struct cgroup *cont) | 2283 | struct cgroup *cont) |
2234 | { | 2284 | { |
2235 | mem_cgroup_put(mem_cgroup_from_cont(cont)); | 2285 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
2286 | struct mem_cgroup *last_scanned_child = mem->last_scanned_child; | ||
2287 | |||
2288 | if (last_scanned_child) { | ||
2289 | VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child)); | ||
2290 | mem_cgroup_put(last_scanned_child); | ||
2291 | } | ||
2292 | mem_cgroup_put(mem); | ||
2236 | } | 2293 | } |
2237 | 2294 | ||
2238 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | 2295 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |
diff --git a/mm/memory.c b/mm/memory.c index 22bfa7a47a0b..baa999e87cd2 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1999,7 +1999,7 @@ gotten: | |||
1999 | * Don't let another task, with possibly unlocked vma, | 1999 | * Don't let another task, with possibly unlocked vma, |
2000 | * keep the mlocked page. | 2000 | * keep the mlocked page. |
2001 | */ | 2001 | */ |
2002 | if (vma->vm_flags & VM_LOCKED) { | 2002 | if ((vma->vm_flags & VM_LOCKED) && old_page) { |
2003 | lock_page(old_page); /* for LRU manipulation */ | 2003 | lock_page(old_page); /* for LRU manipulation */ |
2004 | clear_page_mlock(old_page); | 2004 | clear_page_mlock(old_page); |
2005 | unlock_page(old_page); | 2005 | unlock_page(old_page); |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e412ffa8e52e..3eb4a6fdc043 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1068,10 +1068,9 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, | |||
1068 | return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; | 1068 | return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; |
1069 | } | 1069 | } |
1070 | 1070 | ||
1071 | asmlinkage long sys_mbind(unsigned long start, unsigned long len, | 1071 | SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, |
1072 | unsigned long mode, | 1072 | unsigned long, mode, unsigned long __user *, nmask, |
1073 | unsigned long __user *nmask, unsigned long maxnode, | 1073 | unsigned long, maxnode, unsigned, flags) |
1074 | unsigned flags) | ||
1075 | { | 1074 | { |
1076 | nodemask_t nodes; | 1075 | nodemask_t nodes; |
1077 | int err; | 1076 | int err; |
@@ -1091,8 +1090,8 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len, | |||
1091 | } | 1090 | } |
1092 | 1091 | ||
1093 | /* Set the process memory policy */ | 1092 | /* Set the process memory policy */ |
1094 | asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, | 1093 | SYSCALL_DEFINE3(set_mempolicy, int, mode, unsigned long __user *, nmask, |
1095 | unsigned long maxnode) | 1094 | unsigned long, maxnode) |
1096 | { | 1095 | { |
1097 | int err; | 1096 | int err; |
1098 | nodemask_t nodes; | 1097 | nodemask_t nodes; |
@@ -1110,9 +1109,9 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, | |||
1110 | return do_set_mempolicy(mode, flags, &nodes); | 1109 | return do_set_mempolicy(mode, flags, &nodes); |
1111 | } | 1110 | } |
1112 | 1111 | ||
1113 | asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, | 1112 | SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, |
1114 | const unsigned long __user *old_nodes, | 1113 | const unsigned long __user *, old_nodes, |
1115 | const unsigned long __user *new_nodes) | 1114 | const unsigned long __user *, new_nodes) |
1116 | { | 1115 | { |
1117 | const struct cred *cred = current_cred(), *tcred; | 1116 | const struct cred *cred = current_cred(), *tcred; |
1118 | struct mm_struct *mm; | 1117 | struct mm_struct *mm; |
@@ -1185,10 +1184,9 @@ out: | |||
1185 | 1184 | ||
1186 | 1185 | ||
1187 | /* Retrieve NUMA policy */ | 1186 | /* Retrieve NUMA policy */ |
1188 | asmlinkage long sys_get_mempolicy(int __user *policy, | 1187 | SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, |
1189 | unsigned long __user *nmask, | 1188 | unsigned long __user *, nmask, unsigned long, maxnode, |
1190 | unsigned long maxnode, | 1189 | unsigned long, addr, unsigned long, flags) |
1191 | unsigned long addr, unsigned long flags) | ||
1192 | { | 1190 | { |
1193 | int err; | 1191 | int err; |
1194 | int uninitialized_var(pval); | 1192 | int uninitialized_var(pval); |
diff --git a/mm/migrate.c b/mm/migrate.c index a30ea5fcf9f1..a9eff3f092f6 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -1055,10 +1055,10 @@ out: | |||
1055 | * Move a list of pages in the address space of the currently executing | 1055 | * Move a list of pages in the address space of the currently executing |
1056 | * process. | 1056 | * process. |
1057 | */ | 1057 | */ |
1058 | asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, | 1058 | SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, |
1059 | const void __user * __user *pages, | 1059 | const void __user * __user *, pages, |
1060 | const int __user *nodes, | 1060 | const int __user *, nodes, |
1061 | int __user *status, int flags) | 1061 | int __user *, status, int, flags) |
1062 | { | 1062 | { |
1063 | const struct cred *cred = current_cred(), *tcred; | 1063 | const struct cred *cred = current_cred(), *tcred; |
1064 | struct task_struct *task; | 1064 | struct task_struct *task; |
@@ -1129,7 +1129,7 @@ int migrate_vmas(struct mm_struct *mm, const nodemask_t *to, | |||
1129 | struct vm_area_struct *vma; | 1129 | struct vm_area_struct *vma; |
1130 | int err = 0; | 1130 | int err = 0; |
1131 | 1131 | ||
1132 | for(vma = mm->mmap; vma->vm_next && !err; vma = vma->vm_next) { | 1132 | for (vma = mm->mmap; vma && !err; vma = vma->vm_next) { |
1133 | if (vma->vm_ops && vma->vm_ops->migrate) { | 1133 | if (vma->vm_ops && vma->vm_ops->migrate) { |
1134 | err = vma->vm_ops->migrate(vma, to, from, flags); | 1134 | err = vma->vm_ops->migrate(vma, to, from, flags); |
1135 | if (err) | 1135 | if (err) |
diff --git a/mm/mincore.c b/mm/mincore.c index 5178800bc129..8cb508f84ea4 100644 --- a/mm/mincore.c +++ b/mm/mincore.c | |||
@@ -177,8 +177,8 @@ none_mapped: | |||
177 | * mapped | 177 | * mapped |
178 | * -EAGAIN - A kernel resource was temporarily unavailable. | 178 | * -EAGAIN - A kernel resource was temporarily unavailable. |
179 | */ | 179 | */ |
180 | asmlinkage long sys_mincore(unsigned long start, size_t len, | 180 | SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, |
181 | unsigned char __user * vec) | 181 | unsigned char __user *, vec) |
182 | { | 182 | { |
183 | long retval; | 183 | long retval; |
184 | unsigned long pages; | 184 | unsigned long pages; |
diff --git a/mm/mlock.c b/mm/mlock.c index e125156c664e..cbe9e0581b75 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -294,14 +294,10 @@ static inline int __mlock_posix_error_return(long retval) | |||
294 | * | 294 | * |
295 | * return number of pages [> 0] to be removed from locked_vm on success | 295 | * return number of pages [> 0] to be removed from locked_vm on success |
296 | * of "special" vmas. | 296 | * of "special" vmas. |
297 | * | ||
298 | * return negative error if vma spanning @start-@range disappears while | ||
299 | * mmap semaphore is dropped. Unlikely? | ||
300 | */ | 297 | */ |
301 | long mlock_vma_pages_range(struct vm_area_struct *vma, | 298 | long mlock_vma_pages_range(struct vm_area_struct *vma, |
302 | unsigned long start, unsigned long end) | 299 | unsigned long start, unsigned long end) |
303 | { | 300 | { |
304 | struct mm_struct *mm = vma->vm_mm; | ||
305 | int nr_pages = (end - start) / PAGE_SIZE; | 301 | int nr_pages = (end - start) / PAGE_SIZE; |
306 | BUG_ON(!(vma->vm_flags & VM_LOCKED)); | 302 | BUG_ON(!(vma->vm_flags & VM_LOCKED)); |
307 | 303 | ||
@@ -314,20 +310,11 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, | |||
314 | if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || | 310 | if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || |
315 | is_vm_hugetlb_page(vma) || | 311 | is_vm_hugetlb_page(vma) || |
316 | vma == get_gate_vma(current))) { | 312 | vma == get_gate_vma(current))) { |
317 | long error; | ||
318 | downgrade_write(&mm->mmap_sem); | ||
319 | 313 | ||
320 | error = __mlock_vma_pages_range(vma, start, end, 1); | 314 | __mlock_vma_pages_range(vma, start, end, 1); |
321 | 315 | ||
322 | up_read(&mm->mmap_sem); | 316 | /* Hide errors from mmap() and other callers */ |
323 | /* vma can change or disappear */ | 317 | return 0; |
324 | down_write(&mm->mmap_sem); | ||
325 | vma = find_vma(mm, start); | ||
326 | /* non-NULL vma must contain @start, but need to check @end */ | ||
327 | if (!vma || end > vma->vm_end) | ||
328 | return -ENOMEM; | ||
329 | |||
330 | return 0; /* hide other errors from mmap(), et al */ | ||
331 | } | 318 | } |
332 | 319 | ||
333 | /* | 320 | /* |
@@ -438,41 +425,14 @@ success: | |||
438 | vma->vm_flags = newflags; | 425 | vma->vm_flags = newflags; |
439 | 426 | ||
440 | if (lock) { | 427 | if (lock) { |
441 | /* | ||
442 | * mmap_sem is currently held for write. Downgrade the write | ||
443 | * lock to a read lock so that other faults, mmap scans, ... | ||
444 | * while we fault in all pages. | ||
445 | */ | ||
446 | downgrade_write(&mm->mmap_sem); | ||
447 | |||
448 | ret = __mlock_vma_pages_range(vma, start, end, 1); | 428 | ret = __mlock_vma_pages_range(vma, start, end, 1); |
449 | 429 | ||
450 | /* | 430 | if (ret > 0) { |
451 | * Need to reacquire mmap sem in write mode, as our callers | ||
452 | * expect this. We have no support for atomically upgrading | ||
453 | * a sem to write, so we need to check for ranges while sem | ||
454 | * is unlocked. | ||
455 | */ | ||
456 | up_read(&mm->mmap_sem); | ||
457 | /* vma can change or disappear */ | ||
458 | down_write(&mm->mmap_sem); | ||
459 | *prev = find_vma(mm, start); | ||
460 | /* non-NULL *prev must contain @start, but need to check @end */ | ||
461 | if (!(*prev) || end > (*prev)->vm_end) | ||
462 | ret = -ENOMEM; | ||
463 | else if (ret > 0) { | ||
464 | mm->locked_vm -= ret; | 431 | mm->locked_vm -= ret; |
465 | ret = 0; | 432 | ret = 0; |
466 | } else | 433 | } else |
467 | ret = __mlock_posix_error_return(ret); /* translate if needed */ | 434 | ret = __mlock_posix_error_return(ret); /* translate if needed */ |
468 | } else { | 435 | } else { |
469 | /* | ||
470 | * TODO: for unlocking, pages will already be resident, so | ||
471 | * we don't need to wait for allocations/reclaim/pagein, ... | ||
472 | * However, unlocking a very large region can still take a | ||
473 | * while. Should we downgrade the semaphore for both lock | ||
474 | * AND unlock ? | ||
475 | */ | ||
476 | __mlock_vma_pages_range(vma, start, end, 0); | 436 | __mlock_vma_pages_range(vma, start, end, 0); |
477 | } | 437 | } |
478 | 438 | ||
@@ -530,7 +490,7 @@ static int do_mlock(unsigned long start, size_t len, int on) | |||
530 | return error; | 490 | return error; |
531 | } | 491 | } |
532 | 492 | ||
533 | asmlinkage long sys_mlock(unsigned long start, size_t len) | 493 | SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) |
534 | { | 494 | { |
535 | unsigned long locked; | 495 | unsigned long locked; |
536 | unsigned long lock_limit; | 496 | unsigned long lock_limit; |
@@ -558,7 +518,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) | |||
558 | return error; | 518 | return error; |
559 | } | 519 | } |
560 | 520 | ||
561 | asmlinkage long sys_munlock(unsigned long start, size_t len) | 521 | SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) |
562 | { | 522 | { |
563 | int ret; | 523 | int ret; |
564 | 524 | ||
@@ -595,7 +555,7 @@ out: | |||
595 | return 0; | 555 | return 0; |
596 | } | 556 | } |
597 | 557 | ||
598 | asmlinkage long sys_mlockall(int flags) | 558 | SYSCALL_DEFINE1(mlockall, int, flags) |
599 | { | 559 | { |
600 | unsigned long lock_limit; | 560 | unsigned long lock_limit; |
601 | int ret = -EINVAL; | 561 | int ret = -EINVAL; |
@@ -623,7 +583,7 @@ out: | |||
623 | return ret; | 583 | return ret; |
624 | } | 584 | } |
625 | 585 | ||
626 | asmlinkage long sys_munlockall(void) | 586 | SYSCALL_DEFINE0(munlockall) |
627 | { | 587 | { |
628 | int ret; | 588 | int ret; |
629 | 589 | ||
@@ -700,7 +660,7 @@ void *alloc_locked_buffer(size_t size) | |||
700 | return buffer; | 660 | return buffer; |
701 | } | 661 | } |
702 | 662 | ||
703 | void free_locked_buffer(void *buffer, size_t size) | 663 | void release_locked_buffer(void *buffer, size_t size) |
704 | { | 664 | { |
705 | unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | 665 | unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; |
706 | 666 | ||
@@ -710,6 +670,11 @@ void free_locked_buffer(void *buffer, size_t size) | |||
710 | current->mm->locked_vm -= pgsz; | 670 | current->mm->locked_vm -= pgsz; |
711 | 671 | ||
712 | up_write(¤t->mm->mmap_sem); | 672 | up_write(¤t->mm->mmap_sem); |
673 | } | ||
674 | |||
675 | void free_locked_buffer(void *buffer, size_t size) | ||
676 | { | ||
677 | release_locked_buffer(buffer, size); | ||
713 | 678 | ||
714 | kfree(buffer); | 679 | kfree(buffer); |
715 | } | 680 | } |
@@ -245,7 +245,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) | |||
245 | return next; | 245 | return next; |
246 | } | 246 | } |
247 | 247 | ||
248 | asmlinkage unsigned long sys_brk(unsigned long brk) | 248 | SYSCALL_DEFINE1(brk, unsigned long, brk) |
249 | { | 249 | { |
250 | unsigned long rlim, retval; | 250 | unsigned long rlim, retval; |
251 | unsigned long newbrk, oldbrk; | 251 | unsigned long newbrk, oldbrk; |
@@ -658,6 +658,9 @@ again: remove_next = 1 + (end > next->vm_end); | |||
658 | validate_mm(mm); | 658 | validate_mm(mm); |
659 | } | 659 | } |
660 | 660 | ||
661 | /* Flags that can be inherited from an existing mapping when merging */ | ||
662 | #define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR) | ||
663 | |||
661 | /* | 664 | /* |
662 | * If the vma has a ->close operation then the driver probably needs to release | 665 | * If the vma has a ->close operation then the driver probably needs to release |
663 | * per-vma resources, so we don't attempt to merge those. | 666 | * per-vma resources, so we don't attempt to merge those. |
@@ -665,7 +668,7 @@ again: remove_next = 1 + (end > next->vm_end); | |||
665 | static inline int is_mergeable_vma(struct vm_area_struct *vma, | 668 | static inline int is_mergeable_vma(struct vm_area_struct *vma, |
666 | struct file *file, unsigned long vm_flags) | 669 | struct file *file, unsigned long vm_flags) |
667 | { | 670 | { |
668 | if (vma->vm_flags != vm_flags) | 671 | if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS) |
669 | return 0; | 672 | return 0; |
670 | if (vma->vm_file != file) | 673 | if (vma->vm_file != file) |
671 | return 0; | 674 | return 0; |
@@ -915,7 +918,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
915 | struct inode *inode; | 918 | struct inode *inode; |
916 | unsigned int vm_flags; | 919 | unsigned int vm_flags; |
917 | int error; | 920 | int error; |
918 | int accountable = 1; | ||
919 | unsigned long reqprot = prot; | 921 | unsigned long reqprot = prot; |
920 | 922 | ||
921 | /* | 923 | /* |
@@ -1016,8 +1018,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1016 | return -EPERM; | 1018 | return -EPERM; |
1017 | vm_flags &= ~VM_MAYEXEC; | 1019 | vm_flags &= ~VM_MAYEXEC; |
1018 | } | 1020 | } |
1019 | if (is_file_hugepages(file)) | ||
1020 | accountable = 0; | ||
1021 | 1021 | ||
1022 | if (!file->f_op || !file->f_op->mmap) | 1022 | if (!file->f_op || !file->f_op->mmap) |
1023 | return -ENODEV; | 1023 | return -ENODEV; |
@@ -1050,8 +1050,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1050 | if (error) | 1050 | if (error) |
1051 | return error; | 1051 | return error; |
1052 | 1052 | ||
1053 | return mmap_region(file, addr, len, flags, vm_flags, pgoff, | 1053 | return mmap_region(file, addr, len, flags, vm_flags, pgoff); |
1054 | accountable); | ||
1055 | } | 1054 | } |
1056 | EXPORT_SYMBOL(do_mmap_pgoff); | 1055 | EXPORT_SYMBOL(do_mmap_pgoff); |
1057 | 1056 | ||
@@ -1087,10 +1086,25 @@ int vma_wants_writenotify(struct vm_area_struct *vma) | |||
1087 | mapping_cap_account_dirty(vma->vm_file->f_mapping); | 1086 | mapping_cap_account_dirty(vma->vm_file->f_mapping); |
1088 | } | 1087 | } |
1089 | 1088 | ||
1089 | /* | ||
1090 | * We account for memory if it's a private writeable mapping, | ||
1091 | * not hugepages and VM_NORESERVE wasn't set. | ||
1092 | */ | ||
1093 | static inline int accountable_mapping(struct file *file, unsigned int vm_flags) | ||
1094 | { | ||
1095 | /* | ||
1096 | * hugetlb has its own accounting separate from the core VM | ||
1097 | * VM_HUGETLB may not be set yet so we cannot check for that flag. | ||
1098 | */ | ||
1099 | if (file && is_file_hugepages(file)) | ||
1100 | return 0; | ||
1101 | |||
1102 | return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; | ||
1103 | } | ||
1104 | |||
1090 | unsigned long mmap_region(struct file *file, unsigned long addr, | 1105 | unsigned long mmap_region(struct file *file, unsigned long addr, |
1091 | unsigned long len, unsigned long flags, | 1106 | unsigned long len, unsigned long flags, |
1092 | unsigned int vm_flags, unsigned long pgoff, | 1107 | unsigned int vm_flags, unsigned long pgoff) |
1093 | int accountable) | ||
1094 | { | 1108 | { |
1095 | struct mm_struct *mm = current->mm; | 1109 | struct mm_struct *mm = current->mm; |
1096 | struct vm_area_struct *vma, *prev; | 1110 | struct vm_area_struct *vma, *prev; |
@@ -1114,38 +1128,38 @@ munmap_back: | |||
1114 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) | 1128 | if (!may_expand_vm(mm, len >> PAGE_SHIFT)) |
1115 | return -ENOMEM; | 1129 | return -ENOMEM; |
1116 | 1130 | ||
1117 | if (flags & MAP_NORESERVE) | 1131 | /* |
1118 | vm_flags |= VM_NORESERVE; | 1132 | * Set 'VM_NORESERVE' if we should not account for the |
1133 | * memory use of this mapping. | ||
1134 | */ | ||
1135 | if ((flags & MAP_NORESERVE)) { | ||
1136 | /* We honor MAP_NORESERVE if allowed to overcommit */ | ||
1137 | if (sysctl_overcommit_memory != OVERCOMMIT_NEVER) | ||
1138 | vm_flags |= VM_NORESERVE; | ||
1119 | 1139 | ||
1120 | if (accountable && (!(flags & MAP_NORESERVE) || | 1140 | /* hugetlb applies strict overcommit unless MAP_NORESERVE */ |
1121 | sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { | 1141 | if (file && is_file_hugepages(file)) |
1122 | if (vm_flags & VM_SHARED) { | 1142 | vm_flags |= VM_NORESERVE; |
1123 | /* Check memory availability in shmem_file_setup? */ | ||
1124 | vm_flags |= VM_ACCOUNT; | ||
1125 | } else if (vm_flags & VM_WRITE) { | ||
1126 | /* | ||
1127 | * Private writable mapping: check memory availability | ||
1128 | */ | ||
1129 | charged = len >> PAGE_SHIFT; | ||
1130 | if (security_vm_enough_memory(charged)) | ||
1131 | return -ENOMEM; | ||
1132 | vm_flags |= VM_ACCOUNT; | ||
1133 | } | ||
1134 | } | 1143 | } |
1135 | 1144 | ||
1136 | /* | 1145 | /* |
1137 | * Can we just expand an old private anonymous mapping? | 1146 | * Private writable mapping: check memory availability |
1138 | * The VM_SHARED test is necessary because shmem_zero_setup | ||
1139 | * will create the file object for a shared anonymous map below. | ||
1140 | */ | 1147 | */ |
1141 | if (!file && !(vm_flags & VM_SHARED)) { | 1148 | if (accountable_mapping(file, vm_flags)) { |
1142 | vma = vma_merge(mm, prev, addr, addr + len, vm_flags, | 1149 | charged = len >> PAGE_SHIFT; |
1143 | NULL, NULL, pgoff, NULL); | 1150 | if (security_vm_enough_memory(charged)) |
1144 | if (vma) | 1151 | return -ENOMEM; |
1145 | goto out; | 1152 | vm_flags |= VM_ACCOUNT; |
1146 | } | 1153 | } |
1147 | 1154 | ||
1148 | /* | 1155 | /* |
1156 | * Can we just expand an old mapping? | ||
1157 | */ | ||
1158 | vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL); | ||
1159 | if (vma) | ||
1160 | goto out; | ||
1161 | |||
1162 | /* | ||
1149 | * Determine the object being mapped and call the appropriate | 1163 | * Determine the object being mapped and call the appropriate |
1150 | * specific mapper. the address has already been validated, but | 1164 | * specific mapper. the address has already been validated, but |
1151 | * not unmapped, but the maps are removed from the list. | 1165 | * not unmapped, but the maps are removed from the list. |
@@ -1186,14 +1200,6 @@ munmap_back: | |||
1186 | goto free_vma; | 1200 | goto free_vma; |
1187 | } | 1201 | } |
1188 | 1202 | ||
1189 | /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform | ||
1190 | * shmem_zero_setup (perhaps called through /dev/zero's ->mmap) | ||
1191 | * that memory reservation must be checked; but that reservation | ||
1192 | * belongs to shared memory object, not to vma: so now clear it. | ||
1193 | */ | ||
1194 | if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT)) | ||
1195 | vma->vm_flags &= ~VM_ACCOUNT; | ||
1196 | |||
1197 | /* Can addr have changed?? | 1203 | /* Can addr have changed?? |
1198 | * | 1204 | * |
1199 | * Answer: Yes, several device drivers can do it in their | 1205 | * Answer: Yes, several device drivers can do it in their |
@@ -1206,17 +1212,8 @@ munmap_back: | |||
1206 | if (vma_wants_writenotify(vma)) | 1212 | if (vma_wants_writenotify(vma)) |
1207 | vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); | 1213 | vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); |
1208 | 1214 | ||
1209 | if (file && vma_merge(mm, prev, addr, vma->vm_end, | 1215 | vma_link(mm, vma, prev, rb_link, rb_parent); |
1210 | vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { | 1216 | file = vma->vm_file; |
1211 | mpol_put(vma_policy(vma)); | ||
1212 | kmem_cache_free(vm_area_cachep, vma); | ||
1213 | fput(file); | ||
1214 | if (vm_flags & VM_EXECUTABLE) | ||
1215 | removed_exe_file_vma(mm); | ||
1216 | } else { | ||
1217 | vma_link(mm, vma, prev, rb_link, rb_parent); | ||
1218 | file = vma->vm_file; | ||
1219 | } | ||
1220 | 1217 | ||
1221 | /* Once vma denies write, undo our temporary denial count */ | 1218 | /* Once vma denies write, undo our temporary denial count */ |
1222 | if (correct_wcount) | 1219 | if (correct_wcount) |
@@ -1948,7 +1945,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) | |||
1948 | 1945 | ||
1949 | EXPORT_SYMBOL(do_munmap); | 1946 | EXPORT_SYMBOL(do_munmap); |
1950 | 1947 | ||
1951 | asmlinkage long sys_munmap(unsigned long addr, size_t len) | 1948 | SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) |
1952 | { | 1949 | { |
1953 | int ret; | 1950 | int ret; |
1954 | struct mm_struct *mm = current->mm; | 1951 | struct mm_struct *mm = current->mm; |
@@ -2087,12 +2084,8 @@ void exit_mmap(struct mm_struct *mm) | |||
2087 | unsigned long end; | 2084 | unsigned long end; |
2088 | 2085 | ||
2089 | /* mm's last user has gone, and its about to be pulled down */ | 2086 | /* mm's last user has gone, and its about to be pulled down */ |
2090 | arch_exit_mmap(mm); | ||
2091 | mmu_notifier_release(mm); | 2087 | mmu_notifier_release(mm); |
2092 | 2088 | ||
2093 | if (!mm->mmap) /* Can happen if dup_mmap() received an OOM */ | ||
2094 | return; | ||
2095 | |||
2096 | if (mm->locked_vm) { | 2089 | if (mm->locked_vm) { |
2097 | vma = mm->mmap; | 2090 | vma = mm->mmap; |
2098 | while (vma) { | 2091 | while (vma) { |
@@ -2101,7 +2094,13 @@ void exit_mmap(struct mm_struct *mm) | |||
2101 | vma = vma->vm_next; | 2094 | vma = vma->vm_next; |
2102 | } | 2095 | } |
2103 | } | 2096 | } |
2097 | |||
2098 | arch_exit_mmap(mm); | ||
2099 | |||
2104 | vma = mm->mmap; | 2100 | vma = mm->mmap; |
2101 | if (!vma) /* Can happen if dup_mmap() received an OOM */ | ||
2102 | return; | ||
2103 | |||
2105 | lru_add_drain(); | 2104 | lru_add_drain(); |
2106 | flush_cache_mm(mm); | 2105 | flush_cache_mm(mm); |
2107 | tlb = tlb_gather_mmu(mm, 1); | 2106 | tlb = tlb_gather_mmu(mm, 1); |
diff --git a/mm/mprotect.c b/mm/mprotect.c index d0f6e7ce09f1..258197b76fb4 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -151,10 +151,11 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, | |||
151 | /* | 151 | /* |
152 | * If we make a private mapping writable we increase our commit; | 152 | * If we make a private mapping writable we increase our commit; |
153 | * but (without finer accounting) cannot reduce our commit if we | 153 | * but (without finer accounting) cannot reduce our commit if we |
154 | * make it unwritable again. | 154 | * make it unwritable again. hugetlb mapping were accounted for |
155 | * even if read-only so there is no need to account for them here | ||
155 | */ | 156 | */ |
156 | if (newflags & VM_WRITE) { | 157 | if (newflags & VM_WRITE) { |
157 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE| | 158 | if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB| |
158 | VM_SHARED|VM_NORESERVE))) { | 159 | VM_SHARED|VM_NORESERVE))) { |
159 | charged = nrpages; | 160 | charged = nrpages; |
160 | if (security_vm_enough_memory(charged)) | 161 | if (security_vm_enough_memory(charged)) |
@@ -217,8 +218,8 @@ fail: | |||
217 | return error; | 218 | return error; |
218 | } | 219 | } |
219 | 220 | ||
220 | asmlinkage long | 221 | SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, |
221 | sys_mprotect(unsigned long start, size_t len, unsigned long prot) | 222 | unsigned long, prot) |
222 | { | 223 | { |
223 | unsigned long vm_flags, nstart, end, tmp, reqprot; | 224 | unsigned long vm_flags, nstart, end, tmp, reqprot; |
224 | struct vm_area_struct *vma, *prev; | 225 | struct vm_area_struct *vma, *prev; |
diff --git a/mm/mremap.c b/mm/mremap.c index 646de959aa58..a39b7b91be46 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -420,9 +420,9 @@ out_nc: | |||
420 | return ret; | 420 | return ret; |
421 | } | 421 | } |
422 | 422 | ||
423 | asmlinkage unsigned long sys_mremap(unsigned long addr, | 423 | SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, |
424 | unsigned long old_len, unsigned long new_len, | 424 | unsigned long, new_len, unsigned long, flags, |
425 | unsigned long flags, unsigned long new_addr) | 425 | unsigned long, new_addr) |
426 | { | 426 | { |
427 | unsigned long ret; | 427 | unsigned long ret; |
428 | 428 | ||
diff --git a/mm/msync.c b/mm/msync.c index 07dae08cf31c..4083209b7f02 100644 --- a/mm/msync.c +++ b/mm/msync.c | |||
@@ -28,7 +28,7 @@ | |||
28 | * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to | 28 | * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to |
29 | * applications. | 29 | * applications. |
30 | */ | 30 | */ |
31 | asmlinkage long sys_msync(unsigned long start, size_t len, int flags) | 31 | SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags) |
32 | { | 32 | { |
33 | unsigned long end; | 33 | unsigned long end; |
34 | struct mm_struct *mm = current->mm; | 34 | struct mm_struct *mm = current->mm; |
diff --git a/mm/nommu.c b/mm/nommu.c index 60ed8375c986..2fcf47d449b4 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> | 10 | * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> |
11 | * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> | 11 | * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> |
12 | * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> | 12 | * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> |
13 | * Copyright (c) 2007-2008 Paul Mundt <lethal@linux-sh.org> | 13 | * Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org> |
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
@@ -394,6 +394,24 @@ void vunmap(const void *addr) | |||
394 | } | 394 | } |
395 | EXPORT_SYMBOL(vunmap); | 395 | EXPORT_SYMBOL(vunmap); |
396 | 396 | ||
397 | void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot) | ||
398 | { | ||
399 | BUG(); | ||
400 | return NULL; | ||
401 | } | ||
402 | EXPORT_SYMBOL(vm_map_ram); | ||
403 | |||
404 | void vm_unmap_ram(const void *mem, unsigned int count) | ||
405 | { | ||
406 | BUG(); | ||
407 | } | ||
408 | EXPORT_SYMBOL(vm_unmap_ram); | ||
409 | |||
410 | void vm_unmap_aliases(void) | ||
411 | { | ||
412 | } | ||
413 | EXPORT_SYMBOL_GPL(vm_unmap_aliases); | ||
414 | |||
397 | /* | 415 | /* |
398 | * Implement a stub for vmalloc_sync_all() if the architecture chose not to | 416 | * Implement a stub for vmalloc_sync_all() if the architecture chose not to |
399 | * have one. | 417 | * have one. |
@@ -416,7 +434,7 @@ EXPORT_SYMBOL(vm_insert_page); | |||
416 | * to a regular file. in this case, the unmapping will need | 434 | * to a regular file. in this case, the unmapping will need |
417 | * to invoke file system routines that need the global lock. | 435 | * to invoke file system routines that need the global lock. |
418 | */ | 436 | */ |
419 | asmlinkage unsigned long sys_brk(unsigned long brk) | 437 | SYSCALL_DEFINE1(brk, unsigned long, brk) |
420 | { | 438 | { |
421 | struct mm_struct *mm = current->mm; | 439 | struct mm_struct *mm = current->mm; |
422 | 440 | ||
@@ -1143,8 +1161,8 @@ error_free: | |||
1143 | return ret; | 1161 | return ret; |
1144 | 1162 | ||
1145 | enomem: | 1163 | enomem: |
1146 | printk("Allocation of length %lu from process %d failed\n", | 1164 | printk("Allocation of length %lu from process %d (%s) failed\n", |
1147 | len, current->pid); | 1165 | len, current->pid, current->comm); |
1148 | show_free_areas(); | 1166 | show_free_areas(); |
1149 | return -ENOMEM; | 1167 | return -ENOMEM; |
1150 | } | 1168 | } |
@@ -1573,7 +1591,7 @@ erase_whole_vma: | |||
1573 | } | 1591 | } |
1574 | EXPORT_SYMBOL(do_munmap); | 1592 | EXPORT_SYMBOL(do_munmap); |
1575 | 1593 | ||
1576 | asmlinkage long sys_munmap(unsigned long addr, size_t len) | 1594 | SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) |
1577 | { | 1595 | { |
1578 | int ret; | 1596 | int ret; |
1579 | struct mm_struct *mm = current->mm; | 1597 | struct mm_struct *mm = current->mm; |
@@ -1657,10 +1675,9 @@ unsigned long do_mremap(unsigned long addr, | |||
1657 | } | 1675 | } |
1658 | EXPORT_SYMBOL(do_mremap); | 1676 | EXPORT_SYMBOL(do_mremap); |
1659 | 1677 | ||
1660 | asmlinkage | 1678 | SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, |
1661 | unsigned long sys_mremap(unsigned long addr, | 1679 | unsigned long, new_len, unsigned long, flags, |
1662 | unsigned long old_len, unsigned long new_len, | 1680 | unsigned long, new_addr) |
1663 | unsigned long flags, unsigned long new_addr) | ||
1664 | { | 1681 | { |
1665 | unsigned long ret; | 1682 | unsigned long ret; |
1666 | 1683 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index b493db7841dc..74dc57c74349 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -209,7 +209,7 @@ int dirty_bytes_handler(struct ctl_table *table, int write, | |||
209 | struct file *filp, void __user *buffer, size_t *lenp, | 209 | struct file *filp, void __user *buffer, size_t *lenp, |
210 | loff_t *ppos) | 210 | loff_t *ppos) |
211 | { | 211 | { |
212 | int old_bytes = vm_dirty_bytes; | 212 | unsigned long old_bytes = vm_dirty_bytes; |
213 | int ret; | 213 | int ret; |
214 | 214 | ||
215 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | 215 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); |
@@ -240,7 +240,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi) | |||
240 | } | 240 | } |
241 | EXPORT_SYMBOL_GPL(bdi_writeout_inc); | 241 | EXPORT_SYMBOL_GPL(bdi_writeout_inc); |
242 | 242 | ||
243 | static inline void task_dirty_inc(struct task_struct *tsk) | 243 | void task_dirty_inc(struct task_struct *tsk) |
244 | { | 244 | { |
245 | prop_inc_single(&vm_dirties, &tsk->dirties); | 245 | prop_inc_single(&vm_dirties, &tsk->dirties); |
246 | } | 246 | } |
@@ -1051,13 +1051,25 @@ continue_unlock: | |||
1051 | } | 1051 | } |
1052 | } | 1052 | } |
1053 | 1053 | ||
1054 | if (wbc->sync_mode == WB_SYNC_NONE) { | 1054 | if (nr_to_write > 0) { |
1055 | wbc->nr_to_write--; | 1055 | nr_to_write--; |
1056 | if (wbc->nr_to_write <= 0) { | 1056 | if (nr_to_write == 0 && |
1057 | wbc->sync_mode == WB_SYNC_NONE) { | ||
1058 | /* | ||
1059 | * We stop writing back only if we are | ||
1060 | * not doing integrity sync. In case of | ||
1061 | * integrity sync we have to keep going | ||
1062 | * because someone may be concurrently | ||
1063 | * dirtying pages, and we might have | ||
1064 | * synced a lot of newly appeared dirty | ||
1065 | * pages, but have not synced all of the | ||
1066 | * old dirty pages. | ||
1067 | */ | ||
1057 | done = 1; | 1068 | done = 1; |
1058 | break; | 1069 | break; |
1059 | } | 1070 | } |
1060 | } | 1071 | } |
1072 | |||
1061 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 1073 | if (wbc->nonblocking && bdi_write_congested(bdi)) { |
1062 | wbc->encountered_congestion = 1; | 1074 | wbc->encountered_congestion = 1; |
1063 | done = 1; | 1075 | done = 1; |
@@ -1067,7 +1079,7 @@ continue_unlock: | |||
1067 | pagevec_release(&pvec); | 1079 | pagevec_release(&pvec); |
1068 | cond_resched(); | 1080 | cond_resched(); |
1069 | } | 1081 | } |
1070 | if (!cycled) { | 1082 | if (!cycled && !done) { |
1071 | /* | 1083 | /* |
1072 | * range_cyclic: | 1084 | * range_cyclic: |
1073 | * We hit the last page and there is more work to be done: wrap | 1085 | * We hit the last page and there is more work to be done: wrap |
@@ -1218,6 +1230,7 @@ int __set_page_dirty_nobuffers(struct page *page) | |||
1218 | __inc_zone_page_state(page, NR_FILE_DIRTY); | 1230 | __inc_zone_page_state(page, NR_FILE_DIRTY); |
1219 | __inc_bdi_stat(mapping->backing_dev_info, | 1231 | __inc_bdi_stat(mapping->backing_dev_info, |
1220 | BDI_RECLAIMABLE); | 1232 | BDI_RECLAIMABLE); |
1233 | task_dirty_inc(current); | ||
1221 | task_io_account_write(PAGE_CACHE_SIZE); | 1234 | task_io_account_write(PAGE_CACHE_SIZE); |
1222 | } | 1235 | } |
1223 | radix_tree_tag_set(&mapping->page_tree, | 1236 | radix_tree_tag_set(&mapping->page_tree, |
@@ -1250,7 +1263,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage); | |||
1250 | * If the mapping doesn't provide a set_page_dirty a_op, then | 1263 | * If the mapping doesn't provide a set_page_dirty a_op, then |
1251 | * just fall through and assume that it wants buffer_heads. | 1264 | * just fall through and assume that it wants buffer_heads. |
1252 | */ | 1265 | */ |
1253 | static int __set_page_dirty(struct page *page) | 1266 | int set_page_dirty(struct page *page) |
1254 | { | 1267 | { |
1255 | struct address_space *mapping = page_mapping(page); | 1268 | struct address_space *mapping = page_mapping(page); |
1256 | 1269 | ||
@@ -1268,14 +1281,6 @@ static int __set_page_dirty(struct page *page) | |||
1268 | } | 1281 | } |
1269 | return 0; | 1282 | return 0; |
1270 | } | 1283 | } |
1271 | |||
1272 | int set_page_dirty(struct page *page) | ||
1273 | { | ||
1274 | int ret = __set_page_dirty(page); | ||
1275 | if (ret) | ||
1276 | task_dirty_inc(current); | ||
1277 | return ret; | ||
1278 | } | ||
1279 | EXPORT_SYMBOL(set_page_dirty); | 1284 | EXPORT_SYMBOL(set_page_dirty); |
1280 | 1285 | ||
1281 | /* | 1286 | /* |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5675b3073854..5c44ed49ca93 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2989,7 +2989,7 @@ static int __meminit next_active_region_index_in_nid(int index, int nid) | |||
2989 | * was used and there are no special requirements, this is a convenient | 2989 | * was used and there are no special requirements, this is a convenient |
2990 | * alternative | 2990 | * alternative |
2991 | */ | 2991 | */ |
2992 | int __meminit early_pfn_to_nid(unsigned long pfn) | 2992 | int __meminit __early_pfn_to_nid(unsigned long pfn) |
2993 | { | 2993 | { |
2994 | int i; | 2994 | int i; |
2995 | 2995 | ||
@@ -3000,10 +3000,33 @@ int __meminit early_pfn_to_nid(unsigned long pfn) | |||
3000 | if (start_pfn <= pfn && pfn < end_pfn) | 3000 | if (start_pfn <= pfn && pfn < end_pfn) |
3001 | return early_node_map[i].nid; | 3001 | return early_node_map[i].nid; |
3002 | } | 3002 | } |
3003 | /* This is a memory hole */ | ||
3004 | return -1; | ||
3005 | } | ||
3006 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | ||
3007 | |||
3008 | int __meminit early_pfn_to_nid(unsigned long pfn) | ||
3009 | { | ||
3010 | int nid; | ||
3003 | 3011 | ||
3012 | nid = __early_pfn_to_nid(pfn); | ||
3013 | if (nid >= 0) | ||
3014 | return nid; | ||
3015 | /* just returns 0 */ | ||
3004 | return 0; | 3016 | return 0; |
3005 | } | 3017 | } |
3006 | #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ | 3018 | |
3019 | #ifdef CONFIG_NODES_SPAN_OTHER_NODES | ||
3020 | bool __meminit early_pfn_in_nid(unsigned long pfn, int node) | ||
3021 | { | ||
3022 | int nid; | ||
3023 | |||
3024 | nid = __early_pfn_to_nid(pfn); | ||
3025 | if (nid >= 0 && nid != node) | ||
3026 | return false; | ||
3027 | return true; | ||
3028 | } | ||
3029 | #endif | ||
3007 | 3030 | ||
3008 | /* Basic iterator support to walk early_node_map[] */ | 3031 | /* Basic iterator support to walk early_node_map[] */ |
3009 | #define for_each_active_range_index_in_nid(i, nid) \ | 3032 | #define for_each_active_range_index_in_nid(i, nid) \ |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 7006a11350c8..ceecfbb143fa 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -114,7 +114,8 @@ static int __init_refok init_section_page_cgroup(unsigned long pfn) | |||
114 | nid = page_to_nid(pfn_to_page(pfn)); | 114 | nid = page_to_nid(pfn_to_page(pfn)); |
115 | table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; | 115 | table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION; |
116 | if (slab_is_available()) { | 116 | if (slab_is_available()) { |
117 | base = kmalloc_node(table_size, GFP_KERNEL, nid); | 117 | base = kmalloc_node(table_size, |
118 | GFP_KERNEL | __GFP_NOWARN, nid); | ||
118 | if (!base) | 119 | if (!base) |
119 | base = vmalloc_node(table_size, nid); | 120 | base = vmalloc_node(table_size, nid); |
120 | } else { | 121 | } else { |
diff --git a/mm/page_io.c b/mm/page_io.c index dc6ce0afbded..3023c475e041 100644 --- a/mm/page_io.c +++ b/mm/page_io.c | |||
@@ -111,7 +111,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) | |||
111 | goto out; | 111 | goto out; |
112 | } | 112 | } |
113 | if (wbc->sync_mode == WB_SYNC_ALL) | 113 | if (wbc->sync_mode == WB_SYNC_ALL) |
114 | rw |= (1 << BIO_RW_SYNC); | 114 | rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); |
115 | count_vm_event(PSWPOUT); | 115 | count_vm_event(PSWPOUT); |
116 | set_page_writeback(page); | 116 | set_page_writeback(page); |
117 | unlock_page(page); | 117 | unlock_page(page); |
@@ -1072,7 +1072,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) | |||
1072 | spin_lock(&mapping->i_mmap_lock); | 1072 | spin_lock(&mapping->i_mmap_lock); |
1073 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 1073 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
1074 | if (MLOCK_PAGES && unlikely(unlock)) { | 1074 | if (MLOCK_PAGES && unlikely(unlock)) { |
1075 | if (!(vma->vm_flags & VM_LOCKED)) | 1075 | if (!((vma->vm_flags & VM_LOCKED) && |
1076 | page_mapped_in_vma(page, vma))) | ||
1076 | continue; /* must visit all vmas */ | 1077 | continue; /* must visit all vmas */ |
1077 | ret = SWAP_MLOCK; | 1078 | ret = SWAP_MLOCK; |
1078 | } else { | 1079 | } else { |
diff --git a/mm/shmem.c b/mm/shmem.c index 5d0de96c9789..4103a239ce84 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -169,13 +169,13 @@ static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) | |||
169 | */ | 169 | */ |
170 | static inline int shmem_acct_size(unsigned long flags, loff_t size) | 170 | static inline int shmem_acct_size(unsigned long flags, loff_t size) |
171 | { | 171 | { |
172 | return (flags & VM_ACCOUNT) ? | 172 | return (flags & VM_NORESERVE) ? |
173 | security_vm_enough_memory_kern(VM_ACCT(size)) : 0; | 173 | 0 : security_vm_enough_memory_kern(VM_ACCT(size)); |
174 | } | 174 | } |
175 | 175 | ||
176 | static inline void shmem_unacct_size(unsigned long flags, loff_t size) | 176 | static inline void shmem_unacct_size(unsigned long flags, loff_t size) |
177 | { | 177 | { |
178 | if (flags & VM_ACCOUNT) | 178 | if (!(flags & VM_NORESERVE)) |
179 | vm_unacct_memory(VM_ACCT(size)); | 179 | vm_unacct_memory(VM_ACCT(size)); |
180 | } | 180 | } |
181 | 181 | ||
@@ -187,13 +187,13 @@ static inline void shmem_unacct_size(unsigned long flags, loff_t size) | |||
187 | */ | 187 | */ |
188 | static inline int shmem_acct_block(unsigned long flags) | 188 | static inline int shmem_acct_block(unsigned long flags) |
189 | { | 189 | { |
190 | return (flags & VM_ACCOUNT) ? | 190 | return (flags & VM_NORESERVE) ? |
191 | 0 : security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)); | 191 | security_vm_enough_memory_kern(VM_ACCT(PAGE_CACHE_SIZE)) : 0; |
192 | } | 192 | } |
193 | 193 | ||
194 | static inline void shmem_unacct_blocks(unsigned long flags, long pages) | 194 | static inline void shmem_unacct_blocks(unsigned long flags, long pages) |
195 | { | 195 | { |
196 | if (!(flags & VM_ACCOUNT)) | 196 | if (flags & VM_NORESERVE) |
197 | vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); | 197 | vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); |
198 | } | 198 | } |
199 | 199 | ||
@@ -1515,8 +1515,8 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) | |||
1515 | return 0; | 1515 | return 0; |
1516 | } | 1516 | } |
1517 | 1517 | ||
1518 | static struct inode * | 1518 | static struct inode *shmem_get_inode(struct super_block *sb, int mode, |
1519 | shmem_get_inode(struct super_block *sb, int mode, dev_t dev) | 1519 | dev_t dev, unsigned long flags) |
1520 | { | 1520 | { |
1521 | struct inode *inode; | 1521 | struct inode *inode; |
1522 | struct shmem_inode_info *info; | 1522 | struct shmem_inode_info *info; |
@@ -1537,6 +1537,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) | |||
1537 | info = SHMEM_I(inode); | 1537 | info = SHMEM_I(inode); |
1538 | memset(info, 0, (char *)inode - (char *)info); | 1538 | memset(info, 0, (char *)inode - (char *)info); |
1539 | spin_lock_init(&info->lock); | 1539 | spin_lock_init(&info->lock); |
1540 | info->flags = flags & VM_NORESERVE; | ||
1540 | INIT_LIST_HEAD(&info->swaplist); | 1541 | INIT_LIST_HEAD(&info->swaplist); |
1541 | 1542 | ||
1542 | switch (mode & S_IFMT) { | 1543 | switch (mode & S_IFMT) { |
@@ -1779,9 +1780,10 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1779 | static int | 1780 | static int |
1780 | shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | 1781 | shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) |
1781 | { | 1782 | { |
1782 | struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev); | 1783 | struct inode *inode; |
1783 | int error = -ENOSPC; | 1784 | int error = -ENOSPC; |
1784 | 1785 | ||
1786 | inode = shmem_get_inode(dir->i_sb, mode, dev, VM_NORESERVE); | ||
1785 | if (inode) { | 1787 | if (inode) { |
1786 | error = security_inode_init_security(inode, dir, NULL, NULL, | 1788 | error = security_inode_init_security(inode, dir, NULL, NULL, |
1787 | NULL); | 1789 | NULL); |
@@ -1920,7 +1922,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s | |||
1920 | if (len > PAGE_CACHE_SIZE) | 1922 | if (len > PAGE_CACHE_SIZE) |
1921 | return -ENAMETOOLONG; | 1923 | return -ENAMETOOLONG; |
1922 | 1924 | ||
1923 | inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); | 1925 | inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0, VM_NORESERVE); |
1924 | if (!inode) | 1926 | if (!inode) |
1925 | return -ENOSPC; | 1927 | return -ENOSPC; |
1926 | 1928 | ||
@@ -2332,7 +2334,7 @@ static int shmem_fill_super(struct super_block *sb, | |||
2332 | sb->s_flags |= MS_POSIXACL; | 2334 | sb->s_flags |= MS_POSIXACL; |
2333 | #endif | 2335 | #endif |
2334 | 2336 | ||
2335 | inode = shmem_get_inode(sb, S_IFDIR | sbinfo->mode, 0); | 2337 | inode = shmem_get_inode(sb, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE); |
2336 | if (!inode) | 2338 | if (!inode) |
2337 | goto failed; | 2339 | goto failed; |
2338 | inode->i_uid = sbinfo->uid; | 2340 | inode->i_uid = sbinfo->uid; |
@@ -2574,12 +2576,12 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
2574 | return 0; | 2576 | return 0; |
2575 | } | 2577 | } |
2576 | 2578 | ||
2577 | #define shmem_file_operations ramfs_file_operations | 2579 | #define shmem_vm_ops generic_file_vm_ops |
2578 | #define shmem_vm_ops generic_file_vm_ops | 2580 | #define shmem_file_operations ramfs_file_operations |
2579 | #define shmem_get_inode ramfs_get_inode | 2581 | #define shmem_get_inode(sb, mode, dev, flags) ramfs_get_inode(sb, mode, dev) |
2580 | #define shmem_acct_size(a, b) 0 | 2582 | #define shmem_acct_size(flags, size) 0 |
2581 | #define shmem_unacct_size(a, b) do {} while (0) | 2583 | #define shmem_unacct_size(flags, size) do {} while (0) |
2582 | #define SHMEM_MAX_BYTES LLONG_MAX | 2584 | #define SHMEM_MAX_BYTES LLONG_MAX |
2583 | 2585 | ||
2584 | #endif /* CONFIG_SHMEM */ | 2586 | #endif /* CONFIG_SHMEM */ |
2585 | 2587 | ||
@@ -2589,7 +2591,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page) | |||
2589 | * shmem_file_setup - get an unlinked file living in tmpfs | 2591 | * shmem_file_setup - get an unlinked file living in tmpfs |
2590 | * @name: name for dentry (to be seen in /proc/<pid>/maps | 2592 | * @name: name for dentry (to be seen in /proc/<pid>/maps |
2591 | * @size: size to be set for the file | 2593 | * @size: size to be set for the file |
2592 | * @flags: vm_flags | 2594 | * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size |
2593 | */ | 2595 | */ |
2594 | struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) | 2596 | struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) |
2595 | { | 2597 | { |
@@ -2623,13 +2625,10 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) | |||
2623 | goto put_dentry; | 2625 | goto put_dentry; |
2624 | 2626 | ||
2625 | error = -ENOSPC; | 2627 | error = -ENOSPC; |
2626 | inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); | 2628 | inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0, flags); |
2627 | if (!inode) | 2629 | if (!inode) |
2628 | goto close_file; | 2630 | goto close_file; |
2629 | 2631 | ||
2630 | #ifdef CONFIG_SHMEM | ||
2631 | SHMEM_I(inode)->flags = flags & VM_ACCOUNT; | ||
2632 | #endif | ||
2633 | d_instantiate(dentry, inode); | 2632 | d_instantiate(dentry, inode); |
2634 | inode->i_size = size; | 2633 | inode->i_size = size; |
2635 | inode->i_nlink = 0; /* It is unlinked */ | 2634 | inode->i_nlink = 0; /* It is unlinked */ |
@@ -4457,3 +4457,4 @@ size_t ksize(const void *objp) | |||
4457 | 4457 | ||
4458 | return obj_size(virt_to_cache(objp)); | 4458 | return obj_size(virt_to_cache(objp)); |
4459 | } | 4459 | } |
4460 | EXPORT_SYMBOL(ksize); | ||
@@ -126,9 +126,9 @@ static LIST_HEAD(free_slob_medium); | |||
126 | static LIST_HEAD(free_slob_large); | 126 | static LIST_HEAD(free_slob_large); |
127 | 127 | ||
128 | /* | 128 | /* |
129 | * slob_page: True for all slob pages (false for bigblock pages) | 129 | * is_slob_page: True for all slob pages (false for bigblock pages) |
130 | */ | 130 | */ |
131 | static inline int slob_page(struct slob_page *sp) | 131 | static inline int is_slob_page(struct slob_page *sp) |
132 | { | 132 | { |
133 | return PageSlobPage((struct page *)sp); | 133 | return PageSlobPage((struct page *)sp); |
134 | } | 134 | } |
@@ -143,6 +143,11 @@ static inline void clear_slob_page(struct slob_page *sp) | |||
143 | __ClearPageSlobPage((struct page *)sp); | 143 | __ClearPageSlobPage((struct page *)sp); |
144 | } | 144 | } |
145 | 145 | ||
146 | static inline struct slob_page *slob_page(const void *addr) | ||
147 | { | ||
148 | return (struct slob_page *)virt_to_page(addr); | ||
149 | } | ||
150 | |||
146 | /* | 151 | /* |
147 | * slob_page_free: true for pages on free_slob_pages list. | 152 | * slob_page_free: true for pages on free_slob_pages list. |
148 | */ | 153 | */ |
@@ -230,7 +235,7 @@ static int slob_last(slob_t *s) | |||
230 | return !((unsigned long)slob_next(s) & ~PAGE_MASK); | 235 | return !((unsigned long)slob_next(s) & ~PAGE_MASK); |
231 | } | 236 | } |
232 | 237 | ||
233 | static void *slob_new_page(gfp_t gfp, int order, int node) | 238 | static void *slob_new_pages(gfp_t gfp, int order, int node) |
234 | { | 239 | { |
235 | void *page; | 240 | void *page; |
236 | 241 | ||
@@ -247,12 +252,17 @@ static void *slob_new_page(gfp_t gfp, int order, int node) | |||
247 | return page_address(page); | 252 | return page_address(page); |
248 | } | 253 | } |
249 | 254 | ||
255 | static void slob_free_pages(void *b, int order) | ||
256 | { | ||
257 | free_pages((unsigned long)b, order); | ||
258 | } | ||
259 | |||
250 | /* | 260 | /* |
251 | * Allocate a slob block within a given slob_page sp. | 261 | * Allocate a slob block within a given slob_page sp. |
252 | */ | 262 | */ |
253 | static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) | 263 | static void *slob_page_alloc(struct slob_page *sp, size_t size, int align) |
254 | { | 264 | { |
255 | slob_t *prev, *cur, *aligned = 0; | 265 | slob_t *prev, *cur, *aligned = NULL; |
256 | int delta = 0, units = SLOB_UNITS(size); | 266 | int delta = 0, units = SLOB_UNITS(size); |
257 | 267 | ||
258 | for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) { | 268 | for (prev = NULL, cur = sp->free; ; prev = cur, cur = slob_next(cur)) { |
@@ -349,10 +359,10 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) | |||
349 | 359 | ||
350 | /* Not enough space: must allocate a new page */ | 360 | /* Not enough space: must allocate a new page */ |
351 | if (!b) { | 361 | if (!b) { |
352 | b = slob_new_page(gfp & ~__GFP_ZERO, 0, node); | 362 | b = slob_new_pages(gfp & ~__GFP_ZERO, 0, node); |
353 | if (!b) | 363 | if (!b) |
354 | return 0; | 364 | return NULL; |
355 | sp = (struct slob_page *)virt_to_page(b); | 365 | sp = slob_page(b); |
356 | set_slob_page(sp); | 366 | set_slob_page(sp); |
357 | 367 | ||
358 | spin_lock_irqsave(&slob_lock, flags); | 368 | spin_lock_irqsave(&slob_lock, flags); |
@@ -384,7 +394,7 @@ static void slob_free(void *block, int size) | |||
384 | return; | 394 | return; |
385 | BUG_ON(!size); | 395 | BUG_ON(!size); |
386 | 396 | ||
387 | sp = (struct slob_page *)virt_to_page(block); | 397 | sp = slob_page(block); |
388 | units = SLOB_UNITS(size); | 398 | units = SLOB_UNITS(size); |
389 | 399 | ||
390 | spin_lock_irqsave(&slob_lock, flags); | 400 | spin_lock_irqsave(&slob_lock, flags); |
@@ -393,10 +403,11 @@ static void slob_free(void *block, int size) | |||
393 | /* Go directly to page allocator. Do not pass slob allocator */ | 403 | /* Go directly to page allocator. Do not pass slob allocator */ |
394 | if (slob_page_free(sp)) | 404 | if (slob_page_free(sp)) |
395 | clear_slob_page_free(sp); | 405 | clear_slob_page_free(sp); |
406 | spin_unlock_irqrestore(&slob_lock, flags); | ||
396 | clear_slob_page(sp); | 407 | clear_slob_page(sp); |
397 | free_slob_page(sp); | 408 | free_slob_page(sp); |
398 | free_page((unsigned long)b); | 409 | free_page((unsigned long)b); |
399 | goto out; | 410 | return; |
400 | } | 411 | } |
401 | 412 | ||
402 | if (!slob_page_free(sp)) { | 413 | if (!slob_page_free(sp)) { |
@@ -476,7 +487,7 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) | |||
476 | } else { | 487 | } else { |
477 | void *ret; | 488 | void *ret; |
478 | 489 | ||
479 | ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node); | 490 | ret = slob_new_pages(gfp | __GFP_COMP, get_order(size), node); |
480 | if (ret) { | 491 | if (ret) { |
481 | struct page *page; | 492 | struct page *page; |
482 | page = virt_to_page(ret); | 493 | page = virt_to_page(ret); |
@@ -494,8 +505,8 @@ void kfree(const void *block) | |||
494 | if (unlikely(ZERO_OR_NULL_PTR(block))) | 505 | if (unlikely(ZERO_OR_NULL_PTR(block))) |
495 | return; | 506 | return; |
496 | 507 | ||
497 | sp = (struct slob_page *)virt_to_page(block); | 508 | sp = slob_page(block); |
498 | if (slob_page(sp)) { | 509 | if (is_slob_page(sp)) { |
499 | int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); | 510 | int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); |
500 | unsigned int *m = (unsigned int *)(block - align); | 511 | unsigned int *m = (unsigned int *)(block - align); |
501 | slob_free(m, *m + align); | 512 | slob_free(m, *m + align); |
@@ -513,14 +524,15 @@ size_t ksize(const void *block) | |||
513 | if (unlikely(block == ZERO_SIZE_PTR)) | 524 | if (unlikely(block == ZERO_SIZE_PTR)) |
514 | return 0; | 525 | return 0; |
515 | 526 | ||
516 | sp = (struct slob_page *)virt_to_page(block); | 527 | sp = slob_page(block); |
517 | if (slob_page(sp)) { | 528 | if (is_slob_page(sp)) { |
518 | int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); | 529 | int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); |
519 | unsigned int *m = (unsigned int *)(block - align); | 530 | unsigned int *m = (unsigned int *)(block - align); |
520 | return SLOB_UNITS(*m) * SLOB_UNIT; | 531 | return SLOB_UNITS(*m) * SLOB_UNIT; |
521 | } else | 532 | } else |
522 | return sp->page.private; | 533 | return sp->page.private; |
523 | } | 534 | } |
535 | EXPORT_SYMBOL(ksize); | ||
524 | 536 | ||
525 | struct kmem_cache { | 537 | struct kmem_cache { |
526 | unsigned int size, align; | 538 | unsigned int size, align; |
@@ -572,7 +584,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | |||
572 | if (c->size < PAGE_SIZE) | 584 | if (c->size < PAGE_SIZE) |
573 | b = slob_alloc(c->size, flags, c->align, node); | 585 | b = slob_alloc(c->size, flags, c->align, node); |
574 | else | 586 | else |
575 | b = slob_new_page(flags, get_order(c->size), node); | 587 | b = slob_new_pages(flags, get_order(c->size), node); |
576 | 588 | ||
577 | if (c->ctor) | 589 | if (c->ctor) |
578 | c->ctor(b); | 590 | c->ctor(b); |
@@ -586,7 +598,7 @@ static void __kmem_cache_free(void *b, int size) | |||
586 | if (size < PAGE_SIZE) | 598 | if (size < PAGE_SIZE) |
587 | slob_free(b, size); | 599 | slob_free(b, size); |
588 | else | 600 | else |
589 | free_pages((unsigned long)b, get_order(size)); | 601 | slob_free_pages(b, get_order(size)); |
590 | } | 602 | } |
591 | 603 | ||
592 | static void kmem_rcu_free(struct rcu_head *head) | 604 | static void kmem_rcu_free(struct rcu_head *head) |
@@ -1329,7 +1329,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1329 | n = get_node(s, zone_to_nid(zone)); | 1329 | n = get_node(s, zone_to_nid(zone)); |
1330 | 1330 | ||
1331 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && | 1331 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && |
1332 | n->nr_partial > n->min_partial) { | 1332 | n->nr_partial > s->min_partial) { |
1333 | page = get_partial_node(n); | 1333 | page = get_partial_node(n); |
1334 | if (page) | 1334 | if (page) |
1335 | return page; | 1335 | return page; |
@@ -1381,7 +1381,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1381 | slab_unlock(page); | 1381 | slab_unlock(page); |
1382 | } else { | 1382 | } else { |
1383 | stat(c, DEACTIVATE_EMPTY); | 1383 | stat(c, DEACTIVATE_EMPTY); |
1384 | if (n->nr_partial < n->min_partial) { | 1384 | if (n->nr_partial < s->min_partial) { |
1385 | /* | 1385 | /* |
1386 | * Adding an empty slab to the partial slabs in order | 1386 | * Adding an empty slab to the partial slabs in order |
1387 | * to avoid page allocator overhead. This slab needs | 1387 | * to avoid page allocator overhead. This slab needs |
@@ -1838,6 +1838,7 @@ static inline int calculate_order(int size) | |||
1838 | int order; | 1838 | int order; |
1839 | int min_objects; | 1839 | int min_objects; |
1840 | int fraction; | 1840 | int fraction; |
1841 | int max_objects; | ||
1841 | 1842 | ||
1842 | /* | 1843 | /* |
1843 | * Attempt to find best configuration for a slab. This | 1844 | * Attempt to find best configuration for a slab. This |
@@ -1850,6 +1851,9 @@ static inline int calculate_order(int size) | |||
1850 | min_objects = slub_min_objects; | 1851 | min_objects = slub_min_objects; |
1851 | if (!min_objects) | 1852 | if (!min_objects) |
1852 | min_objects = 4 * (fls(nr_cpu_ids) + 1); | 1853 | min_objects = 4 * (fls(nr_cpu_ids) + 1); |
1854 | max_objects = (PAGE_SIZE << slub_max_order)/size; | ||
1855 | min_objects = min(min_objects, max_objects); | ||
1856 | |||
1853 | while (min_objects > 1) { | 1857 | while (min_objects > 1) { |
1854 | fraction = 16; | 1858 | fraction = 16; |
1855 | while (fraction >= 4) { | 1859 | while (fraction >= 4) { |
@@ -1859,7 +1863,7 @@ static inline int calculate_order(int size) | |||
1859 | return order; | 1863 | return order; |
1860 | fraction /= 2; | 1864 | fraction /= 2; |
1861 | } | 1865 | } |
1862 | min_objects /= 2; | 1866 | min_objects --; |
1863 | } | 1867 | } |
1864 | 1868 | ||
1865 | /* | 1869 | /* |
@@ -1922,17 +1926,6 @@ static void | |||
1922 | init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) | 1926 | init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) |
1923 | { | 1927 | { |
1924 | n->nr_partial = 0; | 1928 | n->nr_partial = 0; |
1925 | |||
1926 | /* | ||
1927 | * The larger the object size is, the more pages we want on the partial | ||
1928 | * list to avoid pounding the page allocator excessively. | ||
1929 | */ | ||
1930 | n->min_partial = ilog2(s->size); | ||
1931 | if (n->min_partial < MIN_PARTIAL) | ||
1932 | n->min_partial = MIN_PARTIAL; | ||
1933 | else if (n->min_partial > MAX_PARTIAL) | ||
1934 | n->min_partial = MAX_PARTIAL; | ||
1935 | |||
1936 | spin_lock_init(&n->list_lock); | 1929 | spin_lock_init(&n->list_lock); |
1937 | INIT_LIST_HEAD(&n->partial); | 1930 | INIT_LIST_HEAD(&n->partial); |
1938 | #ifdef CONFIG_SLUB_DEBUG | 1931 | #ifdef CONFIG_SLUB_DEBUG |
@@ -1990,7 +1983,7 @@ static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, | |||
1990 | static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) | 1983 | static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) |
1991 | { | 1984 | { |
1992 | if (c < per_cpu(kmem_cache_cpu, cpu) || | 1985 | if (c < per_cpu(kmem_cache_cpu, cpu) || |
1993 | c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { | 1986 | c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { |
1994 | kfree(c); | 1987 | kfree(c); |
1995 | return; | 1988 | return; |
1996 | } | 1989 | } |
@@ -2175,6 +2168,15 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) | |||
2175 | } | 2168 | } |
2176 | #endif | 2169 | #endif |
2177 | 2170 | ||
2171 | static void set_min_partial(struct kmem_cache *s, unsigned long min) | ||
2172 | { | ||
2173 | if (min < MIN_PARTIAL) | ||
2174 | min = MIN_PARTIAL; | ||
2175 | else if (min > MAX_PARTIAL) | ||
2176 | min = MAX_PARTIAL; | ||
2177 | s->min_partial = min; | ||
2178 | } | ||
2179 | |||
2178 | /* | 2180 | /* |
2179 | * calculate_sizes() determines the order and the distribution of data within | 2181 | * calculate_sizes() determines the order and the distribution of data within |
2180 | * a slab object. | 2182 | * a slab object. |
@@ -2313,6 +2315,11 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | |||
2313 | if (!calculate_sizes(s, -1)) | 2315 | if (!calculate_sizes(s, -1)) |
2314 | goto error; | 2316 | goto error; |
2315 | 2317 | ||
2318 | /* | ||
2319 | * The larger the object size is, the more pages we want on the partial | ||
2320 | * list to avoid pounding the page allocator excessively. | ||
2321 | */ | ||
2322 | set_min_partial(s, ilog2(s->size)); | ||
2316 | s->refcount = 1; | 2323 | s->refcount = 1; |
2317 | #ifdef CONFIG_NUMA | 2324 | #ifdef CONFIG_NUMA |
2318 | s->remote_node_defrag_ratio = 1000; | 2325 | s->remote_node_defrag_ratio = 1000; |
@@ -2469,7 +2476,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); | |||
2469 | * Kmalloc subsystem | 2476 | * Kmalloc subsystem |
2470 | *******************************************************************/ | 2477 | *******************************************************************/ |
2471 | 2478 | ||
2472 | struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; | 2479 | struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; |
2473 | EXPORT_SYMBOL(kmalloc_caches); | 2480 | EXPORT_SYMBOL(kmalloc_caches); |
2474 | 2481 | ||
2475 | static int __init setup_slub_min_order(char *str) | 2482 | static int __init setup_slub_min_order(char *str) |
@@ -2531,7 +2538,7 @@ panic: | |||
2531 | } | 2538 | } |
2532 | 2539 | ||
2533 | #ifdef CONFIG_ZONE_DMA | 2540 | #ifdef CONFIG_ZONE_DMA |
2534 | static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; | 2541 | static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT]; |
2535 | 2542 | ||
2536 | static void sysfs_add_func(struct work_struct *w) | 2543 | static void sysfs_add_func(struct work_struct *w) |
2537 | { | 2544 | { |
@@ -2652,7 +2659,7 @@ void *__kmalloc(size_t size, gfp_t flags) | |||
2652 | { | 2659 | { |
2653 | struct kmem_cache *s; | 2660 | struct kmem_cache *s; |
2654 | 2661 | ||
2655 | if (unlikely(size > PAGE_SIZE)) | 2662 | if (unlikely(size > SLUB_MAX_SIZE)) |
2656 | return kmalloc_large(size, flags); | 2663 | return kmalloc_large(size, flags); |
2657 | 2664 | ||
2658 | s = get_slab(size, flags); | 2665 | s = get_slab(size, flags); |
@@ -2680,7 +2687,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) | |||
2680 | { | 2687 | { |
2681 | struct kmem_cache *s; | 2688 | struct kmem_cache *s; |
2682 | 2689 | ||
2683 | if (unlikely(size > PAGE_SIZE)) | 2690 | if (unlikely(size > SLUB_MAX_SIZE)) |
2684 | return kmalloc_large_node(size, flags, node); | 2691 | return kmalloc_large_node(size, flags, node); |
2685 | 2692 | ||
2686 | s = get_slab(size, flags); | 2693 | s = get_slab(size, flags); |
@@ -2730,6 +2737,7 @@ size_t ksize(const void *object) | |||
2730 | */ | 2737 | */ |
2731 | return s->size; | 2738 | return s->size; |
2732 | } | 2739 | } |
2740 | EXPORT_SYMBOL(ksize); | ||
2733 | 2741 | ||
2734 | void kfree(const void *x) | 2742 | void kfree(const void *x) |
2735 | { | 2743 | { |
@@ -2979,7 +2987,7 @@ void __init kmem_cache_init(void) | |||
2979 | caches++; | 2987 | caches++; |
2980 | } | 2988 | } |
2981 | 2989 | ||
2982 | for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { | 2990 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { |
2983 | create_kmalloc_cache(&kmalloc_caches[i], | 2991 | create_kmalloc_cache(&kmalloc_caches[i], |
2984 | "kmalloc", 1 << i, GFP_KERNEL); | 2992 | "kmalloc", 1 << i, GFP_KERNEL); |
2985 | caches++; | 2993 | caches++; |
@@ -3016,7 +3024,7 @@ void __init kmem_cache_init(void) | |||
3016 | slab_state = UP; | 3024 | slab_state = UP; |
3017 | 3025 | ||
3018 | /* Provide the correct kmalloc names now that the caches are up */ | 3026 | /* Provide the correct kmalloc names now that the caches are up */ |
3019 | for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) | 3027 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) |
3020 | kmalloc_caches[i]. name = | 3028 | kmalloc_caches[i]. name = |
3021 | kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); | 3029 | kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); |
3022 | 3030 | ||
@@ -3216,7 +3224,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) | |||
3216 | { | 3224 | { |
3217 | struct kmem_cache *s; | 3225 | struct kmem_cache *s; |
3218 | 3226 | ||
3219 | if (unlikely(size > PAGE_SIZE)) | 3227 | if (unlikely(size > SLUB_MAX_SIZE)) |
3220 | return kmalloc_large(size, gfpflags); | 3228 | return kmalloc_large(size, gfpflags); |
3221 | 3229 | ||
3222 | s = get_slab(size, gfpflags); | 3230 | s = get_slab(size, gfpflags); |
@@ -3232,7 +3240,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
3232 | { | 3240 | { |
3233 | struct kmem_cache *s; | 3241 | struct kmem_cache *s; |
3234 | 3242 | ||
3235 | if (unlikely(size > PAGE_SIZE)) | 3243 | if (unlikely(size > SLUB_MAX_SIZE)) |
3236 | return kmalloc_large_node(size, gfpflags, node); | 3244 | return kmalloc_large_node(size, gfpflags, node); |
3237 | 3245 | ||
3238 | s = get_slab(size, gfpflags); | 3246 | s = get_slab(size, gfpflags); |
@@ -3829,6 +3837,26 @@ static ssize_t order_show(struct kmem_cache *s, char *buf) | |||
3829 | } | 3837 | } |
3830 | SLAB_ATTR(order); | 3838 | SLAB_ATTR(order); |
3831 | 3839 | ||
3840 | static ssize_t min_partial_show(struct kmem_cache *s, char *buf) | ||
3841 | { | ||
3842 | return sprintf(buf, "%lu\n", s->min_partial); | ||
3843 | } | ||
3844 | |||
3845 | static ssize_t min_partial_store(struct kmem_cache *s, const char *buf, | ||
3846 | size_t length) | ||
3847 | { | ||
3848 | unsigned long min; | ||
3849 | int err; | ||
3850 | |||
3851 | err = strict_strtoul(buf, 10, &min); | ||
3852 | if (err) | ||
3853 | return err; | ||
3854 | |||
3855 | set_min_partial(s, min); | ||
3856 | return length; | ||
3857 | } | ||
3858 | SLAB_ATTR(min_partial); | ||
3859 | |||
3832 | static ssize_t ctor_show(struct kmem_cache *s, char *buf) | 3860 | static ssize_t ctor_show(struct kmem_cache *s, char *buf) |
3833 | { | 3861 | { |
3834 | if (s->ctor) { | 3862 | if (s->ctor) { |
@@ -4144,6 +4172,7 @@ static struct attribute *slab_attrs[] = { | |||
4144 | &object_size_attr.attr, | 4172 | &object_size_attr.attr, |
4145 | &objs_per_slab_attr.attr, | 4173 | &objs_per_slab_attr.attr, |
4146 | &order_attr.attr, | 4174 | &order_attr.attr, |
4175 | &min_partial_attr.attr, | ||
4147 | &objects_attr.attr, | 4176 | &objects_attr.attr, |
4148 | &objects_partial_attr.attr, | 4177 | &objects_partial_attr.attr, |
4149 | &total_objects_attr.attr, | 4178 | &total_objects_attr.attr, |
diff --git a/mm/swapfile.c b/mm/swapfile.c index da422c47e2ee..312fafe0ab6e 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -635,7 +635,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) | |||
635 | 635 | ||
636 | if (!bdev) { | 636 | if (!bdev) { |
637 | if (bdev_p) | 637 | if (bdev_p) |
638 | *bdev_p = sis->bdev; | 638 | *bdev_p = bdget(sis->bdev->bd_dev); |
639 | 639 | ||
640 | spin_unlock(&swap_lock); | 640 | spin_unlock(&swap_lock); |
641 | return i; | 641 | return i; |
@@ -647,7 +647,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p) | |||
647 | struct swap_extent, list); | 647 | struct swap_extent, list); |
648 | if (se->start_block == offset) { | 648 | if (se->start_block == offset) { |
649 | if (bdev_p) | 649 | if (bdev_p) |
650 | *bdev_p = sis->bdev; | 650 | *bdev_p = bdget(sis->bdev->bd_dev); |
651 | 651 | ||
652 | spin_unlock(&swap_lock); | 652 | spin_unlock(&swap_lock); |
653 | bdput(bdev); | 653 | bdput(bdev); |
@@ -698,8 +698,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | |||
698 | pte_t *pte; | 698 | pte_t *pte; |
699 | int ret = 1; | 699 | int ret = 1; |
700 | 700 | ||
701 | if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) | 701 | if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) { |
702 | ret = -ENOMEM; | 702 | ret = -ENOMEM; |
703 | goto out_nolock; | ||
704 | } | ||
703 | 705 | ||
704 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 706 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
705 | if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { | 707 | if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { |
@@ -723,6 +725,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | |||
723 | activate_page(page); | 725 | activate_page(page); |
724 | out: | 726 | out: |
725 | pte_unmap_unlock(pte, ptl); | 727 | pte_unmap_unlock(pte, ptl); |
728 | out_nolock: | ||
726 | return ret; | 729 | return ret; |
727 | } | 730 | } |
728 | 731 | ||
@@ -1377,7 +1380,7 @@ out: | |||
1377 | return ret; | 1380 | return ret; |
1378 | } | 1381 | } |
1379 | 1382 | ||
1380 | asmlinkage long sys_swapoff(const char __user * specialfile) | 1383 | SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) |
1381 | { | 1384 | { |
1382 | struct swap_info_struct * p = NULL; | 1385 | struct swap_info_struct * p = NULL; |
1383 | unsigned short *swap_map; | 1386 | unsigned short *swap_map; |
@@ -1633,7 +1636,7 @@ late_initcall(max_swapfiles_check); | |||
1633 | * | 1636 | * |
1634 | * The swapon system call | 1637 | * The swapon system call |
1635 | */ | 1638 | */ |
1636 | asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) | 1639 | SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) |
1637 | { | 1640 | { |
1638 | struct swap_info_struct * p; | 1641 | struct swap_info_struct * p; |
1639 | char *name = NULL; | 1642 | char *name = NULL; |
@@ -129,6 +129,26 @@ void *krealloc(const void *p, size_t new_size, gfp_t flags) | |||
129 | } | 129 | } |
130 | EXPORT_SYMBOL(krealloc); | 130 | EXPORT_SYMBOL(krealloc); |
131 | 131 | ||
132 | /** | ||
133 | * kzfree - like kfree but zero memory | ||
134 | * @p: object to free memory of | ||
135 | * | ||
136 | * The memory of the object @p points to is zeroed before freed. | ||
137 | * If @p is %NULL, kzfree() does nothing. | ||
138 | */ | ||
139 | void kzfree(const void *p) | ||
140 | { | ||
141 | size_t ks; | ||
142 | void *mem = (void *)p; | ||
143 | |||
144 | if (unlikely(ZERO_OR_NULL_PTR(mem))) | ||
145 | return; | ||
146 | ks = ksize(mem); | ||
147 | memset(mem, 0, ks); | ||
148 | kfree(mem); | ||
149 | } | ||
150 | EXPORT_SYMBOL(kzfree); | ||
151 | |||
132 | /* | 152 | /* |
133 | * strndup_user - duplicate an existing string from user space | 153 | * strndup_user - duplicate an existing string from user space |
134 | * @s: The string to duplicate | 154 | * @s: The string to duplicate |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c5db9a7264d9..520a75980269 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/highmem.h> | 14 | #include <linux/highmem.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/spinlock.h> | 16 | #include <linux/spinlock.h> |
17 | #include <linux/mutex.h> | ||
18 | #include <linux/interrupt.h> | 17 | #include <linux/interrupt.h> |
19 | #include <linux/proc_fs.h> | 18 | #include <linux/proc_fs.h> |
20 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
@@ -24,6 +23,7 @@ | |||
24 | #include <linux/rbtree.h> | 23 | #include <linux/rbtree.h> |
25 | #include <linux/radix-tree.h> | 24 | #include <linux/radix-tree.h> |
26 | #include <linux/rcupdate.h> | 25 | #include <linux/rcupdate.h> |
26 | #include <linux/bootmem.h> | ||
27 | 27 | ||
28 | #include <asm/atomic.h> | 28 | #include <asm/atomic.h> |
29 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
@@ -323,6 +323,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, | |||
323 | unsigned long addr; | 323 | unsigned long addr; |
324 | int purged = 0; | 324 | int purged = 0; |
325 | 325 | ||
326 | BUG_ON(!size); | ||
326 | BUG_ON(size & ~PAGE_MASK); | 327 | BUG_ON(size & ~PAGE_MASK); |
327 | 328 | ||
328 | va = kmalloc_node(sizeof(struct vmap_area), | 329 | va = kmalloc_node(sizeof(struct vmap_area), |
@@ -334,6 +335,9 @@ retry: | |||
334 | addr = ALIGN(vstart, align); | 335 | addr = ALIGN(vstart, align); |
335 | 336 | ||
336 | spin_lock(&vmap_area_lock); | 337 | spin_lock(&vmap_area_lock); |
338 | if (addr + size - 1 < addr) | ||
339 | goto overflow; | ||
340 | |||
337 | /* XXX: could have a last_hole cache */ | 341 | /* XXX: could have a last_hole cache */ |
338 | n = vmap_area_root.rb_node; | 342 | n = vmap_area_root.rb_node; |
339 | if (n) { | 343 | if (n) { |
@@ -365,6 +369,8 @@ retry: | |||
365 | 369 | ||
366 | while (addr + size > first->va_start && addr + size <= vend) { | 370 | while (addr + size > first->va_start && addr + size <= vend) { |
367 | addr = ALIGN(first->va_end + PAGE_SIZE, align); | 371 | addr = ALIGN(first->va_end + PAGE_SIZE, align); |
372 | if (addr + size - 1 < addr) | ||
373 | goto overflow; | ||
368 | 374 | ||
369 | n = rb_next(&first->rb_node); | 375 | n = rb_next(&first->rb_node); |
370 | if (n) | 376 | if (n) |
@@ -375,6 +381,7 @@ retry: | |||
375 | } | 381 | } |
376 | found: | 382 | found: |
377 | if (addr + size > vend) { | 383 | if (addr + size > vend) { |
384 | overflow: | ||
378 | spin_unlock(&vmap_area_lock); | 385 | spin_unlock(&vmap_area_lock); |
379 | if (!purged) { | 386 | if (!purged) { |
380 | purge_vmap_area_lazy(); | 387 | purge_vmap_area_lazy(); |
@@ -495,9 +502,10 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); | |||
495 | static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | 502 | static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, |
496 | int sync, int force_flush) | 503 | int sync, int force_flush) |
497 | { | 504 | { |
498 | static DEFINE_MUTEX(purge_lock); | 505 | static DEFINE_SPINLOCK(purge_lock); |
499 | LIST_HEAD(valist); | 506 | LIST_HEAD(valist); |
500 | struct vmap_area *va; | 507 | struct vmap_area *va; |
508 | struct vmap_area *n_va; | ||
501 | int nr = 0; | 509 | int nr = 0; |
502 | 510 | ||
503 | /* | 511 | /* |
@@ -506,10 +514,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
506 | * the case that isn't actually used at the moment anyway. | 514 | * the case that isn't actually used at the moment anyway. |
507 | */ | 515 | */ |
508 | if (!sync && !force_flush) { | 516 | if (!sync && !force_flush) { |
509 | if (!mutex_trylock(&purge_lock)) | 517 | if (!spin_trylock(&purge_lock)) |
510 | return; | 518 | return; |
511 | } else | 519 | } else |
512 | mutex_lock(&purge_lock); | 520 | spin_lock(&purge_lock); |
513 | 521 | ||
514 | rcu_read_lock(); | 522 | rcu_read_lock(); |
515 | list_for_each_entry_rcu(va, &vmap_area_list, list) { | 523 | list_for_each_entry_rcu(va, &vmap_area_list, list) { |
@@ -537,11 +545,11 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
537 | 545 | ||
538 | if (nr) { | 546 | if (nr) { |
539 | spin_lock(&vmap_area_lock); | 547 | spin_lock(&vmap_area_lock); |
540 | list_for_each_entry(va, &valist, purge_list) | 548 | list_for_each_entry_safe(va, n_va, &valist, purge_list) |
541 | __free_vmap_area(va); | 549 | __free_vmap_area(va); |
542 | spin_unlock(&vmap_area_lock); | 550 | spin_unlock(&vmap_area_lock); |
543 | } | 551 | } |
544 | mutex_unlock(&purge_lock); | 552 | spin_unlock(&purge_lock); |
545 | } | 553 | } |
546 | 554 | ||
547 | /* | 555 | /* |
@@ -984,6 +992,8 @@ EXPORT_SYMBOL(vm_map_ram); | |||
984 | 992 | ||
985 | void __init vmalloc_init(void) | 993 | void __init vmalloc_init(void) |
986 | { | 994 | { |
995 | struct vmap_area *va; | ||
996 | struct vm_struct *tmp; | ||
987 | int i; | 997 | int i; |
988 | 998 | ||
989 | for_each_possible_cpu(i) { | 999 | for_each_possible_cpu(i) { |
@@ -996,12 +1006,22 @@ void __init vmalloc_init(void) | |||
996 | vbq->nr_dirty = 0; | 1006 | vbq->nr_dirty = 0; |
997 | } | 1007 | } |
998 | 1008 | ||
1009 | /* Import existing vmlist entries. */ | ||
1010 | for (tmp = vmlist; tmp; tmp = tmp->next) { | ||
1011 | va = alloc_bootmem(sizeof(struct vmap_area)); | ||
1012 | va->flags = tmp->flags | VM_VM_AREA; | ||
1013 | va->va_start = (unsigned long)tmp->addr; | ||
1014 | va->va_end = va->va_start + tmp->size; | ||
1015 | __insert_vmap_area(va); | ||
1016 | } | ||
999 | vmap_initialized = true; | 1017 | vmap_initialized = true; |
1000 | } | 1018 | } |
1001 | 1019 | ||
1002 | void unmap_kernel_range(unsigned long addr, unsigned long size) | 1020 | void unmap_kernel_range(unsigned long addr, unsigned long size) |
1003 | { | 1021 | { |
1004 | unsigned long end = addr + size; | 1022 | unsigned long end = addr + size; |
1023 | |||
1024 | flush_cache_vunmap(addr, end); | ||
1005 | vunmap_page_range(addr, end); | 1025 | vunmap_page_range(addr, end); |
1006 | flush_tlb_kernel_range(addr, end); | 1026 | flush_tlb_kernel_range(addr, end); |
1007 | } | 1027 | } |
@@ -1096,6 +1116,14 @@ struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, | |||
1096 | } | 1116 | } |
1097 | EXPORT_SYMBOL_GPL(__get_vm_area); | 1117 | EXPORT_SYMBOL_GPL(__get_vm_area); |
1098 | 1118 | ||
1119 | struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, | ||
1120 | unsigned long start, unsigned long end, | ||
1121 | void *caller) | ||
1122 | { | ||
1123 | return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL, | ||
1124 | caller); | ||
1125 | } | ||
1126 | |||
1099 | /** | 1127 | /** |
1100 | * get_vm_area - reserve a contiguous kernel virtual area | 1128 | * get_vm_area - reserve a contiguous kernel virtual area |
1101 | * @size: size of the area | 1129 | * @size: size of the area |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 9a27c44aa327..56ddf41149eb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1262,7 +1262,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1262 | * Move the pages to the [file or anon] inactive list. | 1262 | * Move the pages to the [file or anon] inactive list. |
1263 | */ | 1263 | */ |
1264 | pagevec_init(&pvec, 1); | 1264 | pagevec_init(&pvec, 1); |
1265 | pgmoved = 0; | ||
1266 | lru = LRU_BASE + file * LRU_FILE; | 1265 | lru = LRU_BASE + file * LRU_FILE; |
1267 | 1266 | ||
1268 | spin_lock_irq(&zone->lru_lock); | 1267 | spin_lock_irq(&zone->lru_lock); |
@@ -1274,6 +1273,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1274 | */ | 1273 | */ |
1275 | reclaim_stat->recent_rotated[!!file] += pgmoved; | 1274 | reclaim_stat->recent_rotated[!!file] += pgmoved; |
1276 | 1275 | ||
1276 | pgmoved = 0; | ||
1277 | while (!list_empty(&l_inactive)) { | 1277 | while (!list_empty(&l_inactive)) { |
1278 | page = lru_to_page(&l_inactive); | 1278 | page = lru_to_page(&l_inactive); |
1279 | prefetchw_prev_lru_page(page, &l_inactive, flags); | 1279 | prefetchw_prev_lru_page(page, &l_inactive, flags); |
@@ -1469,7 +1469,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1469 | int file = is_file_lru(l); | 1469 | int file = is_file_lru(l); |
1470 | int scan; | 1470 | int scan; |
1471 | 1471 | ||
1472 | scan = zone_page_state(zone, NR_LRU_BASE + l); | 1472 | scan = zone_nr_pages(zone, sc, l); |
1473 | if (priority) { | 1473 | if (priority) { |
1474 | scan >>= priority; | 1474 | scan >>= priority; |
1475 | scan = (scan * percent[file]) / 100; | 1475 | scan = (scan * percent[file]) / 100; |
@@ -2057,31 +2057,31 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, | |||
2057 | int pass, struct scan_control *sc) | 2057 | int pass, struct scan_control *sc) |
2058 | { | 2058 | { |
2059 | struct zone *zone; | 2059 | struct zone *zone; |
2060 | unsigned long nr_to_scan, ret = 0; | 2060 | unsigned long ret = 0; |
2061 | enum lru_list l; | ||
2062 | 2061 | ||
2063 | for_each_zone(zone) { | 2062 | for_each_zone(zone) { |
2063 | enum lru_list l; | ||
2064 | 2064 | ||
2065 | if (!populated_zone(zone)) | 2065 | if (!populated_zone(zone)) |
2066 | continue; | 2066 | continue; |
2067 | |||
2068 | if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) | 2067 | if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY) |
2069 | continue; | 2068 | continue; |
2070 | 2069 | ||
2071 | for_each_evictable_lru(l) { | 2070 | for_each_evictable_lru(l) { |
2071 | enum zone_stat_item ls = NR_LRU_BASE + l; | ||
2072 | unsigned long lru_pages = zone_page_state(zone, ls); | ||
2073 | |||
2072 | /* For pass = 0, we don't shrink the active list */ | 2074 | /* For pass = 0, we don't shrink the active list */ |
2073 | if (pass == 0 && | 2075 | if (pass == 0 && (l == LRU_ACTIVE_ANON || |
2074 | (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) | 2076 | l == LRU_ACTIVE_FILE)) |
2075 | continue; | 2077 | continue; |
2076 | 2078 | ||
2077 | zone->lru[l].nr_scan += | 2079 | zone->lru[l].nr_scan += (lru_pages >> prio) + 1; |
2078 | (zone_page_state(zone, NR_LRU_BASE + l) | ||
2079 | >> prio) + 1; | ||
2080 | if (zone->lru[l].nr_scan >= nr_pages || pass > 3) { | 2080 | if (zone->lru[l].nr_scan >= nr_pages || pass > 3) { |
2081 | unsigned long nr_to_scan; | ||
2082 | |||
2081 | zone->lru[l].nr_scan = 0; | 2083 | zone->lru[l].nr_scan = 0; |
2082 | nr_to_scan = min(nr_pages, | 2084 | nr_to_scan = min(nr_pages, lru_pages); |
2083 | zone_page_state(zone, | ||
2084 | NR_LRU_BASE + l)); | ||
2085 | ret += shrink_list(l, nr_to_scan, zone, | 2085 | ret += shrink_list(l, nr_to_scan, zone, |
2086 | sc, prio); | 2086 | sc, prio); |
2087 | if (ret >= nr_pages) | 2087 | if (ret >= nr_pages) |
@@ -2089,7 +2089,6 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, | |||
2089 | } | 2089 | } |
2090 | } | 2090 | } |
2091 | } | 2091 | } |
2092 | |||
2093 | return ret; | 2092 | return ret; |
2094 | } | 2093 | } |
2095 | 2094 | ||
@@ -2112,7 +2111,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
2112 | .may_swap = 0, | 2111 | .may_swap = 0, |
2113 | .swap_cluster_max = nr_pages, | 2112 | .swap_cluster_max = nr_pages, |
2114 | .may_writepage = 1, | 2113 | .may_writepage = 1, |
2115 | .swappiness = vm_swappiness, | ||
2116 | .isolate_pages = isolate_pages_global, | 2114 | .isolate_pages = isolate_pages_global, |
2117 | }; | 2115 | }; |
2118 | 2116 | ||
@@ -2146,10 +2144,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
2146 | int prio; | 2144 | int prio; |
2147 | 2145 | ||
2148 | /* Force reclaiming mapped pages in the passes #3 and #4 */ | 2146 | /* Force reclaiming mapped pages in the passes #3 and #4 */ |
2149 | if (pass > 2) { | 2147 | if (pass > 2) |
2150 | sc.may_swap = 1; | 2148 | sc.may_swap = 1; |
2151 | sc.swappiness = 100; | ||
2152 | } | ||
2153 | 2149 | ||
2154 | for (prio = DEF_PRIORITY; prio >= 0; prio--) { | 2150 | for (prio = DEF_PRIORITY; prio >= 0; prio--) { |
2155 | unsigned long nr_to_scan = nr_pages - ret; | 2151 | unsigned long nr_to_scan = nr_pages - ret; |