aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-02-07 12:31:54 -0500
committerIngo Molnar <mingo@elte.hu>2009-02-07 12:31:54 -0500
commit673f8205914a12e928c65afbcd78ae748f78df53 (patch)
tree38c60215646d079fab3bff812e094e914960c7ec /mm
parentcf47b8f3d96b0b8b10b557444a28b3ca4024ff82 (diff)
parentae1a25da8448271a99745da03100d5299575a269 (diff)
Merge branch 'linus' into core/locking
Conflicts: fs/btrfs/locking.c
Diffstat (limited to 'mm')
-rw-r--r--mm/fadvise.c18
-rw-r--r--mm/filemap.c9
-rw-r--r--mm/fremap.c4
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memcontrol.c155
-rw-r--r--mm/memory.c2
-rw-r--r--mm/mempolicy.c24
-rw-r--r--mm/migrate.c8
-rw-r--r--mm/mincore.c4
-rw-r--r--mm/mlock.c55
-rw-r--r--mm/mmap.c83
-rw-r--r--mm/mprotect.c4
-rw-r--r--mm/mremap.c6
-rw-r--r--mm/msync.c2
-rw-r--r--mm/nommu.c35
-rw-r--r--mm/page-writeback.c21
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/slub.c2
-rw-r--r--mm/swapfile.c9
-rw-r--r--mm/vmalloc.c20
20 files changed, 264 insertions, 201 deletions
diff --git a/mm/fadvise.c b/mm/fadvise.c
index a1da969bd980..54a0f8040afa 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -24,7 +24,7 @@
24 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 24 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
25 * deactivate the pages and clear PG_Referenced. 25 * deactivate the pages and clear PG_Referenced.
26 */ 26 */
27asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) 27SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
28{ 28{
29 struct file *file = fget(fd); 29 struct file *file = fget(fd);
30 struct address_space *mapping; 30 struct address_space *mapping;
@@ -126,12 +126,26 @@ out:
126 fput(file); 126 fput(file);
127 return ret; 127 return ret;
128} 128}
129#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
130asmlinkage long SyS_fadvise64_64(long fd, loff_t offset, loff_t len, long advice)
131{
132 return SYSC_fadvise64_64((int) fd, offset, len, (int) advice);
133}
134SYSCALL_ALIAS(sys_fadvise64_64, SyS_fadvise64_64);
135#endif
129 136
130#ifdef __ARCH_WANT_SYS_FADVISE64 137#ifdef __ARCH_WANT_SYS_FADVISE64
131 138
132asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice) 139SYSCALL_DEFINE(fadvise64)(int fd, loff_t offset, size_t len, int advice)
133{ 140{
134 return sys_fadvise64_64(fd, offset, len, advice); 141 return sys_fadvise64_64(fd, offset, len, advice);
135} 142}
143#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
144asmlinkage long SyS_fadvise64(long fd, loff_t offset, long len, long advice)
145{
146 return SYSC_fadvise64((int) fd, offset, (size_t)len, (int)advice);
147}
148SYSCALL_ALIAS(sys_fadvise64, SyS_fadvise64);
149#endif
136 150
137#endif 151#endif
diff --git a/mm/filemap.c b/mm/filemap.c
index ceba0bd03662..23acefe51808 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1374,7 +1374,7 @@ do_readahead(struct address_space *mapping, struct file *filp,
1374 return 0; 1374 return 0;
1375} 1375}
1376 1376
1377asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) 1377SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count)
1378{ 1378{
1379 ssize_t ret; 1379 ssize_t ret;
1380 struct file *file; 1380 struct file *file;
@@ -1393,6 +1393,13 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
1393 } 1393 }
1394 return ret; 1394 return ret;
1395} 1395}
1396#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
1397asmlinkage long SyS_readahead(long fd, loff_t offset, long count)
1398{
1399 return SYSC_readahead((int) fd, offset, (size_t) count);
1400}
1401SYSCALL_ALIAS(sys_readahead, SyS_readahead);
1402#endif
1396 1403
1397#ifdef CONFIG_MMU 1404#ifdef CONFIG_MMU
1398/** 1405/**
diff --git a/mm/fremap.c b/mm/fremap.c
index 62d5bbda921a..736ba7f3306a 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -120,8 +120,8 @@ static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma,
120 * and the vma's default protection is used. Arbitrary protections 120 * and the vma's default protection is used. Arbitrary protections
121 * might be implemented in the future. 121 * might be implemented in the future.
122 */ 122 */
123asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, 123SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
124 unsigned long prot, unsigned long pgoff, unsigned long flags) 124 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
125{ 125{
126 struct mm_struct *mm = current->mm; 126 struct mm_struct *mm = current->mm;
127 struct address_space *mapping; 127 struct address_space *mapping;
diff --git a/mm/madvise.c b/mm/madvise.c
index f9349c18a1b5..b9ce574827c8 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -281,7 +281,7 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
281 * -EBADF - map exists, but area maps something that isn't a file. 281 * -EBADF - map exists, but area maps something that isn't a file.
282 * -EAGAIN - a kernel resource was temporarily unavailable. 282 * -EAGAIN - a kernel resource was temporarily unavailable.
283 */ 283 */
284asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) 284SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
285{ 285{
286 unsigned long end, tmp; 286 unsigned long end, tmp;
287 struct vm_area_struct * vma, *prev; 287 struct vm_area_struct * vma, *prev;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2996b80601f..8e4be9cb2a6a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -202,6 +202,7 @@ pcg_default_flags[NR_CHARGE_TYPE] = {
202 202
203static void mem_cgroup_get(struct mem_cgroup *mem); 203static void mem_cgroup_get(struct mem_cgroup *mem);
204static void mem_cgroup_put(struct mem_cgroup *mem); 204static void mem_cgroup_put(struct mem_cgroup *mem);
205static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
205 206
206static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, 207static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
207 struct page_cgroup *pc, 208 struct page_cgroup *pc,
@@ -358,6 +359,10 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
358 return; 359 return;
359 360
360 pc = lookup_page_cgroup(page); 361 pc = lookup_page_cgroup(page);
362 /*
363 * Used bit is set without atomic ops but after smp_wmb().
364 * For making pc->mem_cgroup visible, insert smp_rmb() here.
365 */
361 smp_rmb(); 366 smp_rmb();
362 /* unused page is not rotated. */ 367 /* unused page is not rotated. */
363 if (!PageCgroupUsed(pc)) 368 if (!PageCgroupUsed(pc))
@@ -374,7 +379,10 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
374 if (mem_cgroup_disabled()) 379 if (mem_cgroup_disabled())
375 return; 380 return;
376 pc = lookup_page_cgroup(page); 381 pc = lookup_page_cgroup(page);
377 /* barrier to sync with "charge" */ 382 /*
383 * Used bit is set without atomic ops but after smp_wmb().
384 * For making pc->mem_cgroup visible, insert smp_rmb() here.
385 */
378 smp_rmb(); 386 smp_rmb();
379 if (!PageCgroupUsed(pc)) 387 if (!PageCgroupUsed(pc))
380 return; 388 return;
@@ -559,6 +567,14 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
559 return NULL; 567 return NULL;
560 568
561 pc = lookup_page_cgroup(page); 569 pc = lookup_page_cgroup(page);
570 /*
571 * Used bit is set without atomic ops but after smp_wmb().
572 * For making pc->mem_cgroup visible, insert smp_rmb() here.
573 */
574 smp_rmb();
575 if (!PageCgroupUsed(pc))
576 return NULL;
577
562 mz = page_cgroup_zoneinfo(pc); 578 mz = page_cgroup_zoneinfo(pc);
563 if (!mz) 579 if (!mz)
564 return NULL; 580 return NULL;
@@ -618,7 +634,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
618 * called with hierarchy_mutex held 634 * called with hierarchy_mutex held
619 */ 635 */
620static struct mem_cgroup * 636static struct mem_cgroup *
621mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) 637__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
622{ 638{
623 struct cgroup *cgroup, *curr_cgroup, *root_cgroup; 639 struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
624 640
@@ -629,19 +645,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
629 /* 645 /*
630 * Walk down to children 646 * Walk down to children
631 */ 647 */
632 mem_cgroup_put(curr);
633 cgroup = list_entry(curr_cgroup->children.next, 648 cgroup = list_entry(curr_cgroup->children.next,
634 struct cgroup, sibling); 649 struct cgroup, sibling);
635 curr = mem_cgroup_from_cont(cgroup); 650 curr = mem_cgroup_from_cont(cgroup);
636 mem_cgroup_get(curr);
637 goto done; 651 goto done;
638 } 652 }
639 653
640visit_parent: 654visit_parent:
641 if (curr_cgroup == root_cgroup) { 655 if (curr_cgroup == root_cgroup) {
642 mem_cgroup_put(curr); 656 /* caller handles NULL case */
643 curr = root_mem; 657 curr = NULL;
644 mem_cgroup_get(curr);
645 goto done; 658 goto done;
646 } 659 }
647 660
@@ -649,11 +662,9 @@ visit_parent:
649 * Goto next sibling 662 * Goto next sibling
650 */ 663 */
651 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { 664 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
652 mem_cgroup_put(curr);
653 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, 665 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
654 sibling); 666 sibling);
655 curr = mem_cgroup_from_cont(cgroup); 667 curr = mem_cgroup_from_cont(cgroup);
656 mem_cgroup_get(curr);
657 goto done; 668 goto done;
658 } 669 }
659 670
@@ -664,7 +675,6 @@ visit_parent:
664 goto visit_parent; 675 goto visit_parent;
665 676
666done: 677done:
667 root_mem->last_scanned_child = curr;
668 return curr; 678 return curr;
669} 679}
670 680
@@ -674,40 +684,46 @@ done:
674 * that to reclaim free pages from. 684 * that to reclaim free pages from.
675 */ 685 */
676static struct mem_cgroup * 686static struct mem_cgroup *
677mem_cgroup_get_first_node(struct mem_cgroup *root_mem) 687mem_cgroup_get_next_node(struct mem_cgroup *root_mem)
678{ 688{
679 struct cgroup *cgroup; 689 struct cgroup *cgroup;
680 struct mem_cgroup *ret; 690 struct mem_cgroup *orig, *next;
681 bool obsolete; 691 bool obsolete;
682 692
683 obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);
684
685 /* 693 /*
686 * Scan all children under the mem_cgroup mem 694 * Scan all children under the mem_cgroup mem
687 */ 695 */
688 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); 696 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
697
698 orig = root_mem->last_scanned_child;
699 obsolete = mem_cgroup_is_obsolete(orig);
700
689 if (list_empty(&root_mem->css.cgroup->children)) { 701 if (list_empty(&root_mem->css.cgroup->children)) {
690 ret = root_mem; 702 /*
703 * root_mem might have children before and last_scanned_child
704 * may point to one of them. We put it later.
705 */
706 if (orig)
707 VM_BUG_ON(!obsolete);
708 next = NULL;
691 goto done; 709 goto done;
692 } 710 }
693 711
694 if (!root_mem->last_scanned_child || obsolete) { 712 if (!orig || obsolete) {
695
696 if (obsolete && root_mem->last_scanned_child)
697 mem_cgroup_put(root_mem->last_scanned_child);
698
699 cgroup = list_first_entry(&root_mem->css.cgroup->children, 713 cgroup = list_first_entry(&root_mem->css.cgroup->children,
700 struct cgroup, sibling); 714 struct cgroup, sibling);
701 ret = mem_cgroup_from_cont(cgroup); 715 next = mem_cgroup_from_cont(cgroup);
702 mem_cgroup_get(ret);
703 } else 716 } else
704 ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, 717 next = __mem_cgroup_get_next_node(orig, root_mem);
705 root_mem);
706 718
707done: 719done:
708 root_mem->last_scanned_child = ret; 720 if (next)
721 mem_cgroup_get(next);
722 root_mem->last_scanned_child = next;
723 if (orig)
724 mem_cgroup_put(orig);
709 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); 725 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
710 return ret; 726 return (next) ? next : root_mem;
711} 727}
712 728
713static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) 729static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
@@ -758,28 +774,25 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
758 * but there might be left over accounting, even after children 774 * but there might be left over accounting, even after children
759 * have left. 775 * have left.
760 */ 776 */
761 ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, 777 ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
762 get_swappiness(root_mem)); 778 get_swappiness(root_mem));
763 if (mem_cgroup_check_under_limit(root_mem)) 779 if (mem_cgroup_check_under_limit(root_mem))
764 return 0; 780 return 1; /* indicate reclaim has succeeded */
765 if (!root_mem->use_hierarchy) 781 if (!root_mem->use_hierarchy)
766 return ret; 782 return ret;
767 783
768 next_mem = mem_cgroup_get_first_node(root_mem); 784 next_mem = mem_cgroup_get_next_node(root_mem);
769 785
770 while (next_mem != root_mem) { 786 while (next_mem != root_mem) {
771 if (mem_cgroup_is_obsolete(next_mem)) { 787 if (mem_cgroup_is_obsolete(next_mem)) {
772 mem_cgroup_put(next_mem); 788 next_mem = mem_cgroup_get_next_node(root_mem);
773 next_mem = mem_cgroup_get_first_node(root_mem);
774 continue; 789 continue;
775 } 790 }
776 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, 791 ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
777 get_swappiness(next_mem)); 792 get_swappiness(next_mem));
778 if (mem_cgroup_check_under_limit(root_mem)) 793 if (mem_cgroup_check_under_limit(root_mem))
779 return 0; 794 return 1; /* indicate reclaim has succeeded */
780 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); 795 next_mem = mem_cgroup_get_next_node(root_mem);
781 next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
782 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
783 } 796 }
784 return ret; 797 return ret;
785} 798}
@@ -863,6 +876,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
863 876
864 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, 877 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
865 noswap); 878 noswap);
879 if (ret)
880 continue;
866 881
867 /* 882 /*
868 * try_to_free_mem_cgroup_pages() might not give us a full 883 * try_to_free_mem_cgroup_pages() might not give us a full
@@ -979,14 +994,15 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
979 if (pc->mem_cgroup != from) 994 if (pc->mem_cgroup != from)
980 goto out; 995 goto out;
981 996
982 css_put(&from->css);
983 res_counter_uncharge(&from->res, PAGE_SIZE); 997 res_counter_uncharge(&from->res, PAGE_SIZE);
984 mem_cgroup_charge_statistics(from, pc, false); 998 mem_cgroup_charge_statistics(from, pc, false);
985 if (do_swap_account) 999 if (do_swap_account)
986 res_counter_uncharge(&from->memsw, PAGE_SIZE); 1000 res_counter_uncharge(&from->memsw, PAGE_SIZE);
1001 css_put(&from->css);
1002
1003 css_get(&to->css);
987 pc->mem_cgroup = to; 1004 pc->mem_cgroup = to;
988 mem_cgroup_charge_statistics(to, pc, true); 1005 mem_cgroup_charge_statistics(to, pc, true);
989 css_get(&to->css);
990 ret = 0; 1006 ret = 0;
991out: 1007out:
992 unlock_page_cgroup(pc); 1008 unlock_page_cgroup(pc);
@@ -1019,8 +1035,10 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
1019 if (ret || !parent) 1035 if (ret || !parent)
1020 return ret; 1036 return ret;
1021 1037
1022 if (!get_page_unless_zero(page)) 1038 if (!get_page_unless_zero(page)) {
1023 return -EBUSY; 1039 ret = -EBUSY;
1040 goto uncharge;
1041 }
1024 1042
1025 ret = isolate_lru_page(page); 1043 ret = isolate_lru_page(page);
1026 1044
@@ -1029,19 +1047,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
1029 1047
1030 ret = mem_cgroup_move_account(pc, child, parent); 1048 ret = mem_cgroup_move_account(pc, child, parent);
1031 1049
1032 /* drop extra refcnt by try_charge() (move_account increment one) */
1033 css_put(&parent->css);
1034 putback_lru_page(page); 1050 putback_lru_page(page);
1035 if (!ret) { 1051 if (!ret) {
1036 put_page(page); 1052 put_page(page);
1053 /* drop extra refcnt by try_charge() */
1054 css_put(&parent->css);
1037 return 0; 1055 return 0;
1038 } 1056 }
1039 /* uncharge if move fails */ 1057
1040cancel: 1058cancel:
1059 put_page(page);
1060uncharge:
1061 /* drop extra refcnt by try_charge() */
1062 css_put(&parent->css);
1063 /* uncharge if move fails */
1041 res_counter_uncharge(&parent->res, PAGE_SIZE); 1064 res_counter_uncharge(&parent->res, PAGE_SIZE);
1042 if (do_swap_account) 1065 if (do_swap_account)
1043 res_counter_uncharge(&parent->memsw, PAGE_SIZE); 1066 res_counter_uncharge(&parent->memsw, PAGE_SIZE);
1044 put_page(page);
1045 return ret; 1067 return ret;
1046} 1068}
1047 1069
@@ -1663,7 +1685,7 @@ move_account:
1663 /* This is for making all *used* pages to be on LRU. */ 1685 /* This is for making all *used* pages to be on LRU. */
1664 lru_add_drain_all(); 1686 lru_add_drain_all();
1665 ret = 0; 1687 ret = 0;
1666 for_each_node_state(node, N_POSSIBLE) { 1688 for_each_node_state(node, N_HIGH_MEMORY) {
1667 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { 1689 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
1668 enum lru_list l; 1690 enum lru_list l;
1669 for_each_lru(l) { 1691 for_each_lru(l) {
@@ -1971,6 +1993,7 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
1971{ 1993{
1972 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); 1994 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
1973 struct mem_cgroup *parent; 1995 struct mem_cgroup *parent;
1996
1974 if (val > 100) 1997 if (val > 100)
1975 return -EINVAL; 1998 return -EINVAL;
1976 1999
@@ -1978,15 +2001,22 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
1978 return -EINVAL; 2001 return -EINVAL;
1979 2002
1980 parent = mem_cgroup_from_cont(cgrp->parent); 2003 parent = mem_cgroup_from_cont(cgrp->parent);
2004
2005 cgroup_lock();
2006
1981 /* If under hierarchy, only empty-root can set this value */ 2007 /* If under hierarchy, only empty-root can set this value */
1982 if ((parent->use_hierarchy) || 2008 if ((parent->use_hierarchy) ||
1983 (memcg->use_hierarchy && !list_empty(&cgrp->children))) 2009 (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
2010 cgroup_unlock();
1984 return -EINVAL; 2011 return -EINVAL;
2012 }
1985 2013
1986 spin_lock(&memcg->reclaim_param_lock); 2014 spin_lock(&memcg->reclaim_param_lock);
1987 memcg->swappiness = val; 2015 memcg->swappiness = val;
1988 spin_unlock(&memcg->reclaim_param_lock); 2016 spin_unlock(&memcg->reclaim_param_lock);
1989 2017
2018 cgroup_unlock();
2019
1990 return 0; 2020 return 0;
1991} 2021}
1992 2022
@@ -2164,10 +2194,23 @@ static void mem_cgroup_get(struct mem_cgroup *mem)
2164 2194
2165static void mem_cgroup_put(struct mem_cgroup *mem) 2195static void mem_cgroup_put(struct mem_cgroup *mem)
2166{ 2196{
2167 if (atomic_dec_and_test(&mem->refcnt)) 2197 if (atomic_dec_and_test(&mem->refcnt)) {
2198 struct mem_cgroup *parent = parent_mem_cgroup(mem);
2168 __mem_cgroup_free(mem); 2199 __mem_cgroup_free(mem);
2200 if (parent)
2201 mem_cgroup_put(parent);
2202 }
2169} 2203}
2170 2204
2205/*
2206 * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
2207 */
2208static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem)
2209{
2210 if (!mem->res.parent)
2211 return NULL;
2212 return mem_cgroup_from_res_counter(mem->res.parent, res);
2213}
2171 2214
2172#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 2215#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
2173static void __init enable_swap_cgroup(void) 2216static void __init enable_swap_cgroup(void)
@@ -2181,7 +2224,7 @@ static void __init enable_swap_cgroup(void)
2181} 2224}
2182#endif 2225#endif
2183 2226
2184static struct cgroup_subsys_state * 2227static struct cgroup_subsys_state * __ref
2185mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 2228mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2186{ 2229{
2187 struct mem_cgroup *mem, *parent; 2230 struct mem_cgroup *mem, *parent;
@@ -2206,6 +2249,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2206 if (parent && parent->use_hierarchy) { 2249 if (parent && parent->use_hierarchy) {
2207 res_counter_init(&mem->res, &parent->res); 2250 res_counter_init(&mem->res, &parent->res);
2208 res_counter_init(&mem->memsw, &parent->memsw); 2251 res_counter_init(&mem->memsw, &parent->memsw);
2252 /*
2253 * We increment refcnt of the parent to ensure that we can
2254 * safely access it on res_counter_charge/uncharge.
2255 * This refcnt will be decremented when freeing this
2256 * mem_cgroup(see mem_cgroup_put).
2257 */
2258 mem_cgroup_get(parent);
2209 } else { 2259 } else {
2210 res_counter_init(&mem->res, NULL); 2260 res_counter_init(&mem->res, NULL);
2211 res_counter_init(&mem->memsw, NULL); 2261 res_counter_init(&mem->memsw, NULL);
@@ -2232,7 +2282,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
2232static void mem_cgroup_destroy(struct cgroup_subsys *ss, 2282static void mem_cgroup_destroy(struct cgroup_subsys *ss,
2233 struct cgroup *cont) 2283 struct cgroup *cont)
2234{ 2284{
2235 mem_cgroup_put(mem_cgroup_from_cont(cont)); 2285 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
2286 struct mem_cgroup *last_scanned_child = mem->last_scanned_child;
2287
2288 if (last_scanned_child) {
2289 VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child));
2290 mem_cgroup_put(last_scanned_child);
2291 }
2292 mem_cgroup_put(mem);
2236} 2293}
2237 2294
2238static int mem_cgroup_populate(struct cgroup_subsys *ss, 2295static int mem_cgroup_populate(struct cgroup_subsys *ss,
diff --git a/mm/memory.c b/mm/memory.c
index 22bfa7a47a0b..baa999e87cd2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1999,7 +1999,7 @@ gotten:
1999 * Don't let another task, with possibly unlocked vma, 1999 * Don't let another task, with possibly unlocked vma,
2000 * keep the mlocked page. 2000 * keep the mlocked page.
2001 */ 2001 */
2002 if (vma->vm_flags & VM_LOCKED) { 2002 if ((vma->vm_flags & VM_LOCKED) && old_page) {
2003 lock_page(old_page); /* for LRU manipulation */ 2003 lock_page(old_page); /* for LRU manipulation */
2004 clear_page_mlock(old_page); 2004 clear_page_mlock(old_page);
2005 unlock_page(old_page); 2005 unlock_page(old_page);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e412ffa8e52e..3eb4a6fdc043 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1068,10 +1068,9 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
1068 return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; 1068 return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0;
1069} 1069}
1070 1070
1071asmlinkage long sys_mbind(unsigned long start, unsigned long len, 1071SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
1072 unsigned long mode, 1072 unsigned long, mode, unsigned long __user *, nmask,
1073 unsigned long __user *nmask, unsigned long maxnode, 1073 unsigned long, maxnode, unsigned, flags)
1074 unsigned flags)
1075{ 1074{
1076 nodemask_t nodes; 1075 nodemask_t nodes;
1077 int err; 1076 int err;
@@ -1091,8 +1090,8 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
1091} 1090}
1092 1091
1093/* Set the process memory policy */ 1092/* Set the process memory policy */
1094asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, 1093SYSCALL_DEFINE3(set_mempolicy, int, mode, unsigned long __user *, nmask,
1095 unsigned long maxnode) 1094 unsigned long, maxnode)
1096{ 1095{
1097 int err; 1096 int err;
1098 nodemask_t nodes; 1097 nodemask_t nodes;
@@ -1110,9 +1109,9 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
1110 return do_set_mempolicy(mode, flags, &nodes); 1109 return do_set_mempolicy(mode, flags, &nodes);
1111} 1110}
1112 1111
1113asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, 1112SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
1114 const unsigned long __user *old_nodes, 1113 const unsigned long __user *, old_nodes,
1115 const unsigned long __user *new_nodes) 1114 const unsigned long __user *, new_nodes)
1116{ 1115{
1117 const struct cred *cred = current_cred(), *tcred; 1116 const struct cred *cred = current_cred(), *tcred;
1118 struct mm_struct *mm; 1117 struct mm_struct *mm;
@@ -1185,10 +1184,9 @@ out:
1185 1184
1186 1185
1187/* Retrieve NUMA policy */ 1186/* Retrieve NUMA policy */
1188asmlinkage long sys_get_mempolicy(int __user *policy, 1187SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
1189 unsigned long __user *nmask, 1188 unsigned long __user *, nmask, unsigned long, maxnode,
1190 unsigned long maxnode, 1189 unsigned long, addr, unsigned long, flags)
1191 unsigned long addr, unsigned long flags)
1192{ 1190{
1193 int err; 1191 int err;
1194 int uninitialized_var(pval); 1192 int uninitialized_var(pval);
diff --git a/mm/migrate.c b/mm/migrate.c
index a30ea5fcf9f1..2bb4e1d63520 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1055,10 +1055,10 @@ out:
1055 * Move a list of pages in the address space of the currently executing 1055 * Move a list of pages in the address space of the currently executing
1056 * process. 1056 * process.
1057 */ 1057 */
1058asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, 1058SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1059 const void __user * __user *pages, 1059 const void __user * __user *, pages,
1060 const int __user *nodes, 1060 const int __user *, nodes,
1061 int __user *status, int flags) 1061 int __user *, status, int, flags)
1062{ 1062{
1063 const struct cred *cred = current_cred(), *tcred; 1063 const struct cred *cred = current_cred(), *tcred;
1064 struct task_struct *task; 1064 struct task_struct *task;
diff --git a/mm/mincore.c b/mm/mincore.c
index 5178800bc129..8cb508f84ea4 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -177,8 +177,8 @@ none_mapped:
177 * mapped 177 * mapped
178 * -EAGAIN - A kernel resource was temporarily unavailable. 178 * -EAGAIN - A kernel resource was temporarily unavailable.
179 */ 179 */
180asmlinkage long sys_mincore(unsigned long start, size_t len, 180SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
181 unsigned char __user * vec) 181 unsigned char __user *, vec)
182{ 182{
183 long retval; 183 long retval;
184 unsigned long pages; 184 unsigned long pages;
diff --git a/mm/mlock.c b/mm/mlock.c
index e125156c664e..028ec482fdd4 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -294,14 +294,10 @@ static inline int __mlock_posix_error_return(long retval)
294 * 294 *
295 * return number of pages [> 0] to be removed from locked_vm on success 295 * return number of pages [> 0] to be removed from locked_vm on success
296 * of "special" vmas. 296 * of "special" vmas.
297 *
298 * return negative error if vma spanning @start-@range disappears while
299 * mmap semaphore is dropped. Unlikely?
300 */ 297 */
301long mlock_vma_pages_range(struct vm_area_struct *vma, 298long mlock_vma_pages_range(struct vm_area_struct *vma,
302 unsigned long start, unsigned long end) 299 unsigned long start, unsigned long end)
303{ 300{
304 struct mm_struct *mm = vma->vm_mm;
305 int nr_pages = (end - start) / PAGE_SIZE; 301 int nr_pages = (end - start) / PAGE_SIZE;
306 BUG_ON(!(vma->vm_flags & VM_LOCKED)); 302 BUG_ON(!(vma->vm_flags & VM_LOCKED));
307 303
@@ -314,20 +310,8 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
314 if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || 310 if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
315 is_vm_hugetlb_page(vma) || 311 is_vm_hugetlb_page(vma) ||
316 vma == get_gate_vma(current))) { 312 vma == get_gate_vma(current))) {
317 long error;
318 downgrade_write(&mm->mmap_sem);
319
320 error = __mlock_vma_pages_range(vma, start, end, 1);
321 313
322 up_read(&mm->mmap_sem); 314 return __mlock_vma_pages_range(vma, start, end, 1);
323 /* vma can change or disappear */
324 down_write(&mm->mmap_sem);
325 vma = find_vma(mm, start);
326 /* non-NULL vma must contain @start, but need to check @end */
327 if (!vma || end > vma->vm_end)
328 return -ENOMEM;
329
330 return 0; /* hide other errors from mmap(), et al */
331 } 315 }
332 316
333 /* 317 /*
@@ -438,41 +422,14 @@ success:
438 vma->vm_flags = newflags; 422 vma->vm_flags = newflags;
439 423
440 if (lock) { 424 if (lock) {
441 /*
442 * mmap_sem is currently held for write. Downgrade the write
443 * lock to a read lock so that other faults, mmap scans, ...
444 * while we fault in all pages.
445 */
446 downgrade_write(&mm->mmap_sem);
447
448 ret = __mlock_vma_pages_range(vma, start, end, 1); 425 ret = __mlock_vma_pages_range(vma, start, end, 1);
449 426
450 /* 427 if (ret > 0) {
451 * Need to reacquire mmap sem in write mode, as our callers
452 * expect this. We have no support for atomically upgrading
453 * a sem to write, so we need to check for ranges while sem
454 * is unlocked.
455 */
456 up_read(&mm->mmap_sem);
457 /* vma can change or disappear */
458 down_write(&mm->mmap_sem);
459 *prev = find_vma(mm, start);
460 /* non-NULL *prev must contain @start, but need to check @end */
461 if (!(*prev) || end > (*prev)->vm_end)
462 ret = -ENOMEM;
463 else if (ret > 0) {
464 mm->locked_vm -= ret; 428 mm->locked_vm -= ret;
465 ret = 0; 429 ret = 0;
466 } else 430 } else
467 ret = __mlock_posix_error_return(ret); /* translate if needed */ 431 ret = __mlock_posix_error_return(ret); /* translate if needed */
468 } else { 432 } else {
469 /*
470 * TODO: for unlocking, pages will already be resident, so
471 * we don't need to wait for allocations/reclaim/pagein, ...
472 * However, unlocking a very large region can still take a
473 * while. Should we downgrade the semaphore for both lock
474 * AND unlock ?
475 */
476 __mlock_vma_pages_range(vma, start, end, 0); 433 __mlock_vma_pages_range(vma, start, end, 0);
477 } 434 }
478 435
@@ -530,7 +487,7 @@ static int do_mlock(unsigned long start, size_t len, int on)
530 return error; 487 return error;
531} 488}
532 489
533asmlinkage long sys_mlock(unsigned long start, size_t len) 490SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
534{ 491{
535 unsigned long locked; 492 unsigned long locked;
536 unsigned long lock_limit; 493 unsigned long lock_limit;
@@ -558,7 +515,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
558 return error; 515 return error;
559} 516}
560 517
561asmlinkage long sys_munlock(unsigned long start, size_t len) 518SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
562{ 519{
563 int ret; 520 int ret;
564 521
@@ -595,7 +552,7 @@ out:
595 return 0; 552 return 0;
596} 553}
597 554
598asmlinkage long sys_mlockall(int flags) 555SYSCALL_DEFINE1(mlockall, int, flags)
599{ 556{
600 unsigned long lock_limit; 557 unsigned long lock_limit;
601 int ret = -EINVAL; 558 int ret = -EINVAL;
@@ -623,7 +580,7 @@ out:
623 return ret; 580 return ret;
624} 581}
625 582
626asmlinkage long sys_munlockall(void) 583SYSCALL_DEFINE0(munlockall)
627{ 584{
628 int ret; 585 int ret;
629 586
diff --git a/mm/mmap.c b/mm/mmap.c
index 749623196cb9..214b6a258eeb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -245,7 +245,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
245 return next; 245 return next;
246} 246}
247 247
248asmlinkage unsigned long sys_brk(unsigned long brk) 248SYSCALL_DEFINE1(brk, unsigned long, brk)
249{ 249{
250 unsigned long rlim, retval; 250 unsigned long rlim, retval;
251 unsigned long newbrk, oldbrk; 251 unsigned long newbrk, oldbrk;
@@ -658,6 +658,9 @@ again: remove_next = 1 + (end > next->vm_end);
658 validate_mm(mm); 658 validate_mm(mm);
659} 659}
660 660
661/* Flags that can be inherited from an existing mapping when merging */
662#define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR)
663
661/* 664/*
662 * If the vma has a ->close operation then the driver probably needs to release 665 * If the vma has a ->close operation then the driver probably needs to release
663 * per-vma resources, so we don't attempt to merge those. 666 * per-vma resources, so we don't attempt to merge those.
@@ -665,7 +668,7 @@ again: remove_next = 1 + (end > next->vm_end);
665static inline int is_mergeable_vma(struct vm_area_struct *vma, 668static inline int is_mergeable_vma(struct vm_area_struct *vma,
666 struct file *file, unsigned long vm_flags) 669 struct file *file, unsigned long vm_flags)
667{ 670{
668 if (vma->vm_flags != vm_flags) 671 if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS)
669 return 0; 672 return 0;
670 if (vma->vm_file != file) 673 if (vma->vm_file != file)
671 return 0; 674 return 0;
@@ -1087,6 +1090,15 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
1087 mapping_cap_account_dirty(vma->vm_file->f_mapping); 1090 mapping_cap_account_dirty(vma->vm_file->f_mapping);
1088} 1091}
1089 1092
1093/*
1094 * We account for memory if it's a private writeable mapping,
1095 * and VM_NORESERVE wasn't set.
1096 */
1097static inline int accountable_mapping(unsigned int vm_flags)
1098{
1099 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1100}
1101
1090unsigned long mmap_region(struct file *file, unsigned long addr, 1102unsigned long mmap_region(struct file *file, unsigned long addr,
1091 unsigned long len, unsigned long flags, 1103 unsigned long len, unsigned long flags,
1092 unsigned int vm_flags, unsigned long pgoff, 1104 unsigned int vm_flags, unsigned long pgoff,
@@ -1114,36 +1126,32 @@ munmap_back:
1114 if (!may_expand_vm(mm, len >> PAGE_SHIFT)) 1126 if (!may_expand_vm(mm, len >> PAGE_SHIFT))
1115 return -ENOMEM; 1127 return -ENOMEM;
1116 1128
1117 if (flags & MAP_NORESERVE) 1129 /*
1130 * Set 'VM_NORESERVE' if we should not account for the
1131 * memory use of this mapping. We only honor MAP_NORESERVE
1132 * if we're allowed to overcommit memory.
1133 */
1134 if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1135 vm_flags |= VM_NORESERVE;
1136 if (!accountable)
1118 vm_flags |= VM_NORESERVE; 1137 vm_flags |= VM_NORESERVE;
1119 1138
1120 if (accountable && (!(flags & MAP_NORESERVE) || 1139 /*
1121 sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { 1140 * Private writable mapping: check memory availability
1122 if (vm_flags & VM_SHARED) { 1141 */
1123 /* Check memory availability in shmem_file_setup? */ 1142 if (accountable_mapping(vm_flags)) {
1124 vm_flags |= VM_ACCOUNT; 1143 charged = len >> PAGE_SHIFT;
1125 } else if (vm_flags & VM_WRITE) { 1144 if (security_vm_enough_memory(charged))
1126 /* 1145 return -ENOMEM;
1127 * Private writable mapping: check memory availability 1146 vm_flags |= VM_ACCOUNT;
1128 */
1129 charged = len >> PAGE_SHIFT;
1130 if (security_vm_enough_memory(charged))
1131 return -ENOMEM;
1132 vm_flags |= VM_ACCOUNT;
1133 }
1134 } 1147 }
1135 1148
1136 /* 1149 /*
1137 * Can we just expand an old private anonymous mapping? 1150 * Can we just expand an old mapping?
1138 * The VM_SHARED test is necessary because shmem_zero_setup
1139 * will create the file object for a shared anonymous map below.
1140 */ 1151 */
1141 if (!file && !(vm_flags & VM_SHARED)) { 1152 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL);
1142 vma = vma_merge(mm, prev, addr, addr + len, vm_flags, 1153 if (vma)
1143 NULL, NULL, pgoff, NULL); 1154 goto out;
1144 if (vma)
1145 goto out;
1146 }
1147 1155
1148 /* 1156 /*
1149 * Determine the object being mapped and call the appropriate 1157 * Determine the object being mapped and call the appropriate
@@ -1186,14 +1194,6 @@ munmap_back:
1186 goto free_vma; 1194 goto free_vma;
1187 } 1195 }
1188 1196
1189 /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
1190 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
1191 * that memory reservation must be checked; but that reservation
1192 * belongs to shared memory object, not to vma: so now clear it.
1193 */
1194 if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
1195 vma->vm_flags &= ~VM_ACCOUNT;
1196
1197 /* Can addr have changed?? 1197 /* Can addr have changed??
1198 * 1198 *
1199 * Answer: Yes, several device drivers can do it in their 1199 * Answer: Yes, several device drivers can do it in their
@@ -1206,17 +1206,8 @@ munmap_back:
1206 if (vma_wants_writenotify(vma)) 1206 if (vma_wants_writenotify(vma))
1207 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); 1207 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1208 1208
1209 if (file && vma_merge(mm, prev, addr, vma->vm_end, 1209 vma_link(mm, vma, prev, rb_link, rb_parent);
1210 vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { 1210 file = vma->vm_file;
1211 mpol_put(vma_policy(vma));
1212 kmem_cache_free(vm_area_cachep, vma);
1213 fput(file);
1214 if (vm_flags & VM_EXECUTABLE)
1215 removed_exe_file_vma(mm);
1216 } else {
1217 vma_link(mm, vma, prev, rb_link, rb_parent);
1218 file = vma->vm_file;
1219 }
1220 1211
1221 /* Once vma denies write, undo our temporary denial count */ 1212 /* Once vma denies write, undo our temporary denial count */
1222 if (correct_wcount) 1213 if (correct_wcount)
@@ -1948,7 +1939,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1948 1939
1949EXPORT_SYMBOL(do_munmap); 1940EXPORT_SYMBOL(do_munmap);
1950 1941
1951asmlinkage long sys_munmap(unsigned long addr, size_t len) 1942SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
1952{ 1943{
1953 int ret; 1944 int ret;
1954 struct mm_struct *mm = current->mm; 1945 struct mm_struct *mm = current->mm;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index d0f6e7ce09f1..abe2694e13f4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -217,8 +217,8 @@ fail:
217 return error; 217 return error;
218} 218}
219 219
220asmlinkage long 220SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
221sys_mprotect(unsigned long start, size_t len, unsigned long prot) 221 unsigned long, prot)
222{ 222{
223 unsigned long vm_flags, nstart, end, tmp, reqprot; 223 unsigned long vm_flags, nstart, end, tmp, reqprot;
224 struct vm_area_struct *vma, *prev; 224 struct vm_area_struct *vma, *prev;
diff --git a/mm/mremap.c b/mm/mremap.c
index 646de959aa58..a39b7b91be46 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -420,9 +420,9 @@ out_nc:
420 return ret; 420 return ret;
421} 421}
422 422
423asmlinkage unsigned long sys_mremap(unsigned long addr, 423SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
424 unsigned long old_len, unsigned long new_len, 424 unsigned long, new_len, unsigned long, flags,
425 unsigned long flags, unsigned long new_addr) 425 unsigned long, new_addr)
426{ 426{
427 unsigned long ret; 427 unsigned long ret;
428 428
diff --git a/mm/msync.c b/mm/msync.c
index 07dae08cf31c..4083209b7f02 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -28,7 +28,7 @@
28 * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to 28 * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
29 * applications. 29 * applications.
30 */ 30 */
31asmlinkage long sys_msync(unsigned long start, size_t len, int flags) 31SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
32{ 32{
33 unsigned long end; 33 unsigned long end;
34 struct mm_struct *mm = current->mm; 34 struct mm_struct *mm = current->mm;
diff --git a/mm/nommu.c b/mm/nommu.c
index 60ed8375c986..2fcf47d449b4 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -10,7 +10,7 @@
10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com> 10 * Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org> 11 * Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com> 12 * Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
13 * Copyright (c) 2007-2008 Paul Mundt <lethal@linux-sh.org> 13 * Copyright (c) 2007-2009 Paul Mundt <lethal@linux-sh.org>
14 */ 14 */
15 15
16#include <linux/module.h> 16#include <linux/module.h>
@@ -394,6 +394,24 @@ void vunmap(const void *addr)
394} 394}
395EXPORT_SYMBOL(vunmap); 395EXPORT_SYMBOL(vunmap);
396 396
397void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
398{
399 BUG();
400 return NULL;
401}
402EXPORT_SYMBOL(vm_map_ram);
403
404void vm_unmap_ram(const void *mem, unsigned int count)
405{
406 BUG();
407}
408EXPORT_SYMBOL(vm_unmap_ram);
409
410void vm_unmap_aliases(void)
411{
412}
413EXPORT_SYMBOL_GPL(vm_unmap_aliases);
414
397/* 415/*
398 * Implement a stub for vmalloc_sync_all() if the architecture chose not to 416 * Implement a stub for vmalloc_sync_all() if the architecture chose not to
399 * have one. 417 * have one.
@@ -416,7 +434,7 @@ EXPORT_SYMBOL(vm_insert_page);
416 * to a regular file. in this case, the unmapping will need 434 * to a regular file. in this case, the unmapping will need
417 * to invoke file system routines that need the global lock. 435 * to invoke file system routines that need the global lock.
418 */ 436 */
419asmlinkage unsigned long sys_brk(unsigned long brk) 437SYSCALL_DEFINE1(brk, unsigned long, brk)
420{ 438{
421 struct mm_struct *mm = current->mm; 439 struct mm_struct *mm = current->mm;
422 440
@@ -1143,8 +1161,8 @@ error_free:
1143 return ret; 1161 return ret;
1144 1162
1145enomem: 1163enomem:
1146 printk("Allocation of length %lu from process %d failed\n", 1164 printk("Allocation of length %lu from process %d (%s) failed\n",
1147 len, current->pid); 1165 len, current->pid, current->comm);
1148 show_free_areas(); 1166 show_free_areas();
1149 return -ENOMEM; 1167 return -ENOMEM;
1150} 1168}
@@ -1573,7 +1591,7 @@ erase_whole_vma:
1573} 1591}
1574EXPORT_SYMBOL(do_munmap); 1592EXPORT_SYMBOL(do_munmap);
1575 1593
1576asmlinkage long sys_munmap(unsigned long addr, size_t len) 1594SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
1577{ 1595{
1578 int ret; 1596 int ret;
1579 struct mm_struct *mm = current->mm; 1597 struct mm_struct *mm = current->mm;
@@ -1657,10 +1675,9 @@ unsigned long do_mremap(unsigned long addr,
1657} 1675}
1658EXPORT_SYMBOL(do_mremap); 1676EXPORT_SYMBOL(do_mremap);
1659 1677
1660asmlinkage 1678SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
1661unsigned long sys_mremap(unsigned long addr, 1679 unsigned long, new_len, unsigned long, flags,
1662 unsigned long old_len, unsigned long new_len, 1680 unsigned long, new_addr)
1663 unsigned long flags, unsigned long new_addr)
1664{ 1681{
1665 unsigned long ret; 1682 unsigned long ret;
1666 1683
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index b493db7841dc..dc32dae01e5f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1051,13 +1051,22 @@ continue_unlock:
1051 } 1051 }
1052 } 1052 }
1053 1053
1054 if (wbc->sync_mode == WB_SYNC_NONE) { 1054 if (nr_to_write > 0)
1055 wbc->nr_to_write--; 1055 nr_to_write--;
1056 if (wbc->nr_to_write <= 0) { 1056 else if (wbc->sync_mode == WB_SYNC_NONE) {
1057 done = 1; 1057 /*
1058 break; 1058 * We stop writing back only if we are not
1059 } 1059 * doing integrity sync. In case of integrity
1060 * sync we have to keep going because someone
1061 * may be concurrently dirtying pages, and we
1062 * might have synced a lot of newly appeared
1063 * dirty pages, but have not synced all of the
1064 * old dirty pages.
1065 */
1066 done = 1;
1067 break;
1060 } 1068 }
1069
1061 if (wbc->nonblocking && bdi_write_congested(bdi)) { 1070 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1062 wbc->encountered_congestion = 1; 1071 wbc->encountered_congestion = 1;
1063 done = 1; 1072 done = 1;
diff --git a/mm/shmem.c b/mm/shmem.c
index 5d0de96c9789..19d566ccdeea 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2628,7 +2628,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
2628 goto close_file; 2628 goto close_file;
2629 2629
2630#ifdef CONFIG_SHMEM 2630#ifdef CONFIG_SHMEM
2631 SHMEM_I(inode)->flags = flags & VM_ACCOUNT; 2631 SHMEM_I(inode)->flags = (flags & VM_NORESERVE) ? 0 : VM_ACCOUNT;
2632#endif 2632#endif
2633 d_instantiate(dentry, inode); 2633 d_instantiate(dentry, inode);
2634 inode->i_size = size; 2634 inode->i_size = size;
diff --git a/mm/slub.c b/mm/slub.c
index 6392ae5cc6b1..bdc9abb08a23 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1996,7 +1996,7 @@ static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
1996static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) 1996static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
1997{ 1997{
1998 if (c < per_cpu(kmem_cache_cpu, cpu) || 1998 if (c < per_cpu(kmem_cache_cpu, cpu) ||
1999 c > per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { 1999 c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
2000 kfree(c); 2000 kfree(c);
2001 return; 2001 return;
2002 } 2002 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index da422c47e2ee..7e6304dfafab 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -698,8 +698,10 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
698 pte_t *pte; 698 pte_t *pte;
699 int ret = 1; 699 int ret = 1;
700 700
701 if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) 701 if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, GFP_KERNEL, &ptr)) {
702 ret = -ENOMEM; 702 ret = -ENOMEM;
703 goto out_nolock;
704 }
703 705
704 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 706 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
705 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { 707 if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
@@ -723,6 +725,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
723 activate_page(page); 725 activate_page(page);
724out: 726out:
725 pte_unmap_unlock(pte, ptl); 727 pte_unmap_unlock(pte, ptl);
728out_nolock:
726 return ret; 729 return ret;
727} 730}
728 731
@@ -1377,7 +1380,7 @@ out:
1377 return ret; 1380 return ret;
1378} 1381}
1379 1382
1380asmlinkage long sys_swapoff(const char __user * specialfile) 1383SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1381{ 1384{
1382 struct swap_info_struct * p = NULL; 1385 struct swap_info_struct * p = NULL;
1383 unsigned short *swap_map; 1386 unsigned short *swap_map;
@@ -1633,7 +1636,7 @@ late_initcall(max_swapfiles_check);
1633 * 1636 *
1634 * The swapon system call 1637 * The swapon system call
1635 */ 1638 */
1636asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) 1639SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1637{ 1640{
1638 struct swap_info_struct * p; 1641 struct swap_info_struct * p;
1639 char *name = NULL; 1642 char *name = NULL;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c5db9a7264d9..75f49d312e8c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -14,7 +14,6 @@
14#include <linux/highmem.h> 14#include <linux/highmem.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/mutex.h>
18#include <linux/interrupt.h> 17#include <linux/interrupt.h>
19#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
20#include <linux/seq_file.h> 19#include <linux/seq_file.h>
@@ -24,6 +23,7 @@
24#include <linux/rbtree.h> 23#include <linux/rbtree.h>
25#include <linux/radix-tree.h> 24#include <linux/radix-tree.h>
26#include <linux/rcupdate.h> 25#include <linux/rcupdate.h>
26#include <linux/bootmem.h>
27 27
28#include <asm/atomic.h> 28#include <asm/atomic.h>
29#include <asm/uaccess.h> 29#include <asm/uaccess.h>
@@ -495,7 +495,7 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
495static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, 495static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
496 int sync, int force_flush) 496 int sync, int force_flush)
497{ 497{
498 static DEFINE_MUTEX(purge_lock); 498 static DEFINE_SPINLOCK(purge_lock);
499 LIST_HEAD(valist); 499 LIST_HEAD(valist);
500 struct vmap_area *va; 500 struct vmap_area *va;
501 int nr = 0; 501 int nr = 0;
@@ -506,10 +506,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
506 * the case that isn't actually used at the moment anyway. 506 * the case that isn't actually used at the moment anyway.
507 */ 507 */
508 if (!sync && !force_flush) { 508 if (!sync && !force_flush) {
509 if (!mutex_trylock(&purge_lock)) 509 if (!spin_trylock(&purge_lock))
510 return; 510 return;
511 } else 511 } else
512 mutex_lock(&purge_lock); 512 spin_lock(&purge_lock);
513 513
514 rcu_read_lock(); 514 rcu_read_lock();
515 list_for_each_entry_rcu(va, &vmap_area_list, list) { 515 list_for_each_entry_rcu(va, &vmap_area_list, list) {
@@ -541,7 +541,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
541 __free_vmap_area(va); 541 __free_vmap_area(va);
542 spin_unlock(&vmap_area_lock); 542 spin_unlock(&vmap_area_lock);
543 } 543 }
544 mutex_unlock(&purge_lock); 544 spin_unlock(&purge_lock);
545} 545}
546 546
547/* 547/*
@@ -984,6 +984,8 @@ EXPORT_SYMBOL(vm_map_ram);
984 984
985void __init vmalloc_init(void) 985void __init vmalloc_init(void)
986{ 986{
987 struct vmap_area *va;
988 struct vm_struct *tmp;
987 int i; 989 int i;
988 990
989 for_each_possible_cpu(i) { 991 for_each_possible_cpu(i) {
@@ -996,6 +998,14 @@ void __init vmalloc_init(void)
996 vbq->nr_dirty = 0; 998 vbq->nr_dirty = 0;
997 } 999 }
998 1000
1001 /* Import existing vmlist entries. */
1002 for (tmp = vmlist; tmp; tmp = tmp->next) {
1003 va = alloc_bootmem(sizeof(struct vmap_area));
1004 va->flags = tmp->flags | VM_VM_AREA;
1005 va->va_start = (unsigned long)tmp->addr;
1006 va->va_end = va->va_start + tmp->size;
1007 __insert_vmap_area(va);
1008 }
999 vmap_initialized = true; 1009 vmap_initialized = true;
1000} 1010}
1001 1011