aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-18 14:15:05 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-18 14:15:05 -0500
commit4092762aebfe55c1f8e31440b80a053c2dbe519b (patch)
tree8fb9fd14131194174c12daf5d8195afd3b62bc3e /mm
parent745b1626dd71ce9661a05ea4db57859ed5c773d2 (diff)
parent1de9e8e70f5acc441550ca75433563d91b269bbe (diff)
Merge branch 'tracing/ftrace'; commit 'v2.6.29-rc2' into tracing/core
Diffstat (limited to 'mm')
-rw-r--r--mm/fadvise.c18
-rw-r--r--mm/filemap.c9
-rw-r--r--mm/fremap.c4
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memcontrol.c130
-rw-r--r--mm/memory.c24
-rw-r--r--mm/mempolicy.c24
-rw-r--r--mm/migrate.c8
-rw-r--r--mm/mincore.c4
-rw-r--r--mm/mlock.c8
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/mprotect.c4
-rw-r--r--mm/mremap.c6
-rw-r--r--mm/msync.c2
-rw-r--r--mm/nommu.c11
-rw-r--r--mm/swapfile.c4
-rw-r--r--mm/vmalloc.c20
17 files changed, 181 insertions, 101 deletions
diff --git a/mm/fadvise.c b/mm/fadvise.c
index a1da969bd980..54a0f8040afa 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -24,7 +24,7 @@
24 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could 24 * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
25 * deactivate the pages and clear PG_Referenced. 25 * deactivate the pages and clear PG_Referenced.
26 */ 26 */
27asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) 27SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice)
28{ 28{
29 struct file *file = fget(fd); 29 struct file *file = fget(fd);
30 struct address_space *mapping; 30 struct address_space *mapping;
@@ -126,12 +126,26 @@ out:
126 fput(file); 126 fput(file);
127 return ret; 127 return ret;
128} 128}
129#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
130asmlinkage long SyS_fadvise64_64(long fd, loff_t offset, loff_t len, long advice)
131{
132 return SYSC_fadvise64_64((int) fd, offset, len, (int) advice);
133}
134SYSCALL_ALIAS(sys_fadvise64_64, SyS_fadvise64_64);
135#endif
129 136
130#ifdef __ARCH_WANT_SYS_FADVISE64 137#ifdef __ARCH_WANT_SYS_FADVISE64
131 138
132asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice) 139SYSCALL_DEFINE(fadvise64)(int fd, loff_t offset, size_t len, int advice)
133{ 140{
134 return sys_fadvise64_64(fd, offset, len, advice); 141 return sys_fadvise64_64(fd, offset, len, advice);
135} 142}
143#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
144asmlinkage long SyS_fadvise64(long fd, loff_t offset, long len, long advice)
145{
146 return SYSC_fadvise64((int) fd, offset, (size_t)len, (int)advice);
147}
148SYSCALL_ALIAS(sys_fadvise64, SyS_fadvise64);
149#endif
136 150
137#endif 151#endif
diff --git a/mm/filemap.c b/mm/filemap.c
index ceba0bd03662..23acefe51808 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1374,7 +1374,7 @@ do_readahead(struct address_space *mapping, struct file *filp,
1374 return 0; 1374 return 0;
1375} 1375}
1376 1376
1377asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) 1377SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count)
1378{ 1378{
1379 ssize_t ret; 1379 ssize_t ret;
1380 struct file *file; 1380 struct file *file;
@@ -1393,6 +1393,13 @@ asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count)
1393 } 1393 }
1394 return ret; 1394 return ret;
1395} 1395}
1396#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
1397asmlinkage long SyS_readahead(long fd, loff_t offset, long count)
1398{
1399 return SYSC_readahead((int) fd, offset, (size_t) count);
1400}
1401SYSCALL_ALIAS(sys_readahead, SyS_readahead);
1402#endif
1396 1403
1397#ifdef CONFIG_MMU 1404#ifdef CONFIG_MMU
1398/** 1405/**
diff --git a/mm/fremap.c b/mm/fremap.c
index 62d5bbda921a..736ba7f3306a 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -120,8 +120,8 @@ static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma,
120 * and the vma's default protection is used. Arbitrary protections 120 * and the vma's default protection is used. Arbitrary protections
121 * might be implemented in the future. 121 * might be implemented in the future.
122 */ 122 */
123asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, 123SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
124 unsigned long prot, unsigned long pgoff, unsigned long flags) 124 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
125{ 125{
126 struct mm_struct *mm = current->mm; 126 struct mm_struct *mm = current->mm;
127 struct address_space *mapping; 127 struct address_space *mapping;
diff --git a/mm/madvise.c b/mm/madvise.c
index f9349c18a1b5..b9ce574827c8 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -281,7 +281,7 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
281 * -EBADF - map exists, but area maps something that isn't a file. 281 * -EBADF - map exists, but area maps something that isn't a file.
282 * -EAGAIN - a kernel resource was temporarily unavailable. 282 * -EAGAIN - a kernel resource was temporarily unavailable.
283 */ 283 */
284asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) 284SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
285{ 285{
286 unsigned long end, tmp; 286 unsigned long end, tmp;
287 struct vm_area_struct * vma, *prev; 287 struct vm_area_struct * vma, *prev;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2996b80601f..4d0ea3ceba6d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -358,6 +358,10 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
358 return; 358 return;
359 359
360 pc = lookup_page_cgroup(page); 360 pc = lookup_page_cgroup(page);
361 /*
362 * Used bit is set without atomic ops but after smp_wmb().
363 * For making pc->mem_cgroup visible, insert smp_rmb() here.
364 */
361 smp_rmb(); 365 smp_rmb();
362 /* unused page is not rotated. */ 366 /* unused page is not rotated. */
363 if (!PageCgroupUsed(pc)) 367 if (!PageCgroupUsed(pc))
@@ -374,7 +378,10 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
374 if (mem_cgroup_disabled()) 378 if (mem_cgroup_disabled())
375 return; 379 return;
376 pc = lookup_page_cgroup(page); 380 pc = lookup_page_cgroup(page);
377 /* barrier to sync with "charge" */ 381 /*
382 * Used bit is set without atomic ops but after smp_wmb().
383 * For making pc->mem_cgroup visible, insert smp_rmb() here.
384 */
378 smp_rmb(); 385 smp_rmb();
379 if (!PageCgroupUsed(pc)) 386 if (!PageCgroupUsed(pc))
380 return; 387 return;
@@ -559,6 +566,14 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
559 return NULL; 566 return NULL;
560 567
561 pc = lookup_page_cgroup(page); 568 pc = lookup_page_cgroup(page);
569 /*
570 * Used bit is set without atomic ops but after smp_wmb().
571 * For making pc->mem_cgroup visible, insert smp_rmb() here.
572 */
573 smp_rmb();
574 if (!PageCgroupUsed(pc))
575 return NULL;
576
562 mz = page_cgroup_zoneinfo(pc); 577 mz = page_cgroup_zoneinfo(pc);
563 if (!mz) 578 if (!mz)
564 return NULL; 579 return NULL;
@@ -618,7 +633,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
618 * called with hierarchy_mutex held 633 * called with hierarchy_mutex held
619 */ 634 */
620static struct mem_cgroup * 635static struct mem_cgroup *
621mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) 636__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
622{ 637{
623 struct cgroup *cgroup, *curr_cgroup, *root_cgroup; 638 struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
624 639
@@ -629,19 +644,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
629 /* 644 /*
630 * Walk down to children 645 * Walk down to children
631 */ 646 */
632 mem_cgroup_put(curr);
633 cgroup = list_entry(curr_cgroup->children.next, 647 cgroup = list_entry(curr_cgroup->children.next,
634 struct cgroup, sibling); 648 struct cgroup, sibling);
635 curr = mem_cgroup_from_cont(cgroup); 649 curr = mem_cgroup_from_cont(cgroup);
636 mem_cgroup_get(curr);
637 goto done; 650 goto done;
638 } 651 }
639 652
640visit_parent: 653visit_parent:
641 if (curr_cgroup == root_cgroup) { 654 if (curr_cgroup == root_cgroup) {
642 mem_cgroup_put(curr); 655 /* caller handles NULL case */
643 curr = root_mem; 656 curr = NULL;
644 mem_cgroup_get(curr);
645 goto done; 657 goto done;
646 } 658 }
647 659
@@ -649,11 +661,9 @@ visit_parent:
649 * Goto next sibling 661 * Goto next sibling
650 */ 662 */
651 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { 663 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
652 mem_cgroup_put(curr);
653 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, 664 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
654 sibling); 665 sibling);
655 curr = mem_cgroup_from_cont(cgroup); 666 curr = mem_cgroup_from_cont(cgroup);
656 mem_cgroup_get(curr);
657 goto done; 667 goto done;
658 } 668 }
659 669
@@ -664,7 +674,6 @@ visit_parent:
664 goto visit_parent; 674 goto visit_parent;
665 675
666done: 676done:
667 root_mem->last_scanned_child = curr;
668 return curr; 677 return curr;
669} 678}
670 679
@@ -674,40 +683,46 @@ done:
674 * that to reclaim free pages from. 683 * that to reclaim free pages from.
675 */ 684 */
676static struct mem_cgroup * 685static struct mem_cgroup *
677mem_cgroup_get_first_node(struct mem_cgroup *root_mem) 686mem_cgroup_get_next_node(struct mem_cgroup *root_mem)
678{ 687{
679 struct cgroup *cgroup; 688 struct cgroup *cgroup;
680 struct mem_cgroup *ret; 689 struct mem_cgroup *orig, *next;
681 bool obsolete; 690 bool obsolete;
682 691
683 obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child);
684
685 /* 692 /*
686 * Scan all children under the mem_cgroup mem 693 * Scan all children under the mem_cgroup mem
687 */ 694 */
688 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); 695 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
696
697 orig = root_mem->last_scanned_child;
698 obsolete = mem_cgroup_is_obsolete(orig);
699
689 if (list_empty(&root_mem->css.cgroup->children)) { 700 if (list_empty(&root_mem->css.cgroup->children)) {
690 ret = root_mem; 701 /*
702 * root_mem might have children before and last_scanned_child
703 * may point to one of them. We put it later.
704 */
705 if (orig)
706 VM_BUG_ON(!obsolete);
707 next = NULL;
691 goto done; 708 goto done;
692 } 709 }
693 710
694 if (!root_mem->last_scanned_child || obsolete) { 711 if (!orig || obsolete) {
695
696 if (obsolete && root_mem->last_scanned_child)
697 mem_cgroup_put(root_mem->last_scanned_child);
698
699 cgroup = list_first_entry(&root_mem->css.cgroup->children, 712 cgroup = list_first_entry(&root_mem->css.cgroup->children,
700 struct cgroup, sibling); 713 struct cgroup, sibling);
701 ret = mem_cgroup_from_cont(cgroup); 714 next = mem_cgroup_from_cont(cgroup);
702 mem_cgroup_get(ret);
703 } else 715 } else
704 ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, 716 next = __mem_cgroup_get_next_node(orig, root_mem);
705 root_mem);
706 717
707done: 718done:
708 root_mem->last_scanned_child = ret; 719 if (next)
720 mem_cgroup_get(next);
721 root_mem->last_scanned_child = next;
722 if (orig)
723 mem_cgroup_put(orig);
709 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); 724 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
710 return ret; 725 return (next) ? next : root_mem;
711} 726}
712 727
713static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) 728static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
@@ -758,28 +773,25 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
758 * but there might be left over accounting, even after children 773 * but there might be left over accounting, even after children
759 * have left. 774 * have left.
760 */ 775 */
761 ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, 776 ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
762 get_swappiness(root_mem)); 777 get_swappiness(root_mem));
763 if (mem_cgroup_check_under_limit(root_mem)) 778 if (mem_cgroup_check_under_limit(root_mem))
764 return 0; 779 return 1; /* indicate reclaim has succeeded */
765 if (!root_mem->use_hierarchy) 780 if (!root_mem->use_hierarchy)
766 return ret; 781 return ret;
767 782
768 next_mem = mem_cgroup_get_first_node(root_mem); 783 next_mem = mem_cgroup_get_next_node(root_mem);
769 784
770 while (next_mem != root_mem) { 785 while (next_mem != root_mem) {
771 if (mem_cgroup_is_obsolete(next_mem)) { 786 if (mem_cgroup_is_obsolete(next_mem)) {
772 mem_cgroup_put(next_mem); 787 next_mem = mem_cgroup_get_next_node(root_mem);
773 next_mem = mem_cgroup_get_first_node(root_mem);
774 continue; 788 continue;
775 } 789 }
776 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, 790 ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
777 get_swappiness(next_mem)); 791 get_swappiness(next_mem));
778 if (mem_cgroup_check_under_limit(root_mem)) 792 if (mem_cgroup_check_under_limit(root_mem))
779 return 0; 793 return 1; /* indicate reclaim has succeeded */
780 mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); 794 next_mem = mem_cgroup_get_next_node(root_mem);
781 next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
782 mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
783 } 795 }
784 return ret; 796 return ret;
785} 797}
@@ -863,6 +875,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
863 875
864 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, 876 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
865 noswap); 877 noswap);
878 if (ret)
879 continue;
866 880
867 /* 881 /*
868 * try_to_free_mem_cgroup_pages() might not give us a full 882 * try_to_free_mem_cgroup_pages() might not give us a full
@@ -979,14 +993,15 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
979 if (pc->mem_cgroup != from) 993 if (pc->mem_cgroup != from)
980 goto out; 994 goto out;
981 995
982 css_put(&from->css);
983 res_counter_uncharge(&from->res, PAGE_SIZE); 996 res_counter_uncharge(&from->res, PAGE_SIZE);
984 mem_cgroup_charge_statistics(from, pc, false); 997 mem_cgroup_charge_statistics(from, pc, false);
985 if (do_swap_account) 998 if (do_swap_account)
986 res_counter_uncharge(&from->memsw, PAGE_SIZE); 999 res_counter_uncharge(&from->memsw, PAGE_SIZE);
1000 css_put(&from->css);
1001
1002 css_get(&to->css);
987 pc->mem_cgroup = to; 1003 pc->mem_cgroup = to;
988 mem_cgroup_charge_statistics(to, pc, true); 1004 mem_cgroup_charge_statistics(to, pc, true);
989 css_get(&to->css);
990 ret = 0; 1005 ret = 0;
991out: 1006out:
992 unlock_page_cgroup(pc); 1007 unlock_page_cgroup(pc);
@@ -1019,8 +1034,10 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
1019 if (ret || !parent) 1034 if (ret || !parent)
1020 return ret; 1035 return ret;
1021 1036
1022 if (!get_page_unless_zero(page)) 1037 if (!get_page_unless_zero(page)) {
1023 return -EBUSY; 1038 ret = -EBUSY;
1039 goto uncharge;
1040 }
1024 1041
1025 ret = isolate_lru_page(page); 1042 ret = isolate_lru_page(page);
1026 1043
@@ -1029,19 +1046,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
1029 1046
1030 ret = mem_cgroup_move_account(pc, child, parent); 1047 ret = mem_cgroup_move_account(pc, child, parent);
1031 1048
1032 /* drop extra refcnt by try_charge() (move_account increment one) */
1033 css_put(&parent->css);
1034 putback_lru_page(page); 1049 putback_lru_page(page);
1035 if (!ret) { 1050 if (!ret) {
1036 put_page(page); 1051 put_page(page);
1052 /* drop extra refcnt by try_charge() */
1053 css_put(&parent->css);
1037 return 0; 1054 return 0;
1038 } 1055 }
1039 /* uncharge if move fails */ 1056
1040cancel: 1057cancel:
1058 put_page(page);
1059uncharge:
1060 /* drop extra refcnt by try_charge() */
1061 css_put(&parent->css);
1062 /* uncharge if move fails */
1041 res_counter_uncharge(&parent->res, PAGE_SIZE); 1063 res_counter_uncharge(&parent->res, PAGE_SIZE);
1042 if (do_swap_account) 1064 if (do_swap_account)
1043 res_counter_uncharge(&parent->memsw, PAGE_SIZE); 1065 res_counter_uncharge(&parent->memsw, PAGE_SIZE);
1044 put_page(page);
1045 return ret; 1066 return ret;
1046} 1067}
1047 1068
@@ -1971,6 +1992,7 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
1971{ 1992{
1972 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); 1993 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
1973 struct mem_cgroup *parent; 1994 struct mem_cgroup *parent;
1995
1974 if (val > 100) 1996 if (val > 100)
1975 return -EINVAL; 1997 return -EINVAL;
1976 1998
@@ -1978,15 +2000,22 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft,
1978 return -EINVAL; 2000 return -EINVAL;
1979 2001
1980 parent = mem_cgroup_from_cont(cgrp->parent); 2002 parent = mem_cgroup_from_cont(cgrp->parent);
2003
2004 cgroup_lock();
2005
1981 /* If under hierarchy, only empty-root can set this value */ 2006 /* If under hierarchy, only empty-root can set this value */
1982 if ((parent->use_hierarchy) || 2007 if ((parent->use_hierarchy) ||
1983 (memcg->use_hierarchy && !list_empty(&cgrp->children))) 2008 (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
2009 cgroup_unlock();
1984 return -EINVAL; 2010 return -EINVAL;
2011 }
1985 2012
1986 spin_lock(&memcg->reclaim_param_lock); 2013 spin_lock(&memcg->reclaim_param_lock);
1987 memcg->swappiness = val; 2014 memcg->swappiness = val;
1988 spin_unlock(&memcg->reclaim_param_lock); 2015 spin_unlock(&memcg->reclaim_param_lock);
1989 2016
2017 cgroup_unlock();
2018
1990 return 0; 2019 return 0;
1991} 2020}
1992 2021
@@ -2181,7 +2210,7 @@ static void __init enable_swap_cgroup(void)
2181} 2210}
2182#endif 2211#endif
2183 2212
2184static struct cgroup_subsys_state * 2213static struct cgroup_subsys_state * __ref
2185mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 2214mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
2186{ 2215{
2187 struct mem_cgroup *mem, *parent; 2216 struct mem_cgroup *mem, *parent;
@@ -2232,7 +2261,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
2232static void mem_cgroup_destroy(struct cgroup_subsys *ss, 2261static void mem_cgroup_destroy(struct cgroup_subsys *ss,
2233 struct cgroup *cont) 2262 struct cgroup *cont)
2234{ 2263{
2235 mem_cgroup_put(mem_cgroup_from_cont(cont)); 2264 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
2265 struct mem_cgroup *last_scanned_child = mem->last_scanned_child;
2266
2267 if (last_scanned_child) {
2268 VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child));
2269 mem_cgroup_put(last_scanned_child);
2270 }
2271 mem_cgroup_put(mem);
2236} 2272}
2237 2273
2238static int mem_cgroup_populate(struct cgroup_subsys *ss, 2274static int mem_cgroup_populate(struct cgroup_subsys *ss,
diff --git a/mm/memory.c b/mm/memory.c
index e009ce870859..22bfa7a47a0b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1511,6 +1511,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1511 unsigned long pfn) 1511 unsigned long pfn)
1512{ 1512{
1513 int ret; 1513 int ret;
1514 pgprot_t pgprot = vma->vm_page_prot;
1514 /* 1515 /*
1515 * Technically, architectures with pte_special can avoid all these 1516 * Technically, architectures with pte_special can avoid all these
1516 * restrictions (same for remap_pfn_range). However we would like 1517 * restrictions (same for remap_pfn_range). However we would like
@@ -1525,10 +1526,10 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1525 1526
1526 if (addr < vma->vm_start || addr >= vma->vm_end) 1527 if (addr < vma->vm_start || addr >= vma->vm_end)
1527 return -EFAULT; 1528 return -EFAULT;
1528 if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE)) 1529 if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE))
1529 return -EINVAL; 1530 return -EINVAL;
1530 1531
1531 ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot); 1532 ret = insert_pfn(vma, addr, pfn, pgprot);
1532 1533
1533 if (ret) 1534 if (ret)
1534 untrack_pfn_vma(vma, pfn, PAGE_SIZE); 1535 untrack_pfn_vma(vma, pfn, PAGE_SIZE);
@@ -1671,9 +1672,15 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1671 1672
1672 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; 1673 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
1673 1674
1674 err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size)); 1675 err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size));
1675 if (err) 1676 if (err) {
1677 /*
1678 * To indicate that track_pfn related cleanup is not
1679 * needed from higher level routine calling unmap_vmas
1680 */
1681 vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP);
1676 return -EINVAL; 1682 return -EINVAL;
1683 }
1677 1684
1678 BUG_ON(addr >= end); 1685 BUG_ON(addr >= end);
1679 pfn -= addr >> PAGE_SHIFT; 1686 pfn -= addr >> PAGE_SHIFT;
@@ -3165,6 +3172,15 @@ void print_vma_addr(char *prefix, unsigned long ip)
3165#ifdef CONFIG_PROVE_LOCKING 3172#ifdef CONFIG_PROVE_LOCKING
3166void might_fault(void) 3173void might_fault(void)
3167{ 3174{
3175 /*
3176 * Some code (nfs/sunrpc) uses socket ops on kernel memory while
3177 * holding the mmap_sem, this is safe because kernel memory doesn't
3178 * get paged out, therefore we'll never actually fault, and the
3179 * below annotations will generate false positives.
3180 */
3181 if (segment_eq(get_fs(), KERNEL_DS))
3182 return;
3183
3168 might_sleep(); 3184 might_sleep();
3169 /* 3185 /*
3170 * it would be nicer only to annotate paths which are not under 3186 * it would be nicer only to annotate paths which are not under
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e412ffa8e52e..3eb4a6fdc043 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1068,10 +1068,9 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
1068 return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; 1068 return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0;
1069} 1069}
1070 1070
1071asmlinkage long sys_mbind(unsigned long start, unsigned long len, 1071SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
1072 unsigned long mode, 1072 unsigned long, mode, unsigned long __user *, nmask,
1073 unsigned long __user *nmask, unsigned long maxnode, 1073 unsigned long, maxnode, unsigned, flags)
1074 unsigned flags)
1075{ 1074{
1076 nodemask_t nodes; 1075 nodemask_t nodes;
1077 int err; 1076 int err;
@@ -1091,8 +1090,8 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len,
1091} 1090}
1092 1091
1093/* Set the process memory policy */ 1092/* Set the process memory policy */
1094asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, 1093SYSCALL_DEFINE3(set_mempolicy, int, mode, unsigned long __user *, nmask,
1095 unsigned long maxnode) 1094 unsigned long, maxnode)
1096{ 1095{
1097 int err; 1096 int err;
1098 nodemask_t nodes; 1097 nodemask_t nodes;
@@ -1110,9 +1109,9 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
1110 return do_set_mempolicy(mode, flags, &nodes); 1109 return do_set_mempolicy(mode, flags, &nodes);
1111} 1110}
1112 1111
1113asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, 1112SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
1114 const unsigned long __user *old_nodes, 1113 const unsigned long __user *, old_nodes,
1115 const unsigned long __user *new_nodes) 1114 const unsigned long __user *, new_nodes)
1116{ 1115{
1117 const struct cred *cred = current_cred(), *tcred; 1116 const struct cred *cred = current_cred(), *tcred;
1118 struct mm_struct *mm; 1117 struct mm_struct *mm;
@@ -1185,10 +1184,9 @@ out:
1185 1184
1186 1185
1187/* Retrieve NUMA policy */ 1186/* Retrieve NUMA policy */
1188asmlinkage long sys_get_mempolicy(int __user *policy, 1187SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
1189 unsigned long __user *nmask, 1188 unsigned long __user *, nmask, unsigned long, maxnode,
1190 unsigned long maxnode, 1189 unsigned long, addr, unsigned long, flags)
1191 unsigned long addr, unsigned long flags)
1192{ 1190{
1193 int err; 1191 int err;
1194 int uninitialized_var(pval); 1192 int uninitialized_var(pval);
diff --git a/mm/migrate.c b/mm/migrate.c
index a30ea5fcf9f1..2bb4e1d63520 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1055,10 +1055,10 @@ out:
1055 * Move a list of pages in the address space of the currently executing 1055 * Move a list of pages in the address space of the currently executing
1056 * process. 1056 * process.
1057 */ 1057 */
1058asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, 1058SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1059 const void __user * __user *pages, 1059 const void __user * __user *, pages,
1060 const int __user *nodes, 1060 const int __user *, nodes,
1061 int __user *status, int flags) 1061 int __user *, status, int, flags)
1062{ 1062{
1063 const struct cred *cred = current_cred(), *tcred; 1063 const struct cred *cred = current_cred(), *tcred;
1064 struct task_struct *task; 1064 struct task_struct *task;
diff --git a/mm/mincore.c b/mm/mincore.c
index 5178800bc129..8cb508f84ea4 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -177,8 +177,8 @@ none_mapped:
177 * mapped 177 * mapped
178 * -EAGAIN - A kernel resource was temporarily unavailable. 178 * -EAGAIN - A kernel resource was temporarily unavailable.
179 */ 179 */
180asmlinkage long sys_mincore(unsigned long start, size_t len, 180SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
181 unsigned char __user * vec) 181 unsigned char __user *, vec)
182{ 182{
183 long retval; 183 long retval;
184 unsigned long pages; 184 unsigned long pages;
diff --git a/mm/mlock.c b/mm/mlock.c
index e125156c664e..2904a347e476 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -530,7 +530,7 @@ static int do_mlock(unsigned long start, size_t len, int on)
530 return error; 530 return error;
531} 531}
532 532
533asmlinkage long sys_mlock(unsigned long start, size_t len) 533SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
534{ 534{
535 unsigned long locked; 535 unsigned long locked;
536 unsigned long lock_limit; 536 unsigned long lock_limit;
@@ -558,7 +558,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len)
558 return error; 558 return error;
559} 559}
560 560
561asmlinkage long sys_munlock(unsigned long start, size_t len) 561SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
562{ 562{
563 int ret; 563 int ret;
564 564
@@ -595,7 +595,7 @@ out:
595 return 0; 595 return 0;
596} 596}
597 597
598asmlinkage long sys_mlockall(int flags) 598SYSCALL_DEFINE1(mlockall, int, flags)
599{ 599{
600 unsigned long lock_limit; 600 unsigned long lock_limit;
601 int ret = -EINVAL; 601 int ret = -EINVAL;
@@ -623,7 +623,7 @@ out:
623 return ret; 623 return ret;
624} 624}
625 625
626asmlinkage long sys_munlockall(void) 626SYSCALL_DEFINE0(munlockall)
627{ 627{
628 int ret; 628 int ret;
629 629
diff --git a/mm/mmap.c b/mm/mmap.c
index 749623196cb9..8d95902e9a38 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -245,7 +245,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
245 return next; 245 return next;
246} 246}
247 247
248asmlinkage unsigned long sys_brk(unsigned long brk) 248SYSCALL_DEFINE1(brk, unsigned long, brk)
249{ 249{
250 unsigned long rlim, retval; 250 unsigned long rlim, retval;
251 unsigned long newbrk, oldbrk; 251 unsigned long newbrk, oldbrk;
@@ -1948,7 +1948,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1948 1948
1949EXPORT_SYMBOL(do_munmap); 1949EXPORT_SYMBOL(do_munmap);
1950 1950
1951asmlinkage long sys_munmap(unsigned long addr, size_t len) 1951SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
1952{ 1952{
1953 int ret; 1953 int ret;
1954 struct mm_struct *mm = current->mm; 1954 struct mm_struct *mm = current->mm;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index d0f6e7ce09f1..abe2694e13f4 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -217,8 +217,8 @@ fail:
217 return error; 217 return error;
218} 218}
219 219
220asmlinkage long 220SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
221sys_mprotect(unsigned long start, size_t len, unsigned long prot) 221 unsigned long, prot)
222{ 222{
223 unsigned long vm_flags, nstart, end, tmp, reqprot; 223 unsigned long vm_flags, nstart, end, tmp, reqprot;
224 struct vm_area_struct *vma, *prev; 224 struct vm_area_struct *vma, *prev;
diff --git a/mm/mremap.c b/mm/mremap.c
index 646de959aa58..a39b7b91be46 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -420,9 +420,9 @@ out_nc:
420 return ret; 420 return ret;
421} 421}
422 422
423asmlinkage unsigned long sys_mremap(unsigned long addr, 423SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
424 unsigned long old_len, unsigned long new_len, 424 unsigned long, new_len, unsigned long, flags,
425 unsigned long flags, unsigned long new_addr) 425 unsigned long, new_addr)
426{ 426{
427 unsigned long ret; 427 unsigned long ret;
428 428
diff --git a/mm/msync.c b/mm/msync.c
index 07dae08cf31c..4083209b7f02 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -28,7 +28,7 @@
28 * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to 28 * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to
29 * applications. 29 * applications.
30 */ 30 */
31asmlinkage long sys_msync(unsigned long start, size_t len, int flags) 31SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags)
32{ 32{
33 unsigned long end; 33 unsigned long end;
34 struct mm_struct *mm = current->mm; 34 struct mm_struct *mm = current->mm;
diff --git a/mm/nommu.c b/mm/nommu.c
index 60ed8375c986..8cee8c8ff0f2 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -416,7 +416,7 @@ EXPORT_SYMBOL(vm_insert_page);
416 * to a regular file. in this case, the unmapping will need 416 * to a regular file. in this case, the unmapping will need
417 * to invoke file system routines that need the global lock. 417 * to invoke file system routines that need the global lock.
418 */ 418 */
419asmlinkage unsigned long sys_brk(unsigned long brk) 419SYSCALL_DEFINE1(brk, unsigned long, brk)
420{ 420{
421 struct mm_struct *mm = current->mm; 421 struct mm_struct *mm = current->mm;
422 422
@@ -1573,7 +1573,7 @@ erase_whole_vma:
1573} 1573}
1574EXPORT_SYMBOL(do_munmap); 1574EXPORT_SYMBOL(do_munmap);
1575 1575
1576asmlinkage long sys_munmap(unsigned long addr, size_t len) 1576SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
1577{ 1577{
1578 int ret; 1578 int ret;
1579 struct mm_struct *mm = current->mm; 1579 struct mm_struct *mm = current->mm;
@@ -1657,10 +1657,9 @@ unsigned long do_mremap(unsigned long addr,
1657} 1657}
1658EXPORT_SYMBOL(do_mremap); 1658EXPORT_SYMBOL(do_mremap);
1659 1659
1660asmlinkage 1660SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
1661unsigned long sys_mremap(unsigned long addr, 1661 unsigned long, new_len, unsigned long, flags,
1662 unsigned long old_len, unsigned long new_len, 1662 unsigned long, new_addr)
1663 unsigned long flags, unsigned long new_addr)
1664{ 1663{
1665 unsigned long ret; 1664 unsigned long ret;
1666 1665
diff --git a/mm/swapfile.c b/mm/swapfile.c
index da422c47e2ee..f48b831e5e5c 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1377,7 +1377,7 @@ out:
1377 return ret; 1377 return ret;
1378} 1378}
1379 1379
1380asmlinkage long sys_swapoff(const char __user * specialfile) 1380SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
1381{ 1381{
1382 struct swap_info_struct * p = NULL; 1382 struct swap_info_struct * p = NULL;
1383 unsigned short *swap_map; 1383 unsigned short *swap_map;
@@ -1633,7 +1633,7 @@ late_initcall(max_swapfiles_check);
1633 * 1633 *
1634 * The swapon system call 1634 * The swapon system call
1635 */ 1635 */
1636asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) 1636SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
1637{ 1637{
1638 struct swap_info_struct * p; 1638 struct swap_info_struct * p;
1639 char *name = NULL; 1639 char *name = NULL;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c5db9a7264d9..75f49d312e8c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -14,7 +14,6 @@
14#include <linux/highmem.h> 14#include <linux/highmem.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/mutex.h>
18#include <linux/interrupt.h> 17#include <linux/interrupt.h>
19#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
20#include <linux/seq_file.h> 19#include <linux/seq_file.h>
@@ -24,6 +23,7 @@
24#include <linux/rbtree.h> 23#include <linux/rbtree.h>
25#include <linux/radix-tree.h> 24#include <linux/radix-tree.h>
26#include <linux/rcupdate.h> 25#include <linux/rcupdate.h>
26#include <linux/bootmem.h>
27 27
28#include <asm/atomic.h> 28#include <asm/atomic.h>
29#include <asm/uaccess.h> 29#include <asm/uaccess.h>
@@ -495,7 +495,7 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
495static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, 495static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
496 int sync, int force_flush) 496 int sync, int force_flush)
497{ 497{
498 static DEFINE_MUTEX(purge_lock); 498 static DEFINE_SPINLOCK(purge_lock);
499 LIST_HEAD(valist); 499 LIST_HEAD(valist);
500 struct vmap_area *va; 500 struct vmap_area *va;
501 int nr = 0; 501 int nr = 0;
@@ -506,10 +506,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
506 * the case that isn't actually used at the moment anyway. 506 * the case that isn't actually used at the moment anyway.
507 */ 507 */
508 if (!sync && !force_flush) { 508 if (!sync && !force_flush) {
509 if (!mutex_trylock(&purge_lock)) 509 if (!spin_trylock(&purge_lock))
510 return; 510 return;
511 } else 511 } else
512 mutex_lock(&purge_lock); 512 spin_lock(&purge_lock);
513 513
514 rcu_read_lock(); 514 rcu_read_lock();
515 list_for_each_entry_rcu(va, &vmap_area_list, list) { 515 list_for_each_entry_rcu(va, &vmap_area_list, list) {
@@ -541,7 +541,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
541 __free_vmap_area(va); 541 __free_vmap_area(va);
542 spin_unlock(&vmap_area_lock); 542 spin_unlock(&vmap_area_lock);
543 } 543 }
544 mutex_unlock(&purge_lock); 544 spin_unlock(&purge_lock);
545} 545}
546 546
547/* 547/*
@@ -984,6 +984,8 @@ EXPORT_SYMBOL(vm_map_ram);
984 984
985void __init vmalloc_init(void) 985void __init vmalloc_init(void)
986{ 986{
987 struct vmap_area *va;
988 struct vm_struct *tmp;
987 int i; 989 int i;
988 990
989 for_each_possible_cpu(i) { 991 for_each_possible_cpu(i) {
@@ -996,6 +998,14 @@ void __init vmalloc_init(void)
996 vbq->nr_dirty = 0; 998 vbq->nr_dirty = 0;
997 } 999 }
998 1000
1001 /* Import existing vmlist entries. */
1002 for (tmp = vmlist; tmp; tmp = tmp->next) {
1003 va = alloc_bootmem(sizeof(struct vmap_area));
1004 va->flags = tmp->flags | VM_VM_AREA;
1005 va->va_start = (unsigned long)tmp->addr;
1006 va->va_end = va->va_start + tmp->size;
1007 __insert_vmap_area(va);
1008 }
999 vmap_initialized = true; 1009 vmap_initialized = true;
1000} 1010}
1001 1011