1 files changed, 62 insertions, 77 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 21631ab8c08b..2bb4e1d63520 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
        if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old)
                goto out;
-        /*
-         * Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
-         * Failure is not an option here: we're now expected to remove every
-         * migration pte, and will cause crashes otherwise.  Normally this
-         * is not an issue: mem_cgroup_prepare_migration bumped up the old
-         * page_cgroup count for safety, that's now attached to the new page,
-         * so this charge should just be another incrementation of the count,
-         * to keep in balance with rmap.c's mem_cgroup_uncharging.  But if
-         * there's been a force_empty, those reference counts may no longer
-         * be reliable, and this charge can actually fail: oh well, we don't
-         * make the situation any worse by proceeding as if it had succeeded.
-         */
-        mem_cgroup_charge(new, mm, GFP_ATOMIC);
        get_page(new);
        pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
        if (is_write_migration_entry(entry))
@@ -300,12 +286,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
         * Now we know that no one else is looking at the page.
         */
        get_page(newpage);      /* add cache reference */
-#ifdef CONFIG_SWAP
        if (PageSwapCache(page)) {
                SetPageSwapCache(newpage);
                set_page_private(newpage, page_private(page));
        }
-#endif
        radix_tree_replace_slot(pslot, newpage);
@@ -373,18 +357,13 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
        mlock_migrate_page(newpage, page);
-#ifdef CONFIG_SWAP
        ClearPageSwapCache(page);
-#endif
        ClearPagePrivate(page);
        set_page_private(page, 0);
        /* page->mapping contains a flag for PageAnon() */
        anon = PageAnon(page);
        page->mapping = NULL;
-        if (!anon) /* This page was removed from radix-tree. */
-                mem_cgroup_uncharge_cache_page(page);
        /*
         * If any waiters have accumulated on the new page then
         * wake them up.
@@ -618,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
        struct page *newpage = get_new_page(page, private, &result);
        int rcu_locked = 0;
        int charge = 0;
+        struct mem_cgroup *mem;
        if (!newpage)
                return -ENOMEM;
@@ -627,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
                goto move_newpage;
        }
-        charge = mem_cgroup_prepare_migration(page, newpage);
-        if (charge == -ENOMEM) {
-                rc = -ENOMEM;
-                goto move_newpage;
-        }
        /* prepare cgroup just returns 0 or -ENOMEM */
-        BUG_ON(charge);
        rc = -EAGAIN;
        if (!trylock_page(page)) {
                if (!force)
                        goto move_newpage;
                lock_page(page);
        }
+        /* charge against new page */
+        charge = mem_cgroup_prepare_migration(page, &mem);
+        if (charge == -ENOMEM) {
+                rc = -ENOMEM;
+                goto unlock;
+        }
+        BUG_ON(charge);
        if (PageWriteback(page)) {
                if (!force)
-                        goto unlock;
+                        goto uncharge;
                wait_on_page_writeback(page);
        }
        /*
@@ -697,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 rcu_unlock:
        if (rcu_locked)
                rcu_read_unlock();
+uncharge:
+        if (!charge)
+                mem_cgroup_end_migration(mem, page, newpage);
 unlock:
        unlock_page(page);
@@ -713,8 +697,6 @@ unlock:
        }
 move_newpage:
-        if (!charge)
-                mem_cgroup_end_migration(newpage);
        /*
         * Move the new page to the LRU. If migration was not successful
@@ -848,12 +830,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
                struct vm_area_struct *vma;
                struct page *page;
-                /*
-                 * A valid page pointer that will not match any of the
-                 * pages that will be moved.
-                 */
-                pp->page = ZERO_PAGE(0);
                err = -EFAULT;
                vma = find_vma(mm, pp->addr);
                if (!vma || !vma_migratable(vma))
@@ -919,41 +895,43 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
                         const int __user *nodes,
                         int __user *status, int flags)
 {
-        struct page_to_node *pm = NULL;
+        struct page_to_node *pm;
        nodemask_t task_nodes;
-        int err = 0;
+        unsigned long chunk_nr_pages;
-        int i;
+        unsigned long chunk_start;
+        int err;
        task_nodes = cpuset_mems_allowed(task);
-        /* Limit nr_pages so that the multiplication may not overflow */
+        err = -ENOMEM;
-        if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {
+        pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
-                err = -E2BIG;
+        if (!pm)
-                goto out;
-        }
-        pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
-        if (!pm) {
-                err = -ENOMEM;
                goto out;
-        }
        /*
-         * Get parameters from user space and initialize the pm
+         * Store a chunk of page_to_node array in a page,
-         * array. Return various errors if the user did something wrong.
+         * but keep the last one as a marker
         */
-        for (i = 0; i < nr_pages; i++) {
+        chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
-                const void __user *p;
-                err = -EFAULT;
+        for (chunk_start = 0;
-                if (get_user(p, pages + i))
+             chunk_start < nr_pages;
-                        goto out_pm;
+             chunk_start += chunk_nr_pages) {
+                int j;
+                if (chunk_start + chunk_nr_pages > nr_pages)
+                        chunk_nr_pages = nr_pages - chunk_start;
-                pm[i].addr = (unsigned long)p;
+                /* fill the chunk pm with addrs and nodes from user-space */
-                if (nodes) {
+                for (j = 0; j < chunk_nr_pages; j++) {
+                        const void __user *p;
                        int node;
-                        if (get_user(node, nodes + i))
+                        err = -EFAULT;
+                        if (get_user(p, pages + j + chunk_start))
+                                goto out_pm;
+                        pm[j].addr = (unsigned long) p;
+                        if (get_user(node, nodes + j + chunk_start))
                                goto out_pm;
                        err = -ENODEV;
@@ -964,22 +942,29 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
                        if (!node_isset(node, task_nodes))
                                goto out_pm;
-                        pm[i].node = node;
+                        pm[j].node = node;
-                } else
+                }
-                        pm[i].node = 0; /* anything to not match MAX_NUMNODES */
-        }
+                /* End marker for this chunk */
-        /* End marker */
+                pm[chunk_nr_pages].node = MAX_NUMNODES;
-        pm[nr_pages].node = MAX_NUMNODES;
+                /* Migrate this chunk */
+                err = do_move_page_to_node_array(mm, pm,
+                                                 flags & MPOL_MF_MOVE_ALL);
+                if (err < 0)
+                        goto out_pm;
-        err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
-        if (err >= 0)
                /* Return status information */
-                for (i = 0; i < nr_pages; i++)
+                for (j = 0; j < chunk_nr_pages; j++)
-                        if (put_user(pm[i].status, status + i))
+                        if (put_user(pm[j].status, status + j + chunk_start)) {
                                err = -EFAULT;
+                                goto out_pm;
+                        }
+        }
+        err = 0;
 out_pm:
-        vfree(pm);
+        free_page((unsigned long)pm);
 out:
        return err;
 }
@@ -1070,10 +1055,10 @@ out:
 * Move a list of pages in the address space of the currently executing
 * process.
 */
-asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
+SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
-                        const void __user * __user *pages,
+                const void __user * __user *, pages,
-                        const int __user *nodes,
+                const int __user *, nodes,
-                        int __user *status, int flags)
+                int __user *, status, int, flags)
 {
        const struct cred *cred = current_cred(), *tcred;
        struct task_struct *task;

diff --git a/mm/migrate.c b/mm/migrate.c index 21631ab8c08b..2bb4e1d63520 100644 --- a/mm/migrate.c +++ b/mm/migrate.c
@@ -121,20 +121,6 @@ static void remove_migration_pte(struct vm_area_struct *vma,
121	if (!is_migration_entry(entry) \|\| migration_entry_to_page(entry) != old)	121	if (!is_migration_entry(entry) \|\| migration_entry_to_page(entry) != old)
122	goto out;	122	goto out;
123		123
124	/*
125	* Yes, ignore the return value from a GFP_ATOMIC mem_cgroup_charge.
126	* Failure is not an option here: we're now expected to remove every
127	* migration pte, and will cause crashes otherwise. Normally this
128	* is not an issue: mem_cgroup_prepare_migration bumped up the old
129	* page_cgroup count for safety, that's now attached to the new page,
130	* so this charge should just be another incrementation of the count,
131	* to keep in balance with rmap.c's mem_cgroup_uncharging. But if
132	* there's been a force_empty, those reference counts may no longer
133	* be reliable, and this charge can actually fail: oh well, we don't
134	* make the situation any worse by proceeding as if it had succeeded.
135	*/
136	mem_cgroup_charge(new, mm, GFP_ATOMIC);
137
138	get_page(new);	124	get_page(new);
139	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));	125	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
140	if (is_write_migration_entry(entry))	126	if (is_write_migration_entry(entry))
@@ -300,12 +286,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
300	* Now we know that no one else is looking at the page.	286	* Now we know that no one else is looking at the page.
301	*/	287	*/
302	get_page(newpage); /* add cache reference */	288	get_page(newpage); /* add cache reference */
303	#ifdef CONFIG_SWAP
304	if (PageSwapCache(page)) {	289	if (PageSwapCache(page)) {
305	SetPageSwapCache(newpage);	290	SetPageSwapCache(newpage);
306	set_page_private(newpage, page_private(page));	291	set_page_private(newpage, page_private(page));
307	}	292	}
308	#endif
309		293
310	radix_tree_replace_slot(pslot, newpage);	294	radix_tree_replace_slot(pslot, newpage);
311		295
@@ -373,18 +357,13 @@ static void migrate_page_copy(struct page newpage, struct page page)
373		357
374	mlock_migrate_page(newpage, page);	358	mlock_migrate_page(newpage, page);
375		359
376	#ifdef CONFIG_SWAP
377	ClearPageSwapCache(page);	360	ClearPageSwapCache(page);
378	#endif
379	ClearPagePrivate(page);	361	ClearPagePrivate(page);
380	set_page_private(page, 0);	362	set_page_private(page, 0);
381	/* page->mapping contains a flag for PageAnon() */	363	/* page->mapping contains a flag for PageAnon() */
382	anon = PageAnon(page);	364	anon = PageAnon(page);
383	page->mapping = NULL;	365	page->mapping = NULL;
384		366
385	if (!anon) /* This page was removed from radix-tree. */
386	mem_cgroup_uncharge_cache_page(page);
387
388	/*	367	/*
389	* If any waiters have accumulated on the new page then	368	* If any waiters have accumulated on the new page then
390	* wake them up.	369	* wake them up.
@@ -618,6 +597,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
618	struct page *newpage = get_new_page(page, private, &result);	597	struct page *newpage = get_new_page(page, private, &result);
619	int rcu_locked = 0;	598	int rcu_locked = 0;
620	int charge = 0;	599	int charge = 0;
		600	struct mem_cgroup *mem;
621		601
622	if (!newpage)	602	if (!newpage)
623	return -ENOMEM;	603	return -ENOMEM;
@@ -627,24 +607,26 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
627	goto move_newpage;	607	goto move_newpage;
628	}	608	}
629		609
630	charge = mem_cgroup_prepare_migration(page, newpage);
631	if (charge == -ENOMEM) {
632	rc = -ENOMEM;
633	goto move_newpage;
634	}
635	/* prepare cgroup just returns 0 or -ENOMEM */	610	/* prepare cgroup just returns 0 or -ENOMEM */
636	BUG_ON(charge);
637
638	rc = -EAGAIN;	611	rc = -EAGAIN;
		612
639	if (!trylock_page(page)) {	613	if (!trylock_page(page)) {
640	if (!force)	614	if (!force)
641	goto move_newpage;	615	goto move_newpage;
642	lock_page(page);	616	lock_page(page);
643	}	617	}
644		618
		619	/* charge against new page */
		620	charge = mem_cgroup_prepare_migration(page, &mem);
		621	if (charge == -ENOMEM) {
		622	rc = -ENOMEM;
		623	goto unlock;
		624	}
		625	BUG_ON(charge);
		626
645	if (PageWriteback(page)) {	627	if (PageWriteback(page)) {
646	if (!force)	628	if (!force)
647	goto unlock;	629	goto uncharge;
648	wait_on_page_writeback(page);	630	wait_on_page_writeback(page);
649	}	631	}
650	/*	632	/*
@@ -697,7 +679,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
697	rcu_unlock:	679	rcu_unlock:
698	if (rcu_locked)	680	if (rcu_locked)
699	rcu_read_unlock();	681	rcu_read_unlock();
700		682	uncharge:
		683	if (!charge)
		684	mem_cgroup_end_migration(mem, page, newpage);
701	unlock:	685	unlock:
702	unlock_page(page);	686	unlock_page(page);
703		687
@@ -713,8 +697,6 @@ unlock:
713	}	697	}
714		698
715	move_newpage:	699	move_newpage:
716	if (!charge)
717	mem_cgroup_end_migration(newpage);
718		700
719	/*	701	/*
720	* Move the new page to the LRU. If migration was not successful	702	* Move the new page to the LRU. If migration was not successful
@@ -848,12 +830,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
848	struct vm_area_struct *vma;	830	struct vm_area_struct *vma;
849	struct page *page;	831	struct page *page;
850		832
851	/*
852	* A valid page pointer that will not match any of the
853	* pages that will be moved.
854	*/
855	pp->page = ZERO_PAGE(0);
856
857	err = -EFAULT;	833	err = -EFAULT;
858	vma = find_vma(mm, pp->addr);	834	vma = find_vma(mm, pp->addr);
859	if (!vma \|\| !vma_migratable(vma))	835	if (!vma \|\| !vma_migratable(vma))
@@ -919,41 +895,43 @@ static int do_pages_move(struct mm_struct mm, struct task_struct task,
919	const int __user *nodes,	895	const int __user *nodes,
920	int __user *status, int flags)	896	int __user *status, int flags)
921	{	897	{
922	struct page_to_node *pm = NULL;	898	struct page_to_node *pm;
923	nodemask_t task_nodes;	899	nodemask_t task_nodes;
924	int err = 0;	900	unsigned long chunk_nr_pages;
925	int i;	901	unsigned long chunk_start;
		902	int err;
926		903
927	task_nodes = cpuset_mems_allowed(task);	904	task_nodes = cpuset_mems_allowed(task);
928		905
929	/* Limit nr_pages so that the multiplication may not overflow */	906	err = -ENOMEM;
930	if (nr_pages >= ULONG_MAX / sizeof(struct page_to_node) - 1) {	907	pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
931	err = -E2BIG;	908	if (!pm)
932	goto out;
933	}
934
935	pm = vmalloc((nr_pages + 1) * sizeof(struct page_to_node));
936	if (!pm) {
937	err = -ENOMEM;
938	goto out;	909	goto out;
939	}
940
941	/*	910	/*
942	* Get parameters from user space and initialize the pm	911	* Store a chunk of page_to_node array in a page,
943	* array. Return various errors if the user did something wrong.	912	* but keep the last one as a marker
944	*/	913	*/
945	for (i = 0; i < nr_pages; i++) {	914	chunk_nr_pages = (PAGE_SIZE / sizeof(struct page_to_node)) - 1;
946	const void __user *p;
947		915
948	err = -EFAULT;	916	for (chunk_start = 0;
949	if (get_user(p, pages + i))	917	chunk_start < nr_pages;
950	goto out_pm;	918	chunk_start += chunk_nr_pages) {
		919	int j;
		920
		921	if (chunk_start + chunk_nr_pages > nr_pages)
		922	chunk_nr_pages = nr_pages - chunk_start;
951		923
952	pm[i].addr = (unsigned long)p;	924	/* fill the chunk pm with addrs and nodes from user-space */
953	if (nodes) {	925	for (j = 0; j < chunk_nr_pages; j++) {
		926	const void __user *p;
954	int node;	927	int node;
955		928
956	if (get_user(node, nodes + i))	929	err = -EFAULT;
		930	if (get_user(p, pages + j + chunk_start))
		931	goto out_pm;
		932	pm[j].addr = (unsigned long) p;
		933
		934	if (get_user(node, nodes + j + chunk_start))
957	goto out_pm;	935	goto out_pm;
958		936
959	err = -ENODEV;	937	err = -ENODEV;
@@ -964,22 +942,29 @@ static int do_pages_move(struct mm_struct mm, struct task_struct task,
964	if (!node_isset(node, task_nodes))	942	if (!node_isset(node, task_nodes))
965	goto out_pm;	943	goto out_pm;
966		944
967	pm[i].node = node;	945	pm[j].node = node;
968	} else	946	}
969	pm[i].node = 0; /* anything to not match MAX_NUMNODES */	947
970	}	948	/* End marker for this chunk */
971	/* End marker */	949	pm[chunk_nr_pages].node = MAX_NUMNODES;
972	pm[nr_pages].node = MAX_NUMNODES;	950
		951	/* Migrate this chunk */
		952	err = do_move_page_to_node_array(mm, pm,
		953	flags & MPOL_MF_MOVE_ALL);
		954	if (err < 0)
		955	goto out_pm;
973		956
974	err = do_move_page_to_node_array(mm, pm, flags & MPOL_MF_MOVE_ALL);
975	if (err >= 0)
976	/* Return status information */	957	/* Return status information */
977	for (i = 0; i < nr_pages; i++)	958	for (j = 0; j < chunk_nr_pages; j++)
978	if (put_user(pm[i].status, status + i))	959	if (put_user(pm[j].status, status + j + chunk_start)) {
979	err = -EFAULT;	960	err = -EFAULT;
		961	goto out_pm;
		962	}
		963	}
		964	err = 0;
980		965
981	out_pm:	966	out_pm:
982	vfree(pm);	967	free_page((unsigned long)pm);
983	out:	968	out:
984	return err;	969	return err;
985	}	970	}
@@ -1070,10 +1055,10 @@ out:
1070	* Move a list of pages in the address space of the currently executing	1055	* Move a list of pages in the address space of the currently executing
1071	* process.	1056	* process.
1072	*/	1057	*/
1073	asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,	1058	SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1074	const void __user * __user *pages,	1059	const void __user * __user *, pages,
1075	const int __user *nodes,	1060	const int __user *, nodes,
1076	int __user *status, int flags)	1061	int __user *, status, int, flags)
1077	{	1062	{
1078	const struct cred cred = current_cred(), tcred;	1063	const struct cred cred = current_cred(), tcred;
1079	struct task_struct *task;	1064	struct task_struct *task;