aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2012-03-21 19:34:06 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 20:54:58 -0400
commit3268c63eded4612a3d07b56d1e02ce7731e6608e (patch)
treec8a07eb4cdf2697d610bb86d23eb46b0a5f892c2 /mm/migrate.c
parent385de35722c9a22917e7bc5e63cd83a8cffa5ecd (diff)
mm: fix move/migrate_pages() race on task struct
Migration functions perform the rcu_read_unlock too early. As a result the task pointed to may change from under us. This can result in an oops, as reported by Dave Hansen in https://lkml.org/lkml/2012/2/23/302. The following patch extend the period of the rcu_read_lock until after the permissions checks are done. We also take a refcount so that the task reference is stable when calling security check functions and performing cpuset node validation (which takes a mutex). The refcount is dropped before actual page migration occurs so there is no change to the refcounts held during page migration. Also move the determination of the mm of the task struct to immediately before the do_migrate*() calls so that it is clear that we switch from handling the task during permission checks to the mm for the actual migration. Since the determination is only done once and we then no longer use the task_struct we can be sure that we operate on a specific address space that will not change from under us. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Christoph Lameter <cl@linux.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Reported-by: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c36
1 files changed, 19 insertions, 17 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 1503b6b54ecb..51c08a0c6f68 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1174,20 +1174,17 @@ set_status:
1174 * Migrate an array of page address onto an array of nodes and fill 1174 * Migrate an array of page address onto an array of nodes and fill
1175 * the corresponding array of status. 1175 * the corresponding array of status.
1176 */ 1176 */
1177static int do_pages_move(struct mm_struct *mm, struct task_struct *task, 1177static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
1178 unsigned long nr_pages, 1178 unsigned long nr_pages,
1179 const void __user * __user *pages, 1179 const void __user * __user *pages,
1180 const int __user *nodes, 1180 const int __user *nodes,
1181 int __user *status, int flags) 1181 int __user *status, int flags)
1182{ 1182{
1183 struct page_to_node *pm; 1183 struct page_to_node *pm;
1184 nodemask_t task_nodes;
1185 unsigned long chunk_nr_pages; 1184 unsigned long chunk_nr_pages;
1186 unsigned long chunk_start; 1185 unsigned long chunk_start;
1187 int err; 1186 int err;
1188 1187
1189 task_nodes = cpuset_mems_allowed(task);
1190
1191 err = -ENOMEM; 1188 err = -ENOMEM;
1192 pm = (struct page_to_node *)__get_free_page(GFP_KERNEL); 1189 pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
1193 if (!pm) 1190 if (!pm)
@@ -1349,6 +1346,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1349 struct task_struct *task; 1346 struct task_struct *task;
1350 struct mm_struct *mm; 1347 struct mm_struct *mm;
1351 int err; 1348 int err;
1349 nodemask_t task_nodes;
1352 1350
1353 /* Check flags */ 1351 /* Check flags */
1354 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) 1352 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1364,11 +1362,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1364 rcu_read_unlock(); 1362 rcu_read_unlock();
1365 return -ESRCH; 1363 return -ESRCH;
1366 } 1364 }
1367 mm = get_task_mm(task); 1365 get_task_struct(task);
1368 rcu_read_unlock();
1369
1370 if (!mm)
1371 return -EINVAL;
1372 1366
1373 /* 1367 /*
1374 * Check if this process has the right to modify the specified 1368 * Check if this process has the right to modify the specified
@@ -1376,7 +1370,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1376 * capabilities, superuser privileges or the same 1370 * capabilities, superuser privileges or the same
1377 * userid as the target process. 1371 * userid as the target process.
1378 */ 1372 */
1379 rcu_read_lock();
1380 tcred = __task_cred(task); 1373 tcred = __task_cred(task);
1381 if (cred->euid != tcred->suid && cred->euid != tcred->uid && 1374 if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
1382 cred->uid != tcred->suid && cred->uid != tcred->uid && 1375 cred->uid != tcred->suid && cred->uid != tcred->uid &&
@@ -1391,16 +1384,25 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1391 if (err) 1384 if (err)
1392 goto out; 1385 goto out;
1393 1386
1394 if (nodes) { 1387 task_nodes = cpuset_mems_allowed(task);
1395 err = do_pages_move(mm, task, nr_pages, pages, nodes, status, 1388 mm = get_task_mm(task);
1396 flags); 1389 put_task_struct(task);
1397 } else { 1390
1398 err = do_pages_stat(mm, nr_pages, pages, status); 1391 if (mm) {
1399 } 1392 if (nodes)
1393 err = do_pages_move(mm, task_nodes, nr_pages, pages,
1394 nodes, status, flags);
1395 else
1396 err = do_pages_stat(mm, nr_pages, pages, status);
1397 } else
1398 err = -EINVAL;
1400 1399
1401out:
1402 mmput(mm); 1400 mmput(mm);
1403 return err; 1401 return err;
1402
1403out:
1404 put_task_struct(task);
1405 return err;
1404} 1406}
1405 1407
1406/* 1408/*