aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2012-03-21 19:34:06 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 20:54:58 -0400
commit3268c63eded4612a3d07b56d1e02ce7731e6608e (patch)
treec8a07eb4cdf2697d610bb86d23eb46b0a5f892c2
parent385de35722c9a22917e7bc5e63cd83a8cffa5ecd (diff)
mm: fix move/migrate_pages() race on task struct
Migration functions perform the rcu_read_unlock too early. As a result the task pointed to may change from under us. This can result in an oops, as reported by Dave Hansen in https://lkml.org/lkml/2012/2/23/302. The following patch extend the period of the rcu_read_lock until after the permissions checks are done. We also take a refcount so that the task reference is stable when calling security check functions and performing cpuset node validation (which takes a mutex). The refcount is dropped before actual page migration occurs so there is no change to the refcounts held during page migration. Also move the determination of the mm of the task struct to immediately before the do_migrate*() calls so that it is clear that we switch from handling the task during permission checks to the mm for the actual migration. Since the determination is only done once and we then no longer use the task_struct we can be sure that we operate on a specific address space that will not change from under us. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Christoph Lameter <cl@linux.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Reported-by: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/mempolicy.c32
-rw-r--r--mm/migrate.c36
2 files changed, 38 insertions, 30 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0a3757067631..71e1a523e209 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1323,12 +1323,9 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
1323 err = -ESRCH; 1323 err = -ESRCH;
1324 goto out; 1324 goto out;
1325 } 1325 }
1326 mm = get_task_mm(task); 1326 get_task_struct(task);
1327 rcu_read_unlock();
1328 1327
1329 err = -EINVAL; 1328 err = -EINVAL;
1330 if (!mm)
1331 goto out;
1332 1329
1333 /* 1330 /*
1334 * Check if this process has the right to modify the specified 1331 * Check if this process has the right to modify the specified
@@ -1336,14 +1333,13 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
1336 * capabilities, superuser privileges or the same 1333 * capabilities, superuser privileges or the same
1337 * userid as the target process. 1334 * userid as the target process.
1338 */ 1335 */
1339 rcu_read_lock();
1340 tcred = __task_cred(task); 1336 tcred = __task_cred(task);
1341 if (cred->euid != tcred->suid && cred->euid != tcred->uid && 1337 if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
1342 cred->uid != tcred->suid && cred->uid != tcred->uid && 1338 cred->uid != tcred->suid && cred->uid != tcred->uid &&
1343 !capable(CAP_SYS_NICE)) { 1339 !capable(CAP_SYS_NICE)) {
1344 rcu_read_unlock(); 1340 rcu_read_unlock();
1345 err = -EPERM; 1341 err = -EPERM;
1346 goto out; 1342 goto out_put;
1347 } 1343 }
1348 rcu_read_unlock(); 1344 rcu_read_unlock();
1349 1345
@@ -1351,26 +1347,36 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
1351 /* Is the user allowed to access the target nodes? */ 1347 /* Is the user allowed to access the target nodes? */
1352 if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) { 1348 if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) {
1353 err = -EPERM; 1349 err = -EPERM;
1354 goto out; 1350 goto out_put;
1355 } 1351 }
1356 1352
1357 if (!nodes_subset(*new, node_states[N_HIGH_MEMORY])) { 1353 if (!nodes_subset(*new, node_states[N_HIGH_MEMORY])) {
1358 err = -EINVAL; 1354 err = -EINVAL;
1359 goto out; 1355 goto out_put;
1360 } 1356 }
1361 1357
1362 err = security_task_movememory(task); 1358 err = security_task_movememory(task);
1363 if (err) 1359 if (err)
1364 goto out; 1360 goto out_put;
1365 1361
1366 err = do_migrate_pages(mm, old, new, 1362 mm = get_task_mm(task);
1367 capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE); 1363 put_task_struct(task);
1368out:
1369 if (mm) 1364 if (mm)
1370 mmput(mm); 1365 err = do_migrate_pages(mm, old, new,
1366 capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
1367 else
1368 err = -EINVAL;
1369
1370 mmput(mm);
1371out:
1371 NODEMASK_SCRATCH_FREE(scratch); 1372 NODEMASK_SCRATCH_FREE(scratch);
1372 1373
1373 return err; 1374 return err;
1375
1376out_put:
1377 put_task_struct(task);
1378 goto out;
1379
1374} 1380}
1375 1381
1376 1382
diff --git a/mm/migrate.c b/mm/migrate.c
index 1503b6b54ecb..51c08a0c6f68 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1174,20 +1174,17 @@ set_status:
1174 * Migrate an array of page address onto an array of nodes and fill 1174 * Migrate an array of page address onto an array of nodes and fill
1175 * the corresponding array of status. 1175 * the corresponding array of status.
1176 */ 1176 */
1177static int do_pages_move(struct mm_struct *mm, struct task_struct *task, 1177static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
1178 unsigned long nr_pages, 1178 unsigned long nr_pages,
1179 const void __user * __user *pages, 1179 const void __user * __user *pages,
1180 const int __user *nodes, 1180 const int __user *nodes,
1181 int __user *status, int flags) 1181 int __user *status, int flags)
1182{ 1182{
1183 struct page_to_node *pm; 1183 struct page_to_node *pm;
1184 nodemask_t task_nodes;
1185 unsigned long chunk_nr_pages; 1184 unsigned long chunk_nr_pages;
1186 unsigned long chunk_start; 1185 unsigned long chunk_start;
1187 int err; 1186 int err;
1188 1187
1189 task_nodes = cpuset_mems_allowed(task);
1190
1191 err = -ENOMEM; 1188 err = -ENOMEM;
1192 pm = (struct page_to_node *)__get_free_page(GFP_KERNEL); 1189 pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
1193 if (!pm) 1190 if (!pm)
@@ -1349,6 +1346,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1349 struct task_struct *task; 1346 struct task_struct *task;
1350 struct mm_struct *mm; 1347 struct mm_struct *mm;
1351 int err; 1348 int err;
1349 nodemask_t task_nodes;
1352 1350
1353 /* Check flags */ 1351 /* Check flags */
1354 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) 1352 if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1364,11 +1362,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1364 rcu_read_unlock(); 1362 rcu_read_unlock();
1365 return -ESRCH; 1363 return -ESRCH;
1366 } 1364 }
1367 mm = get_task_mm(task); 1365 get_task_struct(task);
1368 rcu_read_unlock();
1369
1370 if (!mm)
1371 return -EINVAL;
1372 1366
1373 /* 1367 /*
1374 * Check if this process has the right to modify the specified 1368 * Check if this process has the right to modify the specified
@@ -1376,7 +1370,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1376 * capabilities, superuser privileges or the same 1370 * capabilities, superuser privileges or the same
1377 * userid as the target process. 1371 * userid as the target process.
1378 */ 1372 */
1379 rcu_read_lock();
1380 tcred = __task_cred(task); 1373 tcred = __task_cred(task);
1381 if (cred->euid != tcred->suid && cred->euid != tcred->uid && 1374 if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
1382 cred->uid != tcred->suid && cred->uid != tcred->uid && 1375 cred->uid != tcred->suid && cred->uid != tcred->uid &&
@@ -1391,16 +1384,25 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1391 if (err) 1384 if (err)
1392 goto out; 1385 goto out;
1393 1386
1394 if (nodes) { 1387 task_nodes = cpuset_mems_allowed(task);
1395 err = do_pages_move(mm, task, nr_pages, pages, nodes, status, 1388 mm = get_task_mm(task);
1396 flags); 1389 put_task_struct(task);
1397 } else { 1390
1398 err = do_pages_stat(mm, nr_pages, pages, status); 1391 if (mm) {
1399 } 1392 if (nodes)
1393 err = do_pages_move(mm, task_nodes, nr_pages, pages,
1394 nodes, status, flags);
1395 else
1396 err = do_pages_stat(mm, nr_pages, pages, status);
1397 } else
1398 err = -EINVAL;
1400 1399
1401out:
1402 mmput(mm); 1400 mmput(mm);
1403 return err; 1401 return err;
1402
1403out:
1404 put_task_struct(task);
1405 return err;
1404} 1406}
1405 1407
1406/* 1408/*