aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c94
1 files changed, 67 insertions, 27 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 984c0bf3807f..21a4e3b2cbda 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -398,21 +398,31 @@ static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
398 * to continue to serve a useful existence. Next time it's released, 398 * to continue to serve a useful existence. Next time it's released,
399 * we will get notified again, if it still has 'notify_on_release' set. 399 * we will get notified again, if it still has 'notify_on_release' set.
400 * 400 *
401 * Note final arg to call_usermodehelper() is 0 - that means 401 * The final arg to call_usermodehelper() is 0, which means don't
402 * don't wait. Since we are holding the global cpuset_sem here, 402 * wait. The separate /sbin/cpuset_release_agent task is forked by
403 * and we are asking another thread (started from keventd) to rmdir a 403 * call_usermodehelper(), then control in this thread returns here,
404 * cpuset, we can't wait - or we'd deadlock with the removing thread 404 * without waiting for the release agent task. We don't bother to
405 * on cpuset_sem. 405 * wait because the caller of this routine has no use for the exit
406 * status of the /sbin/cpuset_release_agent task, so no sense holding
407 * our caller up for that.
408 *
409 * The simple act of forking that task might require more memory,
410 * which might need cpuset_sem. So this routine must be called while
411 * cpuset_sem is not held, to avoid a possible deadlock. See also
412 * comments for check_for_release(), below.
406 */ 413 */
407 414
408static int cpuset_release_agent(char *cpuset_str) 415static void cpuset_release_agent(const char *pathbuf)
409{ 416{
410 char *argv[3], *envp[3]; 417 char *argv[3], *envp[3];
411 int i; 418 int i;
412 419
420 if (!pathbuf)
421 return;
422
413 i = 0; 423 i = 0;
414 argv[i++] = "/sbin/cpuset_release_agent"; 424 argv[i++] = "/sbin/cpuset_release_agent";
415 argv[i++] = cpuset_str; 425 argv[i++] = (char *)pathbuf;
416 argv[i] = NULL; 426 argv[i] = NULL;
417 427
418 i = 0; 428 i = 0;
@@ -421,17 +431,29 @@ static int cpuset_release_agent(char *cpuset_str)
421 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; 431 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
422 envp[i] = NULL; 432 envp[i] = NULL;
423 433
424 return call_usermodehelper(argv[0], argv, envp, 0); 434 call_usermodehelper(argv[0], argv, envp, 0);
435 kfree(pathbuf);
425} 436}
426 437
427/* 438/*
428 * Either cs->count of using tasks transitioned to zero, or the 439 * Either cs->count of using tasks transitioned to zero, or the
429 * cs->children list of child cpusets just became empty. If this 440 * cs->children list of child cpusets just became empty. If this
430 * cs is notify_on_release() and now both the user count is zero and 441 * cs is notify_on_release() and now both the user count is zero and
431 * the list of children is empty, send notice to user land. 442 * the list of children is empty, prepare cpuset path in a kmalloc'd
443 * buffer, to be returned via ppathbuf, so that the caller can invoke
444 * cpuset_release_agent() with it later on, once cpuset_sem is dropped.
445 * Call here with cpuset_sem held.
446 *
447 * This check_for_release() routine is responsible for kmalloc'ing
448 * pathbuf. The above cpuset_release_agent() is responsible for
449 * kfree'ing pathbuf. The caller of these routines is responsible
450 * for providing a pathbuf pointer, initialized to NULL, then
451 * calling check_for_release() with cpuset_sem held and the address
452 * of the pathbuf pointer, then dropping cpuset_sem, then calling
453 * cpuset_release_agent() with pathbuf, as set by check_for_release().
432 */ 454 */
433 455
434static void check_for_release(struct cpuset *cs) 456static void check_for_release(struct cpuset *cs, char **ppathbuf)
435{ 457{
436 if (notify_on_release(cs) && atomic_read(&cs->count) == 0 && 458 if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&
437 list_empty(&cs->children)) { 459 list_empty(&cs->children)) {
@@ -441,10 +463,9 @@ static void check_for_release(struct cpuset *cs)
441 if (!buf) 463 if (!buf)
442 return; 464 return;
443 if (cpuset_path(cs, buf, PAGE_SIZE) < 0) 465 if (cpuset_path(cs, buf, PAGE_SIZE) < 0)
444 goto out; 466 kfree(buf);
445 cpuset_release_agent(buf); 467 else
446out: 468 *ppathbuf = buf;
447 kfree(buf);
448 } 469 }
449} 470}
450 471
@@ -727,14 +748,14 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
727 return 0; 748 return 0;
728} 749}
729 750
730static int attach_task(struct cpuset *cs, char *buf) 751static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
731{ 752{
732 pid_t pid; 753 pid_t pid;
733 struct task_struct *tsk; 754 struct task_struct *tsk;
734 struct cpuset *oldcs; 755 struct cpuset *oldcs;
735 cpumask_t cpus; 756 cpumask_t cpus;
736 757
737 if (sscanf(buf, "%d", &pid) != 1) 758 if (sscanf(pidbuf, "%d", &pid) != 1)
738 return -EIO; 759 return -EIO;
739 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) 760 if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
740 return -ENOSPC; 761 return -ENOSPC;
@@ -777,7 +798,7 @@ static int attach_task(struct cpuset *cs, char *buf)
777 798
778 put_task_struct(tsk); 799 put_task_struct(tsk);
779 if (atomic_dec_and_test(&oldcs->count)) 800 if (atomic_dec_and_test(&oldcs->count))
780 check_for_release(oldcs); 801 check_for_release(oldcs, ppathbuf);
781 return 0; 802 return 0;
782} 803}
783 804
@@ -801,6 +822,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
801 struct cftype *cft = __d_cft(file->f_dentry); 822 struct cftype *cft = __d_cft(file->f_dentry);
802 cpuset_filetype_t type = cft->private; 823 cpuset_filetype_t type = cft->private;
803 char *buffer; 824 char *buffer;
825 char *pathbuf = NULL;
804 int retval = 0; 826 int retval = 0;
805 827
806 /* Crude upper limit on largest legitimate cpulist user might write. */ 828 /* Crude upper limit on largest legitimate cpulist user might write. */
@@ -841,7 +863,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
841 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); 863 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
842 break; 864 break;
843 case FILE_TASKLIST: 865 case FILE_TASKLIST:
844 retval = attach_task(cs, buffer); 866 retval = attach_task(cs, buffer, &pathbuf);
845 break; 867 break;
846 default: 868 default:
847 retval = -EINVAL; 869 retval = -EINVAL;
@@ -852,6 +874,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
852 retval = nbytes; 874 retval = nbytes;
853out2: 875out2:
854 up(&cpuset_sem); 876 up(&cpuset_sem);
877 cpuset_release_agent(pathbuf);
855out1: 878out1:
856 kfree(buffer); 879 kfree(buffer);
857 return retval; 880 return retval;
@@ -1357,6 +1380,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1357 struct cpuset *cs = dentry->d_fsdata; 1380 struct cpuset *cs = dentry->d_fsdata;
1358 struct dentry *d; 1381 struct dentry *d;
1359 struct cpuset *parent; 1382 struct cpuset *parent;
1383 char *pathbuf = NULL;
1360 1384
1361 /* the vfs holds both inode->i_sem already */ 1385 /* the vfs holds both inode->i_sem already */
1362 1386
@@ -1376,7 +1400,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1376 update_cpu_domains(cs); 1400 update_cpu_domains(cs);
1377 list_del(&cs->sibling); /* delete my sibling from parent->children */ 1401 list_del(&cs->sibling); /* delete my sibling from parent->children */
1378 if (list_empty(&parent->children)) 1402 if (list_empty(&parent->children))
1379 check_for_release(parent); 1403 check_for_release(parent, &pathbuf);
1380 spin_lock(&cs->dentry->d_lock); 1404 spin_lock(&cs->dentry->d_lock);
1381 d = dget(cs->dentry); 1405 d = dget(cs->dentry);
1382 cs->dentry = NULL; 1406 cs->dentry = NULL;
@@ -1384,6 +1408,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1384 cpuset_d_remove_dir(d); 1408 cpuset_d_remove_dir(d);
1385 dput(d); 1409 dput(d);
1386 up(&cpuset_sem); 1410 up(&cpuset_sem);
1411 cpuset_release_agent(pathbuf);
1387 return 0; 1412 return 0;
1388} 1413}
1389 1414
@@ -1440,10 +1465,10 @@ void __init cpuset_init_smp(void)
1440 1465
1441/** 1466/**
1442 * cpuset_fork - attach newly forked task to its parents cpuset. 1467 * cpuset_fork - attach newly forked task to its parents cpuset.
1443 * @p: pointer to task_struct of forking parent process. 1468 * @tsk: pointer to task_struct of forking parent process.
1444 * 1469 *
1445 * Description: By default, on fork, a task inherits its 1470 * Description: By default, on fork, a task inherits its
1446 * parents cpuset. The pointer to the shared cpuset is 1471 * parent's cpuset. The pointer to the shared cpuset is
1447 * automatically copied in fork.c by dup_task_struct(). 1472 * automatically copied in fork.c by dup_task_struct().
1448 * This cpuset_fork() routine need only increment the usage 1473 * This cpuset_fork() routine need only increment the usage
1449 * counter in that cpuset. 1474 * counter in that cpuset.
@@ -1471,7 +1496,6 @@ void cpuset_fork(struct task_struct *tsk)
1471 * by the cpuset_sem semaphore. If you don't hold cpuset_sem, 1496 * by the cpuset_sem semaphore. If you don't hold cpuset_sem,
1472 * then a zero cpuset use count is a license to any other task to 1497 * then a zero cpuset use count is a license to any other task to
1473 * nuke the cpuset immediately. 1498 * nuke the cpuset immediately.
1474 *
1475 **/ 1499 **/
1476 1500
1477void cpuset_exit(struct task_struct *tsk) 1501void cpuset_exit(struct task_struct *tsk)
@@ -1484,10 +1508,13 @@ void cpuset_exit(struct task_struct *tsk)
1484 task_unlock(tsk); 1508 task_unlock(tsk);
1485 1509
1486 if (notify_on_release(cs)) { 1510 if (notify_on_release(cs)) {
1511 char *pathbuf = NULL;
1512
1487 down(&cpuset_sem); 1513 down(&cpuset_sem);
1488 if (atomic_dec_and_test(&cs->count)) 1514 if (atomic_dec_and_test(&cs->count))
1489 check_for_release(cs); 1515 check_for_release(cs, &pathbuf);
1490 up(&cpuset_sem); 1516 up(&cpuset_sem);
1517 cpuset_release_agent(pathbuf);
1491 } else { 1518 } else {
1492 atomic_dec(&cs->count); 1519 atomic_dec(&cs->count);
1493 } 1520 }
@@ -1521,7 +1548,9 @@ void cpuset_init_current_mems_allowed(void)
1521 current->mems_allowed = NODE_MASK_ALL; 1548 current->mems_allowed = NODE_MASK_ALL;
1522} 1549}
1523 1550
1524/* 1551/**
1552 * cpuset_update_current_mems_allowed - update mems parameters to new values
1553 *
1525 * If the current tasks cpusets mems_allowed changed behind our backs, 1554 * If the current tasks cpusets mems_allowed changed behind our backs,
1526 * update current->mems_allowed and mems_generation to the new value. 1555 * update current->mems_allowed and mems_generation to the new value.
1527 * Do not call this routine if in_interrupt(). 1556 * Do not call this routine if in_interrupt().
@@ -1540,13 +1569,20 @@ void cpuset_update_current_mems_allowed(void)
1540 } 1569 }
1541} 1570}
1542 1571
1572/**
1573 * cpuset_restrict_to_mems_allowed - limit nodes to current mems_allowed
1574 * @nodes: pointer to a node bitmap that is and-ed with mems_allowed
1575 */
1543void cpuset_restrict_to_mems_allowed(unsigned long *nodes) 1576void cpuset_restrict_to_mems_allowed(unsigned long *nodes)
1544{ 1577{
1545 bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed), 1578 bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed),
1546 MAX_NUMNODES); 1579 MAX_NUMNODES);
1547} 1580}
1548 1581
1549/* 1582/**
1583 * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed
1584 * @zl: the zonelist to be checked
1585 *
1550 * Are any of the nodes on zonelist zl allowed in current->mems_allowed? 1586 * Are any of the nodes on zonelist zl allowed in current->mems_allowed?
1551 */ 1587 */
1552int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) 1588int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
@@ -1562,8 +1598,12 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
1562 return 0; 1598 return 0;
1563} 1599}
1564 1600
1565/* 1601/**
1566 * Is 'current' valid, and is zone z allowed in current->mems_allowed? 1602 * cpuset_zone_allowed - is zone z allowed in current->mems_allowed
1603 * @z: zone in question
1604 *
1605 * Is zone z allowed in current->mems_allowed, or is
1606 * the CPU in interrupt context? (zone is always allowed in this case)
1567 */ 1607 */
1568int cpuset_zone_allowed(struct zone *z) 1608int cpuset_zone_allowed(struct zone *z)
1569{ 1609{