aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c109
1 files changed, 69 insertions, 40 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1f06e7690106..79866bc6b3a1 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -180,6 +180,42 @@ static struct super_block *cpuset_sb = NULL;
180 */ 180 */
181 181
182static DECLARE_MUTEX(cpuset_sem); 182static DECLARE_MUTEX(cpuset_sem);
183static struct task_struct *cpuset_sem_owner;
184static int cpuset_sem_depth;
185
186/*
187 * The global cpuset semaphore cpuset_sem can be needed by the
188 * memory allocator to update a tasks mems_allowed (see the calls
189 * to cpuset_update_current_mems_allowed()) or to walk up the
190 * cpuset hierarchy to find a mem_exclusive cpuset see the calls
191 * to cpuset_excl_nodes_overlap()).
192 *
193 * But if the memory allocation is being done by cpuset.c code, it
194 * usually already holds cpuset_sem. Double tripping on a kernel
195 * semaphore deadlocks the current task, and any other task that
196 * subsequently tries to obtain the lock.
197 *
198 * Run all up's and down's on cpuset_sem through the following
199 * wrappers, which will detect this nested locking, and avoid
200 * deadlocking.
201 */
202
203static inline void cpuset_down(struct semaphore *psem)
204{
205 if (cpuset_sem_owner != current) {
206 down(psem);
207 cpuset_sem_owner = current;
208 }
209 cpuset_sem_depth++;
210}
211
212static inline void cpuset_up(struct semaphore *psem)
213{
214 if (--cpuset_sem_depth == 0) {
215 cpuset_sem_owner = NULL;
216 up(psem);
217 }
218}
183 219
184/* 220/*
185 * A couple of forward declarations required, due to cyclic reference loop: 221 * A couple of forward declarations required, due to cyclic reference loop:
@@ -522,19 +558,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
522 * Refresh current tasks mems_allowed and mems_generation from 558 * Refresh current tasks mems_allowed and mems_generation from
523 * current tasks cpuset. Call with cpuset_sem held. 559 * current tasks cpuset. Call with cpuset_sem held.
524 * 560 *
525 * Be sure to call refresh_mems() on any cpuset operation which 561 * This routine is needed to update the per-task mems_allowed
526 * (1) holds cpuset_sem, and (2) might possibly alloc memory. 562 * data, within the tasks context, when it is trying to allocate
527 * Call after obtaining cpuset_sem lock, before any possible 563 * memory (in various mm/mempolicy.c routines) and notices
528 * allocation. Otherwise one risks trying to allocate memory 564 * that some other task has been modifying its cpuset.
529 * while the task cpuset_mems_generation is not the same as
530 * the mems_generation in its cpuset, which would deadlock on
531 * cpuset_sem in cpuset_update_current_mems_allowed().
532 *
533 * Since we hold cpuset_sem, once refresh_mems() is called, the
534 * test (current->cpuset_mems_generation != cs->mems_generation)
535 * in cpuset_update_current_mems_allowed() will remain false,
536 * until we drop cpuset_sem. Anyone else who would change our
537 * cpusets mems_generation needs to lock cpuset_sem first.
538 */ 565 */
539 566
540static void refresh_mems(void) 567static void refresh_mems(void)
@@ -840,7 +867,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
840 } 867 }
841 buffer[nbytes] = 0; /* nul-terminate */ 868 buffer[nbytes] = 0; /* nul-terminate */
842 869
843 down(&cpuset_sem); 870 cpuset_down(&cpuset_sem);
844 871
845 if (is_removed(cs)) { 872 if (is_removed(cs)) {
846 retval = -ENODEV; 873 retval = -ENODEV;
@@ -874,7 +901,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
874 if (retval == 0) 901 if (retval == 0)
875 retval = nbytes; 902 retval = nbytes;
876out2: 903out2:
877 up(&cpuset_sem); 904 cpuset_up(&cpuset_sem);
878 cpuset_release_agent(pathbuf); 905 cpuset_release_agent(pathbuf);
879out1: 906out1:
880 kfree(buffer); 907 kfree(buffer);
@@ -914,9 +941,9 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
914{ 941{
915 cpumask_t mask; 942 cpumask_t mask;
916 943
917 down(&cpuset_sem); 944 cpuset_down(&cpuset_sem);
918 mask = cs->cpus_allowed; 945 mask = cs->cpus_allowed;
919 up(&cpuset_sem); 946 cpuset_up(&cpuset_sem);
920 947
921 return cpulist_scnprintf(page, PAGE_SIZE, mask); 948 return cpulist_scnprintf(page, PAGE_SIZE, mask);
922} 949}
@@ -925,9 +952,9 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
925{ 952{
926 nodemask_t mask; 953 nodemask_t mask;
927 954
928 down(&cpuset_sem); 955 cpuset_down(&cpuset_sem);
929 mask = cs->mems_allowed; 956 mask = cs->mems_allowed;
930 up(&cpuset_sem); 957 cpuset_up(&cpuset_sem);
931 958
932 return nodelist_scnprintf(page, PAGE_SIZE, mask); 959 return nodelist_scnprintf(page, PAGE_SIZE, mask);
933} 960}
@@ -972,6 +999,10 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
972 *s++ = '\n'; 999 *s++ = '\n';
973 *s = '\0'; 1000 *s = '\0';
974 1001
1002 /* Do nothing if *ppos is at the eof or beyond the eof. */
1003 if (s - page <= *ppos)
1004 return 0;
1005
975 start = page + *ppos; 1006 start = page + *ppos;
976 n = s - start; 1007 n = s - start;
977 retval = n - copy_to_user(buf, start, min(n, nbytes)); 1008 retval = n - copy_to_user(buf, start, min(n, nbytes));
@@ -1330,8 +1361,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1330 if (!cs) 1361 if (!cs)
1331 return -ENOMEM; 1362 return -ENOMEM;
1332 1363
1333 down(&cpuset_sem); 1364 cpuset_down(&cpuset_sem);
1334 refresh_mems();
1335 cs->flags = 0; 1365 cs->flags = 0;
1336 if (notify_on_release(parent)) 1366 if (notify_on_release(parent))
1337 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 1367 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
@@ -1356,14 +1386,14 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1356 * will down() this new directory's i_sem and if we race with 1386 * will down() this new directory's i_sem and if we race with
1357 * another mkdir, we might deadlock. 1387 * another mkdir, we might deadlock.
1358 */ 1388 */
1359 up(&cpuset_sem); 1389 cpuset_up(&cpuset_sem);
1360 1390
1361 err = cpuset_populate_dir(cs->dentry); 1391 err = cpuset_populate_dir(cs->dentry);
1362 /* If err < 0, we have a half-filled directory - oh well ;) */ 1392 /* If err < 0, we have a half-filled directory - oh well ;) */
1363 return 0; 1393 return 0;
1364err: 1394err:
1365 list_del(&cs->sibling); 1395 list_del(&cs->sibling);
1366 up(&cpuset_sem); 1396 cpuset_up(&cpuset_sem);
1367 kfree(cs); 1397 kfree(cs);
1368 return err; 1398 return err;
1369} 1399}
@@ -1385,14 +1415,13 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1385 1415
1386 /* the vfs holds both inode->i_sem already */ 1416 /* the vfs holds both inode->i_sem already */
1387 1417
1388 down(&cpuset_sem); 1418 cpuset_down(&cpuset_sem);
1389 refresh_mems();
1390 if (atomic_read(&cs->count) > 0) { 1419 if (atomic_read(&cs->count) > 0) {
1391 up(&cpuset_sem); 1420 cpuset_up(&cpuset_sem);
1392 return -EBUSY; 1421 return -EBUSY;
1393 } 1422 }
1394 if (!list_empty(&cs->children)) { 1423 if (!list_empty(&cs->children)) {
1395 up(&cpuset_sem); 1424 cpuset_up(&cpuset_sem);
1396 return -EBUSY; 1425 return -EBUSY;
1397 } 1426 }
1398 parent = cs->parent; 1427 parent = cs->parent;
@@ -1408,7 +1437,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1408 spin_unlock(&d->d_lock); 1437 spin_unlock(&d->d_lock);
1409 cpuset_d_remove_dir(d); 1438 cpuset_d_remove_dir(d);
1410 dput(d); 1439 dput(d);
1411 up(&cpuset_sem); 1440 cpuset_up(&cpuset_sem);
1412 cpuset_release_agent(pathbuf); 1441 cpuset_release_agent(pathbuf);
1413 return 0; 1442 return 0;
1414} 1443}
@@ -1511,10 +1540,10 @@ void cpuset_exit(struct task_struct *tsk)
1511 if (notify_on_release(cs)) { 1540 if (notify_on_release(cs)) {
1512 char *pathbuf = NULL; 1541 char *pathbuf = NULL;
1513 1542
1514 down(&cpuset_sem); 1543 cpuset_down(&cpuset_sem);
1515 if (atomic_dec_and_test(&cs->count)) 1544 if (atomic_dec_and_test(&cs->count))
1516 check_for_release(cs, &pathbuf); 1545 check_for_release(cs, &pathbuf);
1517 up(&cpuset_sem); 1546 cpuset_up(&cpuset_sem);
1518 cpuset_release_agent(pathbuf); 1547 cpuset_release_agent(pathbuf);
1519 } else { 1548 } else {
1520 atomic_dec(&cs->count); 1549 atomic_dec(&cs->count);
@@ -1535,11 +1564,11 @@ cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk)
1535{ 1564{
1536 cpumask_t mask; 1565 cpumask_t mask;
1537 1566
1538 down(&cpuset_sem); 1567 cpuset_down(&cpuset_sem);
1539 task_lock((struct task_struct *)tsk); 1568 task_lock((struct task_struct *)tsk);
1540 guarantee_online_cpus(tsk->cpuset, &mask); 1569 guarantee_online_cpus(tsk->cpuset, &mask);
1541 task_unlock((struct task_struct *)tsk); 1570 task_unlock((struct task_struct *)tsk);
1542 up(&cpuset_sem); 1571 cpuset_up(&cpuset_sem);
1543 1572
1544 return mask; 1573 return mask;
1545} 1574}
@@ -1564,9 +1593,9 @@ void cpuset_update_current_mems_allowed(void)
1564 if (!cs) 1593 if (!cs)
1565 return; /* task is exiting */ 1594 return; /* task is exiting */
1566 if (current->cpuset_mems_generation != cs->mems_generation) { 1595 if (current->cpuset_mems_generation != cs->mems_generation) {
1567 down(&cpuset_sem); 1596 cpuset_down(&cpuset_sem);
1568 refresh_mems(); 1597 refresh_mems();
1569 up(&cpuset_sem); 1598 cpuset_up(&cpuset_sem);
1570 } 1599 }
1571} 1600}
1572 1601
@@ -1665,14 +1694,14 @@ int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
1665 return 0; 1694 return 0;
1666 1695
1667 /* Not hardwall and node outside mems_allowed: scan up cpusets */ 1696 /* Not hardwall and node outside mems_allowed: scan up cpusets */
1668 down(&cpuset_sem); 1697 cpuset_down(&cpuset_sem);
1669 cs = current->cpuset; 1698 cs = current->cpuset;
1670 if (!cs) 1699 if (!cs)
1671 goto done; /* current task exiting */ 1700 goto done; /* current task exiting */
1672 cs = nearest_exclusive_ancestor(cs); 1701 cs = nearest_exclusive_ancestor(cs);
1673 allowed = node_isset(node, cs->mems_allowed); 1702 allowed = node_isset(node, cs->mems_allowed);
1674done: 1703done:
1675 up(&cpuset_sem); 1704 cpuset_up(&cpuset_sem);
1676 return allowed; 1705 return allowed;
1677} 1706}
1678 1707
@@ -1693,7 +1722,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
1693 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ 1722 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
1694 int overlap = 0; /* do cpusets overlap? */ 1723 int overlap = 0; /* do cpusets overlap? */
1695 1724
1696 down(&cpuset_sem); 1725 cpuset_down(&cpuset_sem);
1697 cs1 = current->cpuset; 1726 cs1 = current->cpuset;
1698 if (!cs1) 1727 if (!cs1)
1699 goto done; /* current task exiting */ 1728 goto done; /* current task exiting */
@@ -1704,7 +1733,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
1704 cs2 = nearest_exclusive_ancestor(cs2); 1733 cs2 = nearest_exclusive_ancestor(cs2);
1705 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); 1734 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
1706done: 1735done:
1707 up(&cpuset_sem); 1736 cpuset_up(&cpuset_sem);
1708 1737
1709 return overlap; 1738 return overlap;
1710} 1739}
@@ -1727,7 +1756,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
1727 return -ENOMEM; 1756 return -ENOMEM;
1728 1757
1729 tsk = m->private; 1758 tsk = m->private;
1730 down(&cpuset_sem); 1759 cpuset_down(&cpuset_sem);
1731 task_lock(tsk); 1760 task_lock(tsk);
1732 cs = tsk->cpuset; 1761 cs = tsk->cpuset;
1733 task_unlock(tsk); 1762 task_unlock(tsk);
@@ -1742,7 +1771,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
1742 seq_puts(m, buf); 1771 seq_puts(m, buf);
1743 seq_putc(m, '\n'); 1772 seq_putc(m, '\n');
1744out: 1773out:
1745 up(&cpuset_sem); 1774 cpuset_up(&cpuset_sem);
1746 kfree(buf); 1775 kfree(buf);
1747 return retval; 1776 return retval;
1748} 1777}