aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c105
1 files changed, 40 insertions, 65 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index b9342f90d28f..cd54dba2be18 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -180,42 +180,6 @@ static struct super_block *cpuset_sb = NULL;
180 */ 180 */
181 181
182static DECLARE_MUTEX(cpuset_sem); 182static DECLARE_MUTEX(cpuset_sem);
183static struct task_struct *cpuset_sem_owner;
184static int cpuset_sem_depth;
185
186/*
187 * The global cpuset semaphore cpuset_sem can be needed by the
188 * memory allocator to update a tasks mems_allowed (see the calls
189 * to cpuset_update_current_mems_allowed()) or to walk up the
190 * cpuset hierarchy to find a mem_exclusive cpuset see the calls
191 * to cpuset_excl_nodes_overlap()).
192 *
193 * But if the memory allocation is being done by cpuset.c code, it
194 * usually already holds cpuset_sem. Double tripping on a kernel
195 * semaphore deadlocks the current task, and any other task that
196 * subsequently tries to obtain the lock.
197 *
198 * Run all up's and down's on cpuset_sem through the following
199 * wrappers, which will detect this nested locking, and avoid
200 * deadlocking.
201 */
202
203static inline void cpuset_down(struct semaphore *psem)
204{
205 if (cpuset_sem_owner != current) {
206 down(psem);
207 cpuset_sem_owner = current;
208 }
209 cpuset_sem_depth++;
210}
211
212static inline void cpuset_up(struct semaphore *psem)
213{
214 if (--cpuset_sem_depth == 0) {
215 cpuset_sem_owner = NULL;
216 up(psem);
217 }
218}
219 183
220/* 184/*
221 * A couple of forward declarations required, due to cyclic reference loop: 185 * A couple of forward declarations required, due to cyclic reference loop:
@@ -558,10 +522,19 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
558 * Refresh current tasks mems_allowed and mems_generation from 522 * Refresh current tasks mems_allowed and mems_generation from
559 * current tasks cpuset. Call with cpuset_sem held. 523 * current tasks cpuset. Call with cpuset_sem held.
560 * 524 *
561 * This routine is needed to update the per-task mems_allowed 525 * Be sure to call refresh_mems() on any cpuset operation which
562 * data, within the tasks context, when it is trying to allocate 526 * (1) holds cpuset_sem, and (2) might possibly alloc memory.
563 * memory (in various mm/mempolicy.c routines) and notices 527 * Call after obtaining cpuset_sem lock, before any possible
564 * that some other task has been modifying its cpuset. 528 * allocation. Otherwise one risks trying to allocate memory
529 * while the task cpuset_mems_generation is not the same as
530 * the mems_generation in its cpuset, which would deadlock on
531 * cpuset_sem in cpuset_update_current_mems_allowed().
532 *
533 * Since we hold cpuset_sem, once refresh_mems() is called, the
534 * test (current->cpuset_mems_generation != cs->mems_generation)
535 * in cpuset_update_current_mems_allowed() will remain false,
536 * until we drop cpuset_sem. Anyone else who would change our
537 * cpusets mems_generation needs to lock cpuset_sem first.
565 */ 538 */
566 539
567static void refresh_mems(void) 540static void refresh_mems(void)
@@ -867,7 +840,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
867 } 840 }
868 buffer[nbytes] = 0; /* nul-terminate */ 841 buffer[nbytes] = 0; /* nul-terminate */
869 842
870 cpuset_down(&cpuset_sem); 843 down(&cpuset_sem);
871 844
872 if (is_removed(cs)) { 845 if (is_removed(cs)) {
873 retval = -ENODEV; 846 retval = -ENODEV;
@@ -901,7 +874,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
901 if (retval == 0) 874 if (retval == 0)
902 retval = nbytes; 875 retval = nbytes;
903out2: 876out2:
904 cpuset_up(&cpuset_sem); 877 up(&cpuset_sem);
905 cpuset_release_agent(pathbuf); 878 cpuset_release_agent(pathbuf);
906out1: 879out1:
907 kfree(buffer); 880 kfree(buffer);
@@ -941,9 +914,9 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
941{ 914{
942 cpumask_t mask; 915 cpumask_t mask;
943 916
944 cpuset_down(&cpuset_sem); 917 down(&cpuset_sem);
945 mask = cs->cpus_allowed; 918 mask = cs->cpus_allowed;
946 cpuset_up(&cpuset_sem); 919 up(&cpuset_sem);
947 920
948 return cpulist_scnprintf(page, PAGE_SIZE, mask); 921 return cpulist_scnprintf(page, PAGE_SIZE, mask);
949} 922}
@@ -952,9 +925,9 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
952{ 925{
953 nodemask_t mask; 926 nodemask_t mask;
954 927
955 cpuset_down(&cpuset_sem); 928 down(&cpuset_sem);
956 mask = cs->mems_allowed; 929 mask = cs->mems_allowed;
957 cpuset_up(&cpuset_sem); 930 up(&cpuset_sem);
958 931
959 return nodelist_scnprintf(page, PAGE_SIZE, mask); 932 return nodelist_scnprintf(page, PAGE_SIZE, mask);
960} 933}
@@ -1351,7 +1324,8 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1351 if (!cs) 1324 if (!cs)
1352 return -ENOMEM; 1325 return -ENOMEM;
1353 1326
1354 cpuset_down(&cpuset_sem); 1327 down(&cpuset_sem);
1328 refresh_mems();
1355 cs->flags = 0; 1329 cs->flags = 0;
1356 if (notify_on_release(parent)) 1330 if (notify_on_release(parent))
1357 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 1331 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
@@ -1376,14 +1350,14 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1376 * will down() this new directory's i_sem and if we race with 1350 * will down() this new directory's i_sem and if we race with
1377 * another mkdir, we might deadlock. 1351 * another mkdir, we might deadlock.
1378 */ 1352 */
1379 cpuset_up(&cpuset_sem); 1353 up(&cpuset_sem);
1380 1354
1381 err = cpuset_populate_dir(cs->dentry); 1355 err = cpuset_populate_dir(cs->dentry);
1382 /* If err < 0, we have a half-filled directory - oh well ;) */ 1356 /* If err < 0, we have a half-filled directory - oh well ;) */
1383 return 0; 1357 return 0;
1384err: 1358err:
1385 list_del(&cs->sibling); 1359 list_del(&cs->sibling);
1386 cpuset_up(&cpuset_sem); 1360 up(&cpuset_sem);
1387 kfree(cs); 1361 kfree(cs);
1388 return err; 1362 return err;
1389} 1363}
@@ -1405,13 +1379,14 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1405 1379
1406 /* the vfs holds both inode->i_sem already */ 1380 /* the vfs holds both inode->i_sem already */
1407 1381
1408 cpuset_down(&cpuset_sem); 1382 down(&cpuset_sem);
1383 refresh_mems();
1409 if (atomic_read(&cs->count) > 0) { 1384 if (atomic_read(&cs->count) > 0) {
1410 cpuset_up(&cpuset_sem); 1385 up(&cpuset_sem);
1411 return -EBUSY; 1386 return -EBUSY;
1412 } 1387 }
1413 if (!list_empty(&cs->children)) { 1388 if (!list_empty(&cs->children)) {
1414 cpuset_up(&cpuset_sem); 1389 up(&cpuset_sem);
1415 return -EBUSY; 1390 return -EBUSY;
1416 } 1391 }
1417 parent = cs->parent; 1392 parent = cs->parent;
@@ -1427,7 +1402,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1427 spin_unlock(&d->d_lock); 1402 spin_unlock(&d->d_lock);
1428 cpuset_d_remove_dir(d); 1403 cpuset_d_remove_dir(d);
1429 dput(d); 1404 dput(d);
1430 cpuset_up(&cpuset_sem); 1405 up(&cpuset_sem);
1431 cpuset_release_agent(pathbuf); 1406 cpuset_release_agent(pathbuf);
1432 return 0; 1407 return 0;
1433} 1408}
@@ -1530,10 +1505,10 @@ void cpuset_exit(struct task_struct *tsk)
1530 if (notify_on_release(cs)) { 1505 if (notify_on_release(cs)) {
1531 char *pathbuf = NULL; 1506 char *pathbuf = NULL;
1532 1507
1533 cpuset_down(&cpuset_sem); 1508 down(&cpuset_sem);
1534 if (atomic_dec_and_test(&cs->count)) 1509 if (atomic_dec_and_test(&cs->count))
1535 check_for_release(cs, &pathbuf); 1510 check_for_release(cs, &pathbuf);
1536 cpuset_up(&cpuset_sem); 1511 up(&cpuset_sem);
1537 cpuset_release_agent(pathbuf); 1512 cpuset_release_agent(pathbuf);
1538 } else { 1513 } else {
1539 atomic_dec(&cs->count); 1514 atomic_dec(&cs->count);
@@ -1554,11 +1529,11 @@ cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk)
1554{ 1529{
1555 cpumask_t mask; 1530 cpumask_t mask;
1556 1531
1557 cpuset_down(&cpuset_sem); 1532 down(&cpuset_sem);
1558 task_lock((struct task_struct *)tsk); 1533 task_lock((struct task_struct *)tsk);
1559 guarantee_online_cpus(tsk->cpuset, &mask); 1534 guarantee_online_cpus(tsk->cpuset, &mask);
1560 task_unlock((struct task_struct *)tsk); 1535 task_unlock((struct task_struct *)tsk);
1561 cpuset_up(&cpuset_sem); 1536 up(&cpuset_sem);
1562 1537
1563 return mask; 1538 return mask;
1564} 1539}
@@ -1583,9 +1558,9 @@ void cpuset_update_current_mems_allowed(void)
1583 if (!cs) 1558 if (!cs)
1584 return; /* task is exiting */ 1559 return; /* task is exiting */
1585 if (current->cpuset_mems_generation != cs->mems_generation) { 1560 if (current->cpuset_mems_generation != cs->mems_generation) {
1586 cpuset_down(&cpuset_sem); 1561 down(&cpuset_sem);
1587 refresh_mems(); 1562 refresh_mems();
1588 cpuset_up(&cpuset_sem); 1563 up(&cpuset_sem);
1589 } 1564 }
1590} 1565}
1591 1566
@@ -1684,14 +1659,14 @@ int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
1684 return 0; 1659 return 0;
1685 1660
1686 /* Not hardwall and node outside mems_allowed: scan up cpusets */ 1661 /* Not hardwall and node outside mems_allowed: scan up cpusets */
1687 cpuset_down(&cpuset_sem); 1662 down(&cpuset_sem);
1688 cs = current->cpuset; 1663 cs = current->cpuset;
1689 if (!cs) 1664 if (!cs)
1690 goto done; /* current task exiting */ 1665 goto done; /* current task exiting */
1691 cs = nearest_exclusive_ancestor(cs); 1666 cs = nearest_exclusive_ancestor(cs);
1692 allowed = node_isset(node, cs->mems_allowed); 1667 allowed = node_isset(node, cs->mems_allowed);
1693done: 1668done:
1694 cpuset_up(&cpuset_sem); 1669 up(&cpuset_sem);
1695 return allowed; 1670 return allowed;
1696} 1671}
1697 1672
@@ -1712,7 +1687,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
1712 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ 1687 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
1713 int overlap = 0; /* do cpusets overlap? */ 1688 int overlap = 0; /* do cpusets overlap? */
1714 1689
1715 cpuset_down(&cpuset_sem); 1690 down(&cpuset_sem);
1716 cs1 = current->cpuset; 1691 cs1 = current->cpuset;
1717 if (!cs1) 1692 if (!cs1)
1718 goto done; /* current task exiting */ 1693 goto done; /* current task exiting */
@@ -1723,7 +1698,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
1723 cs2 = nearest_exclusive_ancestor(cs2); 1698 cs2 = nearest_exclusive_ancestor(cs2);
1724 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); 1699 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
1725done: 1700done:
1726 cpuset_up(&cpuset_sem); 1701 up(&cpuset_sem);
1727 1702
1728 return overlap; 1703 return overlap;
1729} 1704}
@@ -1746,7 +1721,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
1746 return -ENOMEM; 1721 return -ENOMEM;
1747 1722
1748 tsk = m->private; 1723 tsk = m->private;
1749 cpuset_down(&cpuset_sem); 1724 down(&cpuset_sem);
1750 task_lock(tsk); 1725 task_lock(tsk);
1751 cs = tsk->cpuset; 1726 cs = tsk->cpuset;
1752 task_unlock(tsk); 1727 task_unlock(tsk);
@@ -1761,7 +1736,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
1761 seq_puts(m, buf); 1736 seq_puts(m, buf);
1762 seq_putc(m, '\n'); 1737 seq_putc(m, '\n');
1763out: 1738out:
1764 cpuset_up(&cpuset_sem); 1739 up(&cpuset_sem);
1765 kfree(buf); 1740 kfree(buf);
1766 return retval; 1741 return retval;
1767} 1742}