aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c104
1 files changed, 64 insertions, 40 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1f06e7690106..407b5f0a8c8e 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -182,6 +182,37 @@ static struct super_block *cpuset_sb = NULL;
182static DECLARE_MUTEX(cpuset_sem); 182static DECLARE_MUTEX(cpuset_sem);
183 183
184/* 184/*
185 * The global cpuset semaphore cpuset_sem can be needed by the
186 * memory allocator to update a tasks mems_allowed (see the calls
187 * to cpuset_update_current_mems_allowed()) or to walk up the
188 * cpuset hierarchy to find a mem_exclusive cpuset see the calls
189 * to cpuset_excl_nodes_overlap()).
190 *
191 * But if the memory allocation is being done by cpuset.c code, it
192 * usually already holds cpuset_sem. Double tripping on a kernel
193 * semaphore deadlocks the current task, and any other task that
194 * subsequently tries to obtain the lock.
195 *
196 * Run all up's and down's on cpuset_sem through the following
197 * wrappers, which will detect this nested locking, and avoid
198 * deadlocking.
199 */
200
201static inline void cpuset_down(struct semaphore *psem)
202{
203 if (current->cpuset_sem_nest_depth == 0)
204 down(psem);
205 current->cpuset_sem_nest_depth++;
206}
207
208static inline void cpuset_up(struct semaphore *psem)
209{
210 current->cpuset_sem_nest_depth--;
211 if (current->cpuset_sem_nest_depth == 0)
212 up(psem);
213}
214
215/*
185 * A couple of forward declarations required, due to cyclic reference loop: 216 * A couple of forward declarations required, due to cyclic reference loop:
186 * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file 217 * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file
187 * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir. 218 * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir.
@@ -522,19 +553,10 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
522 * Refresh current tasks mems_allowed and mems_generation from 553 * Refresh current tasks mems_allowed and mems_generation from
523 * current tasks cpuset. Call with cpuset_sem held. 554 * current tasks cpuset. Call with cpuset_sem held.
524 * 555 *
525 * Be sure to call refresh_mems() on any cpuset operation which 556 * This routine is needed to update the per-task mems_allowed
526 * (1) holds cpuset_sem, and (2) might possibly alloc memory. 557 * data, within the tasks context, when it is trying to allocate
527 * Call after obtaining cpuset_sem lock, before any possible 558 * memory (in various mm/mempolicy.c routines) and notices
528 * allocation. Otherwise one risks trying to allocate memory 559 * that some other task has been modifying its cpuset.
529 * while the task cpuset_mems_generation is not the same as
530 * the mems_generation in its cpuset, which would deadlock on
531 * cpuset_sem in cpuset_update_current_mems_allowed().
532 *
533 * Since we hold cpuset_sem, once refresh_mems() is called, the
534 * test (current->cpuset_mems_generation != cs->mems_generation)
535 * in cpuset_update_current_mems_allowed() will remain false,
536 * until we drop cpuset_sem. Anyone else who would change our
537 * cpusets mems_generation needs to lock cpuset_sem first.
538 */ 560 */
539 561
540static void refresh_mems(void) 562static void refresh_mems(void)
@@ -840,7 +862,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
840 } 862 }
841 buffer[nbytes] = 0; /* nul-terminate */ 863 buffer[nbytes] = 0; /* nul-terminate */
842 864
843 down(&cpuset_sem); 865 cpuset_down(&cpuset_sem);
844 866
845 if (is_removed(cs)) { 867 if (is_removed(cs)) {
846 retval = -ENODEV; 868 retval = -ENODEV;
@@ -874,7 +896,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
874 if (retval == 0) 896 if (retval == 0)
875 retval = nbytes; 897 retval = nbytes;
876out2: 898out2:
877 up(&cpuset_sem); 899 cpuset_up(&cpuset_sem);
878 cpuset_release_agent(pathbuf); 900 cpuset_release_agent(pathbuf);
879out1: 901out1:
880 kfree(buffer); 902 kfree(buffer);
@@ -914,9 +936,9 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
914{ 936{
915 cpumask_t mask; 937 cpumask_t mask;
916 938
917 down(&cpuset_sem); 939 cpuset_down(&cpuset_sem);
918 mask = cs->cpus_allowed; 940 mask = cs->cpus_allowed;
919 up(&cpuset_sem); 941 cpuset_up(&cpuset_sem);
920 942
921 return cpulist_scnprintf(page, PAGE_SIZE, mask); 943 return cpulist_scnprintf(page, PAGE_SIZE, mask);
922} 944}
@@ -925,9 +947,9 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
925{ 947{
926 nodemask_t mask; 948 nodemask_t mask;
927 949
928 down(&cpuset_sem); 950 cpuset_down(&cpuset_sem);
929 mask = cs->mems_allowed; 951 mask = cs->mems_allowed;
930 up(&cpuset_sem); 952 cpuset_up(&cpuset_sem);
931 953
932 return nodelist_scnprintf(page, PAGE_SIZE, mask); 954 return nodelist_scnprintf(page, PAGE_SIZE, mask);
933} 955}
@@ -972,6 +994,10 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
972 *s++ = '\n'; 994 *s++ = '\n';
973 *s = '\0'; 995 *s = '\0';
974 996
997 /* Do nothing if *ppos is at the eof or beyond the eof. */
998 if (s - page <= *ppos)
999 return 0;
1000
975 start = page + *ppos; 1001 start = page + *ppos;
976 n = s - start; 1002 n = s - start;
977 retval = n - copy_to_user(buf, start, min(n, nbytes)); 1003 retval = n - copy_to_user(buf, start, min(n, nbytes));
@@ -1330,8 +1356,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1330 if (!cs) 1356 if (!cs)
1331 return -ENOMEM; 1357 return -ENOMEM;
1332 1358
1333 down(&cpuset_sem); 1359 cpuset_down(&cpuset_sem);
1334 refresh_mems();
1335 cs->flags = 0; 1360 cs->flags = 0;
1336 if (notify_on_release(parent)) 1361 if (notify_on_release(parent))
1337 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 1362 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
@@ -1356,14 +1381,14 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1356 * will down() this new directory's i_sem and if we race with 1381 * will down() this new directory's i_sem and if we race with
1357 * another mkdir, we might deadlock. 1382 * another mkdir, we might deadlock.
1358 */ 1383 */
1359 up(&cpuset_sem); 1384 cpuset_up(&cpuset_sem);
1360 1385
1361 err = cpuset_populate_dir(cs->dentry); 1386 err = cpuset_populate_dir(cs->dentry);
1362 /* If err < 0, we have a half-filled directory - oh well ;) */ 1387 /* If err < 0, we have a half-filled directory - oh well ;) */
1363 return 0; 1388 return 0;
1364err: 1389err:
1365 list_del(&cs->sibling); 1390 list_del(&cs->sibling);
1366 up(&cpuset_sem); 1391 cpuset_up(&cpuset_sem);
1367 kfree(cs); 1392 kfree(cs);
1368 return err; 1393 return err;
1369} 1394}
@@ -1385,14 +1410,13 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1385 1410
1386 /* the vfs holds both inode->i_sem already */ 1411 /* the vfs holds both inode->i_sem already */
1387 1412
1388 down(&cpuset_sem); 1413 cpuset_down(&cpuset_sem);
1389 refresh_mems();
1390 if (atomic_read(&cs->count) > 0) { 1414 if (atomic_read(&cs->count) > 0) {
1391 up(&cpuset_sem); 1415 cpuset_up(&cpuset_sem);
1392 return -EBUSY; 1416 return -EBUSY;
1393 } 1417 }
1394 if (!list_empty(&cs->children)) { 1418 if (!list_empty(&cs->children)) {
1395 up(&cpuset_sem); 1419 cpuset_up(&cpuset_sem);
1396 return -EBUSY; 1420 return -EBUSY;
1397 } 1421 }
1398 parent = cs->parent; 1422 parent = cs->parent;
@@ -1408,7 +1432,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1408 spin_unlock(&d->d_lock); 1432 spin_unlock(&d->d_lock);
1409 cpuset_d_remove_dir(d); 1433 cpuset_d_remove_dir(d);
1410 dput(d); 1434 dput(d);
1411 up(&cpuset_sem); 1435 cpuset_up(&cpuset_sem);
1412 cpuset_release_agent(pathbuf); 1436 cpuset_release_agent(pathbuf);
1413 return 0; 1437 return 0;
1414} 1438}
@@ -1511,10 +1535,10 @@ void cpuset_exit(struct task_struct *tsk)
1511 if (notify_on_release(cs)) { 1535 if (notify_on_release(cs)) {
1512 char *pathbuf = NULL; 1536 char *pathbuf = NULL;
1513 1537
1514 down(&cpuset_sem); 1538 cpuset_down(&cpuset_sem);
1515 if (atomic_dec_and_test(&cs->count)) 1539 if (atomic_dec_and_test(&cs->count))
1516 check_for_release(cs, &pathbuf); 1540 check_for_release(cs, &pathbuf);
1517 up(&cpuset_sem); 1541 cpuset_up(&cpuset_sem);
1518 cpuset_release_agent(pathbuf); 1542 cpuset_release_agent(pathbuf);
1519 } else { 1543 } else {
1520 atomic_dec(&cs->count); 1544 atomic_dec(&cs->count);
@@ -1535,11 +1559,11 @@ cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk)
1535{ 1559{
1536 cpumask_t mask; 1560 cpumask_t mask;
1537 1561
1538 down(&cpuset_sem); 1562 cpuset_down(&cpuset_sem);
1539 task_lock((struct task_struct *)tsk); 1563 task_lock((struct task_struct *)tsk);
1540 guarantee_online_cpus(tsk->cpuset, &mask); 1564 guarantee_online_cpus(tsk->cpuset, &mask);
1541 task_unlock((struct task_struct *)tsk); 1565 task_unlock((struct task_struct *)tsk);
1542 up(&cpuset_sem); 1566 cpuset_up(&cpuset_sem);
1543 1567
1544 return mask; 1568 return mask;
1545} 1569}
@@ -1564,9 +1588,9 @@ void cpuset_update_current_mems_allowed(void)
1564 if (!cs) 1588 if (!cs)
1565 return; /* task is exiting */ 1589 return; /* task is exiting */
1566 if (current->cpuset_mems_generation != cs->mems_generation) { 1590 if (current->cpuset_mems_generation != cs->mems_generation) {
1567 down(&cpuset_sem); 1591 cpuset_down(&cpuset_sem);
1568 refresh_mems(); 1592 refresh_mems();
1569 up(&cpuset_sem); 1593 cpuset_up(&cpuset_sem);
1570 } 1594 }
1571} 1595}
1572 1596
@@ -1665,14 +1689,14 @@ int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
1665 return 0; 1689 return 0;
1666 1690
1667 /* Not hardwall and node outside mems_allowed: scan up cpusets */ 1691 /* Not hardwall and node outside mems_allowed: scan up cpusets */
1668 down(&cpuset_sem); 1692 cpuset_down(&cpuset_sem);
1669 cs = current->cpuset; 1693 cs = current->cpuset;
1670 if (!cs) 1694 if (!cs)
1671 goto done; /* current task exiting */ 1695 goto done; /* current task exiting */
1672 cs = nearest_exclusive_ancestor(cs); 1696 cs = nearest_exclusive_ancestor(cs);
1673 allowed = node_isset(node, cs->mems_allowed); 1697 allowed = node_isset(node, cs->mems_allowed);
1674done: 1698done:
1675 up(&cpuset_sem); 1699 cpuset_up(&cpuset_sem);
1676 return allowed; 1700 return allowed;
1677} 1701}
1678 1702
@@ -1693,7 +1717,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
1693 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ 1717 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
1694 int overlap = 0; /* do cpusets overlap? */ 1718 int overlap = 0; /* do cpusets overlap? */
1695 1719
1696 down(&cpuset_sem); 1720 cpuset_down(&cpuset_sem);
1697 cs1 = current->cpuset; 1721 cs1 = current->cpuset;
1698 if (!cs1) 1722 if (!cs1)
1699 goto done; /* current task exiting */ 1723 goto done; /* current task exiting */
@@ -1704,7 +1728,7 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
1704 cs2 = nearest_exclusive_ancestor(cs2); 1728 cs2 = nearest_exclusive_ancestor(cs2);
1705 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); 1729 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
1706done: 1730done:
1707 up(&cpuset_sem); 1731 cpuset_up(&cpuset_sem);
1708 1732
1709 return overlap; 1733 return overlap;
1710} 1734}
@@ -1727,7 +1751,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
1727 return -ENOMEM; 1751 return -ENOMEM;
1728 1752
1729 tsk = m->private; 1753 tsk = m->private;
1730 down(&cpuset_sem); 1754 cpuset_down(&cpuset_sem);
1731 task_lock(tsk); 1755 task_lock(tsk);
1732 cs = tsk->cpuset; 1756 cs = tsk->cpuset;
1733 task_unlock(tsk); 1757 task_unlock(tsk);
@@ -1742,7 +1766,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
1742 seq_puts(m, buf); 1766 seq_puts(m, buf);
1743 seq_putc(m, '\n'); 1767 seq_putc(m, '\n');
1744out: 1768out:
1745 up(&cpuset_sem); 1769 cpuset_up(&cpuset_sem);
1746 kfree(buf); 1770 kfree(buf);
1747 return retval; 1771 return retval;
1748} 1772}