aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c111
1 files changed, 27 insertions, 84 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a09ac2b9a661..2382683617a3 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -964,7 +964,6 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
964{ 964{
965 bool need_loop; 965 bool need_loop;
966 966
967repeat:
968 /* 967 /*
969 * Allow tasks that have access to memory reserves because they have 968 * Allow tasks that have access to memory reserves because they have
970 * been OOM killed to get memory anywhere. 969 * been OOM killed to get memory anywhere.
@@ -983,45 +982,19 @@ repeat:
983 */ 982 */
984 need_loop = task_has_mempolicy(tsk) || 983 need_loop = task_has_mempolicy(tsk) ||
985 !nodes_intersects(*newmems, tsk->mems_allowed); 984 !nodes_intersects(*newmems, tsk->mems_allowed);
986 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
987 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
988 985
989 /* 986 if (need_loop)
990 * ensure checking ->mems_allowed_change_disable after setting all new 987 write_seqcount_begin(&tsk->mems_allowed_seq);
991 * allowed nodes.
992 *
993 * the read-side task can see an nodemask with new allowed nodes and
994 * old allowed nodes. and if it allocates page when cpuset clears newly
995 * disallowed ones continuous, it can see the new allowed bits.
996 *
997 * And if setting all new allowed nodes is after the checking, setting
998 * all new allowed nodes and clearing newly disallowed ones will be done
999 * continuous, and the read-side task may find no node to alloc page.
1000 */
1001 smp_mb();
1002
1003 /*
1004 * Allocation of memory is very fast, we needn't sleep when waiting
1005 * for the read-side.
1006 */
1007 while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
1008 task_unlock(tsk);
1009 if (!task_curr(tsk))
1010 yield();
1011 goto repeat;
1012 }
1013 988
1014 /* 989 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
1015 * ensure checking ->mems_allowed_change_disable before clearing all new 990 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
1016 * disallowed nodes.
1017 *
1018 * if clearing newly disallowed bits before the checking, the read-side
1019 * task may find no node to alloc page.
1020 */
1021 smp_mb();
1022 991
1023 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); 992 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
1024 tsk->mems_allowed = *newmems; 993 tsk->mems_allowed = *newmems;
994
995 if (need_loop)
996 write_seqcount_end(&tsk->mems_allowed_seq);
997
1025 task_unlock(tsk); 998 task_unlock(tsk);
1026} 999}
1027 1000
@@ -1399,8 +1372,7 @@ static nodemask_t cpuset_attach_nodemask_from;
1399static nodemask_t cpuset_attach_nodemask_to; 1372static nodemask_t cpuset_attach_nodemask_to;
1400 1373
1401/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ 1374/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
1402static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 1375static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
1403 struct cgroup_taskset *tset)
1404{ 1376{
1405 struct cpuset *cs = cgroup_cs(cgrp); 1377 struct cpuset *cs = cgroup_cs(cgrp);
1406 struct task_struct *task; 1378 struct task_struct *task;
@@ -1436,8 +1408,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1436 return 0; 1408 return 0;
1437} 1409}
1438 1410
1439static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, 1411static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
1440 struct cgroup_taskset *tset)
1441{ 1412{
1442 struct mm_struct *mm; 1413 struct mm_struct *mm;
1443 struct task_struct *task; 1414 struct task_struct *task;
@@ -1794,28 +1765,17 @@ static struct cftype files[] = {
1794 .write_u64 = cpuset_write_u64, 1765 .write_u64 = cpuset_write_u64,
1795 .private = FILE_SPREAD_SLAB, 1766 .private = FILE_SPREAD_SLAB,
1796 }, 1767 },
1797};
1798 1768
1799static struct cftype cft_memory_pressure_enabled = { 1769 {
1800 .name = "memory_pressure_enabled", 1770 .name = "memory_pressure_enabled",
1801 .read_u64 = cpuset_read_u64, 1771 .flags = CFTYPE_ONLY_ON_ROOT,
1802 .write_u64 = cpuset_write_u64, 1772 .read_u64 = cpuset_read_u64,
1803 .private = FILE_MEMORY_PRESSURE_ENABLED, 1773 .write_u64 = cpuset_write_u64,
1804}; 1774 .private = FILE_MEMORY_PRESSURE_ENABLED,
1805 1775 },
1806static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1807{
1808 int err;
1809 1776
1810 err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); 1777 { } /* terminate */
1811 if (err) 1778};
1812 return err;
1813 /* memory_pressure_enabled is in root cpuset only */
1814 if (!cont->parent)
1815 err = cgroup_add_file(cont, ss,
1816 &cft_memory_pressure_enabled);
1817 return err;
1818}
1819 1779
1820/* 1780/*
1821 * post_clone() is called during cgroup_create() when the 1781 * post_clone() is called during cgroup_create() when the
@@ -1833,8 +1793,7 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1833 * (and likewise for mems) to the new cgroup. Called with cgroup_mutex 1793 * (and likewise for mems) to the new cgroup. Called with cgroup_mutex
1834 * held. 1794 * held.
1835 */ 1795 */
1836static void cpuset_post_clone(struct cgroup_subsys *ss, 1796static void cpuset_post_clone(struct cgroup *cgroup)
1837 struct cgroup *cgroup)
1838{ 1797{
1839 struct cgroup *parent, *child; 1798 struct cgroup *parent, *child;
1840 struct cpuset *cs, *parent_cs; 1799 struct cpuset *cs, *parent_cs;
@@ -1857,13 +1816,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
1857 1816
1858/* 1817/*
1859 * cpuset_create - create a cpuset 1818 * cpuset_create - create a cpuset
1860 * ss: cpuset cgroup subsystem
1861 * cont: control group that the new cpuset will be part of 1819 * cont: control group that the new cpuset will be part of
1862 */ 1820 */
1863 1821
1864static struct cgroup_subsys_state *cpuset_create( 1822static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont)
1865 struct cgroup_subsys *ss,
1866 struct cgroup *cont)
1867{ 1823{
1868 struct cpuset *cs; 1824 struct cpuset *cs;
1869 struct cpuset *parent; 1825 struct cpuset *parent;
@@ -1902,7 +1858,7 @@ static struct cgroup_subsys_state *cpuset_create(
1902 * will call async_rebuild_sched_domains(). 1858 * will call async_rebuild_sched_domains().
1903 */ 1859 */
1904 1860
1905static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) 1861static void cpuset_destroy(struct cgroup *cont)
1906{ 1862{
1907 struct cpuset *cs = cgroup_cs(cont); 1863 struct cpuset *cs = cgroup_cs(cont);
1908 1864
@@ -1920,9 +1876,9 @@ struct cgroup_subsys cpuset_subsys = {
1920 .destroy = cpuset_destroy, 1876 .destroy = cpuset_destroy,
1921 .can_attach = cpuset_can_attach, 1877 .can_attach = cpuset_can_attach,
1922 .attach = cpuset_attach, 1878 .attach = cpuset_attach,
1923 .populate = cpuset_populate,
1924 .post_clone = cpuset_post_clone, 1879 .post_clone = cpuset_post_clone,
1925 .subsys_id = cpuset_subsys_id, 1880 .subsys_id = cpuset_subsys_id,
1881 .base_cftypes = files,
1926 .early_init = 1, 1882 .early_init = 1,
1927}; 1883};
1928 1884
@@ -2195,10 +2151,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2195 mutex_unlock(&callback_mutex); 2151 mutex_unlock(&callback_mutex);
2196} 2152}
2197 2153
2198int cpuset_cpus_allowed_fallback(struct task_struct *tsk) 2154void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2199{ 2155{
2200 const struct cpuset *cs; 2156 const struct cpuset *cs;
2201 int cpu;
2202 2157
2203 rcu_read_lock(); 2158 rcu_read_lock();
2204 cs = task_cs(tsk); 2159 cs = task_cs(tsk);
@@ -2219,22 +2174,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2219 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary 2174 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
2220 * set any mask even if it is not right from task_cs() pov, 2175 * set any mask even if it is not right from task_cs() pov,
2221 * the pending set_cpus_allowed_ptr() will fix things. 2176 * the pending set_cpus_allowed_ptr() will fix things.
2177 *
2178 * select_fallback_rq() will fix things ups and set cpu_possible_mask
2179 * if required.
2222 */ 2180 */
2223
2224 cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
2225 if (cpu >= nr_cpu_ids) {
2226 /*
2227 * Either tsk->cpus_allowed is wrong (see above) or it
2228 * is actually empty. The latter case is only possible
2229 * if we are racing with remove_tasks_in_empty_cpuset().
2230 * Like above we can temporary set any mask and rely on
2231 * set_cpus_allowed_ptr() as synchronization point.
2232 */
2233 do_set_cpus_allowed(tsk, cpu_possible_mask);
2234 cpu = cpumask_any(cpu_active_mask);
2235 }
2236
2237 return cpu;
2238} 2181}
2239 2182
2240void cpuset_init_current_mems_allowed(void) 2183void cpuset_init_current_mems_allowed(void)