diff options
Diffstat (limited to 'kernel/cpuset.c')
| -rw-r--r-- | kernel/cpuset.c | 111 |
1 files changed, 27 insertions, 84 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index a09ac2b9a661..2382683617a3 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -964,7 +964,6 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
| 964 | { | 964 | { |
| 965 | bool need_loop; | 965 | bool need_loop; |
| 966 | 966 | ||
| 967 | repeat: | ||
| 968 | /* | 967 | /* |
| 969 | * Allow tasks that have access to memory reserves because they have | 968 | * Allow tasks that have access to memory reserves because they have |
| 970 | * been OOM killed to get memory anywhere. | 969 | * been OOM killed to get memory anywhere. |
| @@ -983,45 +982,19 @@ repeat: | |||
| 983 | */ | 982 | */ |
| 984 | need_loop = task_has_mempolicy(tsk) || | 983 | need_loop = task_has_mempolicy(tsk) || |
| 985 | !nodes_intersects(*newmems, tsk->mems_allowed); | 984 | !nodes_intersects(*newmems, tsk->mems_allowed); |
| 986 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | ||
| 987 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); | ||
| 988 | 985 | ||
| 989 | /* | 986 | if (need_loop) |
| 990 | * ensure checking ->mems_allowed_change_disable after setting all new | 987 | write_seqcount_begin(&tsk->mems_allowed_seq); |
| 991 | * allowed nodes. | ||
| 992 | * | ||
| 993 | * the read-side task can see an nodemask with new allowed nodes and | ||
| 994 | * old allowed nodes. and if it allocates page when cpuset clears newly | ||
| 995 | * disallowed ones continuous, it can see the new allowed bits. | ||
| 996 | * | ||
| 997 | * And if setting all new allowed nodes is after the checking, setting | ||
| 998 | * all new allowed nodes and clearing newly disallowed ones will be done | ||
| 999 | * continuous, and the read-side task may find no node to alloc page. | ||
| 1000 | */ | ||
| 1001 | smp_mb(); | ||
| 1002 | |||
| 1003 | /* | ||
| 1004 | * Allocation of memory is very fast, we needn't sleep when waiting | ||
| 1005 | * for the read-side. | ||
| 1006 | */ | ||
| 1007 | while (need_loop && ACCESS_ONCE(tsk->mems_allowed_change_disable)) { | ||
| 1008 | task_unlock(tsk); | ||
| 1009 | if (!task_curr(tsk)) | ||
| 1010 | yield(); | ||
| 1011 | goto repeat; | ||
| 1012 | } | ||
| 1013 | 988 | ||
| 1014 | /* | 989 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
| 1015 | * ensure checking ->mems_allowed_change_disable before clearing all new | 990 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); |
| 1016 | * disallowed nodes. | ||
| 1017 | * | ||
| 1018 | * if clearing newly disallowed bits before the checking, the read-side | ||
| 1019 | * task may find no node to alloc page. | ||
| 1020 | */ | ||
| 1021 | smp_mb(); | ||
| 1022 | 991 | ||
| 1023 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); | 992 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); |
| 1024 | tsk->mems_allowed = *newmems; | 993 | tsk->mems_allowed = *newmems; |
| 994 | |||
| 995 | if (need_loop) | ||
| 996 | write_seqcount_end(&tsk->mems_allowed_seq); | ||
| 997 | |||
| 1025 | task_unlock(tsk); | 998 | task_unlock(tsk); |
| 1026 | } | 999 | } |
| 1027 | 1000 | ||
| @@ -1399,8 +1372,7 @@ static nodemask_t cpuset_attach_nodemask_from; | |||
| 1399 | static nodemask_t cpuset_attach_nodemask_to; | 1372 | static nodemask_t cpuset_attach_nodemask_to; |
| 1400 | 1373 | ||
| 1401 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ | 1374 | /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ |
| 1402 | static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 1375 | static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
| 1403 | struct cgroup_taskset *tset) | ||
| 1404 | { | 1376 | { |
| 1405 | struct cpuset *cs = cgroup_cs(cgrp); | 1377 | struct cpuset *cs = cgroup_cs(cgrp); |
| 1406 | struct task_struct *task; | 1378 | struct task_struct *task; |
| @@ -1436,8 +1408,7 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
| 1436 | return 0; | 1408 | return 0; |
| 1437 | } | 1409 | } |
| 1438 | 1410 | ||
| 1439 | static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 1411 | static void cpuset_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) |
| 1440 | struct cgroup_taskset *tset) | ||
| 1441 | { | 1412 | { |
| 1442 | struct mm_struct *mm; | 1413 | struct mm_struct *mm; |
| 1443 | struct task_struct *task; | 1414 | struct task_struct *task; |
| @@ -1794,28 +1765,17 @@ static struct cftype files[] = { | |||
| 1794 | .write_u64 = cpuset_write_u64, | 1765 | .write_u64 = cpuset_write_u64, |
| 1795 | .private = FILE_SPREAD_SLAB, | 1766 | .private = FILE_SPREAD_SLAB, |
| 1796 | }, | 1767 | }, |
| 1797 | }; | ||
| 1798 | 1768 | ||
| 1799 | static struct cftype cft_memory_pressure_enabled = { | 1769 | { |
| 1800 | .name = "memory_pressure_enabled", | 1770 | .name = "memory_pressure_enabled", |
| 1801 | .read_u64 = cpuset_read_u64, | 1771 | .flags = CFTYPE_ONLY_ON_ROOT, |
| 1802 | .write_u64 = cpuset_write_u64, | 1772 | .read_u64 = cpuset_read_u64, |
| 1803 | .private = FILE_MEMORY_PRESSURE_ENABLED, | 1773 | .write_u64 = cpuset_write_u64, |
| 1804 | }; | 1774 | .private = FILE_MEMORY_PRESSURE_ENABLED, |
| 1805 | 1775 | }, | |
| 1806 | static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | ||
| 1807 | { | ||
| 1808 | int err; | ||
| 1809 | 1776 | ||
| 1810 | err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); | 1777 | { } /* terminate */ |
| 1811 | if (err) | 1778 | }; |
| 1812 | return err; | ||
| 1813 | /* memory_pressure_enabled is in root cpuset only */ | ||
| 1814 | if (!cont->parent) | ||
| 1815 | err = cgroup_add_file(cont, ss, | ||
| 1816 | &cft_memory_pressure_enabled); | ||
| 1817 | return err; | ||
| 1818 | } | ||
| 1819 | 1779 | ||
| 1820 | /* | 1780 | /* |
| 1821 | * post_clone() is called during cgroup_create() when the | 1781 | * post_clone() is called during cgroup_create() when the |
| @@ -1833,8 +1793,7 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | |||
| 1833 | * (and likewise for mems) to the new cgroup. Called with cgroup_mutex | 1793 | * (and likewise for mems) to the new cgroup. Called with cgroup_mutex |
| 1834 | * held. | 1794 | * held. |
| 1835 | */ | 1795 | */ |
| 1836 | static void cpuset_post_clone(struct cgroup_subsys *ss, | 1796 | static void cpuset_post_clone(struct cgroup *cgroup) |
| 1837 | struct cgroup *cgroup) | ||
| 1838 | { | 1797 | { |
| 1839 | struct cgroup *parent, *child; | 1798 | struct cgroup *parent, *child; |
| 1840 | struct cpuset *cs, *parent_cs; | 1799 | struct cpuset *cs, *parent_cs; |
| @@ -1857,13 +1816,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss, | |||
| 1857 | 1816 | ||
| 1858 | /* | 1817 | /* |
| 1859 | * cpuset_create - create a cpuset | 1818 | * cpuset_create - create a cpuset |
| 1860 | * ss: cpuset cgroup subsystem | ||
| 1861 | * cont: control group that the new cpuset will be part of | 1819 | * cont: control group that the new cpuset will be part of |
| 1862 | */ | 1820 | */ |
| 1863 | 1821 | ||
| 1864 | static struct cgroup_subsys_state *cpuset_create( | 1822 | static struct cgroup_subsys_state *cpuset_create(struct cgroup *cont) |
| 1865 | struct cgroup_subsys *ss, | ||
| 1866 | struct cgroup *cont) | ||
| 1867 | { | 1823 | { |
| 1868 | struct cpuset *cs; | 1824 | struct cpuset *cs; |
| 1869 | struct cpuset *parent; | 1825 | struct cpuset *parent; |
| @@ -1902,7 +1858,7 @@ static struct cgroup_subsys_state *cpuset_create( | |||
| 1902 | * will call async_rebuild_sched_domains(). | 1858 | * will call async_rebuild_sched_domains(). |
| 1903 | */ | 1859 | */ |
| 1904 | 1860 | ||
| 1905 | static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | 1861 | static void cpuset_destroy(struct cgroup *cont) |
| 1906 | { | 1862 | { |
| 1907 | struct cpuset *cs = cgroup_cs(cont); | 1863 | struct cpuset *cs = cgroup_cs(cont); |
| 1908 | 1864 | ||
| @@ -1920,9 +1876,9 @@ struct cgroup_subsys cpuset_subsys = { | |||
| 1920 | .destroy = cpuset_destroy, | 1876 | .destroy = cpuset_destroy, |
| 1921 | .can_attach = cpuset_can_attach, | 1877 | .can_attach = cpuset_can_attach, |
| 1922 | .attach = cpuset_attach, | 1878 | .attach = cpuset_attach, |
| 1923 | .populate = cpuset_populate, | ||
| 1924 | .post_clone = cpuset_post_clone, | 1879 | .post_clone = cpuset_post_clone, |
| 1925 | .subsys_id = cpuset_subsys_id, | 1880 | .subsys_id = cpuset_subsys_id, |
| 1881 | .base_cftypes = files, | ||
| 1926 | .early_init = 1, | 1882 | .early_init = 1, |
| 1927 | }; | 1883 | }; |
| 1928 | 1884 | ||
| @@ -2195,10 +2151,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) | |||
| 2195 | mutex_unlock(&callback_mutex); | 2151 | mutex_unlock(&callback_mutex); |
| 2196 | } | 2152 | } |
| 2197 | 2153 | ||
| 2198 | int cpuset_cpus_allowed_fallback(struct task_struct *tsk) | 2154 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) |
| 2199 | { | 2155 | { |
| 2200 | const struct cpuset *cs; | 2156 | const struct cpuset *cs; |
| 2201 | int cpu; | ||
| 2202 | 2157 | ||
| 2203 | rcu_read_lock(); | 2158 | rcu_read_lock(); |
| 2204 | cs = task_cs(tsk); | 2159 | cs = task_cs(tsk); |
| @@ -2219,22 +2174,10 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) | |||
| 2219 | * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary | 2174 | * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary |
| 2220 | * set any mask even if it is not right from task_cs() pov, | 2175 | * set any mask even if it is not right from task_cs() pov, |
| 2221 | * the pending set_cpus_allowed_ptr() will fix things. | 2176 | * the pending set_cpus_allowed_ptr() will fix things. |
| 2177 | * | ||
| 2178 | * select_fallback_rq() will fix things ups and set cpu_possible_mask | ||
| 2179 | * if required. | ||
| 2222 | */ | 2180 | */ |
| 2223 | |||
| 2224 | cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask); | ||
| 2225 | if (cpu >= nr_cpu_ids) { | ||
| 2226 | /* | ||
| 2227 | * Either tsk->cpus_allowed is wrong (see above) or it | ||
| 2228 | * is actually empty. The latter case is only possible | ||
| 2229 | * if we are racing with remove_tasks_in_empty_cpuset(). | ||
| 2230 | * Like above we can temporary set any mask and rely on | ||
| 2231 | * set_cpus_allowed_ptr() as synchronization point. | ||
| 2232 | */ | ||
| 2233 | do_set_cpus_allowed(tsk, cpu_possible_mask); | ||
| 2234 | cpu = cpumask_any(cpu_active_mask); | ||
| 2235 | } | ||
| 2236 | |||
| 2237 | return cpu; | ||
| 2238 | } | 2181 | } |
| 2239 | 2182 | ||
| 2240 | void cpuset_init_current_mems_allowed(void) | 2183 | void cpuset_init_current_mems_allowed(void) |
