aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c58
-rw-r--r--kernel/exit.c2
2 files changed, 52 insertions, 8 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index db0990ac3fac..61d6af7fa676 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -946,16 +946,62 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
946 * In order to avoid seeing no nodes if the old and new nodes are disjoint, 946 * In order to avoid seeing no nodes if the old and new nodes are disjoint,
947 * we structure updates as setting all new allowed nodes, then clearing newly 947 * we structure updates as setting all new allowed nodes, then clearing newly
948 * disallowed ones. 948 * disallowed ones.
949 *
950 * Called with task's alloc_lock held
951 */ 949 */
952static void cpuset_change_task_nodemask(struct task_struct *tsk, 950static void cpuset_change_task_nodemask(struct task_struct *tsk,
953 nodemask_t *newmems) 951 nodemask_t *newmems)
954{ 952{
953repeat:
954 /*
955 * Allow tasks that have access to memory reserves because they have
956 * been OOM killed to get memory anywhere.
957 */
958 if (unlikely(test_thread_flag(TIF_MEMDIE)))
959 return;
960 if (current->flags & PF_EXITING) /* Let dying task have memory */
961 return;
962
963 task_lock(tsk);
955 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); 964 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
956 mpol_rebind_task(tsk, &tsk->mems_allowed, MPOL_REBIND_ONCE); 965 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
957 mpol_rebind_task(tsk, newmems, MPOL_REBIND_ONCE); 966
967
968 /*
969 * ensure checking ->mems_allowed_change_disable after setting all new
970 * allowed nodes.
971 *
972 * the read-side task can see an nodemask with new allowed nodes and
973 * old allowed nodes. and if it allocates page when cpuset clears newly
974 * disallowed ones continuous, it can see the new allowed bits.
975 *
976 * And if setting all new allowed nodes is after the checking, setting
977 * all new allowed nodes and clearing newly disallowed ones will be done
978 * continuous, and the read-side task may find no node to alloc page.
979 */
980 smp_mb();
981
982 /*
983 * Allocation of memory is very fast, we needn't sleep when waiting
984 * for the read-side.
985 */
986 while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) {
987 task_unlock(tsk);
988 if (!task_curr(tsk))
989 yield();
990 goto repeat;
991 }
992
993 /*
994 * ensure checking ->mems_allowed_change_disable before clearing all new
995 * disallowed nodes.
996 *
997 * if clearing newly disallowed bits before the checking, the read-side
998 * task may find no node to alloc page.
999 */
1000 smp_mb();
1001
1002 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
958 tsk->mems_allowed = *newmems; 1003 tsk->mems_allowed = *newmems;
1004 task_unlock(tsk);
959} 1005}
960 1006
961/* 1007/*
@@ -978,9 +1024,7 @@ static void cpuset_change_nodemask(struct task_struct *p,
978 cs = cgroup_cs(scan->cg); 1024 cs = cgroup_cs(scan->cg);
979 guarantee_online_mems(cs, newmems); 1025 guarantee_online_mems(cs, newmems);
980 1026
981 task_lock(p);
982 cpuset_change_task_nodemask(p, newmems); 1027 cpuset_change_task_nodemask(p, newmems);
983 task_unlock(p);
984 1028
985 NODEMASK_FREE(newmems); 1029 NODEMASK_FREE(newmems);
986 1030
@@ -1383,9 +1427,7 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
1383 err = set_cpus_allowed_ptr(tsk, cpus_attach); 1427 err = set_cpus_allowed_ptr(tsk, cpus_attach);
1384 WARN_ON_ONCE(err); 1428 WARN_ON_ONCE(err);
1385 1429
1386 task_lock(tsk);
1387 cpuset_change_task_nodemask(tsk, to); 1430 cpuset_change_task_nodemask(tsk, to);
1388 task_unlock(tsk);
1389 cpuset_update_task_spread_flag(cs, tsk); 1431 cpuset_update_task_spread_flag(cs, tsk);
1390 1432
1391} 1433}
diff --git a/kernel/exit.c b/kernel/exit.c
index eabca5a73a85..019a2843bf95 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1002,8 +1002,10 @@ NORET_TYPE void do_exit(long code)
1002 1002
1003 exit_notify(tsk, group_dead); 1003 exit_notify(tsk, group_dead);
1004#ifdef CONFIG_NUMA 1004#ifdef CONFIG_NUMA
1005 task_lock(tsk);
1005 mpol_put(tsk->mempolicy); 1006 mpol_put(tsk->mempolicy);
1006 tsk->mempolicy = NULL; 1007 tsk->mempolicy = NULL;
1008 task_unlock(tsk);
1007#endif 1009#endif
1008#ifdef CONFIG_FUTEX 1010#ifdef CONFIG_FUTEX
1009 if (unlikely(current->pi_state_cache)) 1011 if (unlikely(current->pi_state_cache))