diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 58 | ||||
-rw-r--r-- | kernel/exit.c | 2 |
2 files changed, 52 insertions, 8 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index db0990ac3fac..61d6af7fa676 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -946,16 +946,62 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | |||
946 | * In order to avoid seeing no nodes if the old and new nodes are disjoint, | 946 | * In order to avoid seeing no nodes if the old and new nodes are disjoint, |
947 | * we structure updates as setting all new allowed nodes, then clearing newly | 947 | * we structure updates as setting all new allowed nodes, then clearing newly |
948 | * disallowed ones. | 948 | * disallowed ones. |
949 | * | ||
950 | * Called with task's alloc_lock held | ||
951 | */ | 949 | */ |
952 | static void cpuset_change_task_nodemask(struct task_struct *tsk, | 950 | static void cpuset_change_task_nodemask(struct task_struct *tsk, |
953 | nodemask_t *newmems) | 951 | nodemask_t *newmems) |
954 | { | 952 | { |
953 | repeat: | ||
954 | /* | ||
955 | * Allow tasks that have access to memory reserves because they have | ||
956 | * been OOM killed to get memory anywhere. | ||
957 | */ | ||
958 | if (unlikely(test_thread_flag(TIF_MEMDIE))) | ||
959 | return; | ||
960 | if (current->flags & PF_EXITING) /* Let dying task have memory */ | ||
961 | return; | ||
962 | |||
963 | task_lock(tsk); | ||
955 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | 964 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
956 | mpol_rebind_task(tsk, &tsk->mems_allowed, MPOL_REBIND_ONCE); | 965 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); |
957 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_ONCE); | 966 | |
967 | |||
968 | /* | ||
969 | * ensure checking ->mems_allowed_change_disable after setting all new | ||
970 | * allowed nodes. | ||
971 | * | ||
972 | * the read-side task can see an nodemask with new allowed nodes and | ||
973 | * old allowed nodes. and if it allocates page when cpuset clears newly | ||
974 | * disallowed ones continuous, it can see the new allowed bits. | ||
975 | * | ||
976 | * And if setting all new allowed nodes is after the checking, setting | ||
977 | * all new allowed nodes and clearing newly disallowed ones will be done | ||
978 | * continuous, and the read-side task may find no node to alloc page. | ||
979 | */ | ||
980 | smp_mb(); | ||
981 | |||
982 | /* | ||
983 | * Allocation of memory is very fast, we needn't sleep when waiting | ||
984 | * for the read-side. | ||
985 | */ | ||
986 | while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) { | ||
987 | task_unlock(tsk); | ||
988 | if (!task_curr(tsk)) | ||
989 | yield(); | ||
990 | goto repeat; | ||
991 | } | ||
992 | |||
993 | /* | ||
994 | * ensure checking ->mems_allowed_change_disable before clearing all new | ||
995 | * disallowed nodes. | ||
996 | * | ||
997 | * if clearing newly disallowed bits before the checking, the read-side | ||
998 | * task may find no node to alloc page. | ||
999 | */ | ||
1000 | smp_mb(); | ||
1001 | |||
1002 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); | ||
958 | tsk->mems_allowed = *newmems; | 1003 | tsk->mems_allowed = *newmems; |
1004 | task_unlock(tsk); | ||
959 | } | 1005 | } |
960 | 1006 | ||
961 | /* | 1007 | /* |
@@ -978,9 +1024,7 @@ static void cpuset_change_nodemask(struct task_struct *p, | |||
978 | cs = cgroup_cs(scan->cg); | 1024 | cs = cgroup_cs(scan->cg); |
979 | guarantee_online_mems(cs, newmems); | 1025 | guarantee_online_mems(cs, newmems); |
980 | 1026 | ||
981 | task_lock(p); | ||
982 | cpuset_change_task_nodemask(p, newmems); | 1027 | cpuset_change_task_nodemask(p, newmems); |
983 | task_unlock(p); | ||
984 | 1028 | ||
985 | NODEMASK_FREE(newmems); | 1029 | NODEMASK_FREE(newmems); |
986 | 1030 | ||
@@ -1383,9 +1427,7 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | |||
1383 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | 1427 | err = set_cpus_allowed_ptr(tsk, cpus_attach); |
1384 | WARN_ON_ONCE(err); | 1428 | WARN_ON_ONCE(err); |
1385 | 1429 | ||
1386 | task_lock(tsk); | ||
1387 | cpuset_change_task_nodemask(tsk, to); | 1430 | cpuset_change_task_nodemask(tsk, to); |
1388 | task_unlock(tsk); | ||
1389 | cpuset_update_task_spread_flag(cs, tsk); | 1431 | cpuset_update_task_spread_flag(cs, tsk); |
1390 | 1432 | ||
1391 | } | 1433 | } |
diff --git a/kernel/exit.c b/kernel/exit.c index eabca5a73a85..019a2843bf95 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -1002,8 +1002,10 @@ NORET_TYPE void do_exit(long code) | |||
1002 | 1002 | ||
1003 | exit_notify(tsk, group_dead); | 1003 | exit_notify(tsk, group_dead); |
1004 | #ifdef CONFIG_NUMA | 1004 | #ifdef CONFIG_NUMA |
1005 | task_lock(tsk); | ||
1005 | mpol_put(tsk->mempolicy); | 1006 | mpol_put(tsk->mempolicy); |
1006 | tsk->mempolicy = NULL; | 1007 | tsk->mempolicy = NULL; |
1008 | task_unlock(tsk); | ||
1007 | #endif | 1009 | #endif |
1008 | #ifdef CONFIG_FUTEX | 1010 | #ifdef CONFIG_FUTEX |
1009 | if (unlikely(current->pi_state_cache)) | 1011 | if (unlikely(current->pi_state_cache)) |