diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 78 |
1 files changed, 66 insertions, 12 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1a109788592f..7cb37d86a005 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -946,16 +946,62 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | |||
946 | * In order to avoid seeing no nodes if the old and new nodes are disjoint, | 946 | * In order to avoid seeing no nodes if the old and new nodes are disjoint, |
947 | * we structure updates as setting all new allowed nodes, then clearing newly | 947 | * we structure updates as setting all new allowed nodes, then clearing newly |
948 | * disallowed ones. | 948 | * disallowed ones. |
949 | * | ||
950 | * Called with task's alloc_lock held | ||
951 | */ | 949 | */ |
952 | static void cpuset_change_task_nodemask(struct task_struct *tsk, | 950 | static void cpuset_change_task_nodemask(struct task_struct *tsk, |
953 | nodemask_t *newmems) | 951 | nodemask_t *newmems) |
954 | { | 952 | { |
953 | repeat: | ||
954 | /* | ||
955 | * Allow tasks that have access to memory reserves because they have | ||
956 | * been OOM killed to get memory anywhere. | ||
957 | */ | ||
958 | if (unlikely(test_thread_flag(TIF_MEMDIE))) | ||
959 | return; | ||
960 | if (current->flags & PF_EXITING) /* Let dying task have memory */ | ||
961 | return; | ||
962 | |||
963 | task_lock(tsk); | ||
955 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | 964 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
956 | mpol_rebind_task(tsk, &tsk->mems_allowed); | 965 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); |
957 | mpol_rebind_task(tsk, newmems); | 966 | |
967 | |||
968 | /* | ||
969 | * ensure checking ->mems_allowed_change_disable after setting all new | ||
970 | * allowed nodes. | ||
971 | * | ||
972 | * the read-side task can see an nodemask with new allowed nodes and | ||
973 | * old allowed nodes. and if it allocates page when cpuset clears newly | ||
974 | * disallowed ones continuous, it can see the new allowed bits. | ||
975 | * | ||
976 | * And if setting all new allowed nodes is after the checking, setting | ||
977 | * all new allowed nodes and clearing newly disallowed ones will be done | ||
978 | * continuous, and the read-side task may find no node to alloc page. | ||
979 | */ | ||
980 | smp_mb(); | ||
981 | |||
982 | /* | ||
983 | * Allocation of memory is very fast, we needn't sleep when waiting | ||
984 | * for the read-side. | ||
985 | */ | ||
986 | while (ACCESS_ONCE(tsk->mems_allowed_change_disable)) { | ||
987 | task_unlock(tsk); | ||
988 | if (!task_curr(tsk)) | ||
989 | yield(); | ||
990 | goto repeat; | ||
991 | } | ||
992 | |||
993 | /* | ||
994 | * ensure checking ->mems_allowed_change_disable before clearing all new | ||
995 | * disallowed nodes. | ||
996 | * | ||
997 | * if clearing newly disallowed bits before the checking, the read-side | ||
998 | * task may find no node to alloc page. | ||
999 | */ | ||
1000 | smp_mb(); | ||
1001 | |||
1002 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); | ||
958 | tsk->mems_allowed = *newmems; | 1003 | tsk->mems_allowed = *newmems; |
1004 | task_unlock(tsk); | ||
959 | } | 1005 | } |
960 | 1006 | ||
961 | /* | 1007 | /* |
@@ -978,9 +1024,7 @@ static void cpuset_change_nodemask(struct task_struct *p, | |||
978 | cs = cgroup_cs(scan->cg); | 1024 | cs = cgroup_cs(scan->cg); |
979 | guarantee_online_mems(cs, newmems); | 1025 | guarantee_online_mems(cs, newmems); |
980 | 1026 | ||
981 | task_lock(p); | ||
982 | cpuset_change_task_nodemask(p, newmems); | 1027 | cpuset_change_task_nodemask(p, newmems); |
983 | task_unlock(p); | ||
984 | 1028 | ||
985 | NODEMASK_FREE(newmems); | 1029 | NODEMASK_FREE(newmems); |
986 | 1030 | ||
@@ -1383,9 +1427,7 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to, | |||
1383 | err = set_cpus_allowed_ptr(tsk, cpus_attach); | 1427 | err = set_cpus_allowed_ptr(tsk, cpus_attach); |
1384 | WARN_ON_ONCE(err); | 1428 | WARN_ON_ONCE(err); |
1385 | 1429 | ||
1386 | task_lock(tsk); | ||
1387 | cpuset_change_task_nodemask(tsk, to); | 1430 | cpuset_change_task_nodemask(tsk, to); |
1388 | task_unlock(tsk); | ||
1389 | cpuset_update_task_spread_flag(cs, tsk); | 1431 | cpuset_update_task_spread_flag(cs, tsk); |
1390 | 1432 | ||
1391 | } | 1433 | } |
@@ -2427,7 +2469,8 @@ void cpuset_unlock(void) | |||
2427 | } | 2469 | } |
2428 | 2470 | ||
2429 | /** | 2471 | /** |
2430 | * cpuset_mem_spread_node() - On which node to begin search for a page | 2472 | * cpuset_mem_spread_node() - On which node to begin search for a file page |
2473 | * cpuset_slab_spread_node() - On which node to begin search for a slab page | ||
2431 | * | 2474 | * |
2432 | * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for | 2475 | * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for |
2433 | * tasks in a cpuset with is_spread_page or is_spread_slab set), | 2476 | * tasks in a cpuset with is_spread_page or is_spread_slab set), |
@@ -2452,16 +2495,27 @@ void cpuset_unlock(void) | |||
2452 | * See kmem_cache_alloc_node(). | 2495 | * See kmem_cache_alloc_node(). |
2453 | */ | 2496 | */ |
2454 | 2497 | ||
2455 | int cpuset_mem_spread_node(void) | 2498 | static int cpuset_spread_node(int *rotor) |
2456 | { | 2499 | { |
2457 | int node; | 2500 | int node; |
2458 | 2501 | ||
2459 | node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed); | 2502 | node = next_node(*rotor, current->mems_allowed); |
2460 | if (node == MAX_NUMNODES) | 2503 | if (node == MAX_NUMNODES) |
2461 | node = first_node(current->mems_allowed); | 2504 | node = first_node(current->mems_allowed); |
2462 | current->cpuset_mem_spread_rotor = node; | 2505 | *rotor = node; |
2463 | return node; | 2506 | return node; |
2464 | } | 2507 | } |
2508 | |||
2509 | int cpuset_mem_spread_node(void) | ||
2510 | { | ||
2511 | return cpuset_spread_node(¤t->cpuset_mem_spread_rotor); | ||
2512 | } | ||
2513 | |||
2514 | int cpuset_slab_spread_node(void) | ||
2515 | { | ||
2516 | return cpuset_spread_node(¤t->cpuset_slab_spread_rotor); | ||
2517 | } | ||
2518 | |||
2465 | EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); | 2519 | EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); |
2466 | 2520 | ||
2467 | /** | 2521 | /** |