diff options
Diffstat (limited to 'kernel/cpuset.c')
| -rw-r--r-- | kernel/cpuset.c | 377 |
1 files changed, 191 insertions, 186 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 798b3ab054eb..91cf85b36dd5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -227,10 +227,6 @@ static struct cpuset top_cpuset = { | |||
| 227 | * The task_struct fields mems_allowed and mems_generation may only | 227 | * The task_struct fields mems_allowed and mems_generation may only |
| 228 | * be accessed in the context of that task, so require no locks. | 228 | * be accessed in the context of that task, so require no locks. |
| 229 | * | 229 | * |
| 230 | * The cpuset_common_file_write handler for operations that modify | ||
| 231 | * the cpuset hierarchy holds cgroup_mutex across the entire operation, | ||
| 232 | * single threading all such cpuset modifications across the system. | ||
| 233 | * | ||
| 234 | * The cpuset_common_file_read() handlers only hold callback_mutex across | 230 | * The cpuset_common_file_read() handlers only hold callback_mutex across |
| 235 | * small pieces of code, such as when reading out possibly multi-word | 231 | * small pieces of code, such as when reading out possibly multi-word |
| 236 | * cpumasks and nodemasks. | 232 | * cpumasks and nodemasks. |
| @@ -369,7 +365,7 @@ void cpuset_update_task_memory_state(void) | |||
| 369 | my_cpusets_mem_gen = top_cpuset.mems_generation; | 365 | my_cpusets_mem_gen = top_cpuset.mems_generation; |
| 370 | } else { | 366 | } else { |
| 371 | rcu_read_lock(); | 367 | rcu_read_lock(); |
| 372 | my_cpusets_mem_gen = task_cs(current)->mems_generation; | 368 | my_cpusets_mem_gen = task_cs(tsk)->mems_generation; |
| 373 | rcu_read_unlock(); | 369 | rcu_read_unlock(); |
| 374 | } | 370 | } |
| 375 | 371 | ||
| @@ -500,11 +496,16 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | |||
| 500 | /* | 496 | /* |
| 501 | * rebuild_sched_domains() | 497 | * rebuild_sched_domains() |
| 502 | * | 498 | * |
| 503 | * If the flag 'sched_load_balance' of any cpuset with non-empty | 499 | * This routine will be called to rebuild the scheduler's dynamic |
| 504 | * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset | 500 | * sched domains: |
| 505 | * which has that flag enabled, or if any cpuset with a non-empty | 501 | * - if the flag 'sched_load_balance' of any cpuset with non-empty |
| 506 | * 'cpus' is removed, then call this routine to rebuild the | 502 | * 'cpus' changes, |
| 507 | * scheduler's dynamic sched domains. | 503 | * - or if the 'cpus' allowed changes in any cpuset which has that |
| 504 | * flag enabled, | ||
| 505 | * - or if the 'sched_relax_domain_level' of any cpuset which has | ||
| 506 | * that flag enabled and with non-empty 'cpus' changes, | ||
| 507 | * - or if any cpuset with non-empty 'cpus' is removed, | ||
| 508 | * - or if a cpu gets offlined. | ||
| 508 | * | 509 | * |
| 509 | * This routine builds a partial partition of the systems CPUs | 510 | * This routine builds a partial partition of the systems CPUs |
| 510 | * (the set of non-overlappping cpumask_t's in the array 'part' | 511 | * (the set of non-overlappping cpumask_t's in the array 'part' |
| @@ -564,7 +565,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | |||
| 564 | * partition_sched_domains(). | 565 | * partition_sched_domains(). |
| 565 | */ | 566 | */ |
| 566 | 567 | ||
| 567 | static void rebuild_sched_domains(void) | 568 | void rebuild_sched_domains(void) |
| 568 | { | 569 | { |
| 569 | struct kfifo *q; /* queue of cpusets to be scanned */ | 570 | struct kfifo *q; /* queue of cpusets to be scanned */ |
| 570 | struct cpuset *cp; /* scans q */ | 571 | struct cpuset *cp; /* scans q */ |
| @@ -609,8 +610,13 @@ static void rebuild_sched_domains(void) | |||
| 609 | while (__kfifo_get(q, (void *)&cp, sizeof(cp))) { | 610 | while (__kfifo_get(q, (void *)&cp, sizeof(cp))) { |
| 610 | struct cgroup *cont; | 611 | struct cgroup *cont; |
| 611 | struct cpuset *child; /* scans child cpusets of cp */ | 612 | struct cpuset *child; /* scans child cpusets of cp */ |
| 613 | |||
| 614 | if (cpus_empty(cp->cpus_allowed)) | ||
| 615 | continue; | ||
| 616 | |||
| 612 | if (is_sched_load_balance(cp)) | 617 | if (is_sched_load_balance(cp)) |
| 613 | csa[csn++] = cp; | 618 | csa[csn++] = cp; |
| 619 | |||
| 614 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | 620 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { |
| 615 | child = cgroup_cs(cont); | 621 | child = cgroup_cs(cont); |
| 616 | __kfifo_put(q, (void *)&child, sizeof(cp)); | 622 | __kfifo_put(q, (void *)&child, sizeof(cp)); |
| @@ -679,7 +685,9 @@ restart: | |||
| 679 | if (apn == b->pn) { | 685 | if (apn == b->pn) { |
| 680 | cpus_or(*dp, *dp, b->cpus_allowed); | 686 | cpus_or(*dp, *dp, b->cpus_allowed); |
| 681 | b->pn = -1; | 687 | b->pn = -1; |
| 682 | update_domain_attr(dattr, b); | 688 | if (dattr) |
| 689 | update_domain_attr(dattr | ||
| 690 | + nslot, b); | ||
| 683 | } | 691 | } |
| 684 | } | 692 | } |
| 685 | nslot++; | 693 | nslot++; |
| @@ -701,36 +709,6 @@ done: | |||
| 701 | /* Don't kfree(dattr) -- partition_sched_domains() does that. */ | 709 | /* Don't kfree(dattr) -- partition_sched_domains() does that. */ |
| 702 | } | 710 | } |
| 703 | 711 | ||
| 704 | static inline int started_after_time(struct task_struct *t1, | ||
| 705 | struct timespec *time, | ||
| 706 | struct task_struct *t2) | ||
| 707 | { | ||
| 708 | int start_diff = timespec_compare(&t1->start_time, time); | ||
| 709 | if (start_diff > 0) { | ||
| 710 | return 1; | ||
| 711 | } else if (start_diff < 0) { | ||
| 712 | return 0; | ||
| 713 | } else { | ||
| 714 | /* | ||
| 715 | * Arbitrarily, if two processes started at the same | ||
| 716 | * time, we'll say that the lower pointer value | ||
| 717 | * started first. Note that t2 may have exited by now | ||
| 718 | * so this may not be a valid pointer any longer, but | ||
| 719 | * that's fine - it still serves to distinguish | ||
| 720 | * between two tasks started (effectively) | ||
| 721 | * simultaneously. | ||
| 722 | */ | ||
| 723 | return t1 > t2; | ||
| 724 | } | ||
| 725 | } | ||
| 726 | |||
| 727 | static inline int started_after(void *p1, void *p2) | ||
| 728 | { | ||
| 729 | struct task_struct *t1 = p1; | ||
| 730 | struct task_struct *t2 = p2; | ||
| 731 | return started_after_time(t1, &t2->start_time, t2); | ||
| 732 | } | ||
| 733 | |||
| 734 | /** | 712 | /** |
| 735 | * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's | 713 | * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's |
| 736 | * @tsk: task to test | 714 | * @tsk: task to test |
| @@ -766,15 +744,49 @@ static void cpuset_change_cpumask(struct task_struct *tsk, | |||
| 766 | } | 744 | } |
| 767 | 745 | ||
| 768 | /** | 746 | /** |
| 747 | * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. | ||
| 748 | * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed | ||
| 749 | * | ||
| 750 | * Called with cgroup_mutex held | ||
| 751 | * | ||
| 752 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | ||
| 753 | * calling callback functions for each. | ||
| 754 | * | ||
| 755 | * Return 0 if successful, -errno if not. | ||
| 756 | */ | ||
| 757 | static int update_tasks_cpumask(struct cpuset *cs) | ||
| 758 | { | ||
| 759 | struct cgroup_scanner scan; | ||
| 760 | struct ptr_heap heap; | ||
| 761 | int retval; | ||
| 762 | |||
| 763 | /* | ||
| 764 | * cgroup_scan_tasks() will initialize heap->gt for us. | ||
| 765 | * heap_init() is still needed here for we should not change | ||
| 766 | * cs->cpus_allowed when heap_init() fails. | ||
| 767 | */ | ||
| 768 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); | ||
| 769 | if (retval) | ||
| 770 | return retval; | ||
| 771 | |||
| 772 | scan.cg = cs->css.cgroup; | ||
| 773 | scan.test_task = cpuset_test_cpumask; | ||
| 774 | scan.process_task = cpuset_change_cpumask; | ||
| 775 | scan.heap = &heap; | ||
| 776 | retval = cgroup_scan_tasks(&scan); | ||
| 777 | |||
| 778 | heap_free(&heap); | ||
| 779 | return retval; | ||
| 780 | } | ||
| 781 | |||
| 782 | /** | ||
| 769 | * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it | 783 | * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it |
| 770 | * @cs: the cpuset to consider | 784 | * @cs: the cpuset to consider |
| 771 | * @buf: buffer of cpu numbers written to this cpuset | 785 | * @buf: buffer of cpu numbers written to this cpuset |
| 772 | */ | 786 | */ |
| 773 | static int update_cpumask(struct cpuset *cs, char *buf) | 787 | static int update_cpumask(struct cpuset *cs, const char *buf) |
| 774 | { | 788 | { |
| 775 | struct cpuset trialcs; | 789 | struct cpuset trialcs; |
| 776 | struct cgroup_scanner scan; | ||
| 777 | struct ptr_heap heap; | ||
| 778 | int retval; | 790 | int retval; |
| 779 | int is_load_balanced; | 791 | int is_load_balanced; |
| 780 | 792 | ||
| @@ -790,7 +802,6 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
| 790 | * that parsing. The validate_change() call ensures that cpusets | 802 | * that parsing. The validate_change() call ensures that cpusets |
| 791 | * with tasks have cpus. | 803 | * with tasks have cpus. |
| 792 | */ | 804 | */ |
| 793 | buf = strstrip(buf); | ||
| 794 | if (!*buf) { | 805 | if (!*buf) { |
| 795 | cpus_clear(trialcs.cpus_allowed); | 806 | cpus_clear(trialcs.cpus_allowed); |
| 796 | } else { | 807 | } else { |
| @@ -809,10 +820,6 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
| 809 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) | 820 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) |
| 810 | return 0; | 821 | return 0; |
| 811 | 822 | ||
| 812 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after); | ||
| 813 | if (retval) | ||
| 814 | return retval; | ||
| 815 | |||
| 816 | is_load_balanced = is_sched_load_balance(&trialcs); | 823 | is_load_balanced = is_sched_load_balance(&trialcs); |
| 817 | 824 | ||
| 818 | mutex_lock(&callback_mutex); | 825 | mutex_lock(&callback_mutex); |
| @@ -823,12 +830,9 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
| 823 | * Scan tasks in the cpuset, and update the cpumasks of any | 830 | * Scan tasks in the cpuset, and update the cpumasks of any |
| 824 | * that need an update. | 831 | * that need an update. |
| 825 | */ | 832 | */ |
| 826 | scan.cg = cs->css.cgroup; | 833 | retval = update_tasks_cpumask(cs); |
| 827 | scan.test_task = cpuset_test_cpumask; | 834 | if (retval < 0) |
| 828 | scan.process_task = cpuset_change_cpumask; | 835 | return retval; |
| 829 | scan.heap = &heap; | ||
| 830 | cgroup_scan_tasks(&scan); | ||
| 831 | heap_free(&heap); | ||
| 832 | 836 | ||
| 833 | if (is_load_balanced) | 837 | if (is_load_balanced) |
| 834 | rebuild_sched_domains(); | 838 | rebuild_sched_domains(); |
| @@ -884,74 +888,25 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | |||
| 884 | mutex_unlock(&callback_mutex); | 888 | mutex_unlock(&callback_mutex); |
| 885 | } | 889 | } |
| 886 | 890 | ||
| 887 | /* | ||
| 888 | * Handle user request to change the 'mems' memory placement | ||
| 889 | * of a cpuset. Needs to validate the request, update the | ||
| 890 | * cpusets mems_allowed and mems_generation, and for each | ||
| 891 | * task in the cpuset, rebind any vma mempolicies and if | ||
| 892 | * the cpuset is marked 'memory_migrate', migrate the tasks | ||
| 893 | * pages to the new memory. | ||
| 894 | * | ||
| 895 | * Call with cgroup_mutex held. May take callback_mutex during call. | ||
| 896 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, | ||
| 897 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | ||
| 898 | * their mempolicies to the cpusets new mems_allowed. | ||
| 899 | */ | ||
| 900 | |||
| 901 | static void *cpuset_being_rebound; | 891 | static void *cpuset_being_rebound; |
| 902 | 892 | ||
| 903 | static int update_nodemask(struct cpuset *cs, char *buf) | 893 | /** |
| 894 | * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. | ||
| 895 | * @cs: the cpuset in which each task's mems_allowed mask needs to be changed | ||
| 896 | * @oldmem: old mems_allowed of cpuset cs | ||
| 897 | * | ||
| 898 | * Called with cgroup_mutex held | ||
| 899 | * Return 0 if successful, -errno if not. | ||
| 900 | */ | ||
| 901 | static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem) | ||
| 904 | { | 902 | { |
| 905 | struct cpuset trialcs; | ||
| 906 | nodemask_t oldmem; | ||
| 907 | struct task_struct *p; | 903 | struct task_struct *p; |
| 908 | struct mm_struct **mmarray; | 904 | struct mm_struct **mmarray; |
| 909 | int i, n, ntasks; | 905 | int i, n, ntasks; |
| 910 | int migrate; | 906 | int migrate; |
| 911 | int fudge; | 907 | int fudge; |
| 912 | int retval; | ||
| 913 | struct cgroup_iter it; | 908 | struct cgroup_iter it; |
| 914 | 909 | int retval; | |
| 915 | /* | ||
| 916 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; | ||
| 917 | * it's read-only | ||
| 918 | */ | ||
| 919 | if (cs == &top_cpuset) | ||
| 920 | return -EACCES; | ||
| 921 | |||
| 922 | trialcs = *cs; | ||
| 923 | |||
| 924 | /* | ||
| 925 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. | ||
| 926 | * Since nodelist_parse() fails on an empty mask, we special case | ||
| 927 | * that parsing. The validate_change() call ensures that cpusets | ||
| 928 | * with tasks have memory. | ||
| 929 | */ | ||
| 930 | buf = strstrip(buf); | ||
| 931 | if (!*buf) { | ||
| 932 | nodes_clear(trialcs.mems_allowed); | ||
| 933 | } else { | ||
| 934 | retval = nodelist_parse(buf, trialcs.mems_allowed); | ||
| 935 | if (retval < 0) | ||
| 936 | goto done; | ||
| 937 | |||
| 938 | if (!nodes_subset(trialcs.mems_allowed, | ||
| 939 | node_states[N_HIGH_MEMORY])) | ||
| 940 | return -EINVAL; | ||
| 941 | } | ||
| 942 | oldmem = cs->mems_allowed; | ||
| 943 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | ||
| 944 | retval = 0; /* Too easy - nothing to do */ | ||
| 945 | goto done; | ||
| 946 | } | ||
| 947 | retval = validate_change(cs, &trialcs); | ||
| 948 | if (retval < 0) | ||
| 949 | goto done; | ||
| 950 | |||
| 951 | mutex_lock(&callback_mutex); | ||
| 952 | cs->mems_allowed = trialcs.mems_allowed; | ||
| 953 | cs->mems_generation = cpuset_mems_generation++; | ||
| 954 | mutex_unlock(&callback_mutex); | ||
| 955 | 910 | ||
| 956 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ | 911 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ |
| 957 | 912 | ||
| @@ -1018,7 +973,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
| 1018 | 973 | ||
| 1019 | mpol_rebind_mm(mm, &cs->mems_allowed); | 974 | mpol_rebind_mm(mm, &cs->mems_allowed); |
| 1020 | if (migrate) | 975 | if (migrate) |
| 1021 | cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); | 976 | cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed); |
| 1022 | mmput(mm); | 977 | mmput(mm); |
| 1023 | } | 978 | } |
| 1024 | 979 | ||
| @@ -1030,6 +985,70 @@ done: | |||
| 1030 | return retval; | 985 | return retval; |
| 1031 | } | 986 | } |
| 1032 | 987 | ||
| 988 | /* | ||
| 989 | * Handle user request to change the 'mems' memory placement | ||
| 990 | * of a cpuset. Needs to validate the request, update the | ||
| 991 | * cpusets mems_allowed and mems_generation, and for each | ||
| 992 | * task in the cpuset, rebind any vma mempolicies and if | ||
| 993 | * the cpuset is marked 'memory_migrate', migrate the tasks | ||
| 994 | * pages to the new memory. | ||
| 995 | * | ||
| 996 | * Call with cgroup_mutex held. May take callback_mutex during call. | ||
| 997 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, | ||
| 998 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | ||
| 999 | * their mempolicies to the cpusets new mems_allowed. | ||
| 1000 | */ | ||
| 1001 | static int update_nodemask(struct cpuset *cs, const char *buf) | ||
| 1002 | { | ||
| 1003 | struct cpuset trialcs; | ||
| 1004 | nodemask_t oldmem; | ||
| 1005 | int retval; | ||
| 1006 | |||
| 1007 | /* | ||
| 1008 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; | ||
| 1009 | * it's read-only | ||
| 1010 | */ | ||
| 1011 | if (cs == &top_cpuset) | ||
| 1012 | return -EACCES; | ||
| 1013 | |||
| 1014 | trialcs = *cs; | ||
| 1015 | |||
| 1016 | /* | ||
| 1017 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. | ||
| 1018 | * Since nodelist_parse() fails on an empty mask, we special case | ||
| 1019 | * that parsing. The validate_change() call ensures that cpusets | ||
| 1020 | * with tasks have memory. | ||
| 1021 | */ | ||
| 1022 | if (!*buf) { | ||
| 1023 | nodes_clear(trialcs.mems_allowed); | ||
| 1024 | } else { | ||
| 1025 | retval = nodelist_parse(buf, trialcs.mems_allowed); | ||
| 1026 | if (retval < 0) | ||
| 1027 | goto done; | ||
| 1028 | |||
| 1029 | if (!nodes_subset(trialcs.mems_allowed, | ||
| 1030 | node_states[N_HIGH_MEMORY])) | ||
| 1031 | return -EINVAL; | ||
| 1032 | } | ||
| 1033 | oldmem = cs->mems_allowed; | ||
| 1034 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | ||
| 1035 | retval = 0; /* Too easy - nothing to do */ | ||
| 1036 | goto done; | ||
| 1037 | } | ||
| 1038 | retval = validate_change(cs, &trialcs); | ||
| 1039 | if (retval < 0) | ||
| 1040 | goto done; | ||
| 1041 | |||
| 1042 | mutex_lock(&callback_mutex); | ||
| 1043 | cs->mems_allowed = trialcs.mems_allowed; | ||
| 1044 | cs->mems_generation = cpuset_mems_generation++; | ||
| 1045 | mutex_unlock(&callback_mutex); | ||
| 1046 | |||
| 1047 | retval = update_tasks_nodemask(cs, &oldmem); | ||
| 1048 | done: | ||
| 1049 | return retval; | ||
| 1050 | } | ||
| 1051 | |||
| 1033 | int current_cpuset_is_being_rebound(void) | 1052 | int current_cpuset_is_being_rebound(void) |
| 1034 | { | 1053 | { |
| 1035 | return task_cs(current) == cpuset_being_rebound; | 1054 | return task_cs(current) == cpuset_being_rebound; |
| @@ -1042,7 +1061,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
| 1042 | 1061 | ||
| 1043 | if (val != cs->relax_domain_level) { | 1062 | if (val != cs->relax_domain_level) { |
| 1044 | cs->relax_domain_level = val; | 1063 | cs->relax_domain_level = val; |
| 1045 | rebuild_sched_domains(); | 1064 | if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) |
| 1065 | rebuild_sched_domains(); | ||
| 1046 | } | 1066 | } |
| 1047 | 1067 | ||
| 1048 | return 0; | 1068 | return 0; |
| @@ -1194,6 +1214,15 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, | |||
| 1194 | 1214 | ||
| 1195 | if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) | 1215 | if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) |
| 1196 | return -ENOSPC; | 1216 | return -ENOSPC; |
| 1217 | if (tsk->flags & PF_THREAD_BOUND) { | ||
| 1218 | cpumask_t mask; | ||
| 1219 | |||
| 1220 | mutex_lock(&callback_mutex); | ||
| 1221 | mask = cs->cpus_allowed; | ||
| 1222 | mutex_unlock(&callback_mutex); | ||
| 1223 | if (!cpus_equal(tsk->cpus_allowed, mask)) | ||
| 1224 | return -EINVAL; | ||
| 1225 | } | ||
| 1197 | 1226 | ||
| 1198 | return security_task_setscheduler(tsk, 0, NULL); | 1227 | return security_task_setscheduler(tsk, 0, NULL); |
| 1199 | } | 1228 | } |
| @@ -1207,11 +1236,14 @@ static void cpuset_attach(struct cgroup_subsys *ss, | |||
| 1207 | struct mm_struct *mm; | 1236 | struct mm_struct *mm; |
| 1208 | struct cpuset *cs = cgroup_cs(cont); | 1237 | struct cpuset *cs = cgroup_cs(cont); |
| 1209 | struct cpuset *oldcs = cgroup_cs(oldcont); | 1238 | struct cpuset *oldcs = cgroup_cs(oldcont); |
| 1239 | int err; | ||
| 1210 | 1240 | ||
| 1211 | mutex_lock(&callback_mutex); | 1241 | mutex_lock(&callback_mutex); |
| 1212 | guarantee_online_cpus(cs, &cpus); | 1242 | guarantee_online_cpus(cs, &cpus); |
| 1213 | set_cpus_allowed_ptr(tsk, &cpus); | 1243 | err = set_cpus_allowed_ptr(tsk, &cpus); |
| 1214 | mutex_unlock(&callback_mutex); | 1244 | mutex_unlock(&callback_mutex); |
| 1245 | if (err) | ||
| 1246 | return; | ||
| 1215 | 1247 | ||
| 1216 | from = oldcs->mems_allowed; | 1248 | from = oldcs->mems_allowed; |
| 1217 | to = cs->mems_allowed; | 1249 | to = cs->mems_allowed; |
| @@ -1242,72 +1274,14 @@ typedef enum { | |||
| 1242 | FILE_SPREAD_SLAB, | 1274 | FILE_SPREAD_SLAB, |
| 1243 | } cpuset_filetype_t; | 1275 | } cpuset_filetype_t; |
| 1244 | 1276 | ||
| 1245 | static ssize_t cpuset_common_file_write(struct cgroup *cont, | ||
| 1246 | struct cftype *cft, | ||
| 1247 | struct file *file, | ||
| 1248 | const char __user *userbuf, | ||
| 1249 | size_t nbytes, loff_t *unused_ppos) | ||
| 1250 | { | ||
| 1251 | struct cpuset *cs = cgroup_cs(cont); | ||
| 1252 | cpuset_filetype_t type = cft->private; | ||
| 1253 | char *buffer; | ||
| 1254 | int retval = 0; | ||
| 1255 | |||
| 1256 | /* Crude upper limit on largest legitimate cpulist user might write. */ | ||
| 1257 | if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES)) | ||
| 1258 | return -E2BIG; | ||
| 1259 | |||
| 1260 | /* +1 for nul-terminator */ | ||
| 1261 | buffer = kmalloc(nbytes + 1, GFP_KERNEL); | ||
| 1262 | if (!buffer) | ||
| 1263 | return -ENOMEM; | ||
| 1264 | |||
| 1265 | if (copy_from_user(buffer, userbuf, nbytes)) { | ||
| 1266 | retval = -EFAULT; | ||
| 1267 | goto out1; | ||
| 1268 | } | ||
| 1269 | buffer[nbytes] = 0; /* nul-terminate */ | ||
| 1270 | |||
| 1271 | cgroup_lock(); | ||
| 1272 | |||
| 1273 | if (cgroup_is_removed(cont)) { | ||
| 1274 | retval = -ENODEV; | ||
| 1275 | goto out2; | ||
| 1276 | } | ||
| 1277 | |||
| 1278 | switch (type) { | ||
| 1279 | case FILE_CPULIST: | ||
| 1280 | retval = update_cpumask(cs, buffer); | ||
| 1281 | break; | ||
| 1282 | case FILE_MEMLIST: | ||
| 1283 | retval = update_nodemask(cs, buffer); | ||
| 1284 | break; | ||
| 1285 | default: | ||
| 1286 | retval = -EINVAL; | ||
| 1287 | goto out2; | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | if (retval == 0) | ||
| 1291 | retval = nbytes; | ||
| 1292 | out2: | ||
| 1293 | cgroup_unlock(); | ||
| 1294 | out1: | ||
| 1295 | kfree(buffer); | ||
| 1296 | return retval; | ||
| 1297 | } | ||
| 1298 | |||
| 1299 | static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) | 1277 | static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) |
| 1300 | { | 1278 | { |
| 1301 | int retval = 0; | 1279 | int retval = 0; |
| 1302 | struct cpuset *cs = cgroup_cs(cgrp); | 1280 | struct cpuset *cs = cgroup_cs(cgrp); |
| 1303 | cpuset_filetype_t type = cft->private; | 1281 | cpuset_filetype_t type = cft->private; |
| 1304 | 1282 | ||
| 1305 | cgroup_lock(); | 1283 | if (!cgroup_lock_live_group(cgrp)) |
| 1306 | |||
| 1307 | if (cgroup_is_removed(cgrp)) { | ||
| 1308 | cgroup_unlock(); | ||
| 1309 | return -ENODEV; | 1284 | return -ENODEV; |
| 1310 | } | ||
| 1311 | 1285 | ||
| 1312 | switch (type) { | 1286 | switch (type) { |
| 1313 | case FILE_CPU_EXCLUSIVE: | 1287 | case FILE_CPU_EXCLUSIVE: |
| @@ -1353,12 +1327,9 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) | |||
| 1353 | struct cpuset *cs = cgroup_cs(cgrp); | 1327 | struct cpuset *cs = cgroup_cs(cgrp); |
| 1354 | cpuset_filetype_t type = cft->private; | 1328 | cpuset_filetype_t type = cft->private; |
| 1355 | 1329 | ||
| 1356 | cgroup_lock(); | 1330 | if (!cgroup_lock_live_group(cgrp)) |
| 1357 | |||
| 1358 | if (cgroup_is_removed(cgrp)) { | ||
| 1359 | cgroup_unlock(); | ||
| 1360 | return -ENODEV; | 1331 | return -ENODEV; |
| 1361 | } | 1332 | |
| 1362 | switch (type) { | 1333 | switch (type) { |
| 1363 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | 1334 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: |
| 1364 | retval = update_relax_domain_level(cs, val); | 1335 | retval = update_relax_domain_level(cs, val); |
| @@ -1372,6 +1343,32 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) | |||
| 1372 | } | 1343 | } |
| 1373 | 1344 | ||
| 1374 | /* | 1345 | /* |
| 1346 | * Common handling for a write to a "cpus" or "mems" file. | ||
| 1347 | */ | ||
| 1348 | static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | ||
| 1349 | const char *buf) | ||
| 1350 | { | ||
| 1351 | int retval = 0; | ||
| 1352 | |||
| 1353 | if (!cgroup_lock_live_group(cgrp)) | ||
| 1354 | return -ENODEV; | ||
| 1355 | |||
| 1356 | switch (cft->private) { | ||
| 1357 | case FILE_CPULIST: | ||
| 1358 | retval = update_cpumask(cgroup_cs(cgrp), buf); | ||
| 1359 | break; | ||
| 1360 | case FILE_MEMLIST: | ||
| 1361 | retval = update_nodemask(cgroup_cs(cgrp), buf); | ||
| 1362 | break; | ||
| 1363 | default: | ||
| 1364 | retval = -EINVAL; | ||
| 1365 | break; | ||
| 1366 | } | ||
| 1367 | cgroup_unlock(); | ||
| 1368 | return retval; | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | /* | ||
| 1375 | * These ascii lists should be read in a single call, by using a user | 1372 | * These ascii lists should be read in a single call, by using a user |
| 1376 | * buffer large enough to hold the entire map. If read in smaller | 1373 | * buffer large enough to hold the entire map. If read in smaller |
| 1377 | * chunks, there is no guarantee of atomicity. Since the display format | 1374 | * chunks, there is no guarantee of atomicity. Since the display format |
| @@ -1490,14 +1487,16 @@ static struct cftype files[] = { | |||
| 1490 | { | 1487 | { |
| 1491 | .name = "cpus", | 1488 | .name = "cpus", |
| 1492 | .read = cpuset_common_file_read, | 1489 | .read = cpuset_common_file_read, |
| 1493 | .write = cpuset_common_file_write, | 1490 | .write_string = cpuset_write_resmask, |
| 1491 | .max_write_len = (100U + 6 * NR_CPUS), | ||
| 1494 | .private = FILE_CPULIST, | 1492 | .private = FILE_CPULIST, |
| 1495 | }, | 1493 | }, |
| 1496 | 1494 | ||
| 1497 | { | 1495 | { |
| 1498 | .name = "mems", | 1496 | .name = "mems", |
| 1499 | .read = cpuset_common_file_read, | 1497 | .read = cpuset_common_file_read, |
| 1500 | .write = cpuset_common_file_write, | 1498 | .write_string = cpuset_write_resmask, |
| 1499 | .max_write_len = (100U + 6 * MAX_NUMNODES), | ||
| 1501 | .private = FILE_MEMLIST, | 1500 | .private = FILE_MEMLIST, |
| 1502 | }, | 1501 | }, |
| 1503 | 1502 | ||
| @@ -1778,7 +1777,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to) | |||
| 1778 | scan.scan.heap = NULL; | 1777 | scan.scan.heap = NULL; |
| 1779 | scan.to = to->css.cgroup; | 1778 | scan.to = to->css.cgroup; |
| 1780 | 1779 | ||
| 1781 | if (cgroup_scan_tasks((struct cgroup_scanner *)&scan)) | 1780 | if (cgroup_scan_tasks(&scan.scan)) |
| 1782 | printk(KERN_ERR "move_member_tasks_to_cpuset: " | 1781 | printk(KERN_ERR "move_member_tasks_to_cpuset: " |
| 1783 | "cgroup_scan_tasks failed\n"); | 1782 | "cgroup_scan_tasks failed\n"); |
| 1784 | } | 1783 | } |
| @@ -1838,6 +1837,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root) | |||
| 1838 | struct cpuset *child; /* scans child cpusets of cp */ | 1837 | struct cpuset *child; /* scans child cpusets of cp */ |
| 1839 | struct list_head queue; | 1838 | struct list_head queue; |
| 1840 | struct cgroup *cont; | 1839 | struct cgroup *cont; |
| 1840 | nodemask_t oldmems; | ||
| 1841 | 1841 | ||
| 1842 | INIT_LIST_HEAD(&queue); | 1842 | INIT_LIST_HEAD(&queue); |
| 1843 | 1843 | ||
| @@ -1857,6 +1857,8 @@ static void scan_for_empty_cpusets(const struct cpuset *root) | |||
| 1857 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 1857 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
| 1858 | continue; | 1858 | continue; |
| 1859 | 1859 | ||
| 1860 | oldmems = cp->mems_allowed; | ||
| 1861 | |||
| 1860 | /* Remove offline cpus and mems from this cpuset. */ | 1862 | /* Remove offline cpus and mems from this cpuset. */ |
| 1861 | mutex_lock(&callback_mutex); | 1863 | mutex_lock(&callback_mutex); |
| 1862 | cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); | 1864 | cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); |
| @@ -1868,6 +1870,10 @@ static void scan_for_empty_cpusets(const struct cpuset *root) | |||
| 1868 | if (cpus_empty(cp->cpus_allowed) || | 1870 | if (cpus_empty(cp->cpus_allowed) || |
| 1869 | nodes_empty(cp->mems_allowed)) | 1871 | nodes_empty(cp->mems_allowed)) |
| 1870 | remove_tasks_in_empty_cpuset(cp); | 1872 | remove_tasks_in_empty_cpuset(cp); |
| 1873 | else { | ||
| 1874 | update_tasks_cpumask(cp); | ||
| 1875 | update_tasks_nodemask(cp, &oldmems); | ||
| 1876 | } | ||
| 1871 | } | 1877 | } |
| 1872 | } | 1878 | } |
| 1873 | 1879 | ||
| @@ -1960,7 +1966,6 @@ void __init cpuset_init_smp(void) | |||
| 1960 | } | 1966 | } |
| 1961 | 1967 | ||
| 1962 | /** | 1968 | /** |
| 1963 | |||
| 1964 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. | 1969 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. |
| 1965 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. | 1970 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. |
| 1966 | * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. | 1971 | * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. |
