diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 363 |
1 files changed, 178 insertions, 185 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 459d601947a8..91cf85b36dd5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -227,10 +227,6 @@ static struct cpuset top_cpuset = { | |||
227 | * The task_struct fields mems_allowed and mems_generation may only | 227 | * The task_struct fields mems_allowed and mems_generation may only |
228 | * be accessed in the context of that task, so require no locks. | 228 | * be accessed in the context of that task, so require no locks. |
229 | * | 229 | * |
230 | * The cpuset_common_file_write handler for operations that modify | ||
231 | * the cpuset hierarchy holds cgroup_mutex across the entire operation, | ||
232 | * single threading all such cpuset modifications across the system. | ||
233 | * | ||
234 | * The cpuset_common_file_read() handlers only hold callback_mutex across | 230 | * The cpuset_common_file_read() handlers only hold callback_mutex across |
235 | * small pieces of code, such as when reading out possibly multi-word | 231 | * small pieces of code, such as when reading out possibly multi-word |
236 | * cpumasks and nodemasks. | 232 | * cpumasks and nodemasks. |
@@ -369,7 +365,7 @@ void cpuset_update_task_memory_state(void) | |||
369 | my_cpusets_mem_gen = top_cpuset.mems_generation; | 365 | my_cpusets_mem_gen = top_cpuset.mems_generation; |
370 | } else { | 366 | } else { |
371 | rcu_read_lock(); | 367 | rcu_read_lock(); |
372 | my_cpusets_mem_gen = task_cs(current)->mems_generation; | 368 | my_cpusets_mem_gen = task_cs(tsk)->mems_generation; |
373 | rcu_read_unlock(); | 369 | rcu_read_unlock(); |
374 | } | 370 | } |
375 | 371 | ||
@@ -500,11 +496,16 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | |||
500 | /* | 496 | /* |
501 | * rebuild_sched_domains() | 497 | * rebuild_sched_domains() |
502 | * | 498 | * |
503 | * If the flag 'sched_load_balance' of any cpuset with non-empty | 499 | * This routine will be called to rebuild the scheduler's dynamic |
504 | * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset | 500 | * sched domains: |
505 | * which has that flag enabled, or if any cpuset with a non-empty | 501 | * - if the flag 'sched_load_balance' of any cpuset with non-empty |
506 | * 'cpus' is removed, then call this routine to rebuild the | 502 | * 'cpus' changes, |
507 | * scheduler's dynamic sched domains. | 503 | * - or if the 'cpus' allowed changes in any cpuset which has that |
504 | * flag enabled, | ||
505 | * - or if the 'sched_relax_domain_level' of any cpuset which has | ||
506 | * that flag enabled and with non-empty 'cpus' changes, | ||
507 | * - or if any cpuset with non-empty 'cpus' is removed, | ||
508 | * - or if a cpu gets offlined. | ||
508 | * | 509 | * |
509 | * This routine builds a partial partition of the systems CPUs | 510 | * This routine builds a partial partition of the systems CPUs |
510 | * (the set of non-overlappping cpumask_t's in the array 'part' | 511 | * (the set of non-overlappping cpumask_t's in the array 'part' |
@@ -564,7 +565,7 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | |||
564 | * partition_sched_domains(). | 565 | * partition_sched_domains(). |
565 | */ | 566 | */ |
566 | 567 | ||
567 | static void rebuild_sched_domains(void) | 568 | void rebuild_sched_domains(void) |
568 | { | 569 | { |
569 | struct kfifo *q; /* queue of cpusets to be scanned */ | 570 | struct kfifo *q; /* queue of cpusets to be scanned */ |
570 | struct cpuset *cp; /* scans q */ | 571 | struct cpuset *cp; /* scans q */ |
@@ -609,8 +610,13 @@ static void rebuild_sched_domains(void) | |||
609 | while (__kfifo_get(q, (void *)&cp, sizeof(cp))) { | 610 | while (__kfifo_get(q, (void *)&cp, sizeof(cp))) { |
610 | struct cgroup *cont; | 611 | struct cgroup *cont; |
611 | struct cpuset *child; /* scans child cpusets of cp */ | 612 | struct cpuset *child; /* scans child cpusets of cp */ |
613 | |||
614 | if (cpus_empty(cp->cpus_allowed)) | ||
615 | continue; | ||
616 | |||
612 | if (is_sched_load_balance(cp)) | 617 | if (is_sched_load_balance(cp)) |
613 | csa[csn++] = cp; | 618 | csa[csn++] = cp; |
619 | |||
614 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | 620 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { |
615 | child = cgroup_cs(cont); | 621 | child = cgroup_cs(cont); |
616 | __kfifo_put(q, (void *)&child, sizeof(cp)); | 622 | __kfifo_put(q, (void *)&child, sizeof(cp)); |
@@ -679,7 +685,9 @@ restart: | |||
679 | if (apn == b->pn) { | 685 | if (apn == b->pn) { |
680 | cpus_or(*dp, *dp, b->cpus_allowed); | 686 | cpus_or(*dp, *dp, b->cpus_allowed); |
681 | b->pn = -1; | 687 | b->pn = -1; |
682 | update_domain_attr(dattr, b); | 688 | if (dattr) |
689 | update_domain_attr(dattr | ||
690 | + nslot, b); | ||
683 | } | 691 | } |
684 | } | 692 | } |
685 | nslot++; | 693 | nslot++; |
@@ -701,36 +709,6 @@ done: | |||
701 | /* Don't kfree(dattr) -- partition_sched_domains() does that. */ | 709 | /* Don't kfree(dattr) -- partition_sched_domains() does that. */ |
702 | } | 710 | } |
703 | 711 | ||
704 | static inline int started_after_time(struct task_struct *t1, | ||
705 | struct timespec *time, | ||
706 | struct task_struct *t2) | ||
707 | { | ||
708 | int start_diff = timespec_compare(&t1->start_time, time); | ||
709 | if (start_diff > 0) { | ||
710 | return 1; | ||
711 | } else if (start_diff < 0) { | ||
712 | return 0; | ||
713 | } else { | ||
714 | /* | ||
715 | * Arbitrarily, if two processes started at the same | ||
716 | * time, we'll say that the lower pointer value | ||
717 | * started first. Note that t2 may have exited by now | ||
718 | * so this may not be a valid pointer any longer, but | ||
719 | * that's fine - it still serves to distinguish | ||
720 | * between two tasks started (effectively) | ||
721 | * simultaneously. | ||
722 | */ | ||
723 | return t1 > t2; | ||
724 | } | ||
725 | } | ||
726 | |||
727 | static inline int started_after(void *p1, void *p2) | ||
728 | { | ||
729 | struct task_struct *t1 = p1; | ||
730 | struct task_struct *t2 = p2; | ||
731 | return started_after_time(t1, &t2->start_time, t2); | ||
732 | } | ||
733 | |||
734 | /** | 712 | /** |
735 | * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's | 713 | * cpuset_test_cpumask - test a task's cpus_allowed versus its cpuset's |
736 | * @tsk: task to test | 714 | * @tsk: task to test |
@@ -766,15 +744,49 @@ static void cpuset_change_cpumask(struct task_struct *tsk, | |||
766 | } | 744 | } |
767 | 745 | ||
768 | /** | 746 | /** |
747 | * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. | ||
748 | * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed | ||
749 | * | ||
750 | * Called with cgroup_mutex held | ||
751 | * | ||
752 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | ||
753 | * calling callback functions for each. | ||
754 | * | ||
755 | * Return 0 if successful, -errno if not. | ||
756 | */ | ||
757 | static int update_tasks_cpumask(struct cpuset *cs) | ||
758 | { | ||
759 | struct cgroup_scanner scan; | ||
760 | struct ptr_heap heap; | ||
761 | int retval; | ||
762 | |||
763 | /* | ||
764 | * cgroup_scan_tasks() will initialize heap->gt for us. | ||
765 | * heap_init() is still needed here for we should not change | ||
766 | * cs->cpus_allowed when heap_init() fails. | ||
767 | */ | ||
768 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL); | ||
769 | if (retval) | ||
770 | return retval; | ||
771 | |||
772 | scan.cg = cs->css.cgroup; | ||
773 | scan.test_task = cpuset_test_cpumask; | ||
774 | scan.process_task = cpuset_change_cpumask; | ||
775 | scan.heap = &heap; | ||
776 | retval = cgroup_scan_tasks(&scan); | ||
777 | |||
778 | heap_free(&heap); | ||
779 | return retval; | ||
780 | } | ||
781 | |||
782 | /** | ||
769 | * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it | 783 | * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it |
770 | * @cs: the cpuset to consider | 784 | * @cs: the cpuset to consider |
771 | * @buf: buffer of cpu numbers written to this cpuset | 785 | * @buf: buffer of cpu numbers written to this cpuset |
772 | */ | 786 | */ |
773 | static int update_cpumask(struct cpuset *cs, char *buf) | 787 | static int update_cpumask(struct cpuset *cs, const char *buf) |
774 | { | 788 | { |
775 | struct cpuset trialcs; | 789 | struct cpuset trialcs; |
776 | struct cgroup_scanner scan; | ||
777 | struct ptr_heap heap; | ||
778 | int retval; | 790 | int retval; |
779 | int is_load_balanced; | 791 | int is_load_balanced; |
780 | 792 | ||
@@ -790,7 +802,6 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
790 | * that parsing. The validate_change() call ensures that cpusets | 802 | * that parsing. The validate_change() call ensures that cpusets |
791 | * with tasks have cpus. | 803 | * with tasks have cpus. |
792 | */ | 804 | */ |
793 | buf = strstrip(buf); | ||
794 | if (!*buf) { | 805 | if (!*buf) { |
795 | cpus_clear(trialcs.cpus_allowed); | 806 | cpus_clear(trialcs.cpus_allowed); |
796 | } else { | 807 | } else { |
@@ -809,10 +820,6 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
809 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) | 820 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) |
810 | return 0; | 821 | return 0; |
811 | 822 | ||
812 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after); | ||
813 | if (retval) | ||
814 | return retval; | ||
815 | |||
816 | is_load_balanced = is_sched_load_balance(&trialcs); | 823 | is_load_balanced = is_sched_load_balance(&trialcs); |
817 | 824 | ||
818 | mutex_lock(&callback_mutex); | 825 | mutex_lock(&callback_mutex); |
@@ -823,12 +830,9 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
823 | * Scan tasks in the cpuset, and update the cpumasks of any | 830 | * Scan tasks in the cpuset, and update the cpumasks of any |
824 | * that need an update. | 831 | * that need an update. |
825 | */ | 832 | */ |
826 | scan.cg = cs->css.cgroup; | 833 | retval = update_tasks_cpumask(cs); |
827 | scan.test_task = cpuset_test_cpumask; | 834 | if (retval < 0) |
828 | scan.process_task = cpuset_change_cpumask; | 835 | return retval; |
829 | scan.heap = &heap; | ||
830 | cgroup_scan_tasks(&scan); | ||
831 | heap_free(&heap); | ||
832 | 836 | ||
833 | if (is_load_balanced) | 837 | if (is_load_balanced) |
834 | rebuild_sched_domains(); | 838 | rebuild_sched_domains(); |
@@ -884,74 +888,25 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | |||
884 | mutex_unlock(&callback_mutex); | 888 | mutex_unlock(&callback_mutex); |
885 | } | 889 | } |
886 | 890 | ||
887 | /* | ||
888 | * Handle user request to change the 'mems' memory placement | ||
889 | * of a cpuset. Needs to validate the request, update the | ||
890 | * cpusets mems_allowed and mems_generation, and for each | ||
891 | * task in the cpuset, rebind any vma mempolicies and if | ||
892 | * the cpuset is marked 'memory_migrate', migrate the tasks | ||
893 | * pages to the new memory. | ||
894 | * | ||
895 | * Call with cgroup_mutex held. May take callback_mutex during call. | ||
896 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, | ||
897 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | ||
898 | * their mempolicies to the cpusets new mems_allowed. | ||
899 | */ | ||
900 | |||
901 | static void *cpuset_being_rebound; | 891 | static void *cpuset_being_rebound; |
902 | 892 | ||
903 | static int update_nodemask(struct cpuset *cs, char *buf) | 893 | /** |
894 | * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. | ||
895 | * @cs: the cpuset in which each task's mems_allowed mask needs to be changed | ||
896 | * @oldmem: old mems_allowed of cpuset cs | ||
897 | * | ||
898 | * Called with cgroup_mutex held | ||
899 | * Return 0 if successful, -errno if not. | ||
900 | */ | ||
901 | static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem) | ||
904 | { | 902 | { |
905 | struct cpuset trialcs; | ||
906 | nodemask_t oldmem; | ||
907 | struct task_struct *p; | 903 | struct task_struct *p; |
908 | struct mm_struct **mmarray; | 904 | struct mm_struct **mmarray; |
909 | int i, n, ntasks; | 905 | int i, n, ntasks; |
910 | int migrate; | 906 | int migrate; |
911 | int fudge; | 907 | int fudge; |
912 | int retval; | ||
913 | struct cgroup_iter it; | 908 | struct cgroup_iter it; |
914 | 909 | int retval; | |
915 | /* | ||
916 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; | ||
917 | * it's read-only | ||
918 | */ | ||
919 | if (cs == &top_cpuset) | ||
920 | return -EACCES; | ||
921 | |||
922 | trialcs = *cs; | ||
923 | |||
924 | /* | ||
925 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. | ||
926 | * Since nodelist_parse() fails on an empty mask, we special case | ||
927 | * that parsing. The validate_change() call ensures that cpusets | ||
928 | * with tasks have memory. | ||
929 | */ | ||
930 | buf = strstrip(buf); | ||
931 | if (!*buf) { | ||
932 | nodes_clear(trialcs.mems_allowed); | ||
933 | } else { | ||
934 | retval = nodelist_parse(buf, trialcs.mems_allowed); | ||
935 | if (retval < 0) | ||
936 | goto done; | ||
937 | |||
938 | if (!nodes_subset(trialcs.mems_allowed, | ||
939 | node_states[N_HIGH_MEMORY])) | ||
940 | return -EINVAL; | ||
941 | } | ||
942 | oldmem = cs->mems_allowed; | ||
943 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | ||
944 | retval = 0; /* Too easy - nothing to do */ | ||
945 | goto done; | ||
946 | } | ||
947 | retval = validate_change(cs, &trialcs); | ||
948 | if (retval < 0) | ||
949 | goto done; | ||
950 | |||
951 | mutex_lock(&callback_mutex); | ||
952 | cs->mems_allowed = trialcs.mems_allowed; | ||
953 | cs->mems_generation = cpuset_mems_generation++; | ||
954 | mutex_unlock(&callback_mutex); | ||
955 | 910 | ||
956 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ | 911 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ |
957 | 912 | ||
@@ -1018,7 +973,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
1018 | 973 | ||
1019 | mpol_rebind_mm(mm, &cs->mems_allowed); | 974 | mpol_rebind_mm(mm, &cs->mems_allowed); |
1020 | if (migrate) | 975 | if (migrate) |
1021 | cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); | 976 | cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed); |
1022 | mmput(mm); | 977 | mmput(mm); |
1023 | } | 978 | } |
1024 | 979 | ||
@@ -1030,6 +985,70 @@ done: | |||
1030 | return retval; | 985 | return retval; |
1031 | } | 986 | } |
1032 | 987 | ||
988 | /* | ||
989 | * Handle user request to change the 'mems' memory placement | ||
990 | * of a cpuset. Needs to validate the request, update the | ||
991 | * cpusets mems_allowed and mems_generation, and for each | ||
992 | * task in the cpuset, rebind any vma mempolicies and if | ||
993 | * the cpuset is marked 'memory_migrate', migrate the tasks | ||
994 | * pages to the new memory. | ||
995 | * | ||
996 | * Call with cgroup_mutex held. May take callback_mutex during call. | ||
997 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, | ||
998 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | ||
999 | * their mempolicies to the cpusets new mems_allowed. | ||
1000 | */ | ||
1001 | static int update_nodemask(struct cpuset *cs, const char *buf) | ||
1002 | { | ||
1003 | struct cpuset trialcs; | ||
1004 | nodemask_t oldmem; | ||
1005 | int retval; | ||
1006 | |||
1007 | /* | ||
1008 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; | ||
1009 | * it's read-only | ||
1010 | */ | ||
1011 | if (cs == &top_cpuset) | ||
1012 | return -EACCES; | ||
1013 | |||
1014 | trialcs = *cs; | ||
1015 | |||
1016 | /* | ||
1017 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. | ||
1018 | * Since nodelist_parse() fails on an empty mask, we special case | ||
1019 | * that parsing. The validate_change() call ensures that cpusets | ||
1020 | * with tasks have memory. | ||
1021 | */ | ||
1022 | if (!*buf) { | ||
1023 | nodes_clear(trialcs.mems_allowed); | ||
1024 | } else { | ||
1025 | retval = nodelist_parse(buf, trialcs.mems_allowed); | ||
1026 | if (retval < 0) | ||
1027 | goto done; | ||
1028 | |||
1029 | if (!nodes_subset(trialcs.mems_allowed, | ||
1030 | node_states[N_HIGH_MEMORY])) | ||
1031 | return -EINVAL; | ||
1032 | } | ||
1033 | oldmem = cs->mems_allowed; | ||
1034 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | ||
1035 | retval = 0; /* Too easy - nothing to do */ | ||
1036 | goto done; | ||
1037 | } | ||
1038 | retval = validate_change(cs, &trialcs); | ||
1039 | if (retval < 0) | ||
1040 | goto done; | ||
1041 | |||
1042 | mutex_lock(&callback_mutex); | ||
1043 | cs->mems_allowed = trialcs.mems_allowed; | ||
1044 | cs->mems_generation = cpuset_mems_generation++; | ||
1045 | mutex_unlock(&callback_mutex); | ||
1046 | |||
1047 | retval = update_tasks_nodemask(cs, &oldmem); | ||
1048 | done: | ||
1049 | return retval; | ||
1050 | } | ||
1051 | |||
1033 | int current_cpuset_is_being_rebound(void) | 1052 | int current_cpuset_is_being_rebound(void) |
1034 | { | 1053 | { |
1035 | return task_cs(current) == cpuset_being_rebound; | 1054 | return task_cs(current) == cpuset_being_rebound; |
@@ -1042,7 +1061,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) | |||
1042 | 1061 | ||
1043 | if (val != cs->relax_domain_level) { | 1062 | if (val != cs->relax_domain_level) { |
1044 | cs->relax_domain_level = val; | 1063 | cs->relax_domain_level = val; |
1045 | rebuild_sched_domains(); | 1064 | if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs)) |
1065 | rebuild_sched_domains(); | ||
1046 | } | 1066 | } |
1047 | 1067 | ||
1048 | return 0; | 1068 | return 0; |
@@ -1254,72 +1274,14 @@ typedef enum { | |||
1254 | FILE_SPREAD_SLAB, | 1274 | FILE_SPREAD_SLAB, |
1255 | } cpuset_filetype_t; | 1275 | } cpuset_filetype_t; |
1256 | 1276 | ||
1257 | static ssize_t cpuset_common_file_write(struct cgroup *cont, | ||
1258 | struct cftype *cft, | ||
1259 | struct file *file, | ||
1260 | const char __user *userbuf, | ||
1261 | size_t nbytes, loff_t *unused_ppos) | ||
1262 | { | ||
1263 | struct cpuset *cs = cgroup_cs(cont); | ||
1264 | cpuset_filetype_t type = cft->private; | ||
1265 | char *buffer; | ||
1266 | int retval = 0; | ||
1267 | |||
1268 | /* Crude upper limit on largest legitimate cpulist user might write. */ | ||
1269 | if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES)) | ||
1270 | return -E2BIG; | ||
1271 | |||
1272 | /* +1 for nul-terminator */ | ||
1273 | buffer = kmalloc(nbytes + 1, GFP_KERNEL); | ||
1274 | if (!buffer) | ||
1275 | return -ENOMEM; | ||
1276 | |||
1277 | if (copy_from_user(buffer, userbuf, nbytes)) { | ||
1278 | retval = -EFAULT; | ||
1279 | goto out1; | ||
1280 | } | ||
1281 | buffer[nbytes] = 0; /* nul-terminate */ | ||
1282 | |||
1283 | cgroup_lock(); | ||
1284 | |||
1285 | if (cgroup_is_removed(cont)) { | ||
1286 | retval = -ENODEV; | ||
1287 | goto out2; | ||
1288 | } | ||
1289 | |||
1290 | switch (type) { | ||
1291 | case FILE_CPULIST: | ||
1292 | retval = update_cpumask(cs, buffer); | ||
1293 | break; | ||
1294 | case FILE_MEMLIST: | ||
1295 | retval = update_nodemask(cs, buffer); | ||
1296 | break; | ||
1297 | default: | ||
1298 | retval = -EINVAL; | ||
1299 | goto out2; | ||
1300 | } | ||
1301 | |||
1302 | if (retval == 0) | ||
1303 | retval = nbytes; | ||
1304 | out2: | ||
1305 | cgroup_unlock(); | ||
1306 | out1: | ||
1307 | kfree(buffer); | ||
1308 | return retval; | ||
1309 | } | ||
1310 | |||
1311 | static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) | 1277 | static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) |
1312 | { | 1278 | { |
1313 | int retval = 0; | 1279 | int retval = 0; |
1314 | struct cpuset *cs = cgroup_cs(cgrp); | 1280 | struct cpuset *cs = cgroup_cs(cgrp); |
1315 | cpuset_filetype_t type = cft->private; | 1281 | cpuset_filetype_t type = cft->private; |
1316 | 1282 | ||
1317 | cgroup_lock(); | 1283 | if (!cgroup_lock_live_group(cgrp)) |
1318 | |||
1319 | if (cgroup_is_removed(cgrp)) { | ||
1320 | cgroup_unlock(); | ||
1321 | return -ENODEV; | 1284 | return -ENODEV; |
1322 | } | ||
1323 | 1285 | ||
1324 | switch (type) { | 1286 | switch (type) { |
1325 | case FILE_CPU_EXCLUSIVE: | 1287 | case FILE_CPU_EXCLUSIVE: |
@@ -1365,12 +1327,9 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) | |||
1365 | struct cpuset *cs = cgroup_cs(cgrp); | 1327 | struct cpuset *cs = cgroup_cs(cgrp); |
1366 | cpuset_filetype_t type = cft->private; | 1328 | cpuset_filetype_t type = cft->private; |
1367 | 1329 | ||
1368 | cgroup_lock(); | 1330 | if (!cgroup_lock_live_group(cgrp)) |
1369 | |||
1370 | if (cgroup_is_removed(cgrp)) { | ||
1371 | cgroup_unlock(); | ||
1372 | return -ENODEV; | 1331 | return -ENODEV; |
1373 | } | 1332 | |
1374 | switch (type) { | 1333 | switch (type) { |
1375 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | 1334 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: |
1376 | retval = update_relax_domain_level(cs, val); | 1335 | retval = update_relax_domain_level(cs, val); |
@@ -1384,6 +1343,32 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) | |||
1384 | } | 1343 | } |
1385 | 1344 | ||
1386 | /* | 1345 | /* |
1346 | * Common handling for a write to a "cpus" or "mems" file. | ||
1347 | */ | ||
1348 | static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft, | ||
1349 | const char *buf) | ||
1350 | { | ||
1351 | int retval = 0; | ||
1352 | |||
1353 | if (!cgroup_lock_live_group(cgrp)) | ||
1354 | return -ENODEV; | ||
1355 | |||
1356 | switch (cft->private) { | ||
1357 | case FILE_CPULIST: | ||
1358 | retval = update_cpumask(cgroup_cs(cgrp), buf); | ||
1359 | break; | ||
1360 | case FILE_MEMLIST: | ||
1361 | retval = update_nodemask(cgroup_cs(cgrp), buf); | ||
1362 | break; | ||
1363 | default: | ||
1364 | retval = -EINVAL; | ||
1365 | break; | ||
1366 | } | ||
1367 | cgroup_unlock(); | ||
1368 | return retval; | ||
1369 | } | ||
1370 | |||
1371 | /* | ||
1387 | * These ascii lists should be read in a single call, by using a user | 1372 | * These ascii lists should be read in a single call, by using a user |
1388 | * buffer large enough to hold the entire map. If read in smaller | 1373 | * buffer large enough to hold the entire map. If read in smaller |
1389 | * chunks, there is no guarantee of atomicity. Since the display format | 1374 | * chunks, there is no guarantee of atomicity. Since the display format |
@@ -1502,14 +1487,16 @@ static struct cftype files[] = { | |||
1502 | { | 1487 | { |
1503 | .name = "cpus", | 1488 | .name = "cpus", |
1504 | .read = cpuset_common_file_read, | 1489 | .read = cpuset_common_file_read, |
1505 | .write = cpuset_common_file_write, | 1490 | .write_string = cpuset_write_resmask, |
1491 | .max_write_len = (100U + 6 * NR_CPUS), | ||
1506 | .private = FILE_CPULIST, | 1492 | .private = FILE_CPULIST, |
1507 | }, | 1493 | }, |
1508 | 1494 | ||
1509 | { | 1495 | { |
1510 | .name = "mems", | 1496 | .name = "mems", |
1511 | .read = cpuset_common_file_read, | 1497 | .read = cpuset_common_file_read, |
1512 | .write = cpuset_common_file_write, | 1498 | .write_string = cpuset_write_resmask, |
1499 | .max_write_len = (100U + 6 * MAX_NUMNODES), | ||
1513 | .private = FILE_MEMLIST, | 1500 | .private = FILE_MEMLIST, |
1514 | }, | 1501 | }, |
1515 | 1502 | ||
@@ -1790,7 +1777,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to) | |||
1790 | scan.scan.heap = NULL; | 1777 | scan.scan.heap = NULL; |
1791 | scan.to = to->css.cgroup; | 1778 | scan.to = to->css.cgroup; |
1792 | 1779 | ||
1793 | if (cgroup_scan_tasks((struct cgroup_scanner *)&scan)) | 1780 | if (cgroup_scan_tasks(&scan.scan)) |
1794 | printk(KERN_ERR "move_member_tasks_to_cpuset: " | 1781 | printk(KERN_ERR "move_member_tasks_to_cpuset: " |
1795 | "cgroup_scan_tasks failed\n"); | 1782 | "cgroup_scan_tasks failed\n"); |
1796 | } | 1783 | } |
@@ -1850,6 +1837,7 @@ static void scan_for_empty_cpusets(const struct cpuset *root) | |||
1850 | struct cpuset *child; /* scans child cpusets of cp */ | 1837 | struct cpuset *child; /* scans child cpusets of cp */ |
1851 | struct list_head queue; | 1838 | struct list_head queue; |
1852 | struct cgroup *cont; | 1839 | struct cgroup *cont; |
1840 | nodemask_t oldmems; | ||
1853 | 1841 | ||
1854 | INIT_LIST_HEAD(&queue); | 1842 | INIT_LIST_HEAD(&queue); |
1855 | 1843 | ||
@@ -1869,6 +1857,8 @@ static void scan_for_empty_cpusets(const struct cpuset *root) | |||
1869 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | 1857 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) |
1870 | continue; | 1858 | continue; |
1871 | 1859 | ||
1860 | oldmems = cp->mems_allowed; | ||
1861 | |||
1872 | /* Remove offline cpus and mems from this cpuset. */ | 1862 | /* Remove offline cpus and mems from this cpuset. */ |
1873 | mutex_lock(&callback_mutex); | 1863 | mutex_lock(&callback_mutex); |
1874 | cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); | 1864 | cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map); |
@@ -1880,6 +1870,10 @@ static void scan_for_empty_cpusets(const struct cpuset *root) | |||
1880 | if (cpus_empty(cp->cpus_allowed) || | 1870 | if (cpus_empty(cp->cpus_allowed) || |
1881 | nodes_empty(cp->mems_allowed)) | 1871 | nodes_empty(cp->mems_allowed)) |
1882 | remove_tasks_in_empty_cpuset(cp); | 1872 | remove_tasks_in_empty_cpuset(cp); |
1873 | else { | ||
1874 | update_tasks_cpumask(cp); | ||
1875 | update_tasks_nodemask(cp, &oldmems); | ||
1876 | } | ||
1883 | } | 1877 | } |
1884 | } | 1878 | } |
1885 | 1879 | ||
@@ -1972,7 +1966,6 @@ void __init cpuset_init_smp(void) | |||
1972 | } | 1966 | } |
1973 | 1967 | ||
1974 | /** | 1968 | /** |
1975 | |||
1976 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. | 1969 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. |
1977 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. | 1970 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. |
1978 | * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. | 1971 | * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. |