diff options
-rw-r--r-- | kernel/cpuset.c | 181 |
1 files changed, 109 insertions, 72 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 276ce7e4f1ab..7326d51eefe1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -764,6 +764,37 @@ static void cpuset_change_cpumask(struct task_struct *tsk, | |||
764 | } | 764 | } |
765 | 765 | ||
766 | /** | 766 | /** |
767 | * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset. | ||
768 | * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed | ||
769 | * | ||
770 | * Called with cgroup_mutex held | ||
771 | * | ||
772 | * The cgroup_scan_tasks() function will scan all the tasks in a cgroup, | ||
773 | * calling callback functions for each. | ||
774 | * | ||
775 | * Return 0 if successful, -errno if not. | ||
776 | */ | ||
777 | static int update_tasks_cpumask(struct cpuset *cs) | ||
778 | { | ||
779 | struct cgroup_scanner scan; | ||
780 | struct ptr_heap heap; | ||
781 | int retval; | ||
782 | |||
783 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after); | ||
784 | if (retval) | ||
785 | return retval; | ||
786 | |||
787 | scan.cg = cs->css.cgroup; | ||
788 | scan.test_task = cpuset_test_cpumask; | ||
789 | scan.process_task = cpuset_change_cpumask; | ||
790 | scan.heap = &heap; | ||
791 | retval = cgroup_scan_tasks(&scan); | ||
792 | |||
793 | heap_free(&heap); | ||
794 | return retval; | ||
795 | } | ||
796 | |||
797 | /** | ||
767 | * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it | 798 | * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it |
768 | * @cs: the cpuset to consider | 799 | * @cs: the cpuset to consider |
769 | * @buf: buffer of cpu numbers written to this cpuset | 800 | * @buf: buffer of cpu numbers written to this cpuset |
@@ -771,8 +802,6 @@ static void cpuset_change_cpumask(struct task_struct *tsk, | |||
771 | static int update_cpumask(struct cpuset *cs, const char *buf) | 802 | static int update_cpumask(struct cpuset *cs, const char *buf) |
772 | { | 803 | { |
773 | struct cpuset trialcs; | 804 | struct cpuset trialcs; |
774 | struct cgroup_scanner scan; | ||
775 | struct ptr_heap heap; | ||
776 | int retval; | 805 | int retval; |
777 | int is_load_balanced; | 806 | int is_load_balanced; |
778 | 807 | ||
@@ -806,10 +835,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf) | |||
806 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) | 835 | if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed)) |
807 | return 0; | 836 | return 0; |
808 | 837 | ||
809 | retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, &started_after); | ||
810 | if (retval) | ||
811 | return retval; | ||
812 | |||
813 | is_load_balanced = is_sched_load_balance(&trialcs); | 838 | is_load_balanced = is_sched_load_balance(&trialcs); |
814 | 839 | ||
815 | mutex_lock(&callback_mutex); | 840 | mutex_lock(&callback_mutex); |
@@ -820,12 +845,9 @@ static int update_cpumask(struct cpuset *cs, const char *buf) | |||
820 | * Scan tasks in the cpuset, and update the cpumasks of any | 845 | * Scan tasks in the cpuset, and update the cpumasks of any |
821 | * that need an update. | 846 | * that need an update. |
822 | */ | 847 | */ |
823 | scan.cg = cs->css.cgroup; | 848 | retval = update_tasks_cpumask(cs); |
824 | scan.test_task = cpuset_test_cpumask; | 849 | if (retval < 0) |
825 | scan.process_task = cpuset_change_cpumask; | 850 | return retval; |
826 | scan.heap = &heap; | ||
827 | cgroup_scan_tasks(&scan); | ||
828 | heap_free(&heap); | ||
829 | 851 | ||
830 | if (is_load_balanced) | 852 | if (is_load_balanced) |
831 | rebuild_sched_domains(); | 853 | rebuild_sched_domains(); |
@@ -881,73 +903,25 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, | |||
881 | mutex_unlock(&callback_mutex); | 903 | mutex_unlock(&callback_mutex); |
882 | } | 904 | } |
883 | 905 | ||
884 | /* | ||
885 | * Handle user request to change the 'mems' memory placement | ||
886 | * of a cpuset. Needs to validate the request, update the | ||
887 | * cpusets mems_allowed and mems_generation, and for each | ||
888 | * task in the cpuset, rebind any vma mempolicies and if | ||
889 | * the cpuset is marked 'memory_migrate', migrate the tasks | ||
890 | * pages to the new memory. | ||
891 | * | ||
892 | * Call with cgroup_mutex held. May take callback_mutex during call. | ||
893 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, | ||
894 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | ||
895 | * their mempolicies to the cpusets new mems_allowed. | ||
896 | */ | ||
897 | |||
898 | static void *cpuset_being_rebound; | 906 | static void *cpuset_being_rebound; |
899 | 907 | ||
900 | static int update_nodemask(struct cpuset *cs, const char *buf) | 908 | /** |
909 | * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset. | ||
910 | * @cs: the cpuset in which each task's mems_allowed mask needs to be changed | ||
911 | * @oldmem: old mems_allowed of cpuset cs | ||
912 | * | ||
913 | * Called with cgroup_mutex held | ||
914 | * Return 0 if successful, -errno if not. | ||
915 | */ | ||
916 | static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem) | ||
901 | { | 917 | { |
902 | struct cpuset trialcs; | ||
903 | nodemask_t oldmem; | ||
904 | struct task_struct *p; | 918 | struct task_struct *p; |
905 | struct mm_struct **mmarray; | 919 | struct mm_struct **mmarray; |
906 | int i, n, ntasks; | 920 | int i, n, ntasks; |
907 | int migrate; | 921 | int migrate; |
908 | int fudge; | 922 | int fudge; |
909 | int retval; | ||
910 | struct cgroup_iter it; | 923 | struct cgroup_iter it; |
911 | 924 | int retval; | |
912 | /* | ||
913 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; | ||
914 | * it's read-only | ||
915 | */ | ||
916 | if (cs == &top_cpuset) | ||
917 | return -EACCES; | ||
918 | |||
919 | trialcs = *cs; | ||
920 | |||
921 | /* | ||
922 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. | ||
923 | * Since nodelist_parse() fails on an empty mask, we special case | ||
924 | * that parsing. The validate_change() call ensures that cpusets | ||
925 | * with tasks have memory. | ||
926 | */ | ||
927 | if (!*buf) { | ||
928 | nodes_clear(trialcs.mems_allowed); | ||
929 | } else { | ||
930 | retval = nodelist_parse(buf, trialcs.mems_allowed); | ||
931 | if (retval < 0) | ||
932 | goto done; | ||
933 | |||
934 | if (!nodes_subset(trialcs.mems_allowed, | ||
935 | node_states[N_HIGH_MEMORY])) | ||
936 | return -EINVAL; | ||
937 | } | ||
938 | oldmem = cs->mems_allowed; | ||
939 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | ||
940 | retval = 0; /* Too easy - nothing to do */ | ||
941 | goto done; | ||
942 | } | ||
943 | retval = validate_change(cs, &trialcs); | ||
944 | if (retval < 0) | ||
945 | goto done; | ||
946 | |||
947 | mutex_lock(&callback_mutex); | ||
948 | cs->mems_allowed = trialcs.mems_allowed; | ||
949 | cs->mems_generation = cpuset_mems_generation++; | ||
950 | mutex_unlock(&callback_mutex); | ||
951 | 925 | ||
952 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ | 926 | cpuset_being_rebound = cs; /* causes mpol_dup() rebind */ |
953 | 927 | ||
@@ -1014,7 +988,7 @@ static int update_nodemask(struct cpuset *cs, const char *buf) | |||
1014 | 988 | ||
1015 | mpol_rebind_mm(mm, &cs->mems_allowed); | 989 | mpol_rebind_mm(mm, &cs->mems_allowed); |
1016 | if (migrate) | 990 | if (migrate) |
1017 | cpuset_migrate_mm(mm, &oldmem, &cs->mems_allowed); | 991 | cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed); |
1018 | mmput(mm); | 992 | mmput(mm); |
1019 | } | 993 | } |
1020 | 994 | ||
@@ -1026,6 +1000,70 @@ done: | |||
1026 | return retval; | 1000 | return retval; |
1027 | } | 1001 | } |
1028 | 1002 | ||
1003 | /* | ||
1004 | * Handle user request to change the 'mems' memory placement | ||
1005 | * of a cpuset. Needs to validate the request, update the | ||
1006 | * cpusets mems_allowed and mems_generation, and for each | ||
1007 | * task in the cpuset, rebind any vma mempolicies and if | ||
1008 | * the cpuset is marked 'memory_migrate', migrate the tasks | ||
1009 | * pages to the new memory. | ||
1010 | * | ||
1011 | * Call with cgroup_mutex held. May take callback_mutex during call. | ||
1012 | * Will take tasklist_lock, scan tasklist for tasks in cpuset cs, | ||
1013 | * lock each such tasks mm->mmap_sem, scan its vma's and rebind | ||
1014 | * their mempolicies to the cpusets new mems_allowed. | ||
1015 | */ | ||
1016 | static int update_nodemask(struct cpuset *cs, const char *buf) | ||
1017 | { | ||
1018 | struct cpuset trialcs; | ||
1019 | nodemask_t oldmem; | ||
1020 | int retval; | ||
1021 | |||
1022 | /* | ||
1023 | * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; | ||
1024 | * it's read-only | ||
1025 | */ | ||
1026 | if (cs == &top_cpuset) | ||
1027 | return -EACCES; | ||
1028 | |||
1029 | trialcs = *cs; | ||
1030 | |||
1031 | /* | ||
1032 | * An empty mems_allowed is ok iff there are no tasks in the cpuset. | ||
1033 | * Since nodelist_parse() fails on an empty mask, we special case | ||
1034 | * that parsing. The validate_change() call ensures that cpusets | ||
1035 | * with tasks have memory. | ||
1036 | */ | ||
1037 | if (!*buf) { | ||
1038 | nodes_clear(trialcs.mems_allowed); | ||
1039 | } else { | ||
1040 | retval = nodelist_parse(buf, trialcs.mems_allowed); | ||
1041 | if (retval < 0) | ||
1042 | goto done; | ||
1043 | |||
1044 | if (!nodes_subset(trialcs.mems_allowed, | ||
1045 | node_states[N_HIGH_MEMORY])) | ||
1046 | return -EINVAL; | ||
1047 | } | ||
1048 | oldmem = cs->mems_allowed; | ||
1049 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | ||
1050 | retval = 0; /* Too easy - nothing to do */ | ||
1051 | goto done; | ||
1052 | } | ||
1053 | retval = validate_change(cs, &trialcs); | ||
1054 | if (retval < 0) | ||
1055 | goto done; | ||
1056 | |||
1057 | mutex_lock(&callback_mutex); | ||
1058 | cs->mems_allowed = trialcs.mems_allowed; | ||
1059 | cs->mems_generation = cpuset_mems_generation++; | ||
1060 | mutex_unlock(&callback_mutex); | ||
1061 | |||
1062 | retval = update_tasks_nodemask(cs, &oldmem); | ||
1063 | done: | ||
1064 | return retval; | ||
1065 | } | ||
1066 | |||
1029 | int current_cpuset_is_being_rebound(void) | 1067 | int current_cpuset_is_being_rebound(void) |
1030 | { | 1068 | { |
1031 | return task_cs(current) == cpuset_being_rebound; | 1069 | return task_cs(current) == cpuset_being_rebound; |
@@ -1935,7 +1973,6 @@ void __init cpuset_init_smp(void) | |||
1935 | } | 1973 | } |
1936 | 1974 | ||
1937 | /** | 1975 | /** |
1938 | |||
1939 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. | 1976 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. |
1940 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. | 1977 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. |
1941 | * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. | 1978 | * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. |