aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpuset.c90
1 files changed, 90 insertions, 0 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6004719f26ee..19f87565be17 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -812,12 +812,24 @@ static int update_cpumask(struct cpuset *cs, char *buf)
812} 812}
813 813
814/* 814/*
815 * Handle user request to change the 'mems' memory placement
816 * of a cpuset. Needs to validate the request, update the
817 * cpusets mems_allowed and mems_generation, and for each
818 * task in the cpuset, rebind any vma mempolicies.
819 *
815 * Call with manage_sem held. May take callback_sem during call. 820 * Call with manage_sem held. May take callback_sem during call.
821 * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
822 * lock each such tasks mm->mmap_sem, scan its vma's and rebind
823 * their mempolicies to the cpusets new mems_allowed.
816 */ 824 */
817 825
818static int update_nodemask(struct cpuset *cs, char *buf) 826static int update_nodemask(struct cpuset *cs, char *buf)
819{ 827{
820 struct cpuset trialcs; 828 struct cpuset trialcs;
829 struct task_struct *g, *p;
830 struct mm_struct **mmarray;
831 int i, n, ntasks;
832 int fudge;
821 int retval; 833 int retval;
822 834
823 trialcs = *cs; 835 trialcs = *cs;
@@ -839,6 +851,76 @@ static int update_nodemask(struct cpuset *cs, char *buf)
839 cs->mems_generation = atomic_read(&cpuset_mems_generation); 851 cs->mems_generation = atomic_read(&cpuset_mems_generation);
840 up(&callback_sem); 852 up(&callback_sem);
841 853
854 set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */
855
856 fudge = 10; /* spare mmarray[] slots */
857 fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
858 retval = -ENOMEM;
859
860 /*
861 * Allocate mmarray[] to hold mm reference for each task
862 * in cpuset cs. Can't kmalloc GFP_KERNEL while holding
863 * tasklist_lock. We could use GFP_ATOMIC, but with a
864 * few more lines of code, we can retry until we get a big
865 * enough mmarray[] w/o using GFP_ATOMIC.
866 */
867 while (1) {
868 ntasks = atomic_read(&cs->count); /* guess */
869 ntasks += fudge;
870 mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
871 if (!mmarray)
872 goto done;
873 write_lock_irq(&tasklist_lock); /* block fork */
874 if (atomic_read(&cs->count) <= ntasks)
875 break; /* got enough */
876 write_unlock_irq(&tasklist_lock); /* try again */
877 kfree(mmarray);
878 }
879
880 n = 0;
881
882 /* Load up mmarray[] with mm reference for each task in cpuset. */
883 do_each_thread(g, p) {
884 struct mm_struct *mm;
885
886 if (n >= ntasks) {
887 printk(KERN_WARNING
888 "Cpuset mempolicy rebind incomplete.\n");
889 continue;
890 }
891 if (p->cpuset != cs)
892 continue;
893 mm = get_task_mm(p);
894 if (!mm)
895 continue;
896 mmarray[n++] = mm;
897 } while_each_thread(g, p);
898 write_unlock_irq(&tasklist_lock);
899
900 /*
901 * Now that we've dropped the tasklist spinlock, we can
902 * rebind the vma mempolicies of each mm in mmarray[] to their
903 * new cpuset, and release that mm. The mpol_rebind_mm()
904 * call takes mmap_sem, which we couldn't take while holding
905 * tasklist_lock. Forks can happen again now - the mpol_copy()
906 * cpuset_being_rebound check will catch such forks, and rebind
907 * their vma mempolicies too. Because we still hold the global
908 * cpuset manage_sem, we know that no other rebind effort will
909 * be contending for the global variable cpuset_being_rebound.
910 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
911 * is idempotent.
912 */
913 for (i = 0; i < n; i++) {
914 struct mm_struct *mm = mmarray[i];
915
916 mpol_rebind_mm(mm, &cs->mems_allowed);
917 mmput(mm);
918 }
919
920 /* We're done rebinding vma's to this cpusets new mems_allowed. */
921 kfree(mmarray);
922 set_cpuset_being_rebound(NULL);
923 retval = 0;
842done: 924done:
843 return retval; 925 return retval;
844} 926}
@@ -1011,6 +1093,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1011 struct cpuset *oldcs; 1093 struct cpuset *oldcs;
1012 cpumask_t cpus; 1094 cpumask_t cpus;
1013 nodemask_t from, to; 1095 nodemask_t from, to;
1096 struct mm_struct *mm;
1014 1097
1015 if (sscanf(pidbuf, "%d", &pid) != 1) 1098 if (sscanf(pidbuf, "%d", &pid) != 1)
1016 return -EIO; 1099 return -EIO;
@@ -1060,6 +1143,13 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
1060 to = cs->mems_allowed; 1143 to = cs->mems_allowed;
1061 1144
1062 up(&callback_sem); 1145 up(&callback_sem);
1146
1147 mm = get_task_mm(tsk);
1148 if (mm) {
1149 mpol_rebind_mm(mm, &to);
1150 mmput(mm);
1151 }
1152
1063 if (is_memory_migrate(cs)) 1153 if (is_memory_migrate(cs))
1064 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); 1154 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
1065 put_task_struct(tsk); 1155 put_task_struct(tsk);