aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/cpusets.txt25
-rw-r--r--include/linux/mempolicy.h7
-rw-r--r--kernel/cpuset.c38
3 files changed, 68 insertions, 2 deletions
diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt
index a09a8eb80665..e2d9afc30d2d 100644
--- a/Documentation/cpusets.txt
+++ b/Documentation/cpusets.txt
@@ -192,6 +192,7 @@ containing the following files describing that cpuset:
192 192
193 - cpus: list of CPUs in that cpuset 193 - cpus: list of CPUs in that cpuset
194 - mems: list of Memory Nodes in that cpuset 194 - mems: list of Memory Nodes in that cpuset
195 - memory_migrate flag: if set, move pages to cpusets nodes
195 - cpu_exclusive flag: is cpu placement exclusive? 196 - cpu_exclusive flag: is cpu placement exclusive?
196 - mem_exclusive flag: is memory placement exclusive? 197 - mem_exclusive flag: is memory placement exclusive?
197 - tasks: list of tasks (by pid) attached to that cpuset 198 - tasks: list of tasks (by pid) attached to that cpuset
@@ -277,6 +278,30 @@ rewritten to the 'tasks' file of its cpuset. This is done to avoid
277impacting the scheduler code in the kernel with a check for changes 278impacting the scheduler code in the kernel with a check for changes
278in a tasks processor placement. 279in a tasks processor placement.
279 280
281Normally, once a page is allocated (given a physical page
282of main memory) then that page stays on whatever node it
283was allocated, so long as it remains allocated, even if the
284cpusets memory placement policy 'mems' subsequently changes.
285If the cpuset flag file 'memory_migrate' is set true, then when
286tasks are attached to that cpuset, any pages that task had
287allocated to it on nodes in its previous cpuset are migrated
288to the tasks new cpuset. Depending on the implementation,
289this migration may either be done by swapping the page out,
290so that the next time the page is referenced, it will be paged
291into the tasks new cpuset, usually on the node where it was
292referenced, or this migration may be done by directly copying
293the pages from the tasks previous cpuset to the new cpuset,
294where possible to the same node, relative to the new cpuset,
295as the node that held the page, relative to the old cpuset.
296Also if 'memory_migrate' is set true, then if that cpusets
297'mems' file is modified, pages allocated to tasks in that
298cpuset, that were on nodes in the previous setting of 'mems',
299will be moved to nodes in the new setting of 'mems.' Again,
300depending on the implementation, this might be done by swapping,
301or by direct copying. In either case, pages that were not in
302the tasks prior cpuset, or in the cpusets prior 'mems' setting,
303will not be moved.
304
280There is an exception to the above. If hotplug functionality is used 305There is an exception to the above. If hotplug functionality is used
281to remove all the CPUs that are currently assigned to a cpuset, 306to remove all the CPUs that are currently assigned to a cpuset,
282then the kernel will automatically update the cpus_allowed of all 307then the kernel will automatically update the cpus_allowed of all
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 3e61e829681d..66247eff24a0 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -235,6 +235,13 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
235 return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); 235 return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER);
236} 236}
237 237
238static inline int do_migrate_pages(struct mm_struct *mm,
239 const nodemask_t *from_nodes,
240 const nodemask_t *to_nodes, int flags)
241{
242 return 0;
243}
244
238static inline void check_highest_zone(int k) 245static inline void check_highest_zone(int k)
239{ 246{
240} 247}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7430640f9816..f63383e01ec7 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -87,6 +87,7 @@ struct cpuset {
87typedef enum { 87typedef enum {
88 CS_CPU_EXCLUSIVE, 88 CS_CPU_EXCLUSIVE,
89 CS_MEM_EXCLUSIVE, 89 CS_MEM_EXCLUSIVE,
90 CS_MEMORY_MIGRATE,
90 CS_REMOVED, 91 CS_REMOVED,
91 CS_NOTIFY_ON_RELEASE 92 CS_NOTIFY_ON_RELEASE
92} cpuset_flagbits_t; 93} cpuset_flagbits_t;
@@ -112,6 +113,11 @@ static inline int notify_on_release(const struct cpuset *cs)
112 return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 113 return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
113} 114}
114 115
116static inline int is_memory_migrate(const struct cpuset *cs)
117{
118 return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags);
119}
120
115/* 121/*
116 * Increment this atomic integer everytime any cpuset changes its 122 * Increment this atomic integer everytime any cpuset changes its
117 * mems_allowed value. Users of cpusets can track this generation 123 * mems_allowed value. Users of cpusets can track this generation
@@ -602,16 +608,24 @@ static void refresh_mems(void)
602 if (current->cpuset_mems_generation != my_cpusets_mem_gen) { 608 if (current->cpuset_mems_generation != my_cpusets_mem_gen) {
603 struct cpuset *cs; 609 struct cpuset *cs;
604 nodemask_t oldmem = current->mems_allowed; 610 nodemask_t oldmem = current->mems_allowed;
611 int migrate;
605 612
606 down(&callback_sem); 613 down(&callback_sem);
607 task_lock(current); 614 task_lock(current);
608 cs = current->cpuset; 615 cs = current->cpuset;
616 migrate = is_memory_migrate(cs);
609 guarantee_online_mems(cs, &current->mems_allowed); 617 guarantee_online_mems(cs, &current->mems_allowed);
610 current->cpuset_mems_generation = cs->mems_generation; 618 current->cpuset_mems_generation = cs->mems_generation;
611 task_unlock(current); 619 task_unlock(current);
612 up(&callback_sem); 620 up(&callback_sem);
613 if (!nodes_equal(oldmem, current->mems_allowed)) 621 if (!nodes_equal(oldmem, current->mems_allowed)) {
614 numa_policy_rebind(&oldmem, &current->mems_allowed); 622 numa_policy_rebind(&oldmem, &current->mems_allowed);
623 if (migrate) {
624 do_migrate_pages(current->mm, &oldmem,
625 &current->mems_allowed,
626 MPOL_MF_MOVE_ALL);
627 }
628 }
615 } 629 }
616} 630}
617 631
@@ -795,7 +809,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
795/* 809/*
796 * update_flag - read a 0 or a 1 in a file and update associated flag 810 * update_flag - read a 0 or a 1 in a file and update associated flag
797 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, 811 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
798 * CS_NOTIFY_ON_RELEASE) 812 * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE)
799 * cs: the cpuset to update 813 * cs: the cpuset to update
800 * buf: the buffer where we read the 0 or 1 814 * buf: the buffer where we read the 0 or 1
801 * 815 *
@@ -848,6 +862,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
848 struct task_struct *tsk; 862 struct task_struct *tsk;
849 struct cpuset *oldcs; 863 struct cpuset *oldcs;
850 cpumask_t cpus; 864 cpumask_t cpus;
865 nodemask_t from, to;
851 866
852 if (sscanf(pidbuf, "%d", &pid) != 1) 867 if (sscanf(pidbuf, "%d", &pid) != 1)
853 return -EIO; 868 return -EIO;
@@ -893,7 +908,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
893 guarantee_online_cpus(cs, &cpus); 908 guarantee_online_cpus(cs, &cpus);
894 set_cpus_allowed(tsk, cpus); 909 set_cpus_allowed(tsk, cpus);
895 910
911 from = oldcs->mems_allowed;
912 to = cs->mems_allowed;
913
896 up(&callback_sem); 914 up(&callback_sem);
915 if (is_memory_migrate(cs))
916 do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
897 put_task_struct(tsk); 917 put_task_struct(tsk);
898 if (atomic_dec_and_test(&oldcs->count)) 918 if (atomic_dec_and_test(&oldcs->count))
899 check_for_release(oldcs, ppathbuf); 919 check_for_release(oldcs, ppathbuf);
@@ -905,6 +925,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
905typedef enum { 925typedef enum {
906 FILE_ROOT, 926 FILE_ROOT,
907 FILE_DIR, 927 FILE_DIR,
928 FILE_MEMORY_MIGRATE,
908 FILE_CPULIST, 929 FILE_CPULIST,
909 FILE_MEMLIST, 930 FILE_MEMLIST,
910 FILE_CPU_EXCLUSIVE, 931 FILE_CPU_EXCLUSIVE,
@@ -960,6 +981,9 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
960 case FILE_NOTIFY_ON_RELEASE: 981 case FILE_NOTIFY_ON_RELEASE:
961 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); 982 retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
962 break; 983 break;
984 case FILE_MEMORY_MIGRATE:
985 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
986 break;
963 case FILE_TASKLIST: 987 case FILE_TASKLIST:
964 retval = attach_task(cs, buffer, &pathbuf); 988 retval = attach_task(cs, buffer, &pathbuf);
965 break; 989 break;
@@ -1060,6 +1084,9 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
1060 case FILE_NOTIFY_ON_RELEASE: 1084 case FILE_NOTIFY_ON_RELEASE:
1061 *s++ = notify_on_release(cs) ? '1' : '0'; 1085 *s++ = notify_on_release(cs) ? '1' : '0';
1062 break; 1086 break;
1087 case FILE_MEMORY_MIGRATE:
1088 *s++ = is_memory_migrate(cs) ? '1' : '0';
1089 break;
1063 default: 1090 default:
1064 retval = -EINVAL; 1091 retval = -EINVAL;
1065 goto out; 1092 goto out;
@@ -1408,6 +1435,11 @@ static struct cftype cft_notify_on_release = {
1408 .private = FILE_NOTIFY_ON_RELEASE, 1435 .private = FILE_NOTIFY_ON_RELEASE,
1409}; 1436};
1410 1437
1438static struct cftype cft_memory_migrate = {
1439 .name = "memory_migrate",
1440 .private = FILE_MEMORY_MIGRATE,
1441};
1442
1411static int cpuset_populate_dir(struct dentry *cs_dentry) 1443static int cpuset_populate_dir(struct dentry *cs_dentry)
1412{ 1444{
1413 int err; 1445 int err;
@@ -1422,6 +1454,8 @@ static int cpuset_populate_dir(struct dentry *cs_dentry)
1422 return err; 1454 return err;
1423 if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0) 1455 if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
1424 return err; 1456 return err;
1457 if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
1458 return err;
1425 if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) 1459 if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
1426 return err; 1460 return err;
1427 return 0; 1461 return 0;