From 45b07ef31d1182d2cfde7711327e3afb268bb1ac Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sun, 8 Jan 2006 01:00:56 -0800 Subject: [PATCH] cpusets: swap migration interface Add a boolean "memory_migrate" to each cpuset, represented by a file containing "0" or "1" in each directory below /dev/cpuset. It defaults to false (file contains "0"). It can be set true by writing "1" to the file. If true, then anytime that a task is attached to the cpuset so marked, the pages of that task will be moved to that cpuset, preserving, to the extent practical, the cpuset-relative placement of the pages. Also anytime that a cpuset so marked has its memory placement changed (by writing to its "mems" file), the tasks in that cpuset will have their pages moved to the cpusets new nodes, preserving, to the extent practical, the cpuset-relative placement of the moved pages. Signed-off-by: Paul Jackson Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7430640f9816..f63383e01ec7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -87,6 +87,7 @@ struct cpuset { typedef enum { CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, + CS_MEMORY_MIGRATE, CS_REMOVED, CS_NOTIFY_ON_RELEASE } cpuset_flagbits_t; @@ -112,6 +113,11 @@ static inline int notify_on_release(const struct cpuset *cs) return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); } +static inline int is_memory_migrate(const struct cpuset *cs) +{ + return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags); +} + /* * Increment this atomic integer everytime any cpuset changes its * mems_allowed value. Users of cpusets can track this generation @@ -602,16 +608,24 @@ static void refresh_mems(void) if (current->cpuset_mems_generation != my_cpusets_mem_gen) { struct cpuset *cs; nodemask_t oldmem = current->mems_allowed; + int migrate; down(&callback_sem); task_lock(current); cs = current->cpuset; + migrate = is_memory_migrate(cs); guarantee_online_mems(cs, ¤t->mems_allowed); current->cpuset_mems_generation = cs->mems_generation; task_unlock(current); up(&callback_sem); - if (!nodes_equal(oldmem, current->mems_allowed)) + if (!nodes_equal(oldmem, current->mems_allowed)) { numa_policy_rebind(&oldmem, ¤t->mems_allowed); + if (migrate) { + do_migrate_pages(current->mm, &oldmem, + ¤t->mems_allowed, + MPOL_MF_MOVE_ALL); + } + } } } @@ -795,7 +809,7 @@ static int update_nodemask(struct cpuset *cs, char *buf) /* * update_flag - read a 0 or a 1 in a file and update associated flag * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, - * CS_NOTIFY_ON_RELEASE) + * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE) * cs: the cpuset to update * buf: the buffer where we read the 0 or 1 * @@ -848,6 +862,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) struct task_struct *tsk; struct cpuset *oldcs; cpumask_t cpus; + nodemask_t from, to; if (sscanf(pidbuf, "%d", &pid) != 1) return -EIO; @@ -893,7 +908,12 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) guarantee_online_cpus(cs, &cpus); set_cpus_allowed(tsk, cpus); + from = oldcs->mems_allowed; + to = cs->mems_allowed; + up(&callback_sem); + if (is_memory_migrate(cs)) + do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL); put_task_struct(tsk); if (atomic_dec_and_test(&oldcs->count)) check_for_release(oldcs, ppathbuf); @@ -905,6 +925,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) typedef enum { FILE_ROOT, FILE_DIR, + FILE_MEMORY_MIGRATE, FILE_CPULIST, FILE_MEMLIST, FILE_CPU_EXCLUSIVE, @@ -960,6 +981,9 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us case FILE_NOTIFY_ON_RELEASE: retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); break; + case FILE_MEMORY_MIGRATE: + retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); + break; case FILE_TASKLIST: retval = attach_task(cs, buffer, &pathbuf); break; @@ -1060,6 +1084,9 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, case FILE_NOTIFY_ON_RELEASE: *s++ = notify_on_release(cs) ? '1' : '0'; break; + case FILE_MEMORY_MIGRATE: + *s++ = is_memory_migrate(cs) ? '1' : '0'; + break; default: retval = -EINVAL; goto out; @@ -1408,6 +1435,11 @@ static struct cftype cft_notify_on_release = { .private = FILE_NOTIFY_ON_RELEASE, }; +static struct cftype cft_memory_migrate = { + .name = "memory_migrate", + .private = FILE_MEMORY_MIGRATE, +}; + static int cpuset_populate_dir(struct dentry *cs_dentry) { int err; @@ -1422,6 +1454,8 @@ static int cpuset_populate_dir(struct dentry *cs_dentry) return err; if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0) return err; + if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0) + return err; if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) return err; return 0; -- cgit v1.2.2