aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c104
1 files changed, 98 insertions, 6 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 44d13c246e5c..38f18b33de6c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -4,15 +4,14 @@
4 * Processor and Memory placement constraints for sets of tasks. 4 * Processor and Memory placement constraints for sets of tasks.
5 * 5 *
6 * Copyright (C) 2003 BULL SA. 6 * Copyright (C) 2003 BULL SA.
7 * Copyright (C) 2004 Silicon Graphics, Inc. 7 * Copyright (C) 2004-2006 Silicon Graphics, Inc.
8 * 8 *
9 * Portions derived from Patrick Mochel's sysfs code. 9 * Portions derived from Patrick Mochel's sysfs code.
10 * sysfs is Copyright (c) 2001-3 Patrick Mochel 10 * sysfs is Copyright (c) 2001-3 Patrick Mochel
11 * Portions Copyright (c) 2004 Silicon Graphics, Inc.
12 * 11 *
13 * 2003-10-10 Written by Simon Derr <simon.derr@bull.net> 12 * 2003-10-10 Written by Simon Derr.
14 * 2003-10-22 Updates by Stephen Hemminger. 13 * 2003-10-22 Updates by Stephen Hemminger.
15 * 2004 May-July Rework by Paul Jackson <pj@sgi.com> 14 * 2004 May-July Rework by Paul Jackson.
16 * 15 *
17 * This file is subject to the terms and conditions of the GNU General Public 16 * This file is subject to the terms and conditions of the GNU General Public
18 * License. See the file COPYING in the main directory of the Linux 17 * License. See the file COPYING in the main directory of the Linux
@@ -108,7 +107,9 @@ typedef enum {
108 CS_MEM_EXCLUSIVE, 107 CS_MEM_EXCLUSIVE,
109 CS_MEMORY_MIGRATE, 108 CS_MEMORY_MIGRATE,
110 CS_REMOVED, 109 CS_REMOVED,
111 CS_NOTIFY_ON_RELEASE 110 CS_NOTIFY_ON_RELEASE,
111 CS_SPREAD_PAGE,
112 CS_SPREAD_SLAB,
112} cpuset_flagbits_t; 113} cpuset_flagbits_t;
113 114
114/* convenient tests for these bits */ 115/* convenient tests for these bits */
@@ -137,6 +138,16 @@ static inline int is_memory_migrate(const struct cpuset *cs)
137 return test_bit(CS_MEMORY_MIGRATE, &cs->flags); 138 return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
138} 139}
139 140
141static inline int is_spread_page(const struct cpuset *cs)
142{
143 return test_bit(CS_SPREAD_PAGE, &cs->flags);
144}
145
146static inline int is_spread_slab(const struct cpuset *cs)
147{
148 return test_bit(CS_SPREAD_SLAB, &cs->flags);
149}
150
140/* 151/*
141 * Increment this atomic integer everytime any cpuset changes its 152 * Increment this atomic integer everytime any cpuset changes its
142 * mems_allowed value. Users of cpusets can track this generation 153 * mems_allowed value. Users of cpusets can track this generation
@@ -657,6 +668,14 @@ void cpuset_update_task_memory_state(void)
657 cs = tsk->cpuset; /* Maybe changed when task not locked */ 668 cs = tsk->cpuset; /* Maybe changed when task not locked */
658 guarantee_online_mems(cs, &tsk->mems_allowed); 669 guarantee_online_mems(cs, &tsk->mems_allowed);
659 tsk->cpuset_mems_generation = cs->mems_generation; 670 tsk->cpuset_mems_generation = cs->mems_generation;
671 if (is_spread_page(cs))
672 tsk->flags |= PF_SPREAD_PAGE;
673 else
674 tsk->flags &= ~PF_SPREAD_PAGE;
675 if (is_spread_slab(cs))
676 tsk->flags |= PF_SPREAD_SLAB;
677 else
678 tsk->flags &= ~PF_SPREAD_SLAB;
660 task_unlock(tsk); 679 task_unlock(tsk);
661 mutex_unlock(&callback_mutex); 680 mutex_unlock(&callback_mutex);
662 mpol_rebind_task(tsk, &tsk->mems_allowed); 681 mpol_rebind_task(tsk, &tsk->mems_allowed);
@@ -956,7 +975,8 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
956/* 975/*
957 * update_flag - read a 0 or a 1 in a file and update associated flag 976 * update_flag - read a 0 or a 1 in a file and update associated flag
958 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, 977 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
959 * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE) 978 * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE,
979 * CS_SPREAD_PAGE, CS_SPREAD_SLAB)
960 * cs: the cpuset to update 980 * cs: the cpuset to update
961 * buf: the buffer where we read the 0 or 1 981 * buf: the buffer where we read the 0 or 1
962 * 982 *
@@ -1187,6 +1207,8 @@ typedef enum {
1187 FILE_NOTIFY_ON_RELEASE, 1207 FILE_NOTIFY_ON_RELEASE,
1188 FILE_MEMORY_PRESSURE_ENABLED, 1208 FILE_MEMORY_PRESSURE_ENABLED,
1189 FILE_MEMORY_PRESSURE, 1209 FILE_MEMORY_PRESSURE,
1210 FILE_SPREAD_PAGE,
1211 FILE_SPREAD_SLAB,
1190 FILE_TASKLIST, 1212 FILE_TASKLIST,
1191} cpuset_filetype_t; 1213} cpuset_filetype_t;
1192 1214
@@ -1246,6 +1268,14 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
1246 case FILE_MEMORY_PRESSURE: 1268 case FILE_MEMORY_PRESSURE:
1247 retval = -EACCES; 1269 retval = -EACCES;
1248 break; 1270 break;
1271 case FILE_SPREAD_PAGE:
1272 retval = update_flag(CS_SPREAD_PAGE, cs, buffer);
1273 cs->mems_generation = atomic_inc_return(&cpuset_mems_generation);
1274 break;
1275 case FILE_SPREAD_SLAB:
1276 retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
1277 cs->mems_generation = atomic_inc_return(&cpuset_mems_generation);
1278 break;
1249 case FILE_TASKLIST: 1279 case FILE_TASKLIST:
1250 retval = attach_task(cs, buffer, &pathbuf); 1280 retval = attach_task(cs, buffer, &pathbuf);
1251 break; 1281 break;
@@ -1355,6 +1385,12 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
1355 case FILE_MEMORY_PRESSURE: 1385 case FILE_MEMORY_PRESSURE:
1356 s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); 1386 s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter));
1357 break; 1387 break;
1388 case FILE_SPREAD_PAGE:
1389 *s++ = is_spread_page(cs) ? '1' : '0';
1390 break;
1391 case FILE_SPREAD_SLAB:
1392 *s++ = is_spread_slab(cs) ? '1' : '0';
1393 break;
1358 default: 1394 default:
1359 retval = -EINVAL; 1395 retval = -EINVAL;
1360 goto out; 1396 goto out;
@@ -1718,6 +1754,16 @@ static struct cftype cft_memory_pressure = {
1718 .private = FILE_MEMORY_PRESSURE, 1754 .private = FILE_MEMORY_PRESSURE,
1719}; 1755};
1720 1756
1757static struct cftype cft_spread_page = {
1758 .name = "memory_spread_page",
1759 .private = FILE_SPREAD_PAGE,
1760};
1761
1762static struct cftype cft_spread_slab = {
1763 .name = "memory_spread_slab",
1764 .private = FILE_SPREAD_SLAB,
1765};
1766
1721static int cpuset_populate_dir(struct dentry *cs_dentry) 1767static int cpuset_populate_dir(struct dentry *cs_dentry)
1722{ 1768{
1723 int err; 1769 int err;
@@ -1736,6 +1782,10 @@ static int cpuset_populate_dir(struct dentry *cs_dentry)
1736 return err; 1782 return err;
1737 if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0) 1783 if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)
1738 return err; 1784 return err;
1785 if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0)
1786 return err;
1787 if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0)
1788 return err;
1739 if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) 1789 if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
1740 return err; 1790 return err;
1741 return 0; 1791 return 0;
@@ -1764,6 +1814,10 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
1764 cs->flags = 0; 1814 cs->flags = 0;
1765 if (notify_on_release(parent)) 1815 if (notify_on_release(parent))
1766 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); 1816 set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
1817 if (is_spread_page(parent))
1818 set_bit(CS_SPREAD_PAGE, &cs->flags);
1819 if (is_spread_slab(parent))
1820 set_bit(CS_SPREAD_SLAB, &cs->flags);
1767 cs->cpus_allowed = CPU_MASK_NONE; 1821 cs->cpus_allowed = CPU_MASK_NONE;
1768 cs->mems_allowed = NODE_MASK_NONE; 1822 cs->mems_allowed = NODE_MASK_NONE;
1769 atomic_set(&cs->count, 0); 1823 atomic_set(&cs->count, 0);
@@ -2201,6 +2255,44 @@ void cpuset_unlock(void)
2201} 2255}
2202 2256
2203/** 2257/**
2258 * cpuset_mem_spread_node() - On which node to begin search for a page
2259 *
2260 * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
2261 * tasks in a cpuset with is_spread_page or is_spread_slab set),
2262 * and if the memory allocation used cpuset_mem_spread_node()
2263 * to determine on which node to start looking, as it will for
2264 * certain page cache or slab cache pages such as used for file
2265 * system buffers and inode caches, then instead of starting on the
2266 * local node to look for a free page, rather spread the starting
2267 * node around the tasks mems_allowed nodes.
2268 *
2269 * We don't have to worry about the returned node being offline
2270 * because "it can't happen", and even if it did, it would be ok.
2271 *
2272 * The routines calling guarantee_online_mems() are careful to
2273 * only set nodes in task->mems_allowed that are online. So it
2274 * should not be possible for the following code to return an
2275 * offline node. But if it did, that would be ok, as this routine
2276 * is not returning the node where the allocation must be, only
2277 * the node where the search should start. The zonelist passed to
2278 * __alloc_pages() will include all nodes. If the slab allocator
2279 * is passed an offline node, it will fall back to the local node.
2280 * See kmem_cache_alloc_node().
2281 */
2282
2283int cpuset_mem_spread_node(void)
2284{
2285 int node;
2286
2287 node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed);
2288 if (node == MAX_NUMNODES)
2289 node = first_node(current->mems_allowed);
2290 current->cpuset_mem_spread_rotor = node;
2291 return node;
2292}
2293EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
2294
2295/**
2204 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? 2296 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
2205 * @p: pointer to task_struct of some other task. 2297 * @p: pointer to task_struct of some other task.
2206 * 2298 *