diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 104 |
1 files changed, 98 insertions, 6 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 44d13c246e5c..38f18b33de6c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -4,15 +4,14 @@ | |||
4 | * Processor and Memory placement constraints for sets of tasks. | 4 | * Processor and Memory placement constraints for sets of tasks. |
5 | * | 5 | * |
6 | * Copyright (C) 2003 BULL SA. | 6 | * Copyright (C) 2003 BULL SA. |
7 | * Copyright (C) 2004 Silicon Graphics, Inc. | 7 | * Copyright (C) 2004-2006 Silicon Graphics, Inc. |
8 | * | 8 | * |
9 | * Portions derived from Patrick Mochel's sysfs code. | 9 | * Portions derived from Patrick Mochel's sysfs code. |
10 | * sysfs is Copyright (c) 2001-3 Patrick Mochel | 10 | * sysfs is Copyright (c) 2001-3 Patrick Mochel |
11 | * Portions Copyright (c) 2004 Silicon Graphics, Inc. | ||
12 | * | 11 | * |
13 | * 2003-10-10 Written by Simon Derr <simon.derr@bull.net> | 12 | * 2003-10-10 Written by Simon Derr. |
14 | * 2003-10-22 Updates by Stephen Hemminger. | 13 | * 2003-10-22 Updates by Stephen Hemminger. |
15 | * 2004 May-July Rework by Paul Jackson <pj@sgi.com> | 14 | * 2004 May-July Rework by Paul Jackson. |
16 | * | 15 | * |
17 | * This file is subject to the terms and conditions of the GNU General Public | 16 | * This file is subject to the terms and conditions of the GNU General Public |
18 | * License. See the file COPYING in the main directory of the Linux | 17 | * License. See the file COPYING in the main directory of the Linux |
@@ -108,7 +107,9 @@ typedef enum { | |||
108 | CS_MEM_EXCLUSIVE, | 107 | CS_MEM_EXCLUSIVE, |
109 | CS_MEMORY_MIGRATE, | 108 | CS_MEMORY_MIGRATE, |
110 | CS_REMOVED, | 109 | CS_REMOVED, |
111 | CS_NOTIFY_ON_RELEASE | 110 | CS_NOTIFY_ON_RELEASE, |
111 | CS_SPREAD_PAGE, | ||
112 | CS_SPREAD_SLAB, | ||
112 | } cpuset_flagbits_t; | 113 | } cpuset_flagbits_t; |
113 | 114 | ||
114 | /* convenient tests for these bits */ | 115 | /* convenient tests for these bits */ |
@@ -137,6 +138,16 @@ static inline int is_memory_migrate(const struct cpuset *cs) | |||
137 | return test_bit(CS_MEMORY_MIGRATE, &cs->flags); | 138 | return test_bit(CS_MEMORY_MIGRATE, &cs->flags); |
138 | } | 139 | } |
139 | 140 | ||
141 | static inline int is_spread_page(const struct cpuset *cs) | ||
142 | { | ||
143 | return test_bit(CS_SPREAD_PAGE, &cs->flags); | ||
144 | } | ||
145 | |||
146 | static inline int is_spread_slab(const struct cpuset *cs) | ||
147 | { | ||
148 | return test_bit(CS_SPREAD_SLAB, &cs->flags); | ||
149 | } | ||
150 | |||
140 | /* | 151 | /* |
141 | * Increment this atomic integer everytime any cpuset changes its | 152 | * Increment this atomic integer everytime any cpuset changes its |
142 | * mems_allowed value. Users of cpusets can track this generation | 153 | * mems_allowed value. Users of cpusets can track this generation |
@@ -657,6 +668,14 @@ void cpuset_update_task_memory_state(void) | |||
657 | cs = tsk->cpuset; /* Maybe changed when task not locked */ | 668 | cs = tsk->cpuset; /* Maybe changed when task not locked */ |
658 | guarantee_online_mems(cs, &tsk->mems_allowed); | 669 | guarantee_online_mems(cs, &tsk->mems_allowed); |
659 | tsk->cpuset_mems_generation = cs->mems_generation; | 670 | tsk->cpuset_mems_generation = cs->mems_generation; |
671 | if (is_spread_page(cs)) | ||
672 | tsk->flags |= PF_SPREAD_PAGE; | ||
673 | else | ||
674 | tsk->flags &= ~PF_SPREAD_PAGE; | ||
675 | if (is_spread_slab(cs)) | ||
676 | tsk->flags |= PF_SPREAD_SLAB; | ||
677 | else | ||
678 | tsk->flags &= ~PF_SPREAD_SLAB; | ||
660 | task_unlock(tsk); | 679 | task_unlock(tsk); |
661 | mutex_unlock(&callback_mutex); | 680 | mutex_unlock(&callback_mutex); |
662 | mpol_rebind_task(tsk, &tsk->mems_allowed); | 681 | mpol_rebind_task(tsk, &tsk->mems_allowed); |
@@ -956,7 +975,8 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) | |||
956 | /* | 975 | /* |
957 | * update_flag - read a 0 or a 1 in a file and update associated flag | 976 | * update_flag - read a 0 or a 1 in a file and update associated flag |
958 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, | 977 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, |
959 | * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE) | 978 | * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, |
979 | * CS_SPREAD_PAGE, CS_SPREAD_SLAB) | ||
960 | * cs: the cpuset to update | 980 | * cs: the cpuset to update |
961 | * buf: the buffer where we read the 0 or 1 | 981 | * buf: the buffer where we read the 0 or 1 |
962 | * | 982 | * |
@@ -1187,6 +1207,8 @@ typedef enum { | |||
1187 | FILE_NOTIFY_ON_RELEASE, | 1207 | FILE_NOTIFY_ON_RELEASE, |
1188 | FILE_MEMORY_PRESSURE_ENABLED, | 1208 | FILE_MEMORY_PRESSURE_ENABLED, |
1189 | FILE_MEMORY_PRESSURE, | 1209 | FILE_MEMORY_PRESSURE, |
1210 | FILE_SPREAD_PAGE, | ||
1211 | FILE_SPREAD_SLAB, | ||
1190 | FILE_TASKLIST, | 1212 | FILE_TASKLIST, |
1191 | } cpuset_filetype_t; | 1213 | } cpuset_filetype_t; |
1192 | 1214 | ||
@@ -1246,6 +1268,14 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us | |||
1246 | case FILE_MEMORY_PRESSURE: | 1268 | case FILE_MEMORY_PRESSURE: |
1247 | retval = -EACCES; | 1269 | retval = -EACCES; |
1248 | break; | 1270 | break; |
1271 | case FILE_SPREAD_PAGE: | ||
1272 | retval = update_flag(CS_SPREAD_PAGE, cs, buffer); | ||
1273 | cs->mems_generation = atomic_inc_return(&cpuset_mems_generation); | ||
1274 | break; | ||
1275 | case FILE_SPREAD_SLAB: | ||
1276 | retval = update_flag(CS_SPREAD_SLAB, cs, buffer); | ||
1277 | cs->mems_generation = atomic_inc_return(&cpuset_mems_generation); | ||
1278 | break; | ||
1249 | case FILE_TASKLIST: | 1279 | case FILE_TASKLIST: |
1250 | retval = attach_task(cs, buffer, &pathbuf); | 1280 | retval = attach_task(cs, buffer, &pathbuf); |
1251 | break; | 1281 | break; |
@@ -1355,6 +1385,12 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, | |||
1355 | case FILE_MEMORY_PRESSURE: | 1385 | case FILE_MEMORY_PRESSURE: |
1356 | s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); | 1386 | s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); |
1357 | break; | 1387 | break; |
1388 | case FILE_SPREAD_PAGE: | ||
1389 | *s++ = is_spread_page(cs) ? '1' : '0'; | ||
1390 | break; | ||
1391 | case FILE_SPREAD_SLAB: | ||
1392 | *s++ = is_spread_slab(cs) ? '1' : '0'; | ||
1393 | break; | ||
1358 | default: | 1394 | default: |
1359 | retval = -EINVAL; | 1395 | retval = -EINVAL; |
1360 | goto out; | 1396 | goto out; |
@@ -1718,6 +1754,16 @@ static struct cftype cft_memory_pressure = { | |||
1718 | .private = FILE_MEMORY_PRESSURE, | 1754 | .private = FILE_MEMORY_PRESSURE, |
1719 | }; | 1755 | }; |
1720 | 1756 | ||
1757 | static struct cftype cft_spread_page = { | ||
1758 | .name = "memory_spread_page", | ||
1759 | .private = FILE_SPREAD_PAGE, | ||
1760 | }; | ||
1761 | |||
1762 | static struct cftype cft_spread_slab = { | ||
1763 | .name = "memory_spread_slab", | ||
1764 | .private = FILE_SPREAD_SLAB, | ||
1765 | }; | ||
1766 | |||
1721 | static int cpuset_populate_dir(struct dentry *cs_dentry) | 1767 | static int cpuset_populate_dir(struct dentry *cs_dentry) |
1722 | { | 1768 | { |
1723 | int err; | 1769 | int err; |
@@ -1736,6 +1782,10 @@ static int cpuset_populate_dir(struct dentry *cs_dentry) | |||
1736 | return err; | 1782 | return err; |
1737 | if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0) | 1783 | if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0) |
1738 | return err; | 1784 | return err; |
1785 | if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0) | ||
1786 | return err; | ||
1787 | if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0) | ||
1788 | return err; | ||
1739 | if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) | 1789 | if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) |
1740 | return err; | 1790 | return err; |
1741 | return 0; | 1791 | return 0; |
@@ -1764,6 +1814,10 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) | |||
1764 | cs->flags = 0; | 1814 | cs->flags = 0; |
1765 | if (notify_on_release(parent)) | 1815 | if (notify_on_release(parent)) |
1766 | set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); | 1816 | set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); |
1817 | if (is_spread_page(parent)) | ||
1818 | set_bit(CS_SPREAD_PAGE, &cs->flags); | ||
1819 | if (is_spread_slab(parent)) | ||
1820 | set_bit(CS_SPREAD_SLAB, &cs->flags); | ||
1767 | cs->cpus_allowed = CPU_MASK_NONE; | 1821 | cs->cpus_allowed = CPU_MASK_NONE; |
1768 | cs->mems_allowed = NODE_MASK_NONE; | 1822 | cs->mems_allowed = NODE_MASK_NONE; |
1769 | atomic_set(&cs->count, 0); | 1823 | atomic_set(&cs->count, 0); |
@@ -2201,6 +2255,44 @@ void cpuset_unlock(void) | |||
2201 | } | 2255 | } |
2202 | 2256 | ||
2203 | /** | 2257 | /** |
2258 | * cpuset_mem_spread_node() - On which node to begin search for a page | ||
2259 | * | ||
2260 | * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for | ||
2261 | * tasks in a cpuset with is_spread_page or is_spread_slab set), | ||
2262 | * and if the memory allocation used cpuset_mem_spread_node() | ||
2263 | * to determine on which node to start looking, as it will for | ||
2264 | * certain page cache or slab cache pages such as used for file | ||
2265 | * system buffers and inode caches, then instead of starting on the | ||
2266 | * local node to look for a free page, rather spread the starting | ||
2267 | * node around the tasks mems_allowed nodes. | ||
2268 | * | ||
2269 | * We don't have to worry about the returned node being offline | ||
2270 | * because "it can't happen", and even if it did, it would be ok. | ||
2271 | * | ||
2272 | * The routines calling guarantee_online_mems() are careful to | ||
2273 | * only set nodes in task->mems_allowed that are online. So it | ||
2274 | * should not be possible for the following code to return an | ||
2275 | * offline node. But if it did, that would be ok, as this routine | ||
2276 | * is not returning the node where the allocation must be, only | ||
2277 | * the node where the search should start. The zonelist passed to | ||
2278 | * __alloc_pages() will include all nodes. If the slab allocator | ||
2279 | * is passed an offline node, it will fall back to the local node. | ||
2280 | * See kmem_cache_alloc_node(). | ||
2281 | */ | ||
2282 | |||
2283 | int cpuset_mem_spread_node(void) | ||
2284 | { | ||
2285 | int node; | ||
2286 | |||
2287 | node = next_node(current->cpuset_mem_spread_rotor, current->mems_allowed); | ||
2288 | if (node == MAX_NUMNODES) | ||
2289 | node = first_node(current->mems_allowed); | ||
2290 | current->cpuset_mem_spread_rotor = node; | ||
2291 | return node; | ||
2292 | } | ||
2293 | EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); | ||
2294 | |||
2295 | /** | ||
2204 | * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? | 2296 | * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? |
2205 | * @p: pointer to task_struct of some other task. | 2297 | * @p: pointer to task_struct of some other task. |
2206 | * | 2298 | * |