aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2008-04-28 05:12:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:19 -0400
commitf5b087b52f1710eb0bf15a2d2b030c51a6a1ca9e (patch)
tree66336235822f59215707dfa501e1d2b66b38a015
parent028fec414d803117eb4b2ed12acb4dd5da65b32d (diff)
mempolicy: add MPOL_F_STATIC_NODES flag
Add an optional mempolicy mode flag, MPOL_F_STATIC_NODES, that suppresses the node remap when the policy is rebound. Adds another member to struct mempolicy, nodemask_t user_nodemask, as part of a union with cpuset_mems_allowed: struct mempolicy { ... union { nodemask_t cpuset_mems_allowed; nodemask_t user_nodemask; } w; } that stores the the nodemask that the user passed when he or she created the mempolicy via set_mempolicy() or mbind(). When using MPOL_F_STATIC_NODES, which is passed with any mempolicy mode, the user's passed nodemask intersected with the VMA or task's allowed nodes is always used when determining the preferred node, setting the MPOL_BIND zonelist, or creating the interleave nodemask. This happens whenever the policy is rebound, including when a task's cpuset assignment changes or the cpuset's mems are changed. This creates an interesting side-effect in that it allows the mempolicy "intent" to lie dormant and uneffected until it has access to the node(s) that it desires. For example, if you currently ask for an interleaved policy over a set of nodes that you do not have access to, the mempolicy is not created and the task continues to use the previous policy. With this change, however, it is possible to create the same mempolicy; it is only effected when access to nodes in the nodemask is acquired. It is also possible to mount tmpfs with the static nodemask behavior when specifying a node or nodemask. To do this, simply add "=static" immediately following the mempolicy mode at mount time: mount -o remount mpol=interleave=static:1-3 Also removes mpol_check_policy() and folds its logic into mpol_new() since it is now obsoleted. The unused vma_mpol_equal() is also removed. Cc: Paul Jackson <pj@sgi.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com> Cc: Andi Kleen <ak@suse.de> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mempolicy.h11
-rw-r--r--mm/mempolicy.c172
-rw-r--r--mm/shmem.c2
3 files changed, 97 insertions, 88 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index f2bab4d2fc40..07350d7b8d96 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -24,11 +24,13 @@ enum {
24}; 24};
25 25
26/* Flags for set_mempolicy */ 26/* Flags for set_mempolicy */
27#define MPOL_F_STATIC_NODES (1 << 15)
28
27/* 29/*
28 * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to 30 * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
29 * either set_mempolicy() or mbind(). 31 * either set_mempolicy() or mbind().
30 */ 32 */
31#define MPOL_MODE_FLAGS (0) 33#define MPOL_MODE_FLAGS (MPOL_F_STATIC_NODES)
32 34
33/* Flags for get_mempolicy */ 35/* Flags for get_mempolicy */
34#define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */ 36#define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */
@@ -85,7 +87,10 @@ struct mempolicy {
85 nodemask_t nodes; /* interleave/bind */ 87 nodemask_t nodes; /* interleave/bind */
86 /* undefined for default */ 88 /* undefined for default */
87 } v; 89 } v;
88 nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */ 90 union {
91 nodemask_t cpuset_mems_allowed; /* relative to these nodes */
92 nodemask_t user_nodemask; /* nodemask passed by user */
93 } w;
89}; 94};
90 95
91/* 96/*
@@ -124,7 +129,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
124 return 1; 129 return 1;
125 return __mpol_equal(a, b); 130 return __mpol_equal(a, b);
126} 131}
127#define vma_mpol_equal(a,b) mpol_equal(vma_policy(a), vma_policy(b))
128 132
129/* Could later add inheritance of the process policy here. */ 133/* Could later add inheritance of the process policy here. */
130 134
@@ -190,7 +194,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
190{ 194{
191 return 1; 195 return 1;
192} 196}
193#define vma_mpol_equal(a,b) 1
194 197
195#define mpol_set_vma_default(vma) do {} while(0) 198#define mpol_set_vma_default(vma) do {} while(0)
196 199
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1f6ff9c1bbc3..d59b1e766aee 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -113,58 +113,6 @@ struct mempolicy default_policy = {
113static void mpol_rebind_policy(struct mempolicy *pol, 113static void mpol_rebind_policy(struct mempolicy *pol,
114 const nodemask_t *newmask); 114 const nodemask_t *newmask);
115 115
116/* Do sanity checking on a policy */
117static int mpol_check_policy(unsigned short mode, nodemask_t *nodes)
118{
119 int was_empty, is_empty;
120
121 if (!nodes)
122 return 0;
123
124 /*
125 * "Contextualize" the in-coming nodemast for cpusets:
126 * Remember whether in-coming nodemask was empty, If not,
127 * restrict the nodes to the allowed nodes in the cpuset.
128 * This is guaranteed to be a subset of nodes with memory.
129 */
130 cpuset_update_task_memory_state();
131 is_empty = was_empty = nodes_empty(*nodes);
132 if (!was_empty) {
133 nodes_and(*nodes, *nodes, cpuset_current_mems_allowed);
134 is_empty = nodes_empty(*nodes); /* after "contextualization" */
135 }
136
137 switch (mode) {
138 case MPOL_DEFAULT:
139 /*
140 * require caller to specify an empty nodemask
141 * before "contextualization"
142 */
143 if (!was_empty)
144 return -EINVAL;
145 break;
146 case MPOL_BIND:
147 case MPOL_INTERLEAVE:
148 /*
149 * require at least 1 valid node after "contextualization"
150 */
151 if (is_empty)
152 return -EINVAL;
153 break;
154 case MPOL_PREFERRED:
155 /*
156 * Did caller specify invalid nodes?
157 * Don't silently accept this as "local allocation".
158 */
159 if (!was_empty && is_empty)
160 return -EINVAL;
161 break;
162 default:
163 BUG();
164 }
165 return 0;
166}
167
168/* Check that the nodemask contains at least one populated zone */ 116/* Check that the nodemask contains at least one populated zone */
169static int is_valid_nodemask(nodemask_t *nodemask) 117static int is_valid_nodemask(nodemask_t *nodemask)
170{ 118{
@@ -186,48 +134,60 @@ static int is_valid_nodemask(nodemask_t *nodemask)
186 return 0; 134 return 0;
187} 135}
188 136
137static inline int mpol_store_user_nodemask(const struct mempolicy *pol)
138{
139 return pol->flags & MPOL_F_STATIC_NODES;
140}
141
189/* Create a new policy */ 142/* Create a new policy */
190static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, 143static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
191 nodemask_t *nodes) 144 nodemask_t *nodes)
192{ 145{
193 struct mempolicy *policy; 146 struct mempolicy *policy;
147 nodemask_t cpuset_context_nmask;
194 148
195 pr_debug("setting mode %d flags %d nodes[0] %lx\n", 149 pr_debug("setting mode %d flags %d nodes[0] %lx\n",
196 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); 150 mode, flags, nodes ? nodes_addr(*nodes)[0] : -1);
197 151
198 if (mode == MPOL_DEFAULT) 152 if (mode == MPOL_DEFAULT)
199 return NULL; 153 return (nodes && nodes_weight(*nodes)) ? ERR_PTR(-EINVAL) :
154 NULL;
200 policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); 155 policy = kmem_cache_alloc(policy_cache, GFP_KERNEL);
201 if (!policy) 156 if (!policy)
202 return ERR_PTR(-ENOMEM); 157 return ERR_PTR(-ENOMEM);
203 atomic_set(&policy->refcnt, 1); 158 atomic_set(&policy->refcnt, 1);
159 cpuset_update_task_memory_state();
160 nodes_and(cpuset_context_nmask, *nodes, cpuset_current_mems_allowed);
204 switch (mode) { 161 switch (mode) {
205 case MPOL_INTERLEAVE: 162 case MPOL_INTERLEAVE:
206 policy->v.nodes = *nodes; 163 if (nodes_empty(*nodes) || nodes_empty(cpuset_context_nmask))
207 if (nodes_weight(policy->v.nodes) == 0) { 164 goto free;
208 kmem_cache_free(policy_cache, policy); 165 policy->v.nodes = cpuset_context_nmask;
209 return ERR_PTR(-EINVAL);
210 }
211 break; 166 break;
212 case MPOL_PREFERRED: 167 case MPOL_PREFERRED:
213 policy->v.preferred_node = first_node(*nodes); 168 policy->v.preferred_node = first_node(cpuset_context_nmask);
214 if (policy->v.preferred_node >= MAX_NUMNODES) 169 if (policy->v.preferred_node >= MAX_NUMNODES)
215 policy->v.preferred_node = -1; 170 goto free;
216 break; 171 break;
217 case MPOL_BIND: 172 case MPOL_BIND:
218 if (!is_valid_nodemask(nodes)) { 173 if (!is_valid_nodemask(&cpuset_context_nmask))
219 kmem_cache_free(policy_cache, policy); 174 goto free;
220 return ERR_PTR(-EINVAL); 175 policy->v.nodes = cpuset_context_nmask;
221 }
222 policy->v.nodes = *nodes;
223 break; 176 break;
224 default: 177 default:
225 BUG(); 178 BUG();
226 } 179 }
227 policy->policy = mode; 180 policy->policy = mode;
228 policy->flags = flags; 181 policy->flags = flags;
229 policy->cpuset_mems_allowed = cpuset_mems_allowed(current); 182 if (mpol_store_user_nodemask(policy))
183 policy->w.user_nodemask = *nodes;
184 else
185 policy->w.cpuset_mems_allowed = cpuset_mems_allowed(current);
230 return policy; 186 return policy;
187
188free:
189 kmem_cache_free(policy_cache, policy);
190 return ERR_PTR(-EINVAL);
231} 191}
232 192
233static void gather_stats(struct page *, void *, int pte_dirty); 193static void gather_stats(struct page *, void *, int pte_dirty);
@@ -473,15 +433,14 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
473{ 433{
474 struct mempolicy *new; 434 struct mempolicy *new;
475 435
476 if (mpol_check_policy(mode, nodes))
477 return -EINVAL;
478 new = mpol_new(mode, flags, nodes); 436 new = mpol_new(mode, flags, nodes);
479 if (IS_ERR(new)) 437 if (IS_ERR(new))
480 return PTR_ERR(new); 438 return PTR_ERR(new);
481 mpol_free(current->mempolicy); 439 mpol_free(current->mempolicy);
482 current->mempolicy = new; 440 current->mempolicy = new;
483 mpol_set_task_struct_flag(); 441 mpol_set_task_struct_flag();
484 if (new && new->policy == MPOL_INTERLEAVE) 442 if (new && new->policy == MPOL_INTERLEAVE &&
443 nodes_weight(new->v.nodes))
485 current->il_next = first_node(new->v.nodes); 444 current->il_next = first_node(new->v.nodes);
486 return 0; 445 return 0;
487} 446}
@@ -796,9 +755,6 @@ static long do_mbind(unsigned long start, unsigned long len,
796 if (end == start) 755 if (end == start)
797 return 0; 756 return 0;
798 757
799 if (mpol_check_policy(mode, nmask))
800 return -EINVAL;
801
802 new = mpol_new(mode, mode_flags, nmask); 758 new = mpol_new(mode, mode_flags, nmask);
803 if (IS_ERR(new)) 759 if (IS_ERR(new))
804 return PTR_ERR(new); 760 return PTR_ERR(new);
@@ -1206,7 +1162,8 @@ static unsigned interleave_nodes(struct mempolicy *policy)
1206 next = next_node(nid, policy->v.nodes); 1162 next = next_node(nid, policy->v.nodes);
1207 if (next >= MAX_NUMNODES) 1163 if (next >= MAX_NUMNODES)
1208 next = first_node(policy->v.nodes); 1164 next = first_node(policy->v.nodes);
1209 me->il_next = next; 1165 if (next < MAX_NUMNODES)
1166 me->il_next = next;
1210 return nid; 1167 return nid;
1211} 1168}
1212 1169
@@ -1252,10 +1209,13 @@ static unsigned offset_il_node(struct mempolicy *pol,
1252 struct vm_area_struct *vma, unsigned long off) 1209 struct vm_area_struct *vma, unsigned long off)
1253{ 1210{
1254 unsigned nnodes = nodes_weight(pol->v.nodes); 1211 unsigned nnodes = nodes_weight(pol->v.nodes);
1255 unsigned target = (unsigned)off % nnodes; 1212 unsigned target;
1256 int c; 1213 int c;
1257 int nid = -1; 1214 int nid = -1;
1258 1215
1216 if (!nnodes)
1217 return numa_node_id();
1218 target = (unsigned int)off % nnodes;
1259 c = 0; 1219 c = 0;
1260 do { 1220 do {
1261 nid = next_node(nid, pol->v.nodes); 1221 nid = next_node(nid, pol->v.nodes);
@@ -1465,6 +1425,16 @@ struct mempolicy *__mpol_copy(struct mempolicy *old)
1465 return new; 1425 return new;
1466} 1426}
1467 1427
1428static int mpol_match_intent(const struct mempolicy *a,
1429 const struct mempolicy *b)
1430{
1431 if (a->flags != b->flags)
1432 return 0;
1433 if (!mpol_store_user_nodemask(a))
1434 return 1;
1435 return nodes_equal(a->w.user_nodemask, b->w.user_nodemask);
1436}
1437
1468/* Slow path of a mempolicy comparison */ 1438/* Slow path of a mempolicy comparison */
1469int __mpol_equal(struct mempolicy *a, struct mempolicy *b) 1439int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
1470{ 1440{
@@ -1472,6 +1442,8 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
1472 return 0; 1442 return 0;
1473 if (a->policy != b->policy) 1443 if (a->policy != b->policy)
1474 return 0; 1444 return 0;
1445 if (a->policy != MPOL_DEFAULT && !mpol_match_intent(a, b))
1446 return 0;
1475 switch (a->policy) { 1447 switch (a->policy) {
1476 case MPOL_DEFAULT: 1448 case MPOL_DEFAULT:
1477 return 1; 1449 return 1;
@@ -1771,13 +1743,14 @@ void numa_default_policy(void)
1771static void mpol_rebind_policy(struct mempolicy *pol, 1743static void mpol_rebind_policy(struct mempolicy *pol,
1772 const nodemask_t *newmask) 1744 const nodemask_t *newmask)
1773{ 1745{
1774 nodemask_t *mpolmask;
1775 nodemask_t tmp; 1746 nodemask_t tmp;
1747 int static_nodes;
1776 1748
1777 if (!pol) 1749 if (!pol)
1778 return; 1750 return;
1779 mpolmask = &pol->cpuset_mems_allowed; 1751 static_nodes = pol->flags & MPOL_F_STATIC_NODES;
1780 if (nodes_equal(*mpolmask, *newmask)) 1752 if (!mpol_store_user_nodemask(pol) &&
1753 nodes_equal(pol->w.cpuset_mems_allowed, *newmask))
1781 return; 1754 return;
1782 1755
1783 switch (pol->policy) { 1756 switch (pol->policy) {
@@ -1786,16 +1759,35 @@ static void mpol_rebind_policy(struct mempolicy *pol,
1786 case MPOL_BIND: 1759 case MPOL_BIND:
1787 /* Fall through */ 1760 /* Fall through */
1788 case MPOL_INTERLEAVE: 1761 case MPOL_INTERLEAVE:
1789 nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask); 1762 if (static_nodes)
1763 nodes_and(tmp, pol->w.user_nodemask, *newmask);
1764 else {
1765 nodes_remap(tmp, pol->v.nodes,
1766 pol->w.cpuset_mems_allowed, *newmask);
1767 pol->w.cpuset_mems_allowed = *newmask;
1768 }
1790 pol->v.nodes = tmp; 1769 pol->v.nodes = tmp;
1791 *mpolmask = *newmask; 1770 if (!node_isset(current->il_next, tmp)) {
1792 current->il_next = node_remap(current->il_next, 1771 current->il_next = next_node(current->il_next, tmp);
1793 *mpolmask, *newmask); 1772 if (current->il_next >= MAX_NUMNODES)
1773 current->il_next = first_node(tmp);
1774 if (current->il_next >= MAX_NUMNODES)
1775 current->il_next = numa_node_id();
1776 }
1794 break; 1777 break;
1795 case MPOL_PREFERRED: 1778 case MPOL_PREFERRED:
1796 pol->v.preferred_node = node_remap(pol->v.preferred_node, 1779 if (static_nodes) {
1797 *mpolmask, *newmask); 1780 int node = first_node(pol->w.user_nodemask);
1798 *mpolmask = *newmask; 1781
1782 if (node_isset(node, *newmask))
1783 pol->v.preferred_node = node;
1784 else
1785 pol->v.preferred_node = -1;
1786 } else {
1787 pol->v.preferred_node = node_remap(pol->v.preferred_node,
1788 pol->w.cpuset_mems_allowed, *newmask);
1789 pol->w.cpuset_mems_allowed = *newmask;
1790 }
1799 break; 1791 break;
1800 default: 1792 default:
1801 BUG(); 1793 BUG();
@@ -1847,6 +1839,7 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
1847 int l; 1839 int l;
1848 nodemask_t nodes; 1840 nodemask_t nodes;
1849 unsigned short mode = pol ? pol->policy : MPOL_DEFAULT; 1841 unsigned short mode = pol ? pol->policy : MPOL_DEFAULT;
1842 unsigned short flags = pol ? pol->flags : 0;
1850 1843
1851 switch (mode) { 1844 switch (mode) {
1852 case MPOL_DEFAULT: 1845 case MPOL_DEFAULT:
@@ -1876,6 +1869,17 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
1876 strcpy(p, policy_types[mode]); 1869 strcpy(p, policy_types[mode]);
1877 p += l; 1870 p += l;
1878 1871
1872 if (flags) {
1873 int need_bar = 0;
1874
1875 if (buffer + maxlen < p + 2)
1876 return -ENOSPC;
1877 *p++ = '=';
1878
1879 if (flags & MPOL_F_STATIC_NODES)
1880 p += sprintf(p, "%sstatic", need_bar++ ? "|" : "");
1881 }
1882
1879 if (!nodes_empty(nodes)) { 1883 if (!nodes_empty(nodes)) {
1880 if (buffer + maxlen < p + 2) 1884 if (buffer + maxlen < p + 2)
1881 return -ENOSPC; 1885 return -ENOSPC;
diff --git a/mm/shmem.c b/mm/shmem.c
index 1ccf794fbe61..3e9fda0ca470 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1126,6 +1126,8 @@ static int shmem_parse_mpol(char *value, unsigned short *policy,
1126 err = 0; 1126 err = 0;
1127 } 1127 }
1128 if (flags) { 1128 if (flags) {
1129 if (!strcmp(flags, "static"))
1130 *mode_flags |= MPOL_F_STATIC_NODES;
1129 } 1131 }
1130out: 1132out:
1131 /* Restore string for error message */ 1133 /* Restore string for error message */