diff options
-rw-r--r-- | include/linux/mempolicy.h | 11 | ||||
-rw-r--r-- | mm/mempolicy.c | 172 | ||||
-rw-r--r-- | mm/shmem.c | 2 |
3 files changed, 97 insertions, 88 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index f2bab4d2fc40..07350d7b8d96 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -24,11 +24,13 @@ enum { | |||
24 | }; | 24 | }; |
25 | 25 | ||
26 | /* Flags for set_mempolicy */ | 26 | /* Flags for set_mempolicy */ |
27 | #define MPOL_F_STATIC_NODES (1 << 15) | ||
28 | |||
27 | /* | 29 | /* |
28 | * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to | 30 | * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to |
29 | * either set_mempolicy() or mbind(). | 31 | * either set_mempolicy() or mbind(). |
30 | */ | 32 | */ |
31 | #define MPOL_MODE_FLAGS (0) | 33 | #define MPOL_MODE_FLAGS (MPOL_F_STATIC_NODES) |
32 | 34 | ||
33 | /* Flags for get_mempolicy */ | 35 | /* Flags for get_mempolicy */ |
34 | #define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */ | 36 | #define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */ |
@@ -85,7 +87,10 @@ struct mempolicy { | |||
85 | nodemask_t nodes; /* interleave/bind */ | 87 | nodemask_t nodes; /* interleave/bind */ |
86 | /* undefined for default */ | 88 | /* undefined for default */ |
87 | } v; | 89 | } v; |
88 | nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */ | 90 | union { |
91 | nodemask_t cpuset_mems_allowed; /* relative to these nodes */ | ||
92 | nodemask_t user_nodemask; /* nodemask passed by user */ | ||
93 | } w; | ||
89 | }; | 94 | }; |
90 | 95 | ||
91 | /* | 96 | /* |
@@ -124,7 +129,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b) | |||
124 | return 1; | 129 | return 1; |
125 | return __mpol_equal(a, b); | 130 | return __mpol_equal(a, b); |
126 | } | 131 | } |
127 | #define vma_mpol_equal(a,b) mpol_equal(vma_policy(a), vma_policy(b)) | ||
128 | 132 | ||
129 | /* Could later add inheritance of the process policy here. */ | 133 | /* Could later add inheritance of the process policy here. */ |
130 | 134 | ||
@@ -190,7 +194,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b) | |||
190 | { | 194 | { |
191 | return 1; | 195 | return 1; |
192 | } | 196 | } |
193 | #define vma_mpol_equal(a,b) 1 | ||
194 | 197 | ||
195 | #define mpol_set_vma_default(vma) do {} while(0) | 198 | #define mpol_set_vma_default(vma) do {} while(0) |
196 | 199 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 1f6ff9c1bbc3..d59b1e766aee 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -113,58 +113,6 @@ struct mempolicy default_policy = { | |||
113 | static void mpol_rebind_policy(struct mempolicy *pol, | 113 | static void mpol_rebind_policy(struct mempolicy *pol, |
114 | const nodemask_t *newmask); | 114 | const nodemask_t *newmask); |
115 | 115 | ||
116 | /* Do sanity checking on a policy */ | ||
117 | static int mpol_check_policy(unsigned short mode, nodemask_t *nodes) | ||
118 | { | ||
119 | int was_empty, is_empty; | ||
120 | |||
121 | if (!nodes) | ||
122 | return 0; | ||
123 | |||
124 | /* | ||
125 | * "Contextualize" the in-coming nodemast for cpusets: | ||
126 | * Remember whether in-coming nodemask was empty, If not, | ||
127 | * restrict the nodes to the allowed nodes in the cpuset. | ||
128 | * This is guaranteed to be a subset of nodes with memory. | ||
129 | */ | ||
130 | cpuset_update_task_memory_state(); | ||
131 | is_empty = was_empty = nodes_empty(*nodes); | ||
132 | if (!was_empty) { | ||
133 | nodes_and(*nodes, *nodes, cpuset_current_mems_allowed); | ||
134 | is_empty = nodes_empty(*nodes); /* after "contextualization" */ | ||
135 | } | ||
136 | |||
137 | switch (mode) { | ||
138 | case MPOL_DEFAULT: | ||
139 | /* | ||
140 | * require caller to specify an empty nodemask | ||
141 | * before "contextualization" | ||
142 | */ | ||
143 | if (!was_empty) | ||
144 | return -EINVAL; | ||
145 | break; | ||
146 | case MPOL_BIND: | ||
147 | case MPOL_INTERLEAVE: | ||
148 | /* | ||
149 | * require at least 1 valid node after "contextualization" | ||
150 | */ | ||
151 | if (is_empty) | ||
152 | return -EINVAL; | ||
153 | break; | ||
154 | case MPOL_PREFERRED: | ||
155 | /* | ||
156 | * Did caller specify invalid nodes? | ||
157 | * Don't silently accept this as "local allocation". | ||
158 | */ | ||
159 | if (!was_empty && is_empty) | ||
160 | return -EINVAL; | ||
161 | break; | ||
162 | default: | ||
163 | BUG(); | ||
164 | } | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | /* Check that the nodemask contains at least one populated zone */ | 116 | /* Check that the nodemask contains at least one populated zone */ |
169 | static int is_valid_nodemask(nodemask_t *nodemask) | 117 | static int is_valid_nodemask(nodemask_t *nodemask) |
170 | { | 118 | { |
@@ -186,48 +134,60 @@ static int is_valid_nodemask(nodemask_t *nodemask) | |||
186 | return 0; | 134 | return 0; |
187 | } | 135 | } |
188 | 136 | ||
137 | static inline int mpol_store_user_nodemask(const struct mempolicy *pol) | ||
138 | { | ||
139 | return pol->flags & MPOL_F_STATIC_NODES; | ||
140 | } | ||
141 | |||
189 | /* Create a new policy */ | 142 | /* Create a new policy */ |
190 | static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, | 143 | static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, |
191 | nodemask_t *nodes) | 144 | nodemask_t *nodes) |
192 | { | 145 | { |
193 | struct mempolicy *policy; | 146 | struct mempolicy *policy; |
147 | nodemask_t cpuset_context_nmask; | ||
194 | 148 | ||
195 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", | 149 | pr_debug("setting mode %d flags %d nodes[0] %lx\n", |
196 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); | 150 | mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); |
197 | 151 | ||
198 | if (mode == MPOL_DEFAULT) | 152 | if (mode == MPOL_DEFAULT) |
199 | return NULL; | 153 | return (nodes && nodes_weight(*nodes)) ? ERR_PTR(-EINVAL) : |
154 | NULL; | ||
200 | policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); | 155 | policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); |
201 | if (!policy) | 156 | if (!policy) |
202 | return ERR_PTR(-ENOMEM); | 157 | return ERR_PTR(-ENOMEM); |
203 | atomic_set(&policy->refcnt, 1); | 158 | atomic_set(&policy->refcnt, 1); |
159 | cpuset_update_task_memory_state(); | ||
160 | nodes_and(cpuset_context_nmask, *nodes, cpuset_current_mems_allowed); | ||
204 | switch (mode) { | 161 | switch (mode) { |
205 | case MPOL_INTERLEAVE: | 162 | case MPOL_INTERLEAVE: |
206 | policy->v.nodes = *nodes; | 163 | if (nodes_empty(*nodes) || nodes_empty(cpuset_context_nmask)) |
207 | if (nodes_weight(policy->v.nodes) == 0) { | 164 | goto free; |
208 | kmem_cache_free(policy_cache, policy); | 165 | policy->v.nodes = cpuset_context_nmask; |
209 | return ERR_PTR(-EINVAL); | ||
210 | } | ||
211 | break; | 166 | break; |
212 | case MPOL_PREFERRED: | 167 | case MPOL_PREFERRED: |
213 | policy->v.preferred_node = first_node(*nodes); | 168 | policy->v.preferred_node = first_node(cpuset_context_nmask); |
214 | if (policy->v.preferred_node >= MAX_NUMNODES) | 169 | if (policy->v.preferred_node >= MAX_NUMNODES) |
215 | policy->v.preferred_node = -1; | 170 | goto free; |
216 | break; | 171 | break; |
217 | case MPOL_BIND: | 172 | case MPOL_BIND: |
218 | if (!is_valid_nodemask(nodes)) { | 173 | if (!is_valid_nodemask(&cpuset_context_nmask)) |
219 | kmem_cache_free(policy_cache, policy); | 174 | goto free; |
220 | return ERR_PTR(-EINVAL); | 175 | policy->v.nodes = cpuset_context_nmask; |
221 | } | ||
222 | policy->v.nodes = *nodes; | ||
223 | break; | 176 | break; |
224 | default: | 177 | default: |
225 | BUG(); | 178 | BUG(); |
226 | } | 179 | } |
227 | policy->policy = mode; | 180 | policy->policy = mode; |
228 | policy->flags = flags; | 181 | policy->flags = flags; |
229 | policy->cpuset_mems_allowed = cpuset_mems_allowed(current); | 182 | if (mpol_store_user_nodemask(policy)) |
183 | policy->w.user_nodemask = *nodes; | ||
184 | else | ||
185 | policy->w.cpuset_mems_allowed = cpuset_mems_allowed(current); | ||
230 | return policy; | 186 | return policy; |
187 | |||
188 | free: | ||
189 | kmem_cache_free(policy_cache, policy); | ||
190 | return ERR_PTR(-EINVAL); | ||
231 | } | 191 | } |
232 | 192 | ||
233 | static void gather_stats(struct page *, void *, int pte_dirty); | 193 | static void gather_stats(struct page *, void *, int pte_dirty); |
@@ -473,15 +433,14 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, | |||
473 | { | 433 | { |
474 | struct mempolicy *new; | 434 | struct mempolicy *new; |
475 | 435 | ||
476 | if (mpol_check_policy(mode, nodes)) | ||
477 | return -EINVAL; | ||
478 | new = mpol_new(mode, flags, nodes); | 436 | new = mpol_new(mode, flags, nodes); |
479 | if (IS_ERR(new)) | 437 | if (IS_ERR(new)) |
480 | return PTR_ERR(new); | 438 | return PTR_ERR(new); |
481 | mpol_free(current->mempolicy); | 439 | mpol_free(current->mempolicy); |
482 | current->mempolicy = new; | 440 | current->mempolicy = new; |
483 | mpol_set_task_struct_flag(); | 441 | mpol_set_task_struct_flag(); |
484 | if (new && new->policy == MPOL_INTERLEAVE) | 442 | if (new && new->policy == MPOL_INTERLEAVE && |
443 | nodes_weight(new->v.nodes)) | ||
485 | current->il_next = first_node(new->v.nodes); | 444 | current->il_next = first_node(new->v.nodes); |
486 | return 0; | 445 | return 0; |
487 | } | 446 | } |
@@ -796,9 +755,6 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
796 | if (end == start) | 755 | if (end == start) |
797 | return 0; | 756 | return 0; |
798 | 757 | ||
799 | if (mpol_check_policy(mode, nmask)) | ||
800 | return -EINVAL; | ||
801 | |||
802 | new = mpol_new(mode, mode_flags, nmask); | 758 | new = mpol_new(mode, mode_flags, nmask); |
803 | if (IS_ERR(new)) | 759 | if (IS_ERR(new)) |
804 | return PTR_ERR(new); | 760 | return PTR_ERR(new); |
@@ -1206,7 +1162,8 @@ static unsigned interleave_nodes(struct mempolicy *policy) | |||
1206 | next = next_node(nid, policy->v.nodes); | 1162 | next = next_node(nid, policy->v.nodes); |
1207 | if (next >= MAX_NUMNODES) | 1163 | if (next >= MAX_NUMNODES) |
1208 | next = first_node(policy->v.nodes); | 1164 | next = first_node(policy->v.nodes); |
1209 | me->il_next = next; | 1165 | if (next < MAX_NUMNODES) |
1166 | me->il_next = next; | ||
1210 | return nid; | 1167 | return nid; |
1211 | } | 1168 | } |
1212 | 1169 | ||
@@ -1252,10 +1209,13 @@ static unsigned offset_il_node(struct mempolicy *pol, | |||
1252 | struct vm_area_struct *vma, unsigned long off) | 1209 | struct vm_area_struct *vma, unsigned long off) |
1253 | { | 1210 | { |
1254 | unsigned nnodes = nodes_weight(pol->v.nodes); | 1211 | unsigned nnodes = nodes_weight(pol->v.nodes); |
1255 | unsigned target = (unsigned)off % nnodes; | 1212 | unsigned target; |
1256 | int c; | 1213 | int c; |
1257 | int nid = -1; | 1214 | int nid = -1; |
1258 | 1215 | ||
1216 | if (!nnodes) | ||
1217 | return numa_node_id(); | ||
1218 | target = (unsigned int)off % nnodes; | ||
1259 | c = 0; | 1219 | c = 0; |
1260 | do { | 1220 | do { |
1261 | nid = next_node(nid, pol->v.nodes); | 1221 | nid = next_node(nid, pol->v.nodes); |
@@ -1465,6 +1425,16 @@ struct mempolicy *__mpol_copy(struct mempolicy *old) | |||
1465 | return new; | 1425 | return new; |
1466 | } | 1426 | } |
1467 | 1427 | ||
1428 | static int mpol_match_intent(const struct mempolicy *a, | ||
1429 | const struct mempolicy *b) | ||
1430 | { | ||
1431 | if (a->flags != b->flags) | ||
1432 | return 0; | ||
1433 | if (!mpol_store_user_nodemask(a)) | ||
1434 | return 1; | ||
1435 | return nodes_equal(a->w.user_nodemask, b->w.user_nodemask); | ||
1436 | } | ||
1437 | |||
1468 | /* Slow path of a mempolicy comparison */ | 1438 | /* Slow path of a mempolicy comparison */ |
1469 | int __mpol_equal(struct mempolicy *a, struct mempolicy *b) | 1439 | int __mpol_equal(struct mempolicy *a, struct mempolicy *b) |
1470 | { | 1440 | { |
@@ -1472,6 +1442,8 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b) | |||
1472 | return 0; | 1442 | return 0; |
1473 | if (a->policy != b->policy) | 1443 | if (a->policy != b->policy) |
1474 | return 0; | 1444 | return 0; |
1445 | if (a->policy != MPOL_DEFAULT && !mpol_match_intent(a, b)) | ||
1446 | return 0; | ||
1475 | switch (a->policy) { | 1447 | switch (a->policy) { |
1476 | case MPOL_DEFAULT: | 1448 | case MPOL_DEFAULT: |
1477 | return 1; | 1449 | return 1; |
@@ -1771,13 +1743,14 @@ void numa_default_policy(void) | |||
1771 | static void mpol_rebind_policy(struct mempolicy *pol, | 1743 | static void mpol_rebind_policy(struct mempolicy *pol, |
1772 | const nodemask_t *newmask) | 1744 | const nodemask_t *newmask) |
1773 | { | 1745 | { |
1774 | nodemask_t *mpolmask; | ||
1775 | nodemask_t tmp; | 1746 | nodemask_t tmp; |
1747 | int static_nodes; | ||
1776 | 1748 | ||
1777 | if (!pol) | 1749 | if (!pol) |
1778 | return; | 1750 | return; |
1779 | mpolmask = &pol->cpuset_mems_allowed; | 1751 | static_nodes = pol->flags & MPOL_F_STATIC_NODES; |
1780 | if (nodes_equal(*mpolmask, *newmask)) | 1752 | if (!mpol_store_user_nodemask(pol) && |
1753 | nodes_equal(pol->w.cpuset_mems_allowed, *newmask)) | ||
1781 | return; | 1754 | return; |
1782 | 1755 | ||
1783 | switch (pol->policy) { | 1756 | switch (pol->policy) { |
@@ -1786,16 +1759,35 @@ static void mpol_rebind_policy(struct mempolicy *pol, | |||
1786 | case MPOL_BIND: | 1759 | case MPOL_BIND: |
1787 | /* Fall through */ | 1760 | /* Fall through */ |
1788 | case MPOL_INTERLEAVE: | 1761 | case MPOL_INTERLEAVE: |
1789 | nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask); | 1762 | if (static_nodes) |
1763 | nodes_and(tmp, pol->w.user_nodemask, *newmask); | ||
1764 | else { | ||
1765 | nodes_remap(tmp, pol->v.nodes, | ||
1766 | pol->w.cpuset_mems_allowed, *newmask); | ||
1767 | pol->w.cpuset_mems_allowed = *newmask; | ||
1768 | } | ||
1790 | pol->v.nodes = tmp; | 1769 | pol->v.nodes = tmp; |
1791 | *mpolmask = *newmask; | 1770 | if (!node_isset(current->il_next, tmp)) { |
1792 | current->il_next = node_remap(current->il_next, | 1771 | current->il_next = next_node(current->il_next, tmp); |
1793 | *mpolmask, *newmask); | 1772 | if (current->il_next >= MAX_NUMNODES) |
1773 | current->il_next = first_node(tmp); | ||
1774 | if (current->il_next >= MAX_NUMNODES) | ||
1775 | current->il_next = numa_node_id(); | ||
1776 | } | ||
1794 | break; | 1777 | break; |
1795 | case MPOL_PREFERRED: | 1778 | case MPOL_PREFERRED: |
1796 | pol->v.preferred_node = node_remap(pol->v.preferred_node, | 1779 | if (static_nodes) { |
1797 | *mpolmask, *newmask); | 1780 | int node = first_node(pol->w.user_nodemask); |
1798 | *mpolmask = *newmask; | 1781 | |
1782 | if (node_isset(node, *newmask)) | ||
1783 | pol->v.preferred_node = node; | ||
1784 | else | ||
1785 | pol->v.preferred_node = -1; | ||
1786 | } else { | ||
1787 | pol->v.preferred_node = node_remap(pol->v.preferred_node, | ||
1788 | pol->w.cpuset_mems_allowed, *newmask); | ||
1789 | pol->w.cpuset_mems_allowed = *newmask; | ||
1790 | } | ||
1799 | break; | 1791 | break; |
1800 | default: | 1792 | default: |
1801 | BUG(); | 1793 | BUG(); |
@@ -1847,6 +1839,7 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) | |||
1847 | int l; | 1839 | int l; |
1848 | nodemask_t nodes; | 1840 | nodemask_t nodes; |
1849 | unsigned short mode = pol ? pol->policy : MPOL_DEFAULT; | 1841 | unsigned short mode = pol ? pol->policy : MPOL_DEFAULT; |
1842 | unsigned short flags = pol ? pol->flags : 0; | ||
1850 | 1843 | ||
1851 | switch (mode) { | 1844 | switch (mode) { |
1852 | case MPOL_DEFAULT: | 1845 | case MPOL_DEFAULT: |
@@ -1876,6 +1869,17 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) | |||
1876 | strcpy(p, policy_types[mode]); | 1869 | strcpy(p, policy_types[mode]); |
1877 | p += l; | 1870 | p += l; |
1878 | 1871 | ||
1872 | if (flags) { | ||
1873 | int need_bar = 0; | ||
1874 | |||
1875 | if (buffer + maxlen < p + 2) | ||
1876 | return -ENOSPC; | ||
1877 | *p++ = '='; | ||
1878 | |||
1879 | if (flags & MPOL_F_STATIC_NODES) | ||
1880 | p += sprintf(p, "%sstatic", need_bar++ ? "|" : ""); | ||
1881 | } | ||
1882 | |||
1879 | if (!nodes_empty(nodes)) { | 1883 | if (!nodes_empty(nodes)) { |
1880 | if (buffer + maxlen < p + 2) | 1884 | if (buffer + maxlen < p + 2) |
1881 | return -ENOSPC; | 1885 | return -ENOSPC; |
diff --git a/mm/shmem.c b/mm/shmem.c index 1ccf794fbe61..3e9fda0ca470 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1126,6 +1126,8 @@ static int shmem_parse_mpol(char *value, unsigned short *policy, | |||
1126 | err = 0; | 1126 | err = 0; |
1127 | } | 1127 | } |
1128 | if (flags) { | 1128 | if (flags) { |
1129 | if (!strcmp(flags, "static")) | ||
1130 | *mode_flags |= MPOL_F_STATIC_NODES; | ||
1129 | } | 1131 | } |
1130 | out: | 1132 | out: |
1131 | /* Restore string for error message */ | 1133 | /* Restore string for error message */ |