aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2008-04-28 05:13:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:24 -0400
commitbea904d54d6faa92400f10c8ea3d3828b8e1eb93 (patch)
tree24966dd4dabadb4bb32aa1e00fae2c2168661229 /mm
parent52cd3b074050dd664380b5e8cfc85d4a6ed8ad48 (diff)
mempolicy: use MPOL_PREFERRED for system-wide default policy
Currently, when one specifies MPOL_DEFAULT via a NUMA memory policy API [set_mempolicy(), mbind() and internal versions], the kernel simply installs a NULL struct mempolicy pointer in the appropriate context: task policy, vma policy, or shared policy. This causes any use of that policy to "fall back" to the next most specific policy scope. The only use of MPOL_DEFAULT to mean "local allocation" is in the system default policy. This requires extra checks/cases for MPOL_DEFAULT in many mempolicy.c functions. There is another, "preferred" way to specify local allocation via the APIs. That is using the MPOL_PREFERRED policy mode with an empty nodemask. Internally, the empty nodemask gets converted to a preferred_node id of '-1'. All internal usage of MPOL_PREFERRED will convert the '-1' to the id of the node local to the cpu where the allocation occurs. System default policy, except during boot, is hard-coded to "local allocation". By using the MPOL_PREFERRED mode with a negative value of preferred node for system default policy, MPOL_DEFAULT will never occur in the 'policy' member of a struct mempolicy. Thus, we can remove all checks for MPOL_DEFAULT when converting policy to a node id/zonelist in the allocation paths. In slab_node() return local node id when policy pointer is NULL. No need to set a pol value to take the switch default. Replace switch default with BUG()--i.e., shouldn't happen. With this patch MPOL_DEFAULT is only used in the APIs, including internal calls to do_set_mempolicy() and in the display of policy in /proc/<pid>/numa_maps. It always means "fall back" to the the next most specific policy scope. This simplifies the description of memory policies quite a bit, with no visible change in behavior. get_mempolicy() continues to return MPOL_DEFAULT and an empty nodemask when the requested policy [task or vma/shared] is NULL. These are the values one would supply via set_mempolicy() or mbind() to achieve that condition--default behavior. This patch updates Documentation to reflect this change. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: David Rientjes <rientjes@google.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/mempolicy.c68
1 files changed, 42 insertions, 26 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a237295f8190..fea4a5da6e44 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -104,9 +104,13 @@ static struct kmem_cache *sn_cache;
104 policied. */ 104 policied. */
105enum zone_type policy_zone = 0; 105enum zone_type policy_zone = 0;
106 106
107/*
108 * run-time system-wide default policy => local allocation
109 */
107struct mempolicy default_policy = { 110struct mempolicy default_policy = {
108 .refcnt = ATOMIC_INIT(1), /* never free it */ 111 .refcnt = ATOMIC_INIT(1), /* never free it */
109 .mode = MPOL_DEFAULT, 112 .mode = MPOL_PREFERRED,
113 .v = { .preferred_node = -1 },
110}; 114};
111 115
112static const struct mempolicy_operations { 116static const struct mempolicy_operations {
@@ -189,7 +193,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags,
189 if (mode == MPOL_DEFAULT) { 193 if (mode == MPOL_DEFAULT) {
190 if (nodes && !nodes_empty(*nodes)) 194 if (nodes && !nodes_empty(*nodes))
191 return ERR_PTR(-EINVAL); 195 return ERR_PTR(-EINVAL);
192 return NULL; 196 return NULL; /* simply delete any existing policy */
193 } 197 }
194 VM_BUG_ON(!nodes); 198 VM_BUG_ON(!nodes);
195 199
@@ -246,7 +250,6 @@ void __mpol_put(struct mempolicy *p)
246{ 250{
247 if (!atomic_dec_and_test(&p->refcnt)) 251 if (!atomic_dec_and_test(&p->refcnt))
248 return; 252 return;
249 p->mode = MPOL_DEFAULT;
250 kmem_cache_free(policy_cache, p); 253 kmem_cache_free(policy_cache, p);
251} 254}
252 255
@@ -626,13 +629,16 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
626 return 0; 629 return 0;
627} 630}
628 631
629/* Fill a zone bitmap for a policy */ 632/*
630static void get_zonemask(struct mempolicy *p, nodemask_t *nodes) 633 * Return nodemask for policy for get_mempolicy() query
634 */
635static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
631{ 636{
632 nodes_clear(*nodes); 637 nodes_clear(*nodes);
638 if (p == &default_policy)
639 return;
640
633 switch (p->mode) { 641 switch (p->mode) {
634 case MPOL_DEFAULT:
635 break;
636 case MPOL_BIND: 642 case MPOL_BIND:
637 /* Fall through */ 643 /* Fall through */
638 case MPOL_INTERLEAVE: 644 case MPOL_INTERLEAVE:
@@ -686,6 +692,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
686 } 692 }
687 693
688 if (flags & MPOL_F_ADDR) { 694 if (flags & MPOL_F_ADDR) {
695 /*
696 * Do NOT fall back to task policy if the
697 * vma/shared policy at addr is NULL. We
698 * want to return MPOL_DEFAULT in this case.
699 */
689 down_read(&mm->mmap_sem); 700 down_read(&mm->mmap_sem);
690 vma = find_vma_intersection(mm, addr, addr+1); 701 vma = find_vma_intersection(mm, addr, addr+1);
691 if (!vma) { 702 if (!vma) {
@@ -700,7 +711,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
700 return -EINVAL; 711 return -EINVAL;
701 712
702 if (!pol) 713 if (!pol)
703 pol = &default_policy; 714 pol = &default_policy; /* indicates default behavior */
704 715
705 if (flags & MPOL_F_NODE) { 716 if (flags & MPOL_F_NODE) {
706 if (flags & MPOL_F_ADDR) { 717 if (flags & MPOL_F_ADDR) {
@@ -715,8 +726,11 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
715 err = -EINVAL; 726 err = -EINVAL;
716 goto out; 727 goto out;
717 } 728 }
718 } else 729 } else {
719 *policy = pol->mode | pol->flags; 730 *policy = pol == &default_policy ? MPOL_DEFAULT :
731 pol->mode;
732 *policy |= pol->flags;
733 }
720 734
721 if (vma) { 735 if (vma) {
722 up_read(&current->mm->mmap_sem); 736 up_read(&current->mm->mmap_sem);
@@ -725,7 +739,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
725 739
726 err = 0; 740 err = 0;
727 if (nmask) 741 if (nmask)
728 get_zonemask(pol, nmask); 742 get_policy_nodemask(pol, nmask);
729 743
730 out: 744 out:
731 mpol_cond_put(pol); 745 mpol_cond_put(pol);
@@ -1286,8 +1300,7 @@ static struct mempolicy *get_vma_policy(struct task_struct *task,
1286 addr); 1300 addr);
1287 if (vpol) 1301 if (vpol)
1288 pol = vpol; 1302 pol = vpol;
1289 } else if (vma->vm_policy && 1303 } else if (vma->vm_policy)
1290 vma->vm_policy->mode != MPOL_DEFAULT)
1291 pol = vma->vm_policy; 1304 pol = vma->vm_policy;
1292 } 1305 }
1293 if (!pol) 1306 if (!pol)
@@ -1334,7 +1347,6 @@ static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
1334 nd = first_node(policy->v.nodes); 1347 nd = first_node(policy->v.nodes);
1335 break; 1348 break;
1336 case MPOL_INTERLEAVE: /* should not happen */ 1349 case MPOL_INTERLEAVE: /* should not happen */
1337 case MPOL_DEFAULT:
1338 nd = numa_node_id(); 1350 nd = numa_node_id();
1339 break; 1351 break;
1340 default: 1352 default:
@@ -1369,9 +1381,15 @@ static unsigned interleave_nodes(struct mempolicy *policy)
1369 */ 1381 */
1370unsigned slab_node(struct mempolicy *policy) 1382unsigned slab_node(struct mempolicy *policy)
1371{ 1383{
1372 unsigned short pol = policy ? policy->mode : MPOL_DEFAULT; 1384 if (!policy)
1385 return numa_node_id();
1386
1387 switch (policy->mode) {
1388 case MPOL_PREFERRED:
1389 if (unlikely(policy->v.preferred_node >= 0))
1390 return policy->v.preferred_node;
1391 return numa_node_id();
1373 1392
1374 switch (pol) {
1375 case MPOL_INTERLEAVE: 1393 case MPOL_INTERLEAVE:
1376 return interleave_nodes(policy); 1394 return interleave_nodes(policy);
1377 1395
@@ -1390,13 +1408,8 @@ unsigned slab_node(struct mempolicy *policy)
1390 return zone->node; 1408 return zone->node;
1391 } 1409 }
1392 1410
1393 case MPOL_PREFERRED:
1394 if (policy->v.preferred_node >= 0)
1395 return policy->v.preferred_node;
1396 /* Fall through */
1397
1398 default: 1411 default:
1399 return numa_node_id(); 1412 BUG();
1400 } 1413 }
1401} 1414}
1402 1415
@@ -1650,8 +1663,6 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
1650 if (a->mode != MPOL_DEFAULT && !mpol_match_intent(a, b)) 1663 if (a->mode != MPOL_DEFAULT && !mpol_match_intent(a, b))
1651 return 0; 1664 return 0;
1652 switch (a->mode) { 1665 switch (a->mode) {
1653 case MPOL_DEFAULT:
1654 return 1;
1655 case MPOL_BIND: 1666 case MPOL_BIND:
1656 /* Fall through */ 1667 /* Fall through */
1657 case MPOL_INTERLEAVE: 1668 case MPOL_INTERLEAVE:
@@ -1828,7 +1839,7 @@ void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy,
1828 if (policy != MPOL_DEFAULT) { 1839 if (policy != MPOL_DEFAULT) {
1829 struct mempolicy *newpol; 1840 struct mempolicy *newpol;
1830 1841
1831 /* Falls back to MPOL_DEFAULT on any error */ 1842 /* Falls back to NULL policy [MPOL_DEFAULT] on any error */
1832 newpol = mpol_new(policy, flags, policy_nodes); 1843 newpol = mpol_new(policy, flags, policy_nodes);
1833 if (!IS_ERR(newpol)) { 1844 if (!IS_ERR(newpol)) {
1834 /* Create pseudo-vma that contains just the policy */ 1845 /* Create pseudo-vma that contains just the policy */
@@ -1952,9 +1963,14 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
1952 char *p = buffer; 1963 char *p = buffer;
1953 int l; 1964 int l;
1954 nodemask_t nodes; 1965 nodemask_t nodes;
1955 unsigned short mode = pol ? pol->mode : MPOL_DEFAULT; 1966 unsigned short mode;
1956 unsigned short flags = pol ? pol->flags : 0; 1967 unsigned short flags = pol ? pol->flags : 0;
1957 1968
1969 if (!pol || pol == &default_policy)
1970 mode = MPOL_DEFAULT;
1971 else
1972 mode = pol->mode;
1973
1958 switch (mode) { 1974 switch (mode) {
1959 case MPOL_DEFAULT: 1975 case MPOL_DEFAULT:
1960 nodes_clear(nodes); 1976 nodes_clear(nodes);