aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2008-04-28 05:13:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-28 11:58:24 -0400
commitfc36b8d3d819047eb4d23ca079fb4d3af20ff076 (patch)
tree65ee215a6bdca1e8d4ac4b57525445d7d1829c1d
parent53f2556b6792ed99fde965f5e061749edd455623 (diff)
mempolicy: use MPOL_F_LOCAL to Indicate Preferred Local Policy
Now that we're using "preferred local" policy for system default, we need to make this as fast as possible. Because of the variable size of the mempolicy structure [based on size of nodemasks], the preferred_node may be in a different cacheline from the mode. This can result in accessing an extra cacheline in the normal case of system default policy. Suspect this is the cause of an observed 2-3% slowdown in page fault testing relative to kernel without this patch series. To alleviate this, use an internal mode flag, MPOL_F_LOCAL in the mempolicy flags member which is guaranteed [?] to be in the same cacheline as the mode itself. Verified that reworked mempolicy now performs slightly better on 25-rc8-mm1 for both anon and shmem segments with system default and vma [preferred local] policy. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: David Rientjes <rientjes@google.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Andi Kleen <ak@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/vm/numa_memory_policy.txt11
-rw-r--r--include/linux/mempolicy.h1
-rw-r--r--mm/mempolicy.c47
3 files changed, 28 insertions, 31 deletions
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
index 13cca5a3cf17..bad16d3f6a47 100644
--- a/Documentation/vm/numa_memory_policy.txt
+++ b/Documentation/vm/numa_memory_policy.txt
@@ -176,12 +176,11 @@ Components of Memory Policies
176 containing the cpu where the allocation takes place. 176 containing the cpu where the allocation takes place.
177 177
178 Internally, the Preferred policy uses a single node--the 178 Internally, the Preferred policy uses a single node--the
179 preferred_node member of struct mempolicy. A "distinguished 179 preferred_node member of struct mempolicy. When the internal
180 value of this preferred_node, currently '-1', is interpreted 180 mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and
181 as "the node containing the cpu where the allocation takes 181 the policy is interpreted as local allocation. "Local" allocation
182 place"--local allocation. "Local" allocation policy can be 182 policy can be viewed as a Preferred policy that starts at the node
183 viewed as a Preferred policy that starts at the node containing 183 containing the cpu where the allocation takes place.
184 the cpu where the allocation takes place.
185 184
186 It is possible for the user to specify that local allocation is 185 It is possible for the user to specify that local allocation is
187 always preferred by passing an empty nodemask with this mode. 186 always preferred by passing an empty nodemask with this mode.
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 172b9c6acb91..b0fab9e80655 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -50,6 +50,7 @@ enum {
50 * are never OR'ed into the mode in mempolicy API arguments. 50 * are never OR'ed into the mode in mempolicy API arguments.
51 */ 51 */
52#define MPOL_F_SHARED (1 << 0) /* identify shared policies */ 52#define MPOL_F_SHARED (1 << 0) /* identify shared policies */
53#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
53 54
54#ifdef __KERNEL__ 55#ifdef __KERNEL__
55 56
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7b3ae977b158..143b019e9834 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -110,7 +110,7 @@ enum zone_type policy_zone = 0;
110struct mempolicy default_policy = { 110struct mempolicy default_policy = {
111 .refcnt = ATOMIC_INIT(1), /* never free it */ 111 .refcnt = ATOMIC_INIT(1), /* never free it */
112 .mode = MPOL_PREFERRED, 112 .mode = MPOL_PREFERRED,
113 .v = { .preferred_node = -1 }, 113 .flags = MPOL_F_LOCAL,
114}; 114};
115 115
116static const struct mempolicy_operations { 116static const struct mempolicy_operations {
@@ -163,7 +163,7 @@ static int mpol_new_interleave(struct mempolicy *pol, const nodemask_t *nodes)
163static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes) 163static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes)
164{ 164{
165 if (!nodes) 165 if (!nodes)
166 pol->v.preferred_node = -1; /* local allocation */ 166 pol->flags |= MPOL_F_LOCAL; /* local allocation */
167 else if (nodes_empty(*nodes)) 167 else if (nodes_empty(*nodes))
168 return -EINVAL; /* no allowed nodes */ 168 return -EINVAL; /* no allowed nodes */
169 else 169 else
@@ -290,14 +290,15 @@ static void mpol_rebind_preferred(struct mempolicy *pol,
290 if (pol->flags & MPOL_F_STATIC_NODES) { 290 if (pol->flags & MPOL_F_STATIC_NODES) {
291 int node = first_node(pol->w.user_nodemask); 291 int node = first_node(pol->w.user_nodemask);
292 292
293 if (node_isset(node, *nodes)) 293 if (node_isset(node, *nodes)) {
294 pol->v.preferred_node = node; 294 pol->v.preferred_node = node;
295 else 295 pol->flags &= ~MPOL_F_LOCAL;
296 pol->v.preferred_node = -1; 296 } else
297 pol->flags |= MPOL_F_LOCAL;
297 } else if (pol->flags & MPOL_F_RELATIVE_NODES) { 298 } else if (pol->flags & MPOL_F_RELATIVE_NODES) {
298 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes); 299 mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
299 pol->v.preferred_node = first_node(tmp); 300 pol->v.preferred_node = first_node(tmp);
300 } else if (pol->v.preferred_node != -1) { 301 } else if (!(pol->flags & MPOL_F_LOCAL)) {
301 pol->v.preferred_node = node_remap(pol->v.preferred_node, 302 pol->v.preferred_node = node_remap(pol->v.preferred_node,
302 pol->w.cpuset_mems_allowed, 303 pol->w.cpuset_mems_allowed,
303 *nodes); 304 *nodes);
@@ -645,7 +646,7 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
645 *nodes = p->v.nodes; 646 *nodes = p->v.nodes;
646 break; 647 break;
647 case MPOL_PREFERRED: 648 case MPOL_PREFERRED:
648 if (p->v.preferred_node >= 0) 649 if (!(p->flags & MPOL_F_LOCAL))
649 node_set(p->v.preferred_node, *nodes); 650 node_set(p->v.preferred_node, *nodes);
650 /* else return empty node mask for local allocation */ 651 /* else return empty node mask for local allocation */
651 break; 652 break;
@@ -1324,13 +1325,12 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
1324/* Return a zonelist indicated by gfp for node representing a mempolicy */ 1325/* Return a zonelist indicated by gfp for node representing a mempolicy */
1325static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy) 1326static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
1326{ 1327{
1327 int nd; 1328 int nd = numa_node_id();
1328 1329
1329 switch (policy->mode) { 1330 switch (policy->mode) {
1330 case MPOL_PREFERRED: 1331 case MPOL_PREFERRED:
1331 nd = policy->v.preferred_node; 1332 if (!(policy->flags & MPOL_F_LOCAL))
1332 if (nd < 0) 1333 nd = policy->v.preferred_node;
1333 nd = numa_node_id();
1334 break; 1334 break;
1335 case MPOL_BIND: 1335 case MPOL_BIND:
1336 /* 1336 /*
@@ -1339,16 +1339,13 @@ static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy)
1339 * current node is part of the mask, we use the zonelist for 1339 * current node is part of the mask, we use the zonelist for
1340 * the first node in the mask instead. 1340 * the first node in the mask instead.
1341 */ 1341 */
1342 nd = numa_node_id();
1343 if (unlikely(gfp & __GFP_THISNODE) && 1342 if (unlikely(gfp & __GFP_THISNODE) &&
1344 unlikely(!node_isset(nd, policy->v.nodes))) 1343 unlikely(!node_isset(nd, policy->v.nodes)))
1345 nd = first_node(policy->v.nodes); 1344 nd = first_node(policy->v.nodes);
1346 break; 1345 break;
1347 case MPOL_INTERLEAVE: /* should not happen */ 1346 case MPOL_INTERLEAVE: /* should not happen */
1348 nd = numa_node_id();
1349 break; 1347 break;
1350 default: 1348 default:
1351 nd = 0;
1352 BUG(); 1349 BUG();
1353 } 1350 }
1354 return node_zonelist(nd, gfp); 1351 return node_zonelist(nd, gfp);
@@ -1379,14 +1376,15 @@ static unsigned interleave_nodes(struct mempolicy *policy)
1379 */ 1376 */
1380unsigned slab_node(struct mempolicy *policy) 1377unsigned slab_node(struct mempolicy *policy)
1381{ 1378{
1382 if (!policy) 1379 if (!policy || policy->flags & MPOL_F_LOCAL)
1383 return numa_node_id(); 1380 return numa_node_id();
1384 1381
1385 switch (policy->mode) { 1382 switch (policy->mode) {
1386 case MPOL_PREFERRED: 1383 case MPOL_PREFERRED:
1387 if (unlikely(policy->v.preferred_node >= 0)) 1384 /*
1388 return policy->v.preferred_node; 1385 * handled MPOL_F_LOCAL above
1389 return numa_node_id(); 1386 */
1387 return policy->v.preferred_node;
1390 1388
1391 case MPOL_INTERLEAVE: 1389 case MPOL_INTERLEAVE:
1392 return interleave_nodes(policy); 1390 return interleave_nodes(policy);
@@ -1666,7 +1664,8 @@ int __mpol_equal(struct mempolicy *a, struct mempolicy *b)
1666 case MPOL_INTERLEAVE: 1664 case MPOL_INTERLEAVE:
1667 return nodes_equal(a->v.nodes, b->v.nodes); 1665 return nodes_equal(a->v.nodes, b->v.nodes);
1668 case MPOL_PREFERRED: 1666 case MPOL_PREFERRED:
1669 return a->v.preferred_node == b->v.preferred_node; 1667 return a->v.preferred_node == b->v.preferred_node &&
1668 a->flags == b->flags;
1670 default: 1669 default:
1671 BUG(); 1670 BUG();
1672 return 0; 1671 return 0;
@@ -1946,7 +1945,7 @@ void numa_default_policy(void)
1946} 1945}
1947 1946
1948/* 1947/*
1949 * "local" is pseudo-policy: MPOL_PREFERRED with preferred_node == -1 1948 * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag
1950 * Used only for mpol_to_str() 1949 * Used only for mpol_to_str()
1951 */ 1950 */
1952#define MPOL_LOCAL (MPOL_INTERLEAVE + 1) 1951#define MPOL_LOCAL (MPOL_INTERLEAVE + 1)
@@ -1962,7 +1961,6 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
1962{ 1961{
1963 char *p = buffer; 1962 char *p = buffer;
1964 int l; 1963 int l;
1965 int nid;
1966 nodemask_t nodes; 1964 nodemask_t nodes;
1967 unsigned short mode; 1965 unsigned short mode;
1968 unsigned short flags = pol ? pol->flags : 0; 1966 unsigned short flags = pol ? pol->flags : 0;
@@ -1979,11 +1977,10 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
1979 1977
1980 case MPOL_PREFERRED: 1978 case MPOL_PREFERRED:
1981 nodes_clear(nodes); 1979 nodes_clear(nodes);
1982 nid = pol->v.preferred_node; 1980 if (flags & MPOL_F_LOCAL)
1983 if (nid < 0)
1984 mode = MPOL_LOCAL; /* pseudo-policy */ 1981 mode = MPOL_LOCAL; /* pseudo-policy */
1985 else 1982 else
1986 node_set(nid, nodes); 1983 node_set(pol->v.preferred_node, nodes);
1987 break; 1984 break;
1988 1985
1989 case MPOL_BIND: 1986 case MPOL_BIND:
@@ -2004,7 +2001,7 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
2004 strcpy(p, policy_types[mode]); 2001 strcpy(p, policy_types[mode]);
2005 p += l; 2002 p += l;
2006 2003
2007 if (flags) { 2004 if (flags & MPOL_MODE_FLAGS) {
2008 int need_bar = 0; 2005 int need_bar = 0;
2009 2006
2010 if (buffer + maxlen < p + 2) 2007 if (buffer + maxlen < p + 2)