aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Jackson <pj@sgi.com>2006-03-24 06:16:08 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-24 10:33:23 -0500
commitc61afb181c649754ea221f104e268cbacfc993e3 (patch)
tree870917b3f9175cf1663a2620d989856913cfb5f8
parent101a50019ae5e370d73984ee05d56dd3b08f330a (diff)
[PATCH] cpuset memory spread slab cache optimizations
The hooks in the slab cache allocator code path for support of NUMA mempolicies and cpuset memory spreading are in an important code path. Many systems will use neither feature. This patch optimizes those hooks down to a single check of some bits in the current tasks task_struct flags. For non NUMA systems, this hook and related code is already ifdef'd out. The optimization is done by using another task flag, set if the task is using a non-default NUMA mempolicy. Taking this flag bit along with the PF_SPREAD_PAGE and PF_SPREAD_SLAB flag bits added earlier in this 'cpuset memory spreading' patch set, one can check for the combination of any of these special case memory placement mechanisms with a single test of the current tasks task_struct flags. This patch also tightens up the code, to save a few bytes of kernel text space, and moves some of it out of line. Due to the nested inlines called from multiple places, we were ending up with three copies of this code, which once we get off the main code path (for local node allocation) seems a bit wasteful of instruction memory. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/mempolicy.h5
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/fork.c1
-rw-r--r--mm/mempolicy.c32
-rw-r--r--mm/slab.c41
5 files changed, 67 insertions, 13 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index bbd2221923c3..6a7621b2b12b 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -147,6 +147,7 @@ extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new);
147extern void mpol_rebind_task(struct task_struct *tsk, 147extern void mpol_rebind_task(struct task_struct *tsk,
148 const nodemask_t *new); 148 const nodemask_t *new);
149extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); 149extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
150extern void mpol_fix_fork_child_flag(struct task_struct *p);
150#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) 151#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
151 152
152#ifdef CONFIG_CPUSET 153#ifdef CONFIG_CPUSET
@@ -248,6 +249,10 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
248{ 249{
249} 250}
250 251
252static inline void mpol_fix_fork_child_flag(struct task_struct *p)
253{
254}
255
251#define set_cpuset_being_rebound(x) do {} while (0) 256#define set_cpuset_being_rebound(x) do {} while (0)
252 257
253static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, 258static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b0e37cfa09f5..2cda439ece43 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -932,6 +932,7 @@ static inline void put_task_struct(struct task_struct *t)
932#define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ 932#define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */
933#define PF_SPREAD_PAGE 0x04000000 /* Spread page cache over cpuset */ 933#define PF_SPREAD_PAGE 0x04000000 /* Spread page cache over cpuset */
934#define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */ 934#define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */
935#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
935 936
936/* 937/*
937 * Only the _current_ task can read/write to tsk->flags, but other 938 * Only the _current_ task can read/write to tsk->flags, but other
diff --git a/kernel/fork.c b/kernel/fork.c
index c21bae8c93b9..a02063903aaa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1021,6 +1021,7 @@ static task_t *copy_process(unsigned long clone_flags,
1021 p->mempolicy = NULL; 1021 p->mempolicy = NULL;
1022 goto bad_fork_cleanup_cpuset; 1022 goto bad_fork_cleanup_cpuset;
1023 } 1023 }
1024 mpol_fix_fork_child_flag(p);
1024#endif 1025#endif
1025 1026
1026#ifdef CONFIG_DEBUG_MUTEXES 1027#ifdef CONFIG_DEBUG_MUTEXES
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e93cc740c22b..4f71cfd29c6f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -422,6 +422,37 @@ static int contextualize_policy(int mode, nodemask_t *nodes)
422 return mpol_check_policy(mode, nodes); 422 return mpol_check_policy(mode, nodes);
423} 423}
424 424
425
426/*
427 * Update task->flags PF_MEMPOLICY bit: set iff non-default
428 * mempolicy. Allows more rapid checking of this (combined perhaps
429 * with other PF_* flag bits) on memory allocation hot code paths.
430 *
431 * If called from outside this file, the task 'p' should -only- be
432 * a newly forked child not yet visible on the task list, because
433 * manipulating the task flags of a visible task is not safe.
434 *
435 * The above limitation is why this routine has the funny name
436 * mpol_fix_fork_child_flag().
437 *
438 * It is also safe to call this with a task pointer of current,
439 * which the static wrapper mpol_set_task_struct_flag() does,
440 * for use within this file.
441 */
442
443void mpol_fix_fork_child_flag(struct task_struct *p)
444{
445 if (p->mempolicy)
446 p->flags |= PF_MEMPOLICY;
447 else
448 p->flags &= ~PF_MEMPOLICY;
449}
450
451static void mpol_set_task_struct_flag(void)
452{
453 mpol_fix_fork_child_flag(current);
454}
455
425/* Set the process memory policy */ 456/* Set the process memory policy */
426long do_set_mempolicy(int mode, nodemask_t *nodes) 457long do_set_mempolicy(int mode, nodemask_t *nodes)
427{ 458{
@@ -434,6 +465,7 @@ long do_set_mempolicy(int mode, nodemask_t *nodes)
434 return PTR_ERR(new); 465 return PTR_ERR(new);
435 mpol_free(current->mempolicy); 466 mpol_free(current->mempolicy);
436 current->mempolicy = new; 467 current->mempolicy = new;
468 mpol_set_task_struct_flag();
437 if (new && new->policy == MPOL_INTERLEAVE) 469 if (new && new->policy == MPOL_INTERLEAVE)
438 current->il_next = first_node(new->v.nodes); 470 current->il_next = first_node(new->v.nodes);
439 return 0; 471 return 0;
diff --git a/mm/slab.c b/mm/slab.c
index de516658d3d8..f80b52388a12 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -899,6 +899,7 @@ static struct array_cache *alloc_arraycache(int node, int entries,
899 899
900#ifdef CONFIG_NUMA 900#ifdef CONFIG_NUMA
901static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); 901static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
902static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
902 903
903static struct array_cache **alloc_alien_cache(int node, int limit) 904static struct array_cache **alloc_alien_cache(int node, int limit)
904{ 905{
@@ -2808,19 +2809,11 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
2808 struct array_cache *ac; 2809 struct array_cache *ac;
2809 2810
2810#ifdef CONFIG_NUMA 2811#ifdef CONFIG_NUMA
2811 if (unlikely(current->mempolicy && !in_interrupt())) { 2812 if (unlikely(current->flags & (PF_SPREAD_PAGE | PF_SPREAD_SLAB |
2812 int nid = slab_node(current->mempolicy); 2813 PF_MEMPOLICY))) {
2813 2814 objp = alternate_node_alloc(cachep, flags);
2814 if (nid != numa_node_id()) 2815 if (objp != NULL)
2815 return __cache_alloc_node(cachep, flags, nid); 2816 return objp;
2816 }
2817 if (unlikely(cpuset_do_slab_mem_spread() &&
2818 (cachep->flags & SLAB_MEM_SPREAD) &&
2819 !in_interrupt())) {
2820 int nid = cpuset_mem_spread_node();
2821
2822 if (nid != numa_node_id())
2823 return __cache_alloc_node(cachep, flags, nid);
2824 } 2817 }
2825#endif 2818#endif
2826 2819
@@ -2856,6 +2849,28 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
2856 2849
2857#ifdef CONFIG_NUMA 2850#ifdef CONFIG_NUMA
2858/* 2851/*
2852 * Try allocating on another node if PF_SPREAD_PAGE|PF_SPREAD_SLAB|PF_MEMPOLICY.
2853 *
2854 * If we are in_interrupt, then process context, including cpusets and
2855 * mempolicy, may not apply and should not be used for allocation policy.
2856 */
2857static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
2858{
2859 int nid_alloc, nid_here;
2860
2861 if (in_interrupt())
2862 return NULL;
2863 nid_alloc = nid_here = numa_node_id();
2864 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
2865 nid_alloc = cpuset_mem_spread_node();
2866 else if (current->mempolicy)
2867 nid_alloc = slab_node(current->mempolicy);
2868 if (nid_alloc != nid_here)
2869 return __cache_alloc_node(cachep, flags, nid_alloc);
2870 return NULL;
2871}
2872
2873/*
2859 * A interface to enable slab creation on nodeid 2874 * A interface to enable slab creation on nodeid
2860 */ 2875 */
2861static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, 2876static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,