aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mempolicy.h5
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/fork.c1
-rw-r--r--mm/mempolicy.c32
-rw-r--r--mm/slab.c41
5 files changed, 67 insertions, 13 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index bbd2221923c3..6a7621b2b12b 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -147,6 +147,7 @@ extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new);
147extern void mpol_rebind_task(struct task_struct *tsk, 147extern void mpol_rebind_task(struct task_struct *tsk,
148 const nodemask_t *new); 148 const nodemask_t *new);
149extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); 149extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
150extern void mpol_fix_fork_child_flag(struct task_struct *p);
150#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) 151#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
151 152
152#ifdef CONFIG_CPUSET 153#ifdef CONFIG_CPUSET
@@ -248,6 +249,10 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
248{ 249{
249} 250}
250 251
252static inline void mpol_fix_fork_child_flag(struct task_struct *p)
253{
254}
255
251#define set_cpuset_being_rebound(x) do {} while (0) 256#define set_cpuset_being_rebound(x) do {} while (0)
252 257
253static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, 258static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b0e37cfa09f5..2cda439ece43 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -932,6 +932,7 @@ static inline void put_task_struct(struct task_struct *t)
932#define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ 932#define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */
933#define PF_SPREAD_PAGE 0x04000000 /* Spread page cache over cpuset */ 933#define PF_SPREAD_PAGE 0x04000000 /* Spread page cache over cpuset */
934#define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */ 934#define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */
935#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
935 936
936/* 937/*
937 * Only the _current_ task can read/write to tsk->flags, but other 938 * Only the _current_ task can read/write to tsk->flags, but other
diff --git a/kernel/fork.c b/kernel/fork.c
index c21bae8c93b9..a02063903aaa 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1021,6 +1021,7 @@ static task_t *copy_process(unsigned long clone_flags,
1021 p->mempolicy = NULL; 1021 p->mempolicy = NULL;
1022 goto bad_fork_cleanup_cpuset; 1022 goto bad_fork_cleanup_cpuset;
1023 } 1023 }
1024 mpol_fix_fork_child_flag(p);
1024#endif 1025#endif
1025 1026
1026#ifdef CONFIG_DEBUG_MUTEXES 1027#ifdef CONFIG_DEBUG_MUTEXES
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e93cc740c22b..4f71cfd29c6f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -422,6 +422,37 @@ static int contextualize_policy(int mode, nodemask_t *nodes)
422 return mpol_check_policy(mode, nodes); 422 return mpol_check_policy(mode, nodes);
423} 423}
424 424
425
426/*
427 * Update task->flags PF_MEMPOLICY bit: set iff non-default
428 * mempolicy. Allows more rapid checking of this (combined perhaps
429 * with other PF_* flag bits) on memory allocation hot code paths.
430 *
431 * If called from outside this file, the task 'p' should -only- be
432 * a newly forked child not yet visible on the task list, because
433 * manipulating the task flags of a visible task is not safe.
434 *
435 * The above limitation is why this routine has the funny name
436 * mpol_fix_fork_child_flag().
437 *
438 * It is also safe to call this with a task pointer of current,
439 * which the static wrapper mpol_set_task_struct_flag() does,
440 * for use within this file.
441 */
442
443void mpol_fix_fork_child_flag(struct task_struct *p)
444{
445 if (p->mempolicy)
446 p->flags |= PF_MEMPOLICY;
447 else
448 p->flags &= ~PF_MEMPOLICY;
449}
450
451static void mpol_set_task_struct_flag(void)
452{
453 mpol_fix_fork_child_flag(current);
454}
455
425/* Set the process memory policy */ 456/* Set the process memory policy */
426long do_set_mempolicy(int mode, nodemask_t *nodes) 457long do_set_mempolicy(int mode, nodemask_t *nodes)
427{ 458{
@@ -434,6 +465,7 @@ long do_set_mempolicy(int mode, nodemask_t *nodes)
434 return PTR_ERR(new); 465 return PTR_ERR(new);
435 mpol_free(current->mempolicy); 466 mpol_free(current->mempolicy);
436 current->mempolicy = new; 467 current->mempolicy = new;
468 mpol_set_task_struct_flag();
437 if (new && new->policy == MPOL_INTERLEAVE) 469 if (new && new->policy == MPOL_INTERLEAVE)
438 current->il_next = first_node(new->v.nodes); 470 current->il_next = first_node(new->v.nodes);
439 return 0; 471 return 0;
diff --git a/mm/slab.c b/mm/slab.c
index de516658d3d8..f80b52388a12 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -899,6 +899,7 @@ static struct array_cache *alloc_arraycache(int node, int entries,
899 899
900#ifdef CONFIG_NUMA 900#ifdef CONFIG_NUMA
901static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); 901static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
902static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
902 903
903static struct array_cache **alloc_alien_cache(int node, int limit) 904static struct array_cache **alloc_alien_cache(int node, int limit)
904{ 905{
@@ -2808,19 +2809,11 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
2808 struct array_cache *ac; 2809 struct array_cache *ac;
2809 2810
2810#ifdef CONFIG_NUMA 2811#ifdef CONFIG_NUMA
2811 if (unlikely(current->mempolicy && !in_interrupt())) { 2812 if (unlikely(current->flags & (PF_SPREAD_PAGE | PF_SPREAD_SLAB |
2812 int nid = slab_node(current->mempolicy); 2813 PF_MEMPOLICY))) {
2813 2814 objp = alternate_node_alloc(cachep, flags);
2814 if (nid != numa_node_id()) 2815 if (objp != NULL)
2815 return __cache_alloc_node(cachep, flags, nid); 2816 return objp;
2816 }
2817 if (unlikely(cpuset_do_slab_mem_spread() &&
2818 (cachep->flags & SLAB_MEM_SPREAD) &&
2819 !in_interrupt())) {
2820 int nid = cpuset_mem_spread_node();
2821
2822 if (nid != numa_node_id())
2823 return __cache_alloc_node(cachep, flags, nid);
2824 } 2817 }
2825#endif 2818#endif
2826 2819
@@ -2856,6 +2849,28 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
2856 2849
2857#ifdef CONFIG_NUMA 2850#ifdef CONFIG_NUMA
2858/* 2851/*
2852 * Try allocating on another node if PF_SPREAD_PAGE|PF_SPREAD_SLAB|PF_MEMPOLICY.
2853 *
2854 * If we are in_interrupt, then process context, including cpusets and
2855 * mempolicy, may not apply and should not be used for allocation policy.
2856 */
2857static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
2858{
2859 int nid_alloc, nid_here;
2860
2861 if (in_interrupt())
2862 return NULL;
2863 nid_alloc = nid_here = numa_node_id();
2864 if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
2865 nid_alloc = cpuset_mem_spread_node();
2866 else if (current->mempolicy)
2867 nid_alloc = slab_node(current->mempolicy);
2868 if (nid_alloc != nid_here)
2869 return __cache_alloc_node(cachep, flags, nid_alloc);
2870 return NULL;
2871}
2872
2873/*
2859 * A interface to enable slab creation on nodeid 2874 * A interface to enable slab creation on nodeid
2860 */ 2875 */
2861static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, 2876static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,