diff options
-rw-r--r-- | include/linux/mempolicy.h | 5 | ||||
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | mm/mempolicy.c | 32 | ||||
-rw-r--r-- | mm/slab.c | 41 |
5 files changed, 67 insertions, 13 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index bbd2221923c3..6a7621b2b12b 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -147,6 +147,7 @@ extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new); | |||
147 | extern void mpol_rebind_task(struct task_struct *tsk, | 147 | extern void mpol_rebind_task(struct task_struct *tsk, |
148 | const nodemask_t *new); | 148 | const nodemask_t *new); |
149 | extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); | 149 | extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); |
150 | extern void mpol_fix_fork_child_flag(struct task_struct *p); | ||
150 | #define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) | 151 | #define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) |
151 | 152 | ||
152 | #ifdef CONFIG_CPUSET | 153 | #ifdef CONFIG_CPUSET |
@@ -248,6 +249,10 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) | |||
248 | { | 249 | { |
249 | } | 250 | } |
250 | 251 | ||
252 | static inline void mpol_fix_fork_child_flag(struct task_struct *p) | ||
253 | { | ||
254 | } | ||
255 | |||
251 | #define set_cpuset_being_rebound(x) do {} while (0) | 256 | #define set_cpuset_being_rebound(x) do {} while (0) |
252 | 257 | ||
253 | static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, | 258 | static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, |
diff --git a/include/linux/sched.h b/include/linux/sched.h index b0e37cfa09f5..2cda439ece43 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -932,6 +932,7 @@ static inline void put_task_struct(struct task_struct *t) | |||
932 | #define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ | 932 | #define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */ |
933 | #define PF_SPREAD_PAGE 0x04000000 /* Spread page cache over cpuset */ | 933 | #define PF_SPREAD_PAGE 0x04000000 /* Spread page cache over cpuset */ |
934 | #define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */ | 934 | #define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */ |
935 | #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ | ||
935 | 936 | ||
936 | /* | 937 | /* |
937 | * Only the _current_ task can read/write to tsk->flags, but other | 938 | * Only the _current_ task can read/write to tsk->flags, but other |
diff --git a/kernel/fork.c b/kernel/fork.c index c21bae8c93b9..a02063903aaa 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1021,6 +1021,7 @@ static task_t *copy_process(unsigned long clone_flags, | |||
1021 | p->mempolicy = NULL; | 1021 | p->mempolicy = NULL; |
1022 | goto bad_fork_cleanup_cpuset; | 1022 | goto bad_fork_cleanup_cpuset; |
1023 | } | 1023 | } |
1024 | mpol_fix_fork_child_flag(p); | ||
1024 | #endif | 1025 | #endif |
1025 | 1026 | ||
1026 | #ifdef CONFIG_DEBUG_MUTEXES | 1027 | #ifdef CONFIG_DEBUG_MUTEXES |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e93cc740c22b..4f71cfd29c6f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -422,6 +422,37 @@ static int contextualize_policy(int mode, nodemask_t *nodes) | |||
422 | return mpol_check_policy(mode, nodes); | 422 | return mpol_check_policy(mode, nodes); |
423 | } | 423 | } |
424 | 424 | ||
425 | |||
426 | /* | ||
427 | * Update task->flags PF_MEMPOLICY bit: set iff non-default | ||
428 | * mempolicy. Allows more rapid checking of this (combined perhaps | ||
429 | * with other PF_* flag bits) on memory allocation hot code paths. | ||
430 | * | ||
431 | * If called from outside this file, the task 'p' should -only- be | ||
432 | * a newly forked child not yet visible on the task list, because | ||
433 | * manipulating the task flags of a visible task is not safe. | ||
434 | * | ||
435 | * The above limitation is why this routine has the funny name | ||
436 | * mpol_fix_fork_child_flag(). | ||
437 | * | ||
438 | * It is also safe to call this with a task pointer of current, | ||
439 | * which the static wrapper mpol_set_task_struct_flag() does, | ||
440 | * for use within this file. | ||
441 | */ | ||
442 | |||
443 | void mpol_fix_fork_child_flag(struct task_struct *p) | ||
444 | { | ||
445 | if (p->mempolicy) | ||
446 | p->flags |= PF_MEMPOLICY; | ||
447 | else | ||
448 | p->flags &= ~PF_MEMPOLICY; | ||
449 | } | ||
450 | |||
451 | static void mpol_set_task_struct_flag(void) | ||
452 | { | ||
453 | mpol_fix_fork_child_flag(current); | ||
454 | } | ||
455 | |||
425 | /* Set the process memory policy */ | 456 | /* Set the process memory policy */ |
426 | long do_set_mempolicy(int mode, nodemask_t *nodes) | 457 | long do_set_mempolicy(int mode, nodemask_t *nodes) |
427 | { | 458 | { |
@@ -434,6 +465,7 @@ long do_set_mempolicy(int mode, nodemask_t *nodes) | |||
434 | return PTR_ERR(new); | 465 | return PTR_ERR(new); |
435 | mpol_free(current->mempolicy); | 466 | mpol_free(current->mempolicy); |
436 | current->mempolicy = new; | 467 | current->mempolicy = new; |
468 | mpol_set_task_struct_flag(); | ||
437 | if (new && new->policy == MPOL_INTERLEAVE) | 469 | if (new && new->policy == MPOL_INTERLEAVE) |
438 | current->il_next = first_node(new->v.nodes); | 470 | current->il_next = first_node(new->v.nodes); |
439 | return 0; | 471 | return 0; |
@@ -899,6 +899,7 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
899 | 899 | ||
900 | #ifdef CONFIG_NUMA | 900 | #ifdef CONFIG_NUMA |
901 | static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); | 901 | static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); |
902 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); | ||
902 | 903 | ||
903 | static struct array_cache **alloc_alien_cache(int node, int limit) | 904 | static struct array_cache **alloc_alien_cache(int node, int limit) |
904 | { | 905 | { |
@@ -2808,19 +2809,11 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
2808 | struct array_cache *ac; | 2809 | struct array_cache *ac; |
2809 | 2810 | ||
2810 | #ifdef CONFIG_NUMA | 2811 | #ifdef CONFIG_NUMA |
2811 | if (unlikely(current->mempolicy && !in_interrupt())) { | 2812 | if (unlikely(current->flags & (PF_SPREAD_PAGE | PF_SPREAD_SLAB | |
2812 | int nid = slab_node(current->mempolicy); | 2813 | PF_MEMPOLICY))) { |
2813 | 2814 | objp = alternate_node_alloc(cachep, flags); | |
2814 | if (nid != numa_node_id()) | 2815 | if (objp != NULL) |
2815 | return __cache_alloc_node(cachep, flags, nid); | 2816 | return objp; |
2816 | } | ||
2817 | if (unlikely(cpuset_do_slab_mem_spread() && | ||
2818 | (cachep->flags & SLAB_MEM_SPREAD) && | ||
2819 | !in_interrupt())) { | ||
2820 | int nid = cpuset_mem_spread_node(); | ||
2821 | |||
2822 | if (nid != numa_node_id()) | ||
2823 | return __cache_alloc_node(cachep, flags, nid); | ||
2824 | } | 2817 | } |
2825 | #endif | 2818 | #endif |
2826 | 2819 | ||
@@ -2856,6 +2849,28 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep, | |||
2856 | 2849 | ||
2857 | #ifdef CONFIG_NUMA | 2850 | #ifdef CONFIG_NUMA |
2858 | /* | 2851 | /* |
2852 | * Try allocating on another node if PF_SPREAD_PAGE|PF_SPREAD_SLAB|PF_MEMPOLICY. | ||
2853 | * | ||
2854 | * If we are in_interrupt, then process context, including cpusets and | ||
2855 | * mempolicy, may not apply and should not be used for allocation policy. | ||
2856 | */ | ||
2857 | static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | ||
2858 | { | ||
2859 | int nid_alloc, nid_here; | ||
2860 | |||
2861 | if (in_interrupt()) | ||
2862 | return NULL; | ||
2863 | nid_alloc = nid_here = numa_node_id(); | ||
2864 | if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) | ||
2865 | nid_alloc = cpuset_mem_spread_node(); | ||
2866 | else if (current->mempolicy) | ||
2867 | nid_alloc = slab_node(current->mempolicy); | ||
2868 | if (nid_alloc != nid_here) | ||
2869 | return __cache_alloc_node(cachep, flags, nid_alloc); | ||
2870 | return NULL; | ||
2871 | } | ||
2872 | |||
2873 | /* | ||
2859 | * A interface to enable slab creation on nodeid | 2874 | * A interface to enable slab creation on nodeid |
2860 | */ | 2875 | */ |
2861 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | 2876 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |