diff options
-rw-r--r-- | include/linux/gfp.h | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 17 | ||||
-rw-r--r-- | mm/slab.c | 65 |
3 files changed, 72 insertions, 14 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 20f9148e38d9..7851e6b520cf 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -157,9 +157,9 @@ extern void FASTCALL(free_cold_page(struct page *page)); | |||
157 | 157 | ||
158 | void page_alloc_init(void); | 158 | void page_alloc_init(void); |
159 | #ifdef CONFIG_NUMA | 159 | #ifdef CONFIG_NUMA |
160 | void drain_remote_pages(void); | 160 | void drain_node_pages(int node); |
161 | #else | 161 | #else |
162 | static inline void drain_remote_pages(void) { }; | 162 | static inline void drain_node_pages(int node) { }; |
163 | #endif | 163 | #endif |
164 | 164 | ||
165 | #endif /* __LINUX_GFP_H */ | 165 | #endif /* __LINUX_GFP_H */ |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 791690d7d3fa..234bd4895d14 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -590,21 +590,20 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
590 | } | 590 | } |
591 | 591 | ||
592 | #ifdef CONFIG_NUMA | 592 | #ifdef CONFIG_NUMA |
593 | /* Called from the slab reaper to drain remote pagesets */ | 593 | /* |
594 | void drain_remote_pages(void) | 594 | * Called from the slab reaper to drain pagesets on a particular node that |
595 | * belong to the currently executing processor. | ||
596 | */ | ||
597 | void drain_node_pages(int nodeid) | ||
595 | { | 598 | { |
596 | struct zone *zone; | 599 | int i, z; |
597 | int i; | ||
598 | unsigned long flags; | 600 | unsigned long flags; |
599 | 601 | ||
600 | local_irq_save(flags); | 602 | local_irq_save(flags); |
601 | for_each_zone(zone) { | 603 | for (z = 0; z < MAX_NR_ZONES; z++) { |
604 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; | ||
602 | struct per_cpu_pageset *pset; | 605 | struct per_cpu_pageset *pset; |
603 | 606 | ||
604 | /* Do not drain local pagesets */ | ||
605 | if (zone->zone_pgdat->node_id == numa_node_id()) | ||
606 | continue; | ||
607 | |||
608 | pset = zone_pcp(zone, smp_processor_id()); | 607 | pset = zone_pcp(zone, smp_processor_id()); |
609 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | 608 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { |
610 | struct per_cpu_pages *pcp; | 609 | struct per_cpu_pages *pcp; |
@@ -789,6 +789,47 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, char * | |||
789 | dump_stack(); | 789 | dump_stack(); |
790 | } | 790 | } |
791 | 791 | ||
792 | #ifdef CONFIG_NUMA | ||
793 | /* | ||
794 | * Special reaping functions for NUMA systems called from cache_reap(). | ||
795 | * These take care of doing round robin flushing of alien caches (containing | ||
796 | * objects freed on different nodes from which they were allocated) and the | ||
797 | * flushing of remote pcps by calling drain_node_pages. | ||
798 | */ | ||
799 | static DEFINE_PER_CPU(unsigned long, reap_node); | ||
800 | |||
801 | static void init_reap_node(int cpu) | ||
802 | { | ||
803 | int node; | ||
804 | |||
805 | node = next_node(cpu_to_node(cpu), node_online_map); | ||
806 | if (node == MAX_NUMNODES) | ||
807 | node = 0; | ||
808 | |||
809 | __get_cpu_var(reap_node) = node; | ||
810 | } | ||
811 | |||
812 | static void next_reap_node(void) | ||
813 | { | ||
814 | int node = __get_cpu_var(reap_node); | ||
815 | |||
816 | /* | ||
817 | * Also drain per cpu pages on remote zones | ||
818 | */ | ||
819 | if (node != numa_node_id()) | ||
820 | drain_node_pages(node); | ||
821 | |||
822 | node = next_node(node, node_online_map); | ||
823 | if (unlikely(node >= MAX_NUMNODES)) | ||
824 | node = first_node(node_online_map); | ||
825 | __get_cpu_var(reap_node) = node; | ||
826 | } | ||
827 | |||
828 | #else | ||
829 | #define init_reap_node(cpu) do { } while (0) | ||
830 | #define next_reap_node(void) do { } while (0) | ||
831 | #endif | ||
832 | |||
792 | /* | 833 | /* |
793 | * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz | 834 | * Initiate the reap timer running on the target CPU. We run at around 1 to 2Hz |
794 | * via the workqueue/eventd. | 835 | * via the workqueue/eventd. |
@@ -806,6 +847,7 @@ static void __devinit start_cpu_timer(int cpu) | |||
806 | * at that time. | 847 | * at that time. |
807 | */ | 848 | */ |
808 | if (keventd_up() && reap_work->func == NULL) { | 849 | if (keventd_up() && reap_work->func == NULL) { |
850 | init_reap_node(cpu); | ||
809 | INIT_WORK(reap_work, cache_reap, NULL); | 851 | INIT_WORK(reap_work, cache_reap, NULL); |
810 | schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); | 852 | schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu); |
811 | } | 853 | } |
@@ -884,6 +926,23 @@ static void __drain_alien_cache(struct kmem_cache *cachep, | |||
884 | } | 926 | } |
885 | } | 927 | } |
886 | 928 | ||
929 | /* | ||
930 | * Called from cache_reap() to regularly drain alien caches round robin. | ||
931 | */ | ||
932 | static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | ||
933 | { | ||
934 | int node = __get_cpu_var(reap_node); | ||
935 | |||
936 | if (l3->alien) { | ||
937 | struct array_cache *ac = l3->alien[node]; | ||
938 | if (ac && ac->avail) { | ||
939 | spin_lock_irq(&ac->lock); | ||
940 | __drain_alien_cache(cachep, ac, node); | ||
941 | spin_unlock_irq(&ac->lock); | ||
942 | } | ||
943 | } | ||
944 | } | ||
945 | |||
887 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) | 946 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) |
888 | { | 947 | { |
889 | int i = 0; | 948 | int i = 0; |
@@ -902,6 +961,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **al | |||
902 | #else | 961 | #else |
903 | 962 | ||
904 | #define drain_alien_cache(cachep, alien) do { } while (0) | 963 | #define drain_alien_cache(cachep, alien) do { } while (0) |
964 | #define reap_alien(cachep, l3) do { } while (0) | ||
905 | 965 | ||
906 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | 966 | static inline struct array_cache **alloc_alien_cache(int node, int limit) |
907 | { | 967 | { |
@@ -3497,8 +3557,7 @@ static void cache_reap(void *unused) | |||
3497 | check_irq_on(); | 3557 | check_irq_on(); |
3498 | 3558 | ||
3499 | l3 = searchp->nodelists[numa_node_id()]; | 3559 | l3 = searchp->nodelists[numa_node_id()]; |
3500 | if (l3->alien) | 3560 | reap_alien(searchp, l3); |
3501 | drain_alien_cache(searchp, l3->alien); | ||
3502 | spin_lock_irq(&l3->list_lock); | 3561 | spin_lock_irq(&l3->list_lock); |
3503 | 3562 | ||
3504 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, | 3563 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, |
@@ -3548,7 +3607,7 @@ static void cache_reap(void *unused) | |||
3548 | } | 3607 | } |
3549 | check_irq_on(); | 3608 | check_irq_on(); |
3550 | mutex_unlock(&cache_chain_mutex); | 3609 | mutex_unlock(&cache_chain_mutex); |
3551 | drain_remote_pages(); | 3610 | next_reap_node(); |
3552 | /* Setup the next iteration */ | 3611 | /* Setup the next iteration */ |
3553 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); | 3612 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); |
3554 | } | 3613 | } |