aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorWei Yang <richard.weiyang@gmail.com>2018-12-28 03:38:58 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 15:11:51 -0500
commitd9367bd06faa1beb2951899272f925bdf877d28b (patch)
tree3b6bd6d9c6779ffd8ee020d4b01b033efbacc1e1 /mm/page_alloc.c
parentd53ce042277a94eadf9a8a31fc41fac54c67dec5 (diff)
mm, page_alloc: enable pcpu_drain with zone capability
drain_all_pages is documented to drain per-cpu pages for a given zone (if non-NULL). The current implementation doesn't match the description though. It will drain all pcp pages for all zones that happen to have cached pages on the same cpu as the given zone. This will lead to premature pcp cache draining for zones that are not of any interest to the caller - e.g. compaction, hwpoison or memory offline. This forces the page allocator to take locks and potential lock contention as a result. There is no real reason for this sub-optimal implementation. Replace per-cpu work item with a dedicated structure which contains a pointer to the zone and pass it over to the worker. This will get the zone information all the way down to the worker function and do the right job. [akpm@linux-foundation.org: avoid 80-col tricks] [mhocko@suse.com: refactor the whole changelog] Link: http://lkml.kernel.org/r/20181212142550.61686-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang <richard.weiyang@gmail.com> Acked-by: Michal Hocko <mhocko@suse.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Reviewed-by: David Hildenbrand <david@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c22
1 files changed, 16 insertions, 6 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2cd1f9bb1b52..75865e1325b5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -97,8 +97,12 @@ int _node_numa_mem_[MAX_NUMNODES];
97#endif 97#endif
98 98
99/* work_structs for global per-cpu drains */ 99/* work_structs for global per-cpu drains */
100struct pcpu_drain {
101 struct zone *zone;
102 struct work_struct work;
103};
100DEFINE_MUTEX(pcpu_drain_mutex); 104DEFINE_MUTEX(pcpu_drain_mutex);
101DEFINE_PER_CPU(struct work_struct, pcpu_drain); 105DEFINE_PER_CPU(struct pcpu_drain, pcpu_drain);
102 106
103#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY 107#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
104volatile unsigned long latent_entropy __latent_entropy; 108volatile unsigned long latent_entropy __latent_entropy;
@@ -2658,6 +2662,10 @@ void drain_local_pages(struct zone *zone)
2658 2662
2659static void drain_local_pages_wq(struct work_struct *work) 2663static void drain_local_pages_wq(struct work_struct *work)
2660{ 2664{
2665 struct pcpu_drain *drain;
2666
2667 drain = container_of(work, struct pcpu_drain, work);
2668
2661 /* 2669 /*
2662 * drain_all_pages doesn't use proper cpu hotplug protection so 2670 * drain_all_pages doesn't use proper cpu hotplug protection so
2663 * we can race with cpu offline when the WQ can move this from 2671 * we can race with cpu offline when the WQ can move this from
@@ -2666,7 +2674,7 @@ static void drain_local_pages_wq(struct work_struct *work)
2666 * a different one. 2674 * a different one.
2667 */ 2675 */
2668 preempt_disable(); 2676 preempt_disable();
2669 drain_local_pages(NULL); 2677 drain_local_pages(drain->zone);
2670 preempt_enable(); 2678 preempt_enable();
2671} 2679}
2672 2680
@@ -2737,12 +2745,14 @@ void drain_all_pages(struct zone *zone)
2737 } 2745 }
2738 2746
2739 for_each_cpu(cpu, &cpus_with_pcps) { 2747 for_each_cpu(cpu, &cpus_with_pcps) {
2740 struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu); 2748 struct pcpu_drain *drain = per_cpu_ptr(&pcpu_drain, cpu);
2741 INIT_WORK(work, drain_local_pages_wq); 2749
2742 queue_work_on(cpu, mm_percpu_wq, work); 2750 drain->zone = zone;
2751 INIT_WORK(&drain->work, drain_local_pages_wq);
2752 queue_work_on(cpu, mm_percpu_wq, &drain->work);
2743 } 2753 }
2744 for_each_cpu(cpu, &cpus_with_pcps) 2754 for_each_cpu(cpu, &cpus_with_pcps)
2745 flush_work(per_cpu_ptr(&pcpu_drain, cpu)); 2755 flush_work(&per_cpu_ptr(&pcpu_drain, cpu)->work);
2746 2756
2747 mutex_unlock(&pcpu_drain_mutex); 2757 mutex_unlock(&pcpu_drain_mutex);
2748} 2758}