summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-07-10 18:50:09 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-10 19:32:33 -0400
commit3f906ba23689a3f824424c50f3ae937c2c70f676 (patch)
treedbf8731ebb49d4cca6dd227e6ff5aaf828f6abf0 /mm
parenta47fed5b5b014f5a13878b90ef2c3a7dc294189f (diff)
mm/memory-hotplug: switch locking to a percpu rwsem
Andrey reported a potential deadlock with the memory hotplug lock and the cpu hotplug lock. The reason is that memory hotplug takes the memory hotplug lock and then calls stop_machine() which calls get_online_cpus(). That's the reverse lock order to get_online_cpus(); get_online_mems(); in mm/slub_common.c The problem has been there forever. The reason why this was never reported is that the cpu hotplug locking had this homebrewn recursive reader writer semaphore construct which due to the recursion evaded the full lock dep coverage. The memory hotplug code copied that construct verbatim and therefor has similar issues. Three steps to fix this: 1) Convert the memory hotplug locking to a per cpu rwsem so the potential issues get reported proper by lockdep. 2) Lock the online cpus in mem_hotplug_begin() before taking the memory hotplug rwsem and use stop_machine_cpuslocked() in the page_alloc code to avoid recursive locking. 3) The cpu hotpluck locking in #2 causes a recursive locking of the cpu hotplug lock via __offline_pages() -> lru_add_drain_all(). Solve this by invoking lru_add_drain_all_cpuslocked() instead. Link: http://lkml.kernel.org/r/20170704093421.506836322@linutronix.de Reported-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memory_hotplug.c89
-rw-r--r--mm/page_alloc.c2
2 files changed, 16 insertions, 75 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 7cd4377ac83e..8dccc317aac2 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -52,32 +52,17 @@ static void generic_online_page(struct page *page);
52static online_page_callback_t online_page_callback = generic_online_page; 52static online_page_callback_t online_page_callback = generic_online_page;
53static DEFINE_MUTEX(online_page_callback_lock); 53static DEFINE_MUTEX(online_page_callback_lock);
54 54
55/* The same as the cpu_hotplug lock, but for memory hotplug. */ 55DEFINE_STATIC_PERCPU_RWSEM(mem_hotplug_lock);
56static struct {
57 struct task_struct *active_writer;
58 struct mutex lock; /* Synchronizes accesses to refcount, */
59 /*
60 * Also blocks the new readers during
61 * an ongoing mem hotplug operation.
62 */
63 int refcount;
64 56
65#ifdef CONFIG_DEBUG_LOCK_ALLOC 57void get_online_mems(void)
66 struct lockdep_map dep_map; 58{
67#endif 59 percpu_down_read(&mem_hotplug_lock);
68} mem_hotplug = { 60}
69 .active_writer = NULL,
70 .lock = __MUTEX_INITIALIZER(mem_hotplug.lock),
71 .refcount = 0,
72#ifdef CONFIG_DEBUG_LOCK_ALLOC
73 .dep_map = {.name = "mem_hotplug.lock" },
74#endif
75};
76 61
77/* Lockdep annotations for get/put_online_mems() and mem_hotplug_begin/end() */ 62void put_online_mems(void)
78#define memhp_lock_acquire_read() lock_map_acquire_read(&mem_hotplug.dep_map) 63{
79#define memhp_lock_acquire() lock_map_acquire(&mem_hotplug.dep_map) 64 percpu_up_read(&mem_hotplug_lock);
80#define memhp_lock_release() lock_map_release(&mem_hotplug.dep_map) 65}
81 66
82bool movable_node_enabled = false; 67bool movable_node_enabled = false;
83 68
@@ -99,60 +84,16 @@ static int __init setup_memhp_default_state(char *str)
99} 84}
100__setup("memhp_default_state=", setup_memhp_default_state); 85__setup("memhp_default_state=", setup_memhp_default_state);
101 86
102void get_online_mems(void)
103{
104 might_sleep();
105 if (mem_hotplug.active_writer == current)
106 return;
107 memhp_lock_acquire_read();
108 mutex_lock(&mem_hotplug.lock);
109 mem_hotplug.refcount++;
110 mutex_unlock(&mem_hotplug.lock);
111
112}
113
114void put_online_mems(void)
115{
116 if (mem_hotplug.active_writer == current)
117 return;
118 mutex_lock(&mem_hotplug.lock);
119
120 if (WARN_ON(!mem_hotplug.refcount))
121 mem_hotplug.refcount++; /* try to fix things up */
122
123 if (!--mem_hotplug.refcount && unlikely(mem_hotplug.active_writer))
124 wake_up_process(mem_hotplug.active_writer);
125 mutex_unlock(&mem_hotplug.lock);
126 memhp_lock_release();
127
128}
129
130/* Serializes write accesses to mem_hotplug.active_writer. */
131static DEFINE_MUTEX(memory_add_remove_lock);
132
133void mem_hotplug_begin(void) 87void mem_hotplug_begin(void)
134{ 88{
135 mutex_lock(&memory_add_remove_lock); 89 cpus_read_lock();
136 90 percpu_down_write(&mem_hotplug_lock);
137 mem_hotplug.active_writer = current;
138
139 memhp_lock_acquire();
140 for (;;) {
141 mutex_lock(&mem_hotplug.lock);
142 if (likely(!mem_hotplug.refcount))
143 break;
144 __set_current_state(TASK_UNINTERRUPTIBLE);
145 mutex_unlock(&mem_hotplug.lock);
146 schedule();
147 }
148} 91}
149 92
150void mem_hotplug_done(void) 93void mem_hotplug_done(void)
151{ 94{
152 mem_hotplug.active_writer = NULL; 95 percpu_up_write(&mem_hotplug_lock);
153 mutex_unlock(&mem_hotplug.lock); 96 cpus_read_unlock();
154 memhp_lock_release();
155 mutex_unlock(&memory_add_remove_lock);
156} 97}
157 98
158/* add this memory to iomem resource */ 99/* add this memory to iomem resource */
@@ -1725,7 +1666,7 @@ repeat:
1725 goto failed_removal; 1666 goto failed_removal;
1726 ret = 0; 1667 ret = 0;
1727 if (drain) { 1668 if (drain) {
1728 lru_add_drain_all(); 1669 lru_add_drain_all_cpuslocked();
1729 cond_resched(); 1670 cond_resched();
1730 drain_all_pages(zone); 1671 drain_all_pages(zone);
1731 } 1672 }
@@ -1746,7 +1687,7 @@ repeat:
1746 } 1687 }
1747 } 1688 }
1748 /* drain all zone's lru pagevec, this is asynchronous... */ 1689 /* drain all zone's lru pagevec, this is asynchronous... */
1749 lru_add_drain_all(); 1690 lru_add_drain_all_cpuslocked();
1750 yield(); 1691 yield();
1751 /* drain pcp pages, this is synchronous. */ 1692 /* drain pcp pages, this is synchronous. */
1752 drain_all_pages(zone); 1693 drain_all_pages(zone);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d90c31951b90..64b7d82a9b1a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5278,7 +5278,7 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
5278#endif 5278#endif
5279 /* we have to stop all cpus to guarantee there is no user 5279 /* we have to stop all cpus to guarantee there is no user
5280 of zonelist */ 5280 of zonelist */
5281 stop_machine(__build_all_zonelists, pgdat, NULL); 5281 stop_machine_cpuslocked(__build_all_zonelists, pgdat, NULL);
5282 /* cpuset refresh routine should be here */ 5282 /* cpuset refresh routine should be here */
5283 } 5283 }
5284 vm_total_pages = nr_free_pagecache_pages(); 5284 vm_total_pages = nr_free_pagecache_pages();