diff options
author | David Hildenbrand <david@redhat.com> | 2018-10-30 18:10:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-31 11:54:17 -0400 |
commit | d15e59260f62bd5e0f625cf5f5240f6ffac78ab6 (patch) | |
tree | 8263d73a0a1d853bbcdc77ade12fa7e5c499220f | |
parent | 2f770806fd2c3db9616965e57ba60d80f43c827d (diff) |
mm/memory_hotplug: make remove_memory() take the device_hotplug_lock
Patch series "mm: online/offline_pages called w.o. mem_hotplug_lock", v3.
Reading through the code and studying how mem_hotplug_lock is to be used,
I noticed that there are two places where we can end up calling
device_online()/device_offline() - online_pages()/offline_pages() without
the mem_hotplug_lock. And there are other places where we call
device_online()/device_offline() without the device_hotplug_lock.
While e.g.
echo "online" > /sys/devices/system/memory/memory9/state
is fine, e.g.
echo 1 > /sys/devices/system/memory/memory9/online
Will not take the mem_hotplug_lock. However the device_lock() and
device_hotplug_lock.
E.g. via memory_probe_store(), we can end up calling
add_memory()->online_pages() without the device_hotplug_lock. So we can
have concurrent callers in online_pages(). We e.g. touch in
online_pages() basically unprotected zone->present_pages then.
Looks like there is a longer history to that (see Patch #2 for details),
and fixing it to work the way it was intended is not really possible. We
would e.g. have to take the mem_hotplug_lock in device/base/core.c, which
sounds wrong.
Summary: We had a lock inversion on mem_hotplug_lock and device_lock().
More details can be found in patch 3 and patch 6.
I propose the general rules (documentation added in patch 6):
1. add_memory/add_memory_resource() must only be called with
device_hotplug_lock.
2. remove_memory() must only be called with device_hotplug_lock. This is
already documented and holds for all callers.
3. device_online()/device_offline() must only be called with
device_hotplug_lock. This is already documented and true for now in core
code. Other callers (related to memory hotplug) have to be fixed up.
4. mem_hotplug_lock is taken inside of add_memory/remove_memory/
online_pages/offline_pages.
To me, this looks way cleaner than what we have right now (and easier to
verify). And looking at the documentation of remove_memory, using
lock_device_hotplug also for add_memory() feels natural.
This patch (of 6):
remove_memory() is exported right now but requires the
device_hotplug_lock, which is not exported. So let's provide a variant
that takes the lock and only export that one.
The lock is already held in
arch/powerpc/platforms/pseries/hotplug-memory.c
drivers/acpi/acpi_memhotplug.c
arch/powerpc/platforms/powernv/memtrace.c
Apart from that, there are not other users in the tree.
Link: http://lkml.kernel.org/r/20180925091457.28651-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Rashmica Gupta <rashmica.g@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Cc: John Allen <jallen@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: YASUAKI ISHIMATSU <yasu.isimatu@gmail.com>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | arch/powerpc/platforms/powernv/memtrace.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hotplug-memory.c | 6 | ||||
-rw-r--r-- | drivers/acpi/acpi_memhotplug.c | 2 | ||||
-rw-r--r-- | include/linux/memory_hotplug.h | 3 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 9 |
5 files changed, 15 insertions, 7 deletions
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index a29fdf8a2e56..773623f6bfb1 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c | |||
@@ -121,7 +121,7 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) | |||
121 | lock_device_hotplug(); | 121 | lock_device_hotplug(); |
122 | end_pfn = base_pfn + nr_pages; | 122 | end_pfn = base_pfn + nr_pages; |
123 | for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { | 123 | for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { |
124 | remove_memory(nid, pfn << PAGE_SHIFT, bytes); | 124 | __remove_memory(nid, pfn << PAGE_SHIFT, bytes); |
125 | } | 125 | } |
126 | unlock_device_hotplug(); | 126 | unlock_device_hotplug(); |
127 | return base_pfn << PAGE_SHIFT; | 127 | return base_pfn << PAGE_SHIFT; |
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 2b796da822c2..d79b31e7a6cf 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c | |||
@@ -300,7 +300,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz | |||
300 | nid = memory_add_physaddr_to_nid(base); | 300 | nid = memory_add_physaddr_to_nid(base); |
301 | 301 | ||
302 | for (i = 0; i < sections_per_block; i++) { | 302 | for (i = 0; i < sections_per_block; i++) { |
303 | remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE); | 303 | __remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE); |
304 | base += MIN_MEMORY_BLOCK_SIZE; | 304 | base += MIN_MEMORY_BLOCK_SIZE; |
305 | } | 305 | } |
306 | 306 | ||
@@ -389,7 +389,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) | |||
389 | block_sz = pseries_memory_block_size(); | 389 | block_sz = pseries_memory_block_size(); |
390 | nid = memory_add_physaddr_to_nid(lmb->base_addr); | 390 | nid = memory_add_physaddr_to_nid(lmb->base_addr); |
391 | 391 | ||
392 | remove_memory(nid, lmb->base_addr, block_sz); | 392 | __remove_memory(nid, lmb->base_addr, block_sz); |
393 | 393 | ||
394 | /* Update memory regions for memory remove */ | 394 | /* Update memory regions for memory remove */ |
395 | memblock_remove(lmb->base_addr, block_sz); | 395 | memblock_remove(lmb->base_addr, block_sz); |
@@ -676,7 +676,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) | |||
676 | 676 | ||
677 | rc = dlpar_online_lmb(lmb); | 677 | rc = dlpar_online_lmb(lmb); |
678 | if (rc) { | 678 | if (rc) { |
679 | remove_memory(nid, lmb->base_addr, block_sz); | 679 | __remove_memory(nid, lmb->base_addr, block_sz); |
680 | invalidate_lmb_associativity_index(lmb); | 680 | invalidate_lmb_associativity_index(lmb); |
681 | } else { | 681 | } else { |
682 | lmb->flags |= DRCONF_MEM_ASSIGNED; | 682 | lmb->flags |= DRCONF_MEM_ASSIGNED; |
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 6b0d3ef7309c..811148415993 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c | |||
@@ -282,7 +282,7 @@ static void acpi_memory_remove_memory(struct acpi_memory_device *mem_device) | |||
282 | nid = memory_add_physaddr_to_nid(info->start_addr); | 282 | nid = memory_add_physaddr_to_nid(info->start_addr); |
283 | 283 | ||
284 | acpi_unbind_memory_blocks(info); | 284 | acpi_unbind_memory_blocks(info); |
285 | remove_memory(nid, info->start_addr, info->length); | 285 | __remove_memory(nid, info->start_addr, info->length); |
286 | list_del(&info->list); | 286 | list_del(&info->list); |
287 | kfree(info); | 287 | kfree(info); |
288 | } | 288 | } |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 34a28227068d..1f096852f479 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
@@ -301,6 +301,7 @@ extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages); | |||
301 | extern void try_offline_node(int nid); | 301 | extern void try_offline_node(int nid); |
302 | extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); | 302 | extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); |
303 | extern void remove_memory(int nid, u64 start, u64 size); | 303 | extern void remove_memory(int nid, u64 start, u64 size); |
304 | extern void __remove_memory(int nid, u64 start, u64 size); | ||
304 | 305 | ||
305 | #else | 306 | #else |
306 | static inline bool is_mem_section_removable(unsigned long pfn, | 307 | static inline bool is_mem_section_removable(unsigned long pfn, |
@@ -317,6 +318,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages) | |||
317 | } | 318 | } |
318 | 319 | ||
319 | static inline void remove_memory(int nid, u64 start, u64 size) {} | 320 | static inline void remove_memory(int nid, u64 start, u64 size) {} |
321 | static inline void __remove_memory(int nid, u64 start, u64 size) {} | ||
320 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 322 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
321 | 323 | ||
322 | extern void __ref free_area_init_core_hotplug(int nid); | 324 | extern void __ref free_area_init_core_hotplug(int nid); |
@@ -330,7 +332,6 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, | |||
330 | unsigned long nr_pages, struct vmem_altmap *altmap); | 332 | unsigned long nr_pages, struct vmem_altmap *altmap); |
331 | extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); | 333 | extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); |
332 | extern bool is_memblock_offlined(struct memory_block *mem); | 334 | extern bool is_memblock_offlined(struct memory_block *mem); |
333 | extern void remove_memory(int nid, u64 start, u64 size); | ||
334 | extern int sparse_add_one_section(struct pglist_data *pgdat, | 335 | extern int sparse_add_one_section(struct pglist_data *pgdat, |
335 | unsigned long start_pfn, struct vmem_altmap *altmap); | 336 | unsigned long start_pfn, struct vmem_altmap *altmap); |
336 | extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, | 337 | extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 41e326472ef9..8f38e689da25 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -1806,7 +1806,7 @@ EXPORT_SYMBOL(try_offline_node); | |||
1806 | * and online/offline operations before this call, as required by | 1806 | * and online/offline operations before this call, as required by |
1807 | * try_offline_node(). | 1807 | * try_offline_node(). |
1808 | */ | 1808 | */ |
1809 | void __ref remove_memory(int nid, u64 start, u64 size) | 1809 | void __ref __remove_memory(int nid, u64 start, u64 size) |
1810 | { | 1810 | { |
1811 | int ret; | 1811 | int ret; |
1812 | 1812 | ||
@@ -1835,5 +1835,12 @@ void __ref remove_memory(int nid, u64 start, u64 size) | |||
1835 | 1835 | ||
1836 | mem_hotplug_done(); | 1836 | mem_hotplug_done(); |
1837 | } | 1837 | } |
1838 | |||
1839 | void remove_memory(int nid, u64 start, u64 size) | ||
1840 | { | ||
1841 | lock_device_hotplug(); | ||
1842 | __remove_memory(nid, start, size); | ||
1843 | unlock_device_hotplug(); | ||
1844 | } | ||
1838 | EXPORT_SYMBOL_GPL(remove_memory); | 1845 | EXPORT_SYMBOL_GPL(remove_memory); |
1839 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 1846 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |