diff options
author | Li Zhong <zhong@linux.vnet.ibm.com> | 2014-04-10 04:25:31 -0400 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2014-04-28 02:32:14 -0400 |
commit | 42dbfc8649737cb622b2a7e02045401c4c09561c (patch) | |
tree | 1cf8737c7ff7992f152f861ef9c2cbc98c156cb1 /arch/powerpc | |
parent | 0c930692107be4b91c3eeecfd9c9d600039f966f (diff) |
powerpc/pseries: Protect remove_memory() with device hotplug lock
While testing memory hot-remove, I found following dead lock:
Process #1141 is drmgr, trying to remove some memory, i.e. memory499.
It holds the memory_hotplug_mutex, and blocks when trying to remove file
"online" under dir memory499, in kernfs_drain(), at
wait_event(root->deactivate_waitq,
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
Process #1120 is trying to online memory499 by
echo 1 > memory499/online
In .kernfs_fop_write, it uses kernfs_get_active() to increase
&kn->active, thus blocking process #1141. While itself is blocked later
when trying to acquire memory_hotplug_mutex, which is held by process
The backtrace of both processes are shown below:
[<c000000001b18600>] 0xc000000001b18600
[<c000000000015044>] .__switch_to+0x144/0x200
[<c000000000263ca4>] .online_pages+0x74/0x7b0
[<c00000000055b40c>] .memory_subsys_online+0x9c/0x150
[<c00000000053cbe8>] .device_online+0xb8/0x120
[<c00000000053cd04>] .online_store+0xb4/0xc0
[<c000000000538ce4>] .dev_attr_store+0x64/0xa0
[<c00000000030f4ec>] .sysfs_kf_write+0x7c/0xb0
[<c00000000030e574>] .kernfs_fop_write+0x154/0x1e0
[<c000000000268450>] .vfs_write+0xe0/0x260
[<c000000000269144>] .SyS_write+0x64/0x110
[<c000000000009ffc>] syscall_exit+0x0/0x7c
[<c000000001b18600>] 0xc000000001b18600
[<c000000000015044>] .__switch_to+0x144/0x200
[<c00000000030be14>] .__kernfs_remove+0x204/0x300
[<c00000000030d428>] .kernfs_remove_by_name_ns+0x68/0xf0
[<c00000000030fb38>] .sysfs_remove_file_ns+0x38/0x60
[<c000000000539354>] .device_remove_attrs+0x54/0xc0
[<c000000000539fd8>] .device_del+0x158/0x250
[<c00000000053a104>] .device_unregister+0x34/0xa0
[<c00000000055bc14>] .unregister_memory_section+0x164/0x170
[<c00000000024ee18>] .__remove_pages+0x108/0x4c0
[<c00000000004b590>] .arch_remove_memory+0x60/0xc0
[<c00000000026446c>] .remove_memory+0x8c/0xe0
[<c00000000007f9f4>] .pseries_remove_memblock+0xd4/0x160
[<c00000000007fcfc>] .pseries_memory_notifier+0x27c/0x290
[<c0000000008ae6cc>] .notifier_call_chain+0x8c/0x100
[<c0000000000d858c>] .__blocking_notifier_call_chain+0x6c/0xe0
[<c00000000071ddec>] .of_property_notify+0x7c/0xc0
[<c00000000071ed3c>] .of_update_property+0x3c/0x1b0
[<c0000000000756cc>] .ofdt_write+0x3dc/0x740
[<c0000000002f60fc>] .proc_reg_write+0xac/0x110
[<c000000000268450>] .vfs_write+0xe0/0x260
[<c000000000269144>] .SyS_write+0x64/0x110
[<c000000000009ffc>] syscall_exit+0x0/0x7c
This patch uses lock_device_hotplug() to protect remove_memory() called
in pseries_remove_memblock(), which is also stated before function
remove_memory():
* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
* and online/offline operations before this call, as required by
* try_offline_node().
*/
void __ref remove_memory(int nid, u64 start, u64 size)
With this lock held, the other process(#1120 above) trying to online the
memory block will retry the system call when calling
lock_device_hotplug_sysfs(), and finally find No such device error.
Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/platforms/pseries/hotplug-memory.c | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 573b488fc48b..7f75c94af822 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c | |||
@@ -100,10 +100,10 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz | |||
100 | 100 | ||
101 | start_pfn = base >> PAGE_SHIFT; | 101 | start_pfn = base >> PAGE_SHIFT; |
102 | 102 | ||
103 | if (!pfn_valid(start_pfn)) { | 103 | lock_device_hotplug(); |
104 | memblock_remove(base, memblock_size); | 104 | |
105 | return 0; | 105 | if (!pfn_valid(start_pfn)) |
106 | } | 106 | goto out; |
107 | 107 | ||
108 | block_sz = memory_block_size_bytes(); | 108 | block_sz = memory_block_size_bytes(); |
109 | sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; | 109 | sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; |
@@ -114,8 +114,10 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz | |||
114 | base += MIN_MEMORY_BLOCK_SIZE; | 114 | base += MIN_MEMORY_BLOCK_SIZE; |
115 | } | 115 | } |
116 | 116 | ||
117 | out: | ||
117 | /* Update memory regions for memory remove */ | 118 | /* Update memory regions for memory remove */ |
118 | memblock_remove(base, memblock_size); | 119 | memblock_remove(base, memblock_size); |
120 | unlock_device_hotplug(); | ||
119 | return 0; | 121 | return 0; |
120 | } | 122 | } |
121 | 123 | ||