aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory_hotplug.c
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2015-04-14 18:45:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-14 19:49:00 -0400
commit30467e0b3be83c286d60039f8267dd421128ca74 (patch)
treeabda19f050d5dd049a31728d005fbef5c7987e41 /mm/memory_hotplug.c
parent17e0db822b00cff96c1b662ac0dc0449cb70e0ec (diff)
mm, hotplug: fix concurrent memory hot-add deadlock
There's a deadlock when concurrently hot-adding memory through the probe interface and switching a memory block from offline to online. When hot-adding memory via the probe interface, add_memory() first takes mem_hotplug_begin() and then device_lock() is later taken when registering the newly initialized memory block. This creates a lock dependency of (1) mem_hotplug.lock (2) dev->mutex. When switching a memory block from offline to online, dev->mutex is first grabbed in device_online() when the write(2) transitions an existing memory block from offline to online, and then online_pages() will take mem_hotplug_begin(). This creates a lock inversion between mem_hotplug.lock and dev->mutex. Vitaly reports that this deadlock can happen when kworker handling a probe event races with systemd-udevd switching a memory block's state. This patch requires the state transition to take mem_hotplug_begin() before dev->mutex. Hot-adding memory via the probe interface creates a memory block while holding mem_hotplug_begin(), there is no way to take dev->mutex first in this case. online_pages() and offline_pages() are only called when transitioning memory block state. We now require that mem_hotplug_begin() is taken before calling them -- this requires exporting the mem_hotplug_begin() and mem_hotplug_done() to generic code. In all hot-add and hot-remove cases, mem_hotplug_begin() is done prior to device_online(). This is all that is needed to avoid the deadlock. Signed-off-by: David Rientjes <rientjes@google.com> Reported-by: Vitaly Kuznetsov <vkuznets@redhat.com> Tested-by: Vitaly Kuznetsov <vkuznets@redhat.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: "K. Y. Srinivasan" <kys@microsoft.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Tang Chen <tangchen@cn.fujitsu.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Zhang Zhen <zhenzhang.zhang@huawei.com> Cc: Vladimir Davydov <vdavydov@parallels.com> Cc: Wang Nan <wangnan0@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory_hotplug.c')
-rw-r--r--mm/memory_hotplug.c33
1 files changed, 12 insertions, 21 deletions
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index aaec7758eec3..e2e8014fb755 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -104,7 +104,7 @@ void put_online_mems(void)
104 104
105} 105}
106 106
107static void mem_hotplug_begin(void) 107void mem_hotplug_begin(void)
108{ 108{
109 mem_hotplug.active_writer = current; 109 mem_hotplug.active_writer = current;
110 110
@@ -119,7 +119,7 @@ static void mem_hotplug_begin(void)
119 } 119 }
120} 120}
121 121
122static void mem_hotplug_done(void) 122void mem_hotplug_done(void)
123{ 123{
124 mem_hotplug.active_writer = NULL; 124 mem_hotplug.active_writer = NULL;
125 mutex_unlock(&mem_hotplug.lock); 125 mutex_unlock(&mem_hotplug.lock);
@@ -959,6 +959,7 @@ static void node_states_set_node(int node, struct memory_notify *arg)
959} 959}
960 960
961 961
962/* Must be protected by mem_hotplug_begin() */
962int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type) 963int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_type)
963{ 964{
964 unsigned long flags; 965 unsigned long flags;
@@ -969,7 +970,6 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
969 int ret; 970 int ret;
970 struct memory_notify arg; 971 struct memory_notify arg;
971 972
972 mem_hotplug_begin();
973 /* 973 /*
974 * This doesn't need a lock to do pfn_to_page(). 974 * This doesn't need a lock to do pfn_to_page().
975 * The section can't be removed here because of the 975 * The section can't be removed here because of the
@@ -977,21 +977,20 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
977 */ 977 */
978 zone = page_zone(pfn_to_page(pfn)); 978 zone = page_zone(pfn_to_page(pfn));
979 979
980 ret = -EINVAL;
981 if ((zone_idx(zone) > ZONE_NORMAL || 980 if ((zone_idx(zone) > ZONE_NORMAL ||
982 online_type == MMOP_ONLINE_MOVABLE) && 981 online_type == MMOP_ONLINE_MOVABLE) &&
983 !can_online_high_movable(zone)) 982 !can_online_high_movable(zone))
984 goto out; 983 return -EINVAL;
985 984
986 if (online_type == MMOP_ONLINE_KERNEL && 985 if (online_type == MMOP_ONLINE_KERNEL &&
987 zone_idx(zone) == ZONE_MOVABLE) { 986 zone_idx(zone) == ZONE_MOVABLE) {
988 if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) 987 if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
989 goto out; 988 return -EINVAL;
990 } 989 }
991 if (online_type == MMOP_ONLINE_MOVABLE && 990 if (online_type == MMOP_ONLINE_MOVABLE &&
992 zone_idx(zone) == ZONE_MOVABLE - 1) { 991 zone_idx(zone) == ZONE_MOVABLE - 1) {
993 if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) 992 if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
994 goto out; 993 return -EINVAL;
995 } 994 }
996 995
997 /* Previous code may changed the zone of the pfn range */ 996 /* Previous code may changed the zone of the pfn range */
@@ -1007,7 +1006,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
1007 ret = notifier_to_errno(ret); 1006 ret = notifier_to_errno(ret);
1008 if (ret) { 1007 if (ret) {
1009 memory_notify(MEM_CANCEL_ONLINE, &arg); 1008 memory_notify(MEM_CANCEL_ONLINE, &arg);
1010 goto out; 1009 return ret;
1011 } 1010 }
1012 /* 1011 /*
1013 * If this zone is not populated, then it is not in zonelist. 1012 * If this zone is not populated, then it is not in zonelist.
@@ -1031,7 +1030,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
1031 (((unsigned long long) pfn + nr_pages) 1030 (((unsigned long long) pfn + nr_pages)
1032 << PAGE_SHIFT) - 1); 1031 << PAGE_SHIFT) - 1);
1033 memory_notify(MEM_CANCEL_ONLINE, &arg); 1032 memory_notify(MEM_CANCEL_ONLINE, &arg);
1034 goto out; 1033 return ret;
1035 } 1034 }
1036 1035
1037 zone->present_pages += onlined_pages; 1036 zone->present_pages += onlined_pages;
@@ -1061,9 +1060,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
1061 1060
1062 if (onlined_pages) 1061 if (onlined_pages)
1063 memory_notify(MEM_ONLINE, &arg); 1062 memory_notify(MEM_ONLINE, &arg);
1064out: 1063 return 0;
1065 mem_hotplug_done();
1066 return ret;
1067} 1064}
1068#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 1065#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
1069 1066
@@ -1688,21 +1685,18 @@ static int __ref __offline_pages(unsigned long start_pfn,
1688 if (!test_pages_in_a_zone(start_pfn, end_pfn)) 1685 if (!test_pages_in_a_zone(start_pfn, end_pfn))
1689 return -EINVAL; 1686 return -EINVAL;
1690 1687
1691 mem_hotplug_begin();
1692
1693 zone = page_zone(pfn_to_page(start_pfn)); 1688 zone = page_zone(pfn_to_page(start_pfn));
1694 node = zone_to_nid(zone); 1689 node = zone_to_nid(zone);
1695 nr_pages = end_pfn - start_pfn; 1690 nr_pages = end_pfn - start_pfn;
1696 1691
1697 ret = -EINVAL;
1698 if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages)) 1692 if (zone_idx(zone) <= ZONE_NORMAL && !can_offline_normal(zone, nr_pages))
1699 goto out; 1693 return -EINVAL;
1700 1694
1701 /* set above range as isolated */ 1695 /* set above range as isolated */
1702 ret = start_isolate_page_range(start_pfn, end_pfn, 1696 ret = start_isolate_page_range(start_pfn, end_pfn,
1703 MIGRATE_MOVABLE, true); 1697 MIGRATE_MOVABLE, true);
1704 if (ret) 1698 if (ret)
1705 goto out; 1699 return ret;
1706 1700
1707 arg.start_pfn = start_pfn; 1701 arg.start_pfn = start_pfn;
1708 arg.nr_pages = nr_pages; 1702 arg.nr_pages = nr_pages;
@@ -1795,7 +1789,6 @@ repeat:
1795 writeback_set_ratelimit(); 1789 writeback_set_ratelimit();
1796 1790
1797 memory_notify(MEM_OFFLINE, &arg); 1791 memory_notify(MEM_OFFLINE, &arg);
1798 mem_hotplug_done();
1799 return 0; 1792 return 0;
1800 1793
1801failed_removal: 1794failed_removal:
@@ -1805,12 +1798,10 @@ failed_removal:
1805 memory_notify(MEM_CANCEL_OFFLINE, &arg); 1798 memory_notify(MEM_CANCEL_OFFLINE, &arg);
1806 /* pushback to free area */ 1799 /* pushback to free area */
1807 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); 1800 undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
1808
1809out:
1810 mem_hotplug_done();
1811 return ret; 1801 return ret;
1812} 1802}
1813 1803
1804/* Must be protected by mem_hotplug_begin() */
1814int offline_pages(unsigned long start_pfn, unsigned long nr_pages) 1805int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
1815{ 1806{
1816 return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ); 1807 return __offline_pages(start_pfn, start_pfn + nr_pages, 120 * HZ);