aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2013-02-22 19:35:16 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:20 -0500
commitef4d43a8075891137ebc064e473d2e904971e933 (patch)
treea32216419976dca72813a107bf2200d1683341c4
parent9c620e2bc5aa4256c102ada34e6c76204ed5898b (diff)
ksm: stop hotremove lockdep warning
Complaints are rare, but lockdep still does not understand the way ksm_memory_callback(MEM_GOING_OFFLINE) takes ksm_thread_mutex, and holds it until the ksm_memory_callback(MEM_OFFLINE): that appears to be a problem because notifier callbacks are made under down_read of blocking_notifier_head->rwsem (so first the mutex is taken while holding the rwsem, then later the rwsem is taken while still holding the mutex); but is not in fact a problem because mem_hotplug_mutex is held throughout the dance. There was an attempt to fix this with mutex_lock_nested(); but if that happened to fool lockdep two years ago, apparently it does so no longer. I had hoped to eradicate this issue in extending KSM page migration not to need the ksm_thread_mutex. But then realized that although the page migration itself is safe, we do still need to lock out ksmd and other users of get_ksm_page() while offlining memory - at some point between MEM_GOING_OFFLINE and MEM_OFFLINE, the struct pages themselves may vanish, and get_ksm_page()'s accesses to them become a violation. So, give up on holding ksm_thread_mutex itself from MEM_GOING_OFFLINE to MEM_OFFLINE, and add a KSM_RUN_OFFLINE flag, and wait_while_offlining() checks, to achieve the same lockout without being caught by lockdep. This is less elegant for KSM, but it's more important to keep lockdep useful to other users - and I apologize for how long it took to fix. Signed-off-by: Hugh Dickins <hughd@google.com> Reported-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Tested-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Cc: Rik van Riel <riel@redhat.com> Cc: Petr Holasek <pholasek@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Izik Eidus <izik.eidus@ravellosystems.com> Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/ksm.c55
1 files changed, 41 insertions, 14 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index 91f07d0e9ce7..5ea0f64b8714 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -226,7 +226,9 @@ static unsigned int ksm_merge_across_nodes = 1;
226#define KSM_RUN_STOP 0 226#define KSM_RUN_STOP 0
227#define KSM_RUN_MERGE 1 227#define KSM_RUN_MERGE 1
228#define KSM_RUN_UNMERGE 2 228#define KSM_RUN_UNMERGE 2
229static unsigned int ksm_run = KSM_RUN_STOP; 229#define KSM_RUN_OFFLINE 4
230static unsigned long ksm_run = KSM_RUN_STOP;
231static void wait_while_offlining(void);
230 232
231static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait); 233static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait);
232static DEFINE_MUTEX(ksm_thread_mutex); 234static DEFINE_MUTEX(ksm_thread_mutex);
@@ -1700,6 +1702,7 @@ static int ksm_scan_thread(void *nothing)
1700 1702
1701 while (!kthread_should_stop()) { 1703 while (!kthread_should_stop()) {
1702 mutex_lock(&ksm_thread_mutex); 1704 mutex_lock(&ksm_thread_mutex);
1705 wait_while_offlining();
1703 if (ksmd_should_run()) 1706 if (ksmd_should_run())
1704 ksm_do_scan(ksm_thread_pages_to_scan); 1707 ksm_do_scan(ksm_thread_pages_to_scan);
1705 mutex_unlock(&ksm_thread_mutex); 1708 mutex_unlock(&ksm_thread_mutex);
@@ -2056,6 +2059,22 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage)
2056#endif /* CONFIG_MIGRATION */ 2059#endif /* CONFIG_MIGRATION */
2057 2060
2058#ifdef CONFIG_MEMORY_HOTREMOVE 2061#ifdef CONFIG_MEMORY_HOTREMOVE
2062static int just_wait(void *word)
2063{
2064 schedule();
2065 return 0;
2066}
2067
2068static void wait_while_offlining(void)
2069{
2070 while (ksm_run & KSM_RUN_OFFLINE) {
2071 mutex_unlock(&ksm_thread_mutex);
2072 wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE),
2073 just_wait, TASK_UNINTERRUPTIBLE);
2074 mutex_lock(&ksm_thread_mutex);
2075 }
2076}
2077
2059static void ksm_check_stable_tree(unsigned long start_pfn, 2078static void ksm_check_stable_tree(unsigned long start_pfn,
2060 unsigned long end_pfn) 2079 unsigned long end_pfn)
2061{ 2080{
@@ -2098,15 +2117,15 @@ static int ksm_memory_callback(struct notifier_block *self,
2098 switch (action) { 2117 switch (action) {
2099 case MEM_GOING_OFFLINE: 2118 case MEM_GOING_OFFLINE:
2100 /* 2119 /*
2101 * Keep it very simple for now: just lock out ksmd and 2120 * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items()
2102 * MADV_UNMERGEABLE while any memory is going offline. 2121 * and remove_all_stable_nodes() while memory is going offline:
2103 * mutex_lock_nested() is necessary because lockdep was alarmed 2122 * it is unsafe for them to touch the stable tree at this time.
2104 * that here we take ksm_thread_mutex inside notifier chain 2123 * But unmerge_ksm_pages(), rmap lookups and other entry points
2105 * mutex, and later take notifier chain mutex inside 2124 * which do not need the ksm_thread_mutex are all safe.
2106 * ksm_thread_mutex to unlock it. But that's safe because both
2107 * are inside mem_hotplug_mutex.
2108 */ 2125 */
2109 mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING); 2126 mutex_lock(&ksm_thread_mutex);
2127 ksm_run |= KSM_RUN_OFFLINE;
2128 mutex_unlock(&ksm_thread_mutex);
2110 break; 2129 break;
2111 2130
2112 case MEM_OFFLINE: 2131 case MEM_OFFLINE:
@@ -2122,11 +2141,20 @@ static int ksm_memory_callback(struct notifier_block *self,
2122 /* fallthrough */ 2141 /* fallthrough */
2123 2142
2124 case MEM_CANCEL_OFFLINE: 2143 case MEM_CANCEL_OFFLINE:
2144 mutex_lock(&ksm_thread_mutex);
2145 ksm_run &= ~KSM_RUN_OFFLINE;
2125 mutex_unlock(&ksm_thread_mutex); 2146 mutex_unlock(&ksm_thread_mutex);
2147
2148 smp_mb(); /* wake_up_bit advises this */
2149 wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE));
2126 break; 2150 break;
2127 } 2151 }
2128 return NOTIFY_OK; 2152 return NOTIFY_OK;
2129} 2153}
2154#else
2155static void wait_while_offlining(void)
2156{
2157}
2130#endif /* CONFIG_MEMORY_HOTREMOVE */ 2158#endif /* CONFIG_MEMORY_HOTREMOVE */
2131 2159
2132#ifdef CONFIG_SYSFS 2160#ifdef CONFIG_SYSFS
@@ -2189,7 +2217,7 @@ KSM_ATTR(pages_to_scan);
2189static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr, 2217static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr,
2190 char *buf) 2218 char *buf)
2191{ 2219{
2192 return sprintf(buf, "%u\n", ksm_run); 2220 return sprintf(buf, "%lu\n", ksm_run);
2193} 2221}
2194 2222
2195static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, 2223static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
@@ -2212,6 +2240,7 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
2212 */ 2240 */
2213 2241
2214 mutex_lock(&ksm_thread_mutex); 2242 mutex_lock(&ksm_thread_mutex);
2243 wait_while_offlining();
2215 if (ksm_run != flags) { 2244 if (ksm_run != flags) {
2216 ksm_run = flags; 2245 ksm_run = flags;
2217 if (flags & KSM_RUN_UNMERGE) { 2246 if (flags & KSM_RUN_UNMERGE) {
@@ -2254,6 +2283,7 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj,
2254 return -EINVAL; 2283 return -EINVAL;
2255 2284
2256 mutex_lock(&ksm_thread_mutex); 2285 mutex_lock(&ksm_thread_mutex);
2286 wait_while_offlining();
2257 if (ksm_merge_across_nodes != knob) { 2287 if (ksm_merge_across_nodes != knob) {
2258 if (ksm_pages_shared || remove_all_stable_nodes()) 2288 if (ksm_pages_shared || remove_all_stable_nodes())
2259 err = -EBUSY; 2289 err = -EBUSY;
@@ -2366,10 +2396,7 @@ static int __init ksm_init(void)
2366#endif /* CONFIG_SYSFS */ 2396#endif /* CONFIG_SYSFS */
2367 2397
2368#ifdef CONFIG_MEMORY_HOTREMOVE 2398#ifdef CONFIG_MEMORY_HOTREMOVE
2369 /* 2399 /* There is no significance to this priority 100 */
2370 * Choose a high priority since the callback takes ksm_thread_mutex:
2371 * later callbacks could only be taking locks which nest within that.
2372 */
2373 hotplug_memory_notifier(ksm_memory_callback, 100); 2400 hotplug_memory_notifier(ksm_memory_callback, 100);
2374#endif 2401#endif
2375 return 0; 2402 return 0;