diff options
author | Hugh Dickins <hughd@google.com> | 2013-02-22 19:35:16 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-23 20:50:20 -0500 |
commit | ef4d43a8075891137ebc064e473d2e904971e933 (patch) | |
tree | a32216419976dca72813a107bf2200d1683341c4 | |
parent | 9c620e2bc5aa4256c102ada34e6c76204ed5898b (diff) |
ksm: stop hotremove lockdep warning
Complaints are rare, but lockdep still does not understand the way
ksm_memory_callback(MEM_GOING_OFFLINE) takes ksm_thread_mutex, and holds
it until the ksm_memory_callback(MEM_OFFLINE): that appears to be a
problem because notifier callbacks are made under down_read of
blocking_notifier_head->rwsem (so first the mutex is taken while holding
the rwsem, then later the rwsem is taken while still holding the mutex);
but is not in fact a problem because mem_hotplug_mutex is held
throughout the dance.
There was an attempt to fix this with mutex_lock_nested(); but if that
happened to fool lockdep two years ago, apparently it does so no longer.
I had hoped to eradicate this issue in extending KSM page migration not
to need the ksm_thread_mutex. But then realized that although the page
migration itself is safe, we do still need to lock out ksmd and other
users of get_ksm_page() while offlining memory - at some point between
MEM_GOING_OFFLINE and MEM_OFFLINE, the struct pages themselves may
vanish, and get_ksm_page()'s accesses to them become a violation.
So, give up on holding ksm_thread_mutex itself from MEM_GOING_OFFLINE to
MEM_OFFLINE, and add a KSM_RUN_OFFLINE flag, and wait_while_offlining()
checks, to achieve the same lockout without being caught by lockdep.
This is less elegant for KSM, but it's more important to keep lockdep
useful to other users - and I apologize for how long it took to fix.
Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Tested-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Petr Holasek <pholasek@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Izik Eidus <izik.eidus@ravellosystems.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/ksm.c | 55 |
1 files changed, 41 insertions, 14 deletions
@@ -226,7 +226,9 @@ static unsigned int ksm_merge_across_nodes = 1; | |||
226 | #define KSM_RUN_STOP 0 | 226 | #define KSM_RUN_STOP 0 |
227 | #define KSM_RUN_MERGE 1 | 227 | #define KSM_RUN_MERGE 1 |
228 | #define KSM_RUN_UNMERGE 2 | 228 | #define KSM_RUN_UNMERGE 2 |
229 | static unsigned int ksm_run = KSM_RUN_STOP; | 229 | #define KSM_RUN_OFFLINE 4 |
230 | static unsigned long ksm_run = KSM_RUN_STOP; | ||
231 | static void wait_while_offlining(void); | ||
230 | 232 | ||
231 | static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait); | 233 | static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait); |
232 | static DEFINE_MUTEX(ksm_thread_mutex); | 234 | static DEFINE_MUTEX(ksm_thread_mutex); |
@@ -1700,6 +1702,7 @@ static int ksm_scan_thread(void *nothing) | |||
1700 | 1702 | ||
1701 | while (!kthread_should_stop()) { | 1703 | while (!kthread_should_stop()) { |
1702 | mutex_lock(&ksm_thread_mutex); | 1704 | mutex_lock(&ksm_thread_mutex); |
1705 | wait_while_offlining(); | ||
1703 | if (ksmd_should_run()) | 1706 | if (ksmd_should_run()) |
1704 | ksm_do_scan(ksm_thread_pages_to_scan); | 1707 | ksm_do_scan(ksm_thread_pages_to_scan); |
1705 | mutex_unlock(&ksm_thread_mutex); | 1708 | mutex_unlock(&ksm_thread_mutex); |
@@ -2056,6 +2059,22 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage) | |||
2056 | #endif /* CONFIG_MIGRATION */ | 2059 | #endif /* CONFIG_MIGRATION */ |
2057 | 2060 | ||
2058 | #ifdef CONFIG_MEMORY_HOTREMOVE | 2061 | #ifdef CONFIG_MEMORY_HOTREMOVE |
2062 | static int just_wait(void *word) | ||
2063 | { | ||
2064 | schedule(); | ||
2065 | return 0; | ||
2066 | } | ||
2067 | |||
2068 | static void wait_while_offlining(void) | ||
2069 | { | ||
2070 | while (ksm_run & KSM_RUN_OFFLINE) { | ||
2071 | mutex_unlock(&ksm_thread_mutex); | ||
2072 | wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE), | ||
2073 | just_wait, TASK_UNINTERRUPTIBLE); | ||
2074 | mutex_lock(&ksm_thread_mutex); | ||
2075 | } | ||
2076 | } | ||
2077 | |||
2059 | static void ksm_check_stable_tree(unsigned long start_pfn, | 2078 | static void ksm_check_stable_tree(unsigned long start_pfn, |
2060 | unsigned long end_pfn) | 2079 | unsigned long end_pfn) |
2061 | { | 2080 | { |
@@ -2098,15 +2117,15 @@ static int ksm_memory_callback(struct notifier_block *self, | |||
2098 | switch (action) { | 2117 | switch (action) { |
2099 | case MEM_GOING_OFFLINE: | 2118 | case MEM_GOING_OFFLINE: |
2100 | /* | 2119 | /* |
2101 | * Keep it very simple for now: just lock out ksmd and | 2120 | * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items() |
2102 | * MADV_UNMERGEABLE while any memory is going offline. | 2121 | * and remove_all_stable_nodes() while memory is going offline: |
2103 | * mutex_lock_nested() is necessary because lockdep was alarmed | 2122 | * it is unsafe for them to touch the stable tree at this time. |
2104 | * that here we take ksm_thread_mutex inside notifier chain | 2123 | * But unmerge_ksm_pages(), rmap lookups and other entry points |
2105 | * mutex, and later take notifier chain mutex inside | 2124 | * which do not need the ksm_thread_mutex are all safe. |
2106 | * ksm_thread_mutex to unlock it. But that's safe because both | ||
2107 | * are inside mem_hotplug_mutex. | ||
2108 | */ | 2125 | */ |
2109 | mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING); | 2126 | mutex_lock(&ksm_thread_mutex); |
2127 | ksm_run |= KSM_RUN_OFFLINE; | ||
2128 | mutex_unlock(&ksm_thread_mutex); | ||
2110 | break; | 2129 | break; |
2111 | 2130 | ||
2112 | case MEM_OFFLINE: | 2131 | case MEM_OFFLINE: |
@@ -2122,11 +2141,20 @@ static int ksm_memory_callback(struct notifier_block *self, | |||
2122 | /* fallthrough */ | 2141 | /* fallthrough */ |
2123 | 2142 | ||
2124 | case MEM_CANCEL_OFFLINE: | 2143 | case MEM_CANCEL_OFFLINE: |
2144 | mutex_lock(&ksm_thread_mutex); | ||
2145 | ksm_run &= ~KSM_RUN_OFFLINE; | ||
2125 | mutex_unlock(&ksm_thread_mutex); | 2146 | mutex_unlock(&ksm_thread_mutex); |
2147 | |||
2148 | smp_mb(); /* wake_up_bit advises this */ | ||
2149 | wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE)); | ||
2126 | break; | 2150 | break; |
2127 | } | 2151 | } |
2128 | return NOTIFY_OK; | 2152 | return NOTIFY_OK; |
2129 | } | 2153 | } |
2154 | #else | ||
2155 | static void wait_while_offlining(void) | ||
2156 | { | ||
2157 | } | ||
2130 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 2158 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
2131 | 2159 | ||
2132 | #ifdef CONFIG_SYSFS | 2160 | #ifdef CONFIG_SYSFS |
@@ -2189,7 +2217,7 @@ KSM_ATTR(pages_to_scan); | |||
2189 | static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr, | 2217 | static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr, |
2190 | char *buf) | 2218 | char *buf) |
2191 | { | 2219 | { |
2192 | return sprintf(buf, "%u\n", ksm_run); | 2220 | return sprintf(buf, "%lu\n", ksm_run); |
2193 | } | 2221 | } |
2194 | 2222 | ||
2195 | static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, | 2223 | static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, |
@@ -2212,6 +2240,7 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, | |||
2212 | */ | 2240 | */ |
2213 | 2241 | ||
2214 | mutex_lock(&ksm_thread_mutex); | 2242 | mutex_lock(&ksm_thread_mutex); |
2243 | wait_while_offlining(); | ||
2215 | if (ksm_run != flags) { | 2244 | if (ksm_run != flags) { |
2216 | ksm_run = flags; | 2245 | ksm_run = flags; |
2217 | if (flags & KSM_RUN_UNMERGE) { | 2246 | if (flags & KSM_RUN_UNMERGE) { |
@@ -2254,6 +2283,7 @@ static ssize_t merge_across_nodes_store(struct kobject *kobj, | |||
2254 | return -EINVAL; | 2283 | return -EINVAL; |
2255 | 2284 | ||
2256 | mutex_lock(&ksm_thread_mutex); | 2285 | mutex_lock(&ksm_thread_mutex); |
2286 | wait_while_offlining(); | ||
2257 | if (ksm_merge_across_nodes != knob) { | 2287 | if (ksm_merge_across_nodes != knob) { |
2258 | if (ksm_pages_shared || remove_all_stable_nodes()) | 2288 | if (ksm_pages_shared || remove_all_stable_nodes()) |
2259 | err = -EBUSY; | 2289 | err = -EBUSY; |
@@ -2366,10 +2396,7 @@ static int __init ksm_init(void) | |||
2366 | #endif /* CONFIG_SYSFS */ | 2396 | #endif /* CONFIG_SYSFS */ |
2367 | 2397 | ||
2368 | #ifdef CONFIG_MEMORY_HOTREMOVE | 2398 | #ifdef CONFIG_MEMORY_HOTREMOVE |
2369 | /* | 2399 | /* There is no significance to this priority 100 */ |
2370 | * Choose a high priority since the callback takes ksm_thread_mutex: | ||
2371 | * later callbacks could only be taking locks which nest within that. | ||
2372 | */ | ||
2373 | hotplug_memory_notifier(ksm_memory_callback, 100); | 2400 | hotplug_memory_notifier(ksm_memory_callback, 100); |
2374 | #endif | 2401 | #endif |
2375 | return 0; | 2402 | return 0; |